See lines near 125 and edit the text to point to a valid
previously compressed file.


#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <ctype.h>
#include <errno.h>
#include <unistd.h>

/////////////////////////////////////////////////////////////////////////////

#define mask(n)         ((1<<n)-1)
#define rdm(n)          ((random() >>  7) & mask(n))
#define rdm8()          ((random() >> 20) & 255)

/////////////////////////////////////////////////////////////////////////////

typedef      char         *stx;

typedef  unsigned char     cnu;
typedef    signed char     cns;

typedef  unsigned int      inu;
typedef    signed int      ins;

typedef    cns            *csx;
typedef    cnu            *cnx;
typedef    int            *inx;

/////////////////////////////////////////////////////////////////////////////

FILE *iFILE;

int nCHAR, iDSN;

char DSN[4][99];

int raw, newSR, oldSD, oldSRoldSD, newSRoldSD;

/////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////

int
rdRELATION()
{
 // this predicts whether the next byte
 // in a file containing previously
 // compressed data, meets:
 //
 //            r >= d
 //
 // where 'r' is the value raw, an 8-bit random
 // number, and 'd' is the next value in a vector
 // from a previously compressed data vector.
 //
 // This does something very simple, it predicts
 // the "r >= d" relationship with better than 75%
 // accuracy, having never seen the data value.
 //
 // Except that the 'd' is never read or examined.
 // Here, yes, but only in order to provide a basis
 // calibrating the 75% figure-of-merit.
 //
 // The data vector can be on the same computer,
 // or another computer.  This program predicts
 // byte relationships (which can be converted to 
 // specific values,) one unit ahead.  
 //
 // NOTE1:  Normally I XOR such data once with the
 // output of a PRNG as a first step, as is done
 // here.  Please do this.
 //
 // NOTE2:  I do not promise useful results with
 // non-random data, in fact this code will not work
 // with, say, document data unless it has been XOR'ed.
 //
 // NOTE3:  Given p = r >= d;  Do:
 //                                newd = p ? (r-d) : (d-r);
 //
 // Where 'newd' is a likely to be closer to a smaller value.
 // Remember, this program does this without ever needing to
 // read the value of 'd'.q
 //
 
  if ((raw <= 119)) return 0;
  if ((raw <= 128) && (newSR >= 106)) return 0;
  if ((raw <= 128) && (raw >= 124) && (oldSD >= -35))
      return 0;
  if ((raw <= 134) && (oldSRoldSD >= -29) && (newSRoldSD <= 100))
      return 0;
  if ((raw <= 143) && (newSRoldSD >= 108) && (newSR <= 88) &&
      (newSR >= 83) && (oldSRoldSD >= 8)) return 0;
  return 1;
}

/////////////////////////////////////////////////////////////////////////////

long
sizeofFILE( FILE *f)
{
  long  current, filesize;
  current = ftell(f);
  fseek(f, 0L, SEEK_END);
  filesize = ftell(f);
  fseek(f, current, SEEK_SET);
  rewind(f);
  return filesize;
}

/////////////////////////////////////////////////////////////////////////////

// This shows a single-pass process, without
// forward look-ahead, also without file
// characterization of any kind, partially
// predicting the next byte.

// This code is written to show verification
// of the process, but it's easy to convert
// this program to characterize multiple
// pass data to produce correct values.

void
testPartiallyPredictBytes()
{
 int i, count=0;
 
 int sr = 0;
 int sd = 0;

YOU HAVE TO EDIT THIS LINE TO POINT
THE PROGRAM TO SOME COMPRESSED FILE.
 
  // just examples of how to open a file in C.
  iFILE = fopen("in", "rb"); 
  // iFILE = fopen("thunderbird-31.3.0.source.tar.bz2", "rb");
  nCHAR = sizeofFILE(iFILE);
  
  for(i=0;i<nCHAR;i++)
       {
         cnu jaz = rdm8();
         cnu dat = jaz ^ fgetc(iFILE);
         
         int actFLAG, prdFLAG;
         
          raw = rdm8();

          actFLAG = raw >= dat;

          oldSD = sd;                  oldSRoldSD = sr - sd;
          sr += raw;                   newSRoldSD = sr - sd;
          newSR = sr;
          
          prdFLAG = rdRELATION();
          
          // printf("%d%d\n",actFLAG,prdFLAG);
          if (actFLAG == prdFLAG) ++count;
          
          sd += rdm8();
          sr /= -2;
          sd /= -2;   
       }
       
  fclose(iFILE);

  // show single-pass merit, typically about 75%. 
  printf("%6.3f%%  %d\n" , (100. * count) / nCHAR , nCHAR);
  // show single-pass merit, typically about 75%. 
}

/////////////////////////////////////////////////////////////////////////////

void main() { /*for(;;)*/ testPartiallyPredictBytes(); } 
