/*

  t-rand.c

  Author: Mika Kojo <mkojo@ssh.fi>

  Copyright (c) 1999 SSH Communications Security, Finland
  All rights reserved.

  Created Wed Jul 19 19:32:22 2000.

  */

/* This program analyzes the SSH's random number generator using
   several simple tests.
   */

#include <math.h>

#include "sshincludes.h"
#include "sshcrypt.h"
#include "sshtimemeasure.h"
#include "sshrand.h"
#include "sshmp.h"
#include "sshmp-kernel.h"

/* FFT is included just to get one FFT implementation into SSH's
   source tree. */
#include "fft.h"

#define MAX_ALLOC_SIZE ((size_t)1024*1024)

/* Acceptance probability for the distribution. This gives the probability
   by which we will accept good instance of the distribution. */
#define ACCEPT_HI_PROB    0.999
#define ACCEPT_LO_PROB    0.00001

/* The block sizes for block test. */
#define MIN_BLOCK_SIZE 64
#define MAX_BLOCK_SIZE 65536

/*** Generic routines for analyzing distributions. */

/* Routines for handling some distributions. */

static double chi_square_sum(double x, unsigned int n)
{
  double z, lambda, l;
  int i;

  /* If n is odd, then we cannot use the integral formula (although
     we will use it anyway!). */
  if (n & 1)
    n++;
  
  lambda = x/2;
  n = n/2;
  
  l = exp(-lambda);
  z = 1.0;
  
  for (i = 0; i < n; i++)
    {
      z -= l;
      l *= lambda;
      l /= (i+1);
    }

  return z;
}

#if 0
/* Compute Chi-square table for testing. */
static void chi_square_init(void)
{
  unsigned int df;
  double x[10] =
  { 6.63, 9.21, 11.3, 13.3, 15.1, 16.8, 18.5, 20.1, 21.7, 23.2 };

  for (df = 1; df < 11; df++)
    {
      printf(" %2u, %g : %g\n",
             df, x[df-1], chi_square_sum(x[df-1], df));
      printf(" %2u, %g : %g\n",
             df, x[df-1]/2.0, chi_square_sum(x[df-1]/2.0, df));
      printf(" %2u, %g : %g\n",
             df, x[df-1]*2.0, chi_square_sum(x[df-1]*2.0, df));
    }
}
#endif


/* Using Chi-square test study the input distribution against
   uniform distribution. */
static double chi_square_uniform(unsigned int *c, size_t c_len,
                                 unsigned int  number_of_samples)
{
  size_t i;
  double v, delta, d;

  delta = ((double)number_of_samples / (double)c_len);
  
  /* Compute the squared sum. */
  for (v = 0.0, i = 0; i < c_len; i++)
    {
      double t;
      t = c[i] - delta;
      v += t*t;
    }

  /* Finish the chi-square computation. */
  v = ((double)c_len/(double)number_of_samples)*v;

  /* Compute the probability. */
  d = chi_square_sum(v, c_len-1);

#if 0
  printf("Value c = [%u, %u]; n = %u; v = %g with df = %u with p = %g\n",
         c[0], c[1], number_of_samples,
         v, c_len-1, d);
#endif
  
  return d;
}


/* Chi-square (non-uniform). */
static double chi_square_statistic(unsigned int *c, 
                                   double       *p,
                                   size_t c_len,
                                   unsigned int number_of_samples)
{
  size_t i;
  double v, d;

  /* Compute the squared sum. */
  for (v = 0.0, i = 0; i < c_len; i++)
    {
      double t, k;
      k = p[i]*number_of_samples;
      t = c[i] - k;
      v += t*t/k;
    }

  /* Compute the probability. */
  d = chi_square_sum(v, c_len-1);
  
  return d;
}

/*** Linearity tests. */

static int parity(unsigned int x)
{
  x ^= (x >> 16);
  x ^= (x >> 8);
  x ^= (x >> 4);
  x ^= (x >> 2);
  x ^= (x >> 1);
  return x & 1;
}

static int test_linearity_mask(unsigned char *s, size_t s_len,
                               unsigned char *buf, size_t b_len)
{
  size_t i, j;
  double v;
  unsigned int c[2], ns;

  /* Clear counters. */
  c[0] = c[1] = 0;
  ns = 0;
  /* Start checking. */
  for (i = 0; i + b_len < s_len; i += b_len)
    {
      int p = 0;
      for (j = 0; j < b_len; j++)
        p ^= parity(buf[j] & s[i+j]);
      c[p]++;
      ns++;
    }

  /* Now determine whether the distribution between 0 and 1 is uniform. */
  v = chi_square_uniform(c, 2, ns);
  if (v > ACCEPT_HI_PROB)
    {
      printf("Warning: unacceptable distribution (p = %g).\n", 1.0-v);
      printf("         c = [%u, %u], n = %u.\n",
             c[0], c[1], ns);
      
      return 1;
    }
  return 0;
}

static int test_linearity(unsigned char *s, size_t s_len)
{
#define MAX_MASK_SIZE 5
  unsigned char buf[MAX_MASK_SIZE];
  size_t i, l, m;
  int rv;

  for (l = 1; l < MAX_MASK_SIZE; l++)
    {
      if (l == 1)
        {
          /* One byte masks are handled exhaustively. */
          for (m = 1; m < 256; m++)
            {
              for (i = 0; i < l; i++)
                buf[i] = m;

              rv = test_linearity_mask(s, s_len, buf, l);
              if (rv != 0)
                {
                  printf("Warning: Mask 0x%02x detected linearity.\n", m);
                  return rv;
                }
            }
        }
      else
        {
          /* We work with 50 instances of each > 1 byte mask, using
             simple affine thing to generate the masks. */
          for (m = 0; m < 50; m++)
            {
              int k;
              for (i = 0, k = (m+1); i < l; i++)
                {
                  while(1)
                    {
                      buf[i] = (89*k + m + 1) & 0xff;
                      if (buf[i] != 0)
                        break;
                      k++;
                    }
                }

              rv = test_linearity_mask(s, s_len, buf, l);
              if (rv != 0)
                {
                  printf("Warning: Long mask (l = %u) detected linearity.\n", l);
                  return rv;
                }
            }
        }
    }
  return 0;
}


/*** Hash test. */

/* Remark. This hash function is pretty linear, and thus not good. We
   should try other hash functions too. Observe that if the hash function
   is poor, and loses information (significantly) then the distribution
   will be biased even if the input stream is random.

   So we should make sure that the hash function is foremost surjective,
   and for given input has almost even distribution. Of course, this
   is not easy to prove (and this given hash function certainly does
   not satisfy it), but as we allow quite a lot of variance very rough
   estimates are ok. 
   */


#define NUM_OF_HASHES 3
static unsigned int test_hash_adhoc(unsigned char *s, size_t bytes,
                                    unsigned int out_size)
{
  unsigned int hash = 0xabcd0123;
  unsigned int i;

  for (i = 0; i < bytes; i++)
    {
      hash ^= (((hash << 11) ^ (hash >> 23)) ^ (hash << 1)) ^ (hash << 5);
      hash ^= ((unsigned int)s[i] ^ i);
    }

  return hash % out_size;
}

#if 0
/* Remark. This hash function is not very good for our tests, as it
   works less than optimally with small "out_size". */
static unsigned long test_hash_huima(unsigned char *s, size_t bytes,
                                     unsigned int out_size)
{
  int i;
  int size = bytes;
  SshUInt32 h = 0;
  for (i = 0; i < size; i++)
    {
      h = ((h << 19) ^ (h >> 13)) + ((unsigned char *)s)[i];
    }
  return h % out_size;
}
#endif

static unsigned int test_hash_copy(unsigned char *s, size_t bytes,
                                   unsigned int out_size)
{
  unsigned int hash = 0;
  unsigned int i;

  for (i = 0; i < bytes; i++)
    hash = (hash << 8) | ((unsigned int)s[i]);

  return hash % out_size;
}

static unsigned int test_hash_copy_rot7(unsigned char *s, size_t bytes,
                                         unsigned int out_size)
{
  unsigned int hash = 0;
  unsigned int i;

  for (i = 0; i < bytes; i++)
    hash = ((hash << 8) | ((unsigned int)s[i])) ^ (hash >> 7);
  
  return hash % out_size;
}


static unsigned int test_block_log2(unsigned int x)
{
  unsigned int t, k;

  for (t = 1, k = 0; t < x; t <<= 1, k++)
    ;
  return k;
}

static int test_block(unsigned char *s, size_t s_len)
{
  unsigned char *block;
  size_t block_size;
  int i, hf;
  unsigned int c[2];
  double       p[2], v;

  for (hf = 0; hf < NUM_OF_HASHES; hf++)
    for (block_size = MIN_BLOCK_SIZE;  block_size < MAX_BLOCK_SIZE;
         block_size *= 2)
      {
        unsigned int bytes, nv;
        block = ssh_xcalloc(1, (block_size + 7)/8);

        for (bytes = (test_block_log2(block_size) + 7)/8;
             bytes < 10; bytes++)
          {
            memset(block, 0, (block_size + 7)/8);
            
            for (i = 0, nv = 0; i + bytes < s_len; i++)
              {
                unsigned int hash;
                switch (hf)
                  {
                  case 0:
                    hash = test_hash_copy(s + i, bytes, block_size);
                    break;
                  case 1:
                    hash = test_hash_copy_rot7(s + i, bytes, block_size);
                    break;
                  case 2:
                    hash = test_hash_adhoc(s + i, bytes, block_size);
                    break;
                  default:
                    printf("ERROR: hash function not available (%u).\n", hf);
                    return 2;
                  }
                block[hash/8] |= (1 << (hash % 8));
                nv++;
              }

            /* Compute the hypothetical probabilities.

               This is based on a function

               f_n(m) = m + (n - m)/n,

               which gives the expected number of ones in the table (by
               recursive evaluation).

               We write

               phi(m) = f_n(m) / n = (m + (n-m)/n)/n = m/n + (n - m)/(n^2),

               and compute phi^n(0).

               */
            p[1] = 0;
            for (i = 0; i < nv; i++)
              p[1] = p[1] + (block_size - p[1])/block_size;
            p[1] *= ((double)1.0/block_size);
            p[0] = 1.0 - p[1];
      
            /* Do the counting. */
            c[0] = 0;
            c[1] = 0;
            for (i = 0; i < block_size; i++)
              {
                if (block[i/8] & (1 << (i%8)))
                  c[1]++;
                else
                  c[0]++;
              }

            /* Run the Chi-square. */
            v = chi_square_statistic(c, p, 2, block_size);
            if (v > ACCEPT_HI_PROB)
              {
                printf("Warning: unacceptable distribution (p = %g).\n", 1.0-v);
                printf("         c = [%u, %u], p = [%g, %g], n = %u, s = %u.\n",
                       c[0], c[1], p[0], p[1], block_size, s_len);
                printf("         hash function = %s.\n",
                       hf == 0 ? "copying hash" :
                       hf == 1 ? "copying hash with rotation" :
                       hf == 2 ? "ad hoc hash" :
                       "unnamed hash");

          
                return 1;
              }
          }
        
        /* Free the block after use. */
        ssh_xfree(block);
      }
  /* Ok. */
  return 0;
}

/*** A test utilizing FFT as a preprocessor! */


/* This test is rather ad hoc, and I must confess that I have not
   proven that the code actually works (in the sense that it would
   detect "bad" octet sequences).

   However, here is how it apparently works:

   Use FFT to transform the input sequence into complex. Map the
   things back to integers (by computing the moduli).

   The trick is to multiply by suitable constant (256.0) such that
   the result mapped to octets is uniformly distributed iff the
   modulies have uniformly distributed decimal digits. Now, it is
   true that this should happen on good generators, the strange thing
   is that e.g. ANSI C's rand doesn't pass this test.
   
*/


static int test_fft(unsigned char *s, size_t s_len)
{
  double *re, *im;
  unsigned char *data;
  unsigned int i, t, l, len;
  int rv = 0, pos;
  unsigned int c[16];
  double p[16], v;

  /* Use only 2^l of the input stream. */
  for (l = 1, t = 1; l < 16 && t <= s_len; t <<= 1, l++)
    ;
  l--;
  len = ((unsigned int)1 << l);
  
  /* We do a very simple transform octet at a time, more choices
     could be implemented. */
  re = ssh_xmalloc(sizeof(double)*len);
  im = ssh_xmalloc(sizeof(double)*len);
  data = ssh_xmalloc(sizeof(unsigned char)*len/2);

  for (pos = 0; (pos + len) <= s_len; pos += len)
    {
      for (i = 0; i < len; i++)
        {
          /* Just throw the input stream in. */
          re[i] = s[i];
          im[i] = 0.0;
        }

      /* Do the FFT. */
      fft(re, im, l, 1);

      /* Do a trick here. */
      for (i = 0; i < len/2; i++)
        {
          data[i] = (unsigned char)(256.0*sqrt(re[i]*re[i] + im[i]*im[i]));
        }

      /* Now let us compute a distribution of this. */
      for (i = 0; i < 16; i++)
        {
          c[i] = 0;
          p[i] = 1.0 - fabs((double)i/15 - 1/2);
        }
      for (i = 0; i < len/2; i++)
        c[data[i] & 15]++;

      v = chi_square_uniform(c, 16, len/2);
      if (v > ACCEPT_HI_PROB)
        {
          printf("Warning: unacceptable distribution (p = %g).\n", 1.0-v);
          rv = 1;
          break;
        }
      break;
    }
  ssh_xfree(re);
  ssh_xfree(im);
  ssh_xfree(data);

  return rv;
}


/*** Some random number generators. */

/* ANSI rand.
 */

unsigned char ansi_rand(void)
{
  static unsigned long y0 = 12345;
  y0 = (y0 * (unsigned long)1103515245 + 12345) &
    ((unsigned long)(1 << 31) - 1);
  return (unsigned char)(y0 & 0xff);
}

/* Inversive generator. This is way too slow for practical use, but
   probably can be speeded up with some tricks? */

unsigned char icg_rand(void)
{
  static unsigned long y0 = 1;
  SshWord u0, u1, v0, v1, x, d0, d1;

  /* Compute with extended gcd. */
  u0 = 1; u1 = y0; v0 = 0; v1 = (unsigned long)(1 << 31) - 1;
  while (v1 != 0)
    {
      unsigned long t1, t2, t3;
      t2 = u1 / v1;
      t3 = u1 % v1;
      t1 = t2 * v0;
      t1 = u0 - t1;
      u0 = v0;
      u1 = v1;
      v0 = t1;
      v1 = t3;
    }
  u1 = 1288490188;
  SSH_MPK_LONG_MUL(d0, d1, u0, u1);
  /* Divide by normalized. */
  x = ((unsigned long)(1 << 31) - 1) << 1;
  SSH_MPK_LONG_DIV(v0, v1, d0, d1, x);
  v1 >>= 1;
  /* Now we know the remainder. */
  y0 = (v1 + 121) % x;
  return (unsigned char)(y0 & 0xff);
}

/*** Test engine. */

/* The test engine uses first the default sample sizes, and then
   raises the sample size by multiple of 2 if 1 is returned by the
   test method.

   This approach gives means for the test method to see whether the
   bad distribution holds as sample size grows. This should remove
   most false alarms. 
   */

typedef struct
{
  char *name;
  size_t sample_size;
  int (*method)(unsigned char *s, size_t s_len);
} TestMethod;


static TestMethod test_method[] =
{
  { "linearityTest",
    1024*64,
    test_linearity },

  { "blockTest",
    1024*64,
    test_block },

  { "fftLinearityTest",
    65536,
    test_fft },

  /* Terminating method. */
  { NULL, 0, NULL }
};

#define LINEARIZE(gen, v, octets) \
do { \
  SshUInt32 i, j; \
  for (i = 0; i < s_len; i += octets) \
    { \
      v = gen; \
      for (j = 0; j < octets && (i+j) < s_len; j++) \
        s[i+j] = (v >> (j*8)) & 0xff; \
    } \
} while(0)

static int tester(char *method, char *generator)
{
  unsigned char *s;
  size_t s_len;
  unsigned int failed = 0, success = 0;
  size_t i;
  int rv;

  for (i = 0; test_method[i].name; i++)
    {
      if (method)
        {
          if (strcmp(method, test_method[i].name) != 0)
            continue;
        }
      
      printf("RUNNING: method=%s\n",
             test_method[i].name);

      s_len = test_method[i].sample_size;

      while (1)
        {
          if (s_len > MAX_ALLOC_SIZE)
            {
              printf(" => FAILED: Test requested too big sample.\n");
              failed++;
              break;
            }
          
          /* Creating the sample. */
          s = ssh_xmalloc(s_len);
          if (generator)
            {
              if (strcmp(generator, "rand") == 0)
                {
                  SshUInt32 t;
                  LINEARIZE(rand(), t, 4);
                }
              else if (strcmp(generator, "ansi") == 0)
                {
                  SshUInt32 t;
                  LINEARIZE(ansi_rand(), t, 4);
                }
              else if (strcmp(generator, "sshrand") == 0)
                {
                  SshUInt32 t;
                  ssh_rand_seed(1111);
                  LINEARIZE(ssh_rand(), t, 4);
                }
              else if (strcmp(generator, "icg") == 0)
                {
                  SshUInt32 t;
                  LINEARIZE(icg_rand(), t, 1);
                }
              else if (strcmp(generator, "random") == 0)
                {
                  SshUInt32 t;
                  srandom(1111);
                  LINEARIZE(random(), t, 3);
                }
              else
                {
                  printf("error: random number generator '%s' not implemented.\n", generator);
                }
              
            }
          else
            {
              SshUInt32 t;
              /* SSH's random number generator. */
              LINEARIZE(ssh_random_get_byte(), t, 1);
            }

          /* Try the test. */
          rv = (*test_method[i].method)(s, s_len);
          switch (rv)
            {
            case 0:
              printf(" => SUCCESS.\n");
              success++;
              break;
            case 1:
              printf(" => Trying again with larger sample (to see if observed problem goes away).\n");
              s_len *= 2;
              break;
            default:
              printf(" => FAILED.\n");
              failed++;
              rv = 0;
              break;
            }

          ssh_xfree(s);
          if (rv == 0)
            break;
        }
    }
  printf("RESULT: %u successful tests, %u failed tests.\n", success, failed);
  if (failed)
    return 1;
  return 0;
}

int main(int ac, char *av[])
{
  char *method, *generator;
  int pos;

  method = generator = NULL;
  
  for (pos = 1; pos < ac; pos++)
    {
      if (strcmp(av[pos], "-g") == 0)
        {
          if (pos + 1 >= ac)
            {
              printf("error: you did not specify the generator.\n");
              exit(1);
            }
          pos++;
          generator = av[pos];
          continue;
        }
      if (strcmp(av[pos], "-m") == 0)
        {
          if (pos + 1 >= ac)
            {
              printf("error: you did not specify the method.\n");
              exit(1);
            }
          pos++;
          method = av[pos];
          continue;
        }
      printf("error: unknown option '%s'.\n", av[pos]);
      exit(1);
    }
  
  /* No arguments at the moment. */
  return tester(method, generator);
}
