#include <stdio.h>
#include <stdlib.h>
#include <limits.h>
#include <math.h>
#include <assert.h>
#include <stdint.h>

#include "../../src/util/memory.h"
#include "../../src/util/prng.h"
#include "statistics.h"


static int err;

/*
 * How much statistical figures may deviate from the expected theorical values.
 * Much depends on the specific PRNG algorithm chosen in the prng module
 * and on the number of samples submitted for analysis.
 * 3% results to be the minimum value using either the Windows 7 or the built-in
 * generator in the prng module and 10000 samples for all the cases.
 */
#define FAIL_THRESHOLD 0.03

/*
 * Turns on detailed statistics reporting and histograms.
 */
static int display_histograms = 0;


/**
 * Test int PRNG uniform distribution.
 * @param line
 * @param min
 * @param max
 * @param number_of_samples
 */
static void int_uniform_test(int line, int min, int max, int number_of_samples)
{
	// The generated random numbers should have a uniform distribution.
	statistics_Type *uniform = statistics_new();
	if( display_histograms )
		statistics_histogramEnable(uniform, min - 0.5, max + 0.5, 15);
	
	// The difference between two successive generated random numbers should
	// have the same distribution of the difference between two arbitrary
	// numbers in the range.
	statistics_Type *difference = statistics_new();
	if( display_histograms )
		statistics_histogramEnable(difference, (double) min - max - 0.5, (double) max - min + 0.5, 15);
	
	// Collect data from our PRNG.
	int i, r_prev = 0;
	for(i = 0; i < number_of_samples; i++){
		int r = prng_getIntInRange(min, max);
		assert(min <= r && r <= max);
		statistics_put(uniform, r);
		if( i > 0 )
			statistics_put(difference, (double) r - r_prev);
		r_prev = r;
	}
	
	// Retrieve statistics from the statistics module:
	double got_average = statistics_mean(uniform);
	double got_variance = statistics_variance(uniform);
	double got_difference_average = statistics_mean(difference);
	double got_difference_variance = statistics_variance(difference);
	if( display_histograms ){
		printf("\n\n%d int random numbers in the range [%d,%d]:\n",
			number_of_samples, min, max);
		printf("Samples distribution: %s\n", statistics_toString(uniform));
		statistics_histogramPrint(uniform);
		printf("Difference distribution: %s\n", statistics_toString(difference));
		statistics_histogramPrint(difference);
	}
	memory_dispose(uniform);
	memory_dispose(difference);
	
	/*
	 * Expected statistics figures (using Mathematica or Mathics notation):
	 * n = max - min + 1
	 * exp_average = min + Sum[i, {i, 0, n-1}] / n = min + (n-1)/2
	 * exp_variance = Sum[(i - exp_average)^2, {i, 0, n-1}] / n = (n^2 - 1)/12
	 * exp_difference_average = Sum[ Sum[i-j, {j, 0, n-1}], {i, 0, n-1}] / n^2 = 0
	 * exp_difference_variance = Sum[ Sum[(i-j)^2, {j, 0, n-1}], {i, 0, n-1}] / n^2
	 *                         = (n^2 - 1) / 6
	 */
	double n = (double) max - min + 1;
	double exp_average = min + (n-1) / 2;
	double exp_variance = (n*n - 1) / 12;
	double exp_difference_average = 0;
	double exp_difference_variance = (n * n - 1) / 6;
	
	// Compare expected and got values:
	
	if( fabs(got_average - exp_average) / exp_average > FAIL_THRESHOLD ){
		err++;
		printf("in line %d got average %g, exp average %g\n",
			line, got_average, exp_average);
	}
	
	if( fabs(got_variance - exp_variance) / exp_variance > FAIL_THRESHOLD ){
		err++;
		printf("in line %d got variance %g, exp variance %g\n",
			line, got_variance, exp_variance);
	}
	
	if( fabs(got_difference_average) / n > FAIL_THRESHOLD ){
		err++;
		printf("in line %d got average difference %g, exp average difference %g\n",
			line, got_difference_average, exp_difference_average);
	}
	
	if( fabs(got_difference_variance - exp_difference_variance) / exp_difference_variance > FAIL_THRESHOLD ){
		err++;
		printf("in line %d got difference variance %g, exp difference variance %g\n",
			line, got_difference_variance, exp_difference_variance);
	}
}

/**
 * Test PRNG double precision floating point numbers uniform distribution.
 * @param line
 * @param min
 * @param max
 * @param number_of_samples
 */
static void double_uniform_test(int line, int number_of_samples)
{
	// The generated random numbers should have a uniform distribution.
	statistics_Type *uniform = statistics_new();
	if( display_histograms )
		statistics_histogramEnable(uniform, 0, 1, 15);
	
	// The difference between two successive generated random numbers should
	// have the same distribution of the difference between two arbitrary
	// numbers in the range.
	statistics_Type *difference = statistics_new();
	if( display_histograms )
		statistics_histogramEnable(difference, -1, 1, 15);
	
	// Collect data from our PRNG.
	int i;
	double r_prev = 0;
	for(i = 0; i < number_of_samples; i++){
		double r = prng_getDouble();
		assert(0.0 <= r && r < 1.0);
		statistics_put(uniform, r);
		if( i > 0 )
			statistics_put(difference, r - r_prev);
		r_prev = r;
	}
	
	// Retrieve statistics from the statistics module:
	double got_average = statistics_mean(uniform);
	double got_variance = statistics_variance(uniform);
	double got_difference_average = statistics_mean(difference);
	double got_difference_variance = statistics_variance(difference);
	if( display_histograms ){
		printf("\n\n%d double random numbers in the range [0,1[:\n", number_of_samples);
		printf("Samples distribution: %s\n", statistics_toString(uniform));
		statistics_histogramPrint(uniform);
		printf("Difference distribution: %s\n", statistics_toString(difference));
		statistics_histogramPrint(difference);
	}
	memory_dispose(uniform);
	memory_dispose(difference);
	
	// Expected statistics figure:
	double exp_average = 0.5;
	double exp_variance = 1.0 / 12;
	double exp_difference_average = 0.0;
	double exp_difference_variance = 1.0 / 6;
	
	if( fabs(got_average - exp_average) / exp_average > FAIL_THRESHOLD ){
		err++;
		printf("in line %d got average %g, exp average %g\n",
			line, got_average, exp_average);
	}
	
	if( fabs(got_variance - exp_variance) / exp_variance > FAIL_THRESHOLD ){
		err++;
		printf("in line %d got variance %g, exp variance %g\n",
			line, got_variance, exp_variance);
	}
	
	if( fabs(got_difference_average) > FAIL_THRESHOLD ){
		err++;
		printf("in line %d got average difference %g, exp average difference %g\n",
			line, got_difference_average, exp_difference_average);
	}
	
	if( fabs(got_difference_variance - exp_difference_variance) / exp_difference_variance > FAIL_THRESHOLD ){
		err++;
		printf("in line %d got difference variance %g, exp difference variance %g\n",
			line, got_difference_variance, exp_difference_variance);
	}
}


/**
 * Simple but very effective test proposed by Joshua Bloch, "Effective Java",
 * second edition, p. 215. By generating 1 million of random numbers on a very
 * wide range of the int spectrum, about half of them should fall before the
 * middle. The typical naive implementation rand() % n to get a value in the
 * range [0,n-1] does not work and retrieves something like 666666 rather than
 * the expected 500000.
 */
static void modulo_bias_test(int line)
{
	int n = 2 * (INT_MAX / 3);
	int low = 0;
	int i;
	for(i = 0; i < 1000000; i++)
		if( prng_getIntInRange(0, n) < n/2 )
			low++;
	if( fabs(low - 500000) > 10000 ){
		err++;
		printf("in line %d got low %d exp 500000\n", line, low);
	}
}


/**
 * Test prng_fill() by filling-in an array of n int numbers then do statistics.
 * @param line
 * @param n
 */
static void fill_test(int line, int number_of_samples)
{
	int buffer[number_of_samples];
	
	/* Generate array of n random int: */
	prng_fill(buffer, number_of_samples*sizeof(int));
	
	/* Basic statistics on this array:  */
	int min = INT_MIN;
	int max = INT_MAX;
	int i;
	statistics_Type *uniform = statistics_new();
	for(i = number_of_samples; i > 0; i--){
		int r = buffer[i];
		assert(min <= r && r <= max);
		statistics_put(uniform, r);
	}
	double got_average = statistics_mean(uniform);
	double got_variance = statistics_variance(uniform);
	memory_dispose(uniform);
	
	/*
	 * See discussion above about how the expected values are defined.
	 */
	double n = (double) max - min + 1;
	double exp_average = min + (n-1) / 2;
	double exp_variance = (n*n - 1) / 12;
	
	if( fabs(got_average - exp_average) / exp_average > FAIL_THRESHOLD ){
		err++;
		printf("in line %d got average %g, exp average %g\n",
			line, got_average, exp_average);
	}
	
	if( fabs(got_variance - exp_variance) / exp_variance > FAIL_THRESHOLD ){
		err++;
		printf("in line %d got variance %g, exp variance %g\n",
			line, got_variance, exp_variance);
	}
}


/*
 * From here on are the U.S. FIPS 140-1 statistical tests as explained in
 * Handbook of Applied Cryptography, note 5.32.
 * FIXME: update to FIPS 140-2.
 */


/**
 * Returns the total number of bits set.
 * @param sample Sample of random bits.
 * @param sample_len Number of 32-bits words in the sample, for a total of
 * 32*sample_len bits.
 * @return Total number of bits set.
 */
static int monobit_test(uint32_t sample[], int sample_len)
{
	static int ones_per_nibble[] = {0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4};
	int ones = 0;
	int i;
	for(i = 0; i < sample_len; i++){
		uint32_t w = sample[i];
		int j;
		for(j = 0; j < 8; j++){
			ones += ones_per_nibble[ w & 15 ];
			w = w >> 4;
		}
	}
	return ones;
}


/**
 * Performs the poker test.
 * @param sample Sample of random bits.
 * @param sample_len Number of 32-bits words in the sample, for a total of
 * 32*sample_len bits.
 * @param m Length in bits of each subsample.
 * @return Poker test statistic figure.
 */
static double poker_test(uint32_t sample[], int sample_len, int m)
{
	// There are 2^m = 1<<m possible m-bits long subsamples:
	int number_of_subsamples = 1 << m;
	
	// Reset the table of occurrences of each possible m-bits subsample.
	int n[number_of_subsamples];
	int i;
	for(i = 0; i < number_of_subsamples; i++)
		n[i] = 0;
	
	// Build table n[] of occurrences of each subsample. Scans the words of the
	// sample and the bits of each word one by one while building each subsample;
	// when a subsample of m bits is complete, increment its entry n[subsample].
	int subsample = 0; // here build m-bits subsample
	int subsample_len = 0; // current no. of bits in the subsample
	for(i = 0; i < sample_len; i++){
		uint32_t word = sample[i];
		int j;
		for(j = 0; j < 32; j++){
			int bit = word & 1;
			word = word >> 1;
			subsample = (subsample << 1) | bit;
			subsample_len++;
			if( subsample_len == m ){
				n[subsample]++;
				subsample = 0;
				subsample_len = 0;
			}
		}
	}
	
	double sum = 0.0;
	for(i = 0; i < number_of_subsamples; i++){
		sum += n[i] * n[i];
	}
	int k = 32 * sample_len / m;
	return sum * number_of_subsamples / k - k;
}


/**
 * Performs the long run test.
 * @param sample Sample of random bits. Bits are scanned starting from the least
 * significant bit of each word.
 * @param sample_len Number of 32-bits words in the sample, for a total of
 * 32*sample_len bits.
 * @param runs Here save the results of the runs lengths counts found in the sample.
 * Each entry runs[i] contains the number of gaps and blocks:
 * runs[i][0] is the number of consecutive zeros (gaps) of length "i";
 * runs[i][1] is the number of consecutive ones (blocks) of length "i".
 * runs[0][*] always returns zero.
 * The last entry runs[runs_len-1][*] accounts for runs gaps and blocks that are
 * (runs_len-1) or more bits long.
 * @param runs_len Number of entries in the runs array.
 */
static void runs_test(uint32_t sample[], int sample_len, int runs[][2], int runs_len)
{
	int bit, i;
	for(bit = 0; bit <= 1; bit++)
		for(i = 0; i < runs_len; i++)
			runs[i][bit] = 0;
	
	int curr_run_bit = 0;
	int curr_run_len = 0;
	for(i = 0; i < sample_len; i++){
		uint32_t word = sample[i];
		int j;
		for(j = 0; j < 32; j++){
			bit = word & 1;
			word = word >> 1;
			if( curr_run_len == 0 ){
				curr_run_bit = bit;
				curr_run_len = 1;
			} else {
				if( bit == curr_run_bit ){
					curr_run_len++;
				} else {
					if( curr_run_len < runs_len )
						runs[curr_run_len][curr_run_bit]++;
					else
						runs[runs_len-1][curr_run_bit]++;
					curr_run_bit = bit;
					curr_run_len = 1;
				}
			}
		}
	}
	if( curr_run_len > 0 ){
		if( curr_run_len < runs_len )
			runs[curr_run_len][curr_run_bit]++;
		else
			runs[runs_len-1][curr_run_bit]++;
	}
}


/**
 * Performs the FIPS 140-1 statistical tests as explained in Handbook of Applied
 * Cryptography, par. 5.32.
 */
static void FIPS_140_1_tests()
{
	// First check if our routines do really work comparing with example 5.31.
	// -----------------------------------------------------------------------
	
	// The book uses this sample that must be repeated four times. Note that the
	// first char of the string here becomes the bit zero of the first word;
	// the other routines here assume bits are ordered in this same way.
	char *bits = "11100 01100 01000 10100 11101 11100 10010 01001";
	
	// Build the sample array one 32-bits word at a time, cycling over the
	// bits string to get each bit. The first bit of the sample becomes the
	// bit 0 of the first word.
	uint32_t sample[5];
	int i;
	char *next_bit = bits;
	for(i = 0; i < 5; i++){
		// Built the next 32-bits word reading the bits string.
		uint32_t word = 0;
		int mask = 1;
		int j;
		for(j = 0; j < 32; j++){
			// Loops over the bits string extracting the next bit.
			int bit = 0;
			do {
				if( *next_bit == 0 )
					next_bit = bits;
				if( *next_bit == '1' ){
					bit = 1;
					break;
				} else if( *next_bit == '0' ){
					bit = 0;
					break;
				} else {
					next_bit++;
				}
			} while(1);
			next_bit++;
			if( bit )
				word = word | mask;
			mask = mask << 1;
		}
		sample[i] = word;
	}
	
	int ones = monobit_test(sample, 5);
	if( ones != 76 ){
		err++;
		printf("in line %d monobit test (internal check, NOT the PRNG): found %d ones\n", __LINE__, ones);
	}
	
	double X3 = poker_test(sample, 5, 3);
	if( fabs(X3 - 9.6415) > 0.001 ){
		err++;
		printf("in line %d poker test (internal check, NOT the PRNG): X3=%g\n", __LINE__, X3);
	}
	
	int got_runs[6][2];
	runs_test(sample, 5, got_runs, 6);
	// Example 5.31-iv; here we also add the counts for gaps (0) and blocks (7) of
	// length 4; extra entry for blocks of length >=5 (zero gaps, zero blocks).
	int exp_runs[][2] = { {0,0}, {8,25}, {20,4}, {12,5}, {0,7}, {0,0}};
	int bit;
	for(bit = 0; bit <= 1; bit++){
		for(i = 0; i < 6; i++){
			if( got_runs[i][bit] != exp_runs[i][bit] ){
				err++;
				printf("in line %d long run test (internal check, NOT the PRNG):"
					" run len %d bit %d got %d, exp %d\n",
					__LINE__, i, bit, got_runs[i][bit], exp_runs[i][bit]);
			}
		}
	}
	
	// Now the real FIPS 140-1 tests applied to our PRNG module.
	// ---------------------------------------------------------
	
	// Create sample of 20000 random bits.
	uint32_t fips_sample[625];
	prng_fill(fips_sample, sizeof(fips_sample));
	
	// Monobit test.
	ones = monobit_test(fips_sample, 625);
	if( !(9654 < ones && ones < 10346) ){
		err++;
		printf("in line %d monobit test: found %d ones\n", __LINE__, ones);
	}
	
	// Poker test.
	X3 = poker_test(fips_sample, 625, 4);
	if( !(1.03 < X3 && X3 < 57.4) ){
		err++;
		printf("in line %d poker test: X3=%g\n", __LINE__, X3);
	}
	
	// Runs test.
	int got_fips_runs[7][2];
	runs_test(fips_sample, 625, got_fips_runs, 7);
	int exp_fips_runs_min[] = {0, 2267, 1079, 502, 223, 90, 90}; // 5.32-iii
	int exp_fips_runs_max[] = {0, 2733, 1421, 748, 402, 223, 223}; // 5.32-iii
	for(bit = 0; bit <= 1; bit++){
		for(i = 0; i < 7; i++){
			if( !(exp_fips_runs_min[i] <= got_fips_runs[i][bit]
			&& got_fips_runs[i][bit] <= exp_fips_runs_max[i]) ){
				err++;
				printf("in line %d runs test: run len %d bit %d count got %d, exp [%d,%d]\n",
					__LINE__, i, bit, got_fips_runs[i][bit], exp_fips_runs_min[i], exp_fips_runs_max[i]);
			}
		}
	}
	
	// Long run test.
	if( got_fips_runs[6][0] > 0 || got_fips_runs[6][1] > 0 ){
		int got_fips_long_run[35][2];
		runs_test(fips_sample, 625, got_fips_long_run, 35);
		for(bit = 0; bit <= 1; bit++){
			if( got_fips_long_run[34][bit] != 0 ){
				err++;
				printf("in line %d long run bit %d test 34-bits or more: got %d subsamples, exp zero\n", __LINE__, bit, got_fips_long_run[34][bit]);
			}
		}
	}
}


int main(int argc, char** argv)
{
	int a, b, r;
	
	prng_setSeed(123456789);
	
	/* Range 1: */
	assert( prng_getIntInRange(0, 0) == 0 );
	assert( prng_getIntInRange(1, 1) == 1 );
	assert( prng_getIntInRange(-1, -1) == -1 );
	assert( prng_getIntInRange(10000000, 10000000) == 10000000 );
	assert( prng_getIntInRange(-10000000, -10000000) == -10000000 );
	
	/* Range 2: */
	a = 0;  b = 1;
	r = prng_getIntInRange(a, b);
	assert( a <= r && r <= b );
	
	a = -1;  b = 0;
	r = prng_getIntInRange(a, b);
	assert( a <= r && r <= b );
	
	/* Wider range using statistics: */
	int_uniform_test(__LINE__, 0, 1, 10000);
	int_uniform_test(__LINE__, 0, 99, 10000);
	int_uniform_test(__LINE__, -9, 99, 10000);
	int_uniform_test(__LINE__, 0, INT_MAX/4, 10000);
	int_uniform_test(__LINE__, 0, INT_MAX/3, 10000);
	int_uniform_test(__LINE__, 0, INT_MAX/2, 10000);
	int_uniform_test(__LINE__, 0, INT_MAX - INT_MAX/4, 10000);
	int_uniform_test(__LINE__, INT_MIN, INT_MAX, 10000);
	
	/* Bit generator test: among 100000 bits, half 0, half 1: */
	r = 0;
	for(a = 0; a < 100000; a++)
		if( prng_getIntInRange(0, 1) )
			r++;
	assert( 50000 - 1000 < r && r < 50000 + 1000 );
	
	double_uniform_test(__LINE__, 10000);
	
	modulo_bias_test(__LINE__);
	
	fill_test(__LINE__, 10000);
	
	FIPS_140_1_tests();
	
/*
	{
		// Generate 1 MB file of random bytes.
		// Compression algorithm, for example zip, gzip, should generate
		// a file even larger.
		char sample[1024*1024];
		prng_fill(sample, sizeof(sample));
		char *fn = "prng-test-sample.bin";
		FILE *f = fopen(fn, "wb");
		assert(f != NULL);
		assert( fwrite(sample, 1024, 1024, f) == 1024 );
		fclose(f);
		printf("Generated file %s\n", fn);
	}
*/
	
	err += memory_report();

	return err == 0? 0 : 1;
}

