2008-06-25 03:33:36 -04:00
|
|
|
/* stat.c -- statistical tests of random number sequences. */
|
|
|
|
|
|
|
|
/*
|
|
|
|
Copyright 1999, 2000 Free Software Foundation, Inc.
|
|
|
|
|
|
|
|
This file is part of the GNU MP Library.
|
|
|
|
|
|
|
|
The GNU MP Library is free software; you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU Lesser General Public License as published by
|
|
|
|
the Free Software Foundation; either version 2.1 of the License, or (at your
|
|
|
|
option) any later version.
|
|
|
|
|
|
|
|
The GNU MP Library is distributed in the hope that it will be useful, but
|
|
|
|
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
|
|
|
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
|
|
|
License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU Lesser General Public License
|
|
|
|
along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
|
|
|
the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
|
|
|
MA 02110-1301, USA.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* Examples:
|
|
|
|
|
|
|
|
$ gen 1000 | stat
|
|
|
|
Test 1000 real numbers.
|
|
|
|
|
|
|
|
$ gen 30000 | stat -2 1000
|
|
|
|
Test 1000 real numbers 30 times and then test the 30 results in a
|
|
|
|
``second level''.
|
|
|
|
|
|
|
|
$ gen -f mpz_urandomb 1000 | stat -i 0xffffffff
|
|
|
|
Test 1000 integers 0 <= X <= 2^32-1.
|
|
|
|
|
|
|
|
$ gen -f mpz_urandomb -z 34 1000 | stat -i 0x3ffffffff
|
|
|
|
Test 1000 integers 0 <= X <= 2^34-1.
|
|
|
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <unistd.h>
|
|
|
|
#include <math.h>
|
2009-02-12 05:24:24 -05:00
|
|
|
#include "mpir.h"
|
2008-06-25 03:33:36 -04:00
|
|
|
#include "gmpstat.h"
|
|
|
|
|
|
|
|
#if !HAVE_DECL_OPTARG
|
|
|
|
extern char *optarg;
|
|
|
|
extern int optind, opterr;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#define FVECSIZ (100000L)
|
|
|
|
|
|
|
|
int g_debug = 0;
|
|
|
|
|
|
|
|
static void
|
|
|
|
print_ks_results (mpf_t f_p, mpf_t f_p_prob,
|
|
|
|
mpf_t f_m, mpf_t f_m_prob,
|
|
|
|
FILE *fp)
|
|
|
|
{
|
|
|
|
double p, pp, m, mp;
|
|
|
|
|
|
|
|
p = mpf_get_d (f_p);
|
|
|
|
m = mpf_get_d (f_m);
|
|
|
|
pp = mpf_get_d (f_p_prob);
|
|
|
|
mp = mpf_get_d (f_m_prob);
|
|
|
|
|
|
|
|
fprintf (fp, "%.4f (%.0f%%)\t", p, pp * 100.0);
|
|
|
|
fprintf (fp, "%.4f (%.0f%%)\n", m, mp * 100.0);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
print_x2_table (unsigned int v, FILE *fp)
|
|
|
|
{
|
|
|
|
double t[7];
|
|
|
|
int f;
|
|
|
|
|
|
|
|
|
|
|
|
fprintf (fp, "Chi-square table for v=%u\n", v);
|
|
|
|
fprintf (fp, "1%%\t5%%\t25%%\t50%%\t75%%\t95%%\t99%%\n");
|
|
|
|
x2_table (t, v);
|
|
|
|
for (f = 0; f < 7; f++)
|
|
|
|
fprintf (fp, "%.2f\t", t[f]);
|
|
|
|
fputs ("\n", fp);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* Pks () -- Distribution function for KS results with a big n (like 1000
|
|
|
|
or so): F(x) = 1 - pow(e, -2*x^2) [Knuth, vol 2, p.51]. */
|
|
|
|
/* gnuplot: plot [0:1] Pks(x), Pks(x) = 1-exp(-2*x**2) */
|
|
|
|
|
|
|
|
static void
|
|
|
|
Pks (mpf_t p, mpf_t x)
|
|
|
|
{
|
|
|
|
double dt; /* temp double */
|
|
|
|
|
|
|
|
mpf_set (p, x);
|
|
|
|
mpf_mul (p, p, p); /* p = x^2 */
|
|
|
|
mpf_mul_ui (p, p, 2); /* p = 2*x^2 */
|
|
|
|
mpf_neg (p, p); /* p = -2*x^2 */
|
|
|
|
/* No pow() in gmp. Use doubles. */
|
|
|
|
/* FIXME: Use exp()? */
|
|
|
|
dt = pow (M_E, mpf_get_d (p));
|
|
|
|
mpf_set_d (p, dt);
|
|
|
|
mpf_ui_sub (p, 1, p);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* f_freq() -- frequency test on real numbers 0<=f<1*/
|
|
|
|
static void
|
|
|
|
f_freq (const unsigned l1runs, const unsigned l2runs,
|
|
|
|
mpf_t fvec[], const unsigned long n)
|
|
|
|
{
|
|
|
|
unsigned f;
|
|
|
|
mpf_t f_p, f_p_prob;
|
|
|
|
mpf_t f_m, f_m_prob;
|
|
|
|
mpf_t *l1res; /* level 1 result array */
|
|
|
|
|
|
|
|
mpf_init (f_p); mpf_init (f_m);
|
|
|
|
mpf_init (f_p_prob); mpf_init (f_m_prob);
|
|
|
|
|
|
|
|
|
|
|
|
/* Allocate space for 1st level results. */
|
|
|
|
l1res = (mpf_t *) malloc (l2runs * 2 * sizeof (mpf_t));
|
|
|
|
if (NULL == l1res)
|
|
|
|
{
|
|
|
|
fprintf (stderr, "stat: malloc failure\n");
|
|
|
|
exit (1);
|
|
|
|
}
|
|
|
|
|
|
|
|
printf ("\nEquidistribution/Frequency test on real numbers (0<=X<1):\n");
|
|
|
|
printf ("\tKp\t\tKm\n");
|
|
|
|
|
|
|
|
for (f = 0; f < l2runs; f++)
|
|
|
|
{
|
|
|
|
/* f_printvec (fvec, n); */
|
|
|
|
mpf_freqt (f_p, f_m, fvec + f * n, n);
|
|
|
|
|
|
|
|
/* what's the probability of getting these results? */
|
|
|
|
ks_table (f_p_prob, f_p, n);
|
|
|
|
ks_table (f_m_prob, f_m, n);
|
|
|
|
|
|
|
|
if (l1runs == 0)
|
|
|
|
{
|
|
|
|
/*printf ("%u:\t", f + 1);*/
|
|
|
|
print_ks_results (f_p, f_p_prob, f_m, f_m_prob, stdout);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* save result */
|
|
|
|
mpf_init_set (l1res[f], f_p);
|
|
|
|
mpf_init_set (l1res[f + l2runs], f_m);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Now, apply the KS test on the results from the 1st level rounds
|
|
|
|
with the distribution
|
|
|
|
F(x) = 1 - pow(e, -2*x^2) [Knuth, vol 2, p.51] */
|
|
|
|
|
|
|
|
if (l1runs != 0)
|
|
|
|
{
|
|
|
|
/*printf ("-------------------------------------\n");*/
|
|
|
|
|
|
|
|
/* The Kp's. */
|
|
|
|
ks (f_p, f_m, l1res, Pks, l2runs);
|
|
|
|
ks_table (f_p_prob, f_p, l2runs);
|
|
|
|
ks_table (f_m_prob, f_m, l2runs);
|
|
|
|
printf ("Kp:\t");
|
|
|
|
print_ks_results (f_p, f_p_prob, f_m, f_m_prob, stdout);
|
|
|
|
|
|
|
|
/* The Km's. */
|
|
|
|
ks (f_p, f_m, l1res + l2runs, Pks, l2runs);
|
|
|
|
ks_table (f_p_prob, f_p, l2runs);
|
|
|
|
ks_table (f_m_prob, f_m, l2runs);
|
|
|
|
printf ("Km:\t");
|
|
|
|
print_ks_results (f_p, f_p_prob, f_m, f_m_prob, stdout);
|
|
|
|
}
|
|
|
|
|
|
|
|
mpf_clear (f_p); mpf_clear (f_m);
|
|
|
|
mpf_clear (f_p_prob); mpf_clear (f_m_prob);
|
|
|
|
free (l1res);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* z_freq(l1runs, l2runs, zvec, n, max) -- frequency test on integers
|
|
|
|
0<=z<=MAX */
|
|
|
|
static void
|
|
|
|
z_freq (const unsigned l1runs,
|
|
|
|
const unsigned l2runs,
|
|
|
|
mpz_t zvec[],
|
|
|
|
const unsigned long n,
|
|
|
|
unsigned int max)
|
|
|
|
{
|
|
|
|
mpf_t V; /* result */
|
|
|
|
double d_V; /* result as a double */
|
|
|
|
|
|
|
|
mpf_init (V);
|
|
|
|
|
|
|
|
|
|
|
|
printf ("\nEquidistribution/Frequency test on integers (0<=X<=%u):\n", max);
|
|
|
|
print_x2_table (max, stdout);
|
|
|
|
|
|
|
|
mpz_freqt (V, zvec, max, n);
|
|
|
|
|
|
|
|
d_V = mpf_get_d (V);
|
|
|
|
printf ("V = %.2f (n = %lu)\n", d_V, n);
|
|
|
|
|
|
|
|
mpf_clear (V);
|
|
|
|
}
|
|
|
|
|
|
|
|
unsigned int stat_debug = 0;
|
|
|
|
|
|
|
|
int
|
|
|
|
main (argc, argv)
|
|
|
|
int argc;
|
|
|
|
char *argv[];
|
|
|
|
{
|
|
|
|
const char usage[] =
|
|
|
|
"usage: stat [-d] [-2 runs] [-i max | -r max] [file]\n" \
|
|
|
|
" file filename\n" \
|
|
|
|
" -2 runs perform 2-level test with RUNS runs on 1st level\n" \
|
|
|
|
" -d increase debugging level\n" \
|
|
|
|
" -i max input is integers 0 <= Z <= MAX\n" \
|
|
|
|
" -r max input is real numbers 0 <= R < 1 and use MAX as\n" \
|
|
|
|
" maximum value when converting real numbers to integers\n" \
|
|
|
|
"";
|
|
|
|
|
|
|
|
mpf_t fvec[FVECSIZ];
|
|
|
|
mpz_t zvec[FVECSIZ];
|
|
|
|
unsigned long int f, n, vecentries;
|
|
|
|
char *filen;
|
|
|
|
FILE *fp;
|
|
|
|
int c;
|
|
|
|
int omitoutput = 0;
|
|
|
|
int realinput = -1; /* 1: input is real numbers 0<=R<1;
|
|
|
|
0: input is integers 0 <= Z <= MAX. */
|
|
|
|
long l1runs = 0, /* 1st level runs */
|
|
|
|
l2runs = 1; /* 2nd level runs */
|
|
|
|
mpf_t f_temp;
|
|
|
|
mpz_t z_imax; /* max value when converting between
|
|
|
|
real number and integer. */
|
|
|
|
mpf_t f_imax_plus1; /* f_imax + 1 stored in an mpf_t for
|
|
|
|
convenience */
|
|
|
|
mpf_t f_imax_minus1; /* f_imax - 1 stored in an mpf_t for
|
|
|
|
convenience */
|
|
|
|
|
|
|
|
|
|
|
|
mpf_init (f_temp);
|
|
|
|
mpz_init_set_ui (z_imax, 0x7fffffff);
|
|
|
|
mpf_init (f_imax_plus1);
|
|
|
|
mpf_init (f_imax_minus1);
|
|
|
|
|
|
|
|
while ((c = getopt (argc, argv, "d2:i:r:")) != -1)
|
|
|
|
switch (c)
|
|
|
|
{
|
|
|
|
case '2':
|
|
|
|
l1runs = atol (optarg);
|
|
|
|
l2runs = -1; /* set later on */
|
|
|
|
break;
|
|
|
|
case 'd': /* increase debug level */
|
|
|
|
stat_debug++;
|
|
|
|
break;
|
|
|
|
case 'i':
|
|
|
|
if (1 == realinput)
|
|
|
|
{
|
|
|
|
fputs ("stat: options -i and -r are mutually exclusive\n", stderr);
|
|
|
|
exit (1);
|
|
|
|
}
|
|
|
|
if (mpz_set_str (z_imax, optarg, 0))
|
|
|
|
{
|
|
|
|
fprintf (stderr, "stat: bad max value %s\n", optarg);
|
|
|
|
exit (1);
|
|
|
|
}
|
|
|
|
realinput = 0;
|
|
|
|
break;
|
|
|
|
case 'r':
|
|
|
|
if (0 == realinput)
|
|
|
|
{
|
|
|
|
fputs ("stat: options -i and -r are mutually exclusive\n", stderr);
|
|
|
|
exit (1);
|
|
|
|
}
|
|
|
|
if (mpz_set_str (z_imax, optarg, 0))
|
|
|
|
{
|
|
|
|
fprintf (stderr, "stat: bad max value %s\n", optarg);
|
|
|
|
exit (1);
|
|
|
|
}
|
|
|
|
realinput = 1;
|
|
|
|
break;
|
|
|
|
case 'o':
|
|
|
|
omitoutput = atoi (optarg);
|
|
|
|
break;
|
|
|
|
case '?':
|
|
|
|
default:
|
|
|
|
fputs (usage, stderr);
|
|
|
|
exit (1);
|
|
|
|
}
|
|
|
|
argc -= optind;
|
|
|
|
argv += optind;
|
|
|
|
|
|
|
|
if (argc < 1)
|
|
|
|
fp = stdin;
|
|
|
|
else
|
|
|
|
filen = argv[0];
|
|
|
|
|
|
|
|
if (fp != stdin)
|
|
|
|
if (NULL == (fp = fopen (filen, "r")))
|
|
|
|
{
|
|
|
|
perror (filen);
|
|
|
|
exit (1);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (-1 == realinput)
|
|
|
|
realinput = 1; /* default is real numbers */
|
|
|
|
|
|
|
|
/* read file and fill appropriate vec */
|
|
|
|
if (1 == realinput) /* real input */
|
|
|
|
{
|
|
|
|
for (f = 0; f < FVECSIZ ; f++)
|
|
|
|
{
|
|
|
|
mpf_init (fvec[f]);
|
|
|
|
if (!mpf_inp_str (fvec[f], fp, 10))
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else /* integer input */
|
|
|
|
{
|
|
|
|
for (f = 0; f < FVECSIZ ; f++)
|
|
|
|
{
|
|
|
|
mpz_init (zvec[f]);
|
|
|
|
if (!mpz_inp_str (zvec[f], fp, 10))
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
vecentries = n = f; /* number of entries read */
|
|
|
|
fclose (fp);
|
|
|
|
|
|
|
|
if (FVECSIZ == f)
|
|
|
|
fprintf (stderr, "stat: warning: discarding input due to lazy allocation "\
|
|
|
|
"of only %ld entries. sorry.\n", FVECSIZ);
|
|
|
|
|
|
|
|
printf ("Got %lu numbers.\n", n);
|
|
|
|
|
|
|
|
/* convert and fill the other vec */
|
|
|
|
/* since fvec[] contains 0<=f<1 and we want ivec[] to contain
|
|
|
|
0<=z<=imax and we are truncating all fractions when
|
|
|
|
converting float to int, we have to add 1 to imax.*/
|
|
|
|
mpf_set_z (f_imax_plus1, z_imax);
|
|
|
|
mpf_add_ui (f_imax_plus1, f_imax_plus1, 1);
|
|
|
|
if (1 == realinput) /* fill zvec[] */
|
|
|
|
{
|
|
|
|
for (f = 0; f < n; f++)
|
|
|
|
{
|
|
|
|
mpf_mul (f_temp, fvec[f], f_imax_plus1);
|
|
|
|
mpz_init (zvec[f]);
|
|
|
|
mpz_set_f (zvec[f], f_temp); /* truncating fraction */
|
|
|
|
if (stat_debug > 1)
|
|
|
|
{
|
|
|
|
mpz_out_str (stderr, 10, zvec[f]);
|
|
|
|
fputs ("\n", stderr);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else /* integer input; fill fvec[] */
|
|
|
|
{
|
|
|
|
/* mpf_set_z (f_imax_minus1, z_imax);
|
|
|
|
mpf_sub_ui (f_imax_minus1, f_imax_minus1, 1);*/
|
|
|
|
for (f = 0; f < n; f++)
|
|
|
|
{
|
|
|
|
mpf_init (fvec[f]);
|
|
|
|
mpf_set_z (fvec[f], zvec[f]);
|
|
|
|
mpf_div (fvec[f], fvec[f], f_imax_plus1);
|
|
|
|
if (stat_debug > 1)
|
|
|
|
{
|
|
|
|
mpf_out_str (stderr, 10, 0, fvec[f]);
|
|
|
|
fputs ("\n", stderr);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* 2 levels? */
|
|
|
|
if (1 != l2runs)
|
|
|
|
{
|
|
|
|
l2runs = n / l1runs;
|
|
|
|
printf ("Doing %ld second level rounds "\
|
|
|
|
"with %ld entries in each round", l2runs, l1runs);
|
|
|
|
if (n % l1runs)
|
|
|
|
printf (" (discarding %ld entr%s)", n % l1runs,
|
|
|
|
n % l1runs == 1 ? "y" : "ies");
|
|
|
|
puts (".");
|
|
|
|
n = l1runs;
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifndef DONT_FFREQ
|
|
|
|
f_freq (l1runs, l2runs, fvec, n);
|
|
|
|
#endif
|
|
|
|
#ifdef DO_ZFREQ
|
|
|
|
z_freq (l1runs, l2runs, zvec, n, mpz_get_ui (z_imax));
|
|
|
|
#endif
|
|
|
|
|
|
|
|
mpf_clear (f_temp); mpz_clear (z_imax);
|
|
|
|
mpf_clear (f_imax_plus1);
|
|
|
|
mpf_clear (f_imax_minus1);
|
|
|
|
for (f = 0; f < vecentries; f++)
|
|
|
|
{
|
|
|
|
mpf_clear (fvec[f]);
|
|
|
|
mpz_clear (zvec[f]);
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|