/*************************************************************************\ * Copyright (c) 2002 The University of Chicago, as Operator of Argonne * National Laboratory. * Copyright (c) 2002 The Regents of the University of California, as * Operator of Los Alamos National Laboratory. * This file is distributed subject to a Software License Agreement found * in the file LICENSE that is included with this distribution. \*************************************************************************/ /* program: sddsdistest.c * purpose: test data against various distributions * M. Borland, August 1995. $Log: sddsdistest.c,v $ Revision 1.8 2005/11/04 22:46:13 soliday Updated code to be compiled by a 64 bit processor. Revision 1.7 2005/10/18 14:14:01 jiaox Corrected PoissonCDF function. Fixed a bug of @parameterName option that did not work. Revision 1.6 2002/08/14 17:12:44 soliday Added Open License Revision 1.5 2001/01/10 19:35:35 soliday Standardized usage message. Revision 1.4 1999/07/12 21:11:56 soliday Added link to fdlibm library for lgamma function on WIN32 Revision 1.3 1999/05/25 19:08:31 soliday Removed compiler warning on linux. Revision 1.2 1999/01/06 19:54:42 borland Fixed the version number in the usage message. Revision 1.1 1997/01/15 18:39:22 borland Found this old program in an old SDDS directory. Think it is ok, but was accidentally left out of repository. */ #include "mdb.h" #include "SDDS.h" #include "SDDSutils.h" #include "scan.h" #if defined(_WIN32) #include "fdlibm.h" #endif void compareToFileDistribution(char *output, long testCode, SDDS_DATASET *SDDSin, char **columnName, long columnNames, char *distFile, char *distFileIndep, char *distFileDepen); void compareToDistribution(char *output, long testCode, SDDS_DATASET *SDDSin, char **columnName, char **sigmaName, long columnNames, long distCode, long degreesFree, char *dofParameter); void ksTestWithFunction(double *data, long rows, double (*CDF)(double x), double *statReturn, double *sigLevelReturn); void chiSquaredTestWithFunction(double *data, long rows, double (*PDF)(double x), double *statReturn, double *sigLevelReturn); static char *USAGE = "sddsdistest [] [] \n\ [-pipe=[in][,out]] \n\ -column=[,sigma=]... -exclude=[,...]\n\ [-degreesOfFreedom={|@}]\n\ [-test={ks|chisquared}] \n\ [{-fileDistribution=,, | \n\ -gaussian | -poisson | -student | -chisquared}]\n\n\ Program by M. Borland. (This is version 2, January 1997.)\n"; #define CLO_PIPE 0 #define CLO_COLUMN 1 #define CLO_TEST 2 #define CLO_FILEDIST 3 #define CLO_GAUSSIAN 4 #define CLO_POISSON 5 #define CLO_STUDENT 6 #define CLO_CHISQUARED 7 #define CLO_DOF 8 #define CLO_EXCLUDE 9 #define N_CL_OPTIONS 10 static char *option[N_CL_OPTIONS] = { "pipe", "column", "test", "filedistribution", "gaussian", "poisson", "student", "chisquared", "degreesoffreedom", "exclude", }; #define KS_TEST 0 #define CHI_TEST 1 #define N_TESTS 2 static char *testChoice[N_TESTS] = { "ks", "chisquared", } ; int main(int argc, char **argv) { int iArg; unsigned long dummyFlags, pipeFlags; SCANNED_ARG *scanned; SDDS_DATASET SDDSin; char *input, *output, *distFile, **columnName, **sigmaName, **excludeName, *distFileIndep, *distFileDepen; long testCode, distCode, code, degreesFree, columnNames, excludeNames; char *dofParameter; SDDS_RegisterProgramName(argv[0]); argc = scanargs(&scanned, argc, argv); if (argc<3) bomb(NULL, USAGE); output = input = distFile = distFileIndep = distFileDepen = NULL; columnName = sigmaName = excludeName = NULL; excludeNames = columnNames = 0; pipeFlags = 0; testCode = 0; distCode = degreesFree = -1; dofParameter = NULL; for (iArg=1; iArg0) return 1- betaInc(DOF/2.0, 0.5, DOF/(DOF+t*t))/2; else return betaInc(DOF/2.0, 0.5, DOF/(DOF+t*t))/2; } #define LOG2 0.693147180559945 double chiSquaredPDF(double x) { double chiSqr, DOFover2; if (x<0) return 0; chiSqr = x*DOF/sampleMean; DOFover2 = DOF/2.0; return exp( (DOFover2-1.0)*log(chiSqr) - chiSqr/2 - DOFover2*LOG2 - lgamma(DOFover2) ); } double chiSquaredCDF(double x) { double chiSqr; if (x<0) x = 0; chiSqr = x*DOF/sampleMean; return 1 - gammaQ(DOF/2.0, chiSqr/2.0); } void compareToDistribution(char *output, long testCode, SDDS_DATASET *SDDSin, char **columnName, char **sigmaName, long columnNames, long distCode, long degreesFree, char *dofParameter) { SDDS_DATASET SDDSout; double *data, *data1, stat, sigLevel; long iStat, iSigLevel, iCount, iColumnName; long rows, icol, row; iStat = iSigLevel = iCount = iColumnName = 0; if (!SDDS_InitializeOutput(&SDDSout, SDDS_BINARY, 0, NULL, "sddsdistest output", output) || 0>SDDS_DefineParameter(&SDDSout, "distestDistribution", NULL, NULL, "sddsdistest distribution name", NULL, SDDS_STRING, option[distCode]) || 0>SDDS_DefineParameter(&SDDSout, "distestTest", NULL, NULL, "sddsdistest test name", NULL, SDDS_STRING, testChoice[testCode]) || 0>(iCount=SDDS_DefineParameter(&SDDSout, "Count", NULL, NULL, "Number of data points", NULL, SDDS_LONG, 0))) SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors); switch (testCode) { case KS_TEST: if (0>(iColumnName=SDDS_DefineColumn(&SDDSout, "ColumnName", NULL, NULL, "Column analysed by sddsdistest", NULL, SDDS_STRING, 0)) || 0>(iStat=SDDS_DefineColumn(&SDDSout, "D", NULL, NULL, "Kolmogorov-Smirnov D statistic", NULL, SDDS_DOUBLE, 0)) || 0>(iSigLevel=SDDS_DefineColumn(&SDDSout, "distestSigLevel", "P(D$ba$n>D)", NULL, "Probability of exceeding D", NULL, SDDS_DOUBLE, 0))) SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors); break; case CHI_TEST: if (0>(iColumnName=SDDS_DefineColumn(&SDDSout, "ColumnName", NULL, NULL, "Column analysed by sddsdistest", NULL, SDDS_STRING, 0)) || 0>(iStat=SDDS_DefineParameter(&SDDSout, "ChiSquared", "$gh$r$a2$n", NULL, "Chi-squared statistic", NULL, SDDS_DOUBLE, 0)) || 0>(iSigLevel=SDDS_DefineParameter(&SDDSout, "distestSigLevel", "P($gh$r$a2$n$ba$n>$gh$r$a2$n)", NULL, "Probability of exceeding $gh$r$a2$n", NULL, SDDS_DOUBLE, 0)) ) SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors); break; default: SDDS_Bomb("Invalid testCode seen in compareToDistribution--this shouldn't happen."); break; } if (!SDDS_WriteLayout(&SDDSout)) SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors); while (SDDS_ReadPage(SDDSin)>0) { if (!SDDS_StartPage(&SDDSout, columnNames)) SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors); rows = SDDS_CountRowsOfInterest(SDDSin); for (icol=0; icol=2) { if (!(data=SDDS_GetColumnInDoubles(SDDSin, columnName[icol]))) SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors); if (dofParameter) SDDS_GetParameterAsLong(SDDSin,dofParameter,&DOF); else DOF = degreesFree; switch (distCode) { case CLO_GAUSSIAN: computeMoments(&sampleMean, NULL, &sampleStDev, NULL, data, rows); if (testCode==KS_TEST) ksTestWithFunction(data, rows, gaussianCDF, &stat, &sigLevel); else chiSquaredTestWithFunction(data, rows, gaussianPDF, &stat, &sigLevel); break; case CLO_POISSON: computeMoments(&sampleMean, NULL, NULL, NULL, data, rows); if (testCode==KS_TEST) ksTestWithFunction(data, rows, poissonCDF, &stat, &sigLevel); else chiSquaredTestWithFunction(data, rows, poissonPDF, &stat, &sigLevel); break; case CLO_STUDENT: if (DOF<1) SDDS_Bomb("must have at least one degree of freedom for Student distribution tests"); computeMoments(&sampleMean, NULL, NULL, NULL, data, rows); if (sigmaName && sigmaName[icol]) { if (!(data1=SDDS_GetColumnInDoubles(SDDSin, sigmaName[icol]))) SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors); for (row=0; rowdCDFmaximum) dCDFmaximum = dCDF1; if (dCDF2>dCDFmaximum) dCDFmaximum = dCDF2; } *statReturn = dCDFmaximum; *sigLevelReturn = KS_Qfunction(sqrt(1.0*rows)*dCDFmaximum); }