/*************************************************************************\ * Copyright (c) 2002 The University of Chicago, as Operator of Argonne * National Laboratory. * Copyright (c) 2002 The Regents of the University of California, as * Operator of Los Alamos National Laboratory. * This file is distributed subject to a Software License Agreement found * in the file LICENSE that is included with this distribution. \*************************************************************************/ /* $Log: sddssampledist.c,v $ Revision 1.16 2010/01/14 22:46:39 shang added -optimalHalton option Revision 1.15 2007/06/21 17:38:12 borland Restored ability to read from a file and write to a pipe. Fixed usage messages for -gaussian, -uniform, and -poisson. Revision 1.14 2007/02/12 23:03:59 soliday Updated to work with WIN32 Revision 1.13 2007/01/03 20:15:36 shang added gaussian, uniform, and poisson distribution Revision 1.12 2006/12/14 22:22:00 soliday Updated a bunch of programs because SDDS_SaveLayout is now called by SDDS_WriteLayout and it is no longer required to be called directly. Also the AutoCheckMode is turned off by default now so I removed calls to SDDS_SetAutoCheckMode that would attempt to turn it off. It is now up to the programmer to turn it on in new programs until debugging is completed and then remove the call to SDDS_SetAutoCheckMode. Revision 1.11 2006/10/19 17:55:40 soliday Updated to work with linux-x86_64. Revision 1.10 2005/11/07 21:48:11 soliday Updated to remove Linux compiler warnings. Revision 1.9 2005/11/04 22:46:17 soliday Updated code to be compiled by a 64 bit processor. Revision 1.8 2005/03/02 01:40:27 borland Fixed bug in halton sequences (uninitialized values) that resulted in very long run times. Added verbose option. Revision 1.7 2003/05/08 18:27:40 borland Added the haltonOffset feature. Revision 1.6 2002/08/14 17:12:53 soliday Added Open License Revision 1.5 2002/02/10 07:38:01 borland Fixed bug that occurred when there was a single input file. Revision 1.4 2002/01/21 01:36:04 borland Fixed bug in randomization when no group was given. Revision 1.3 2002/01/21 00:39:09 borland Added units, factor, and offset qualifiers to -columns option. Revision 1.2 2002/01/20 23:15:11 borland Added option for individual input files for each -column option. Can use Halton sequences instead of random numbers, with grouping and randomization of order. Revision 1.1 2002/01/05 05:17:59 borland First version. */ #include "mdb.h" #include "scan.h" #include "SDDS.h" #include #define CLO_PIPE 0 #define CLO_COLUMNS 1 #define CLO_SAMPLES 2 #define CLO_SEED 3 #define CLO_VERBOSE 4 #define CLO_GAUSSIAN 5 #define CLO_UNIFORM 6 #define CLO_POISSON 7 #define CLO_OPTIMAL_HALTON 8 #define CLO_OPTIONS 9 static char *option[CLO_OPTIONS] = { "pipe", "columns", "samples", "seed", "verbose", "gaussian", "uniform", "poisson", "optimalHalton", } ; char *USAGE1 = "sddssampledist [] [] [-pipe=[in][,out]]\n\ -columns=independentVariable=,{cdf= | df=}[,output=][,units=][,factor=][,offset=][,datafile=][,haltonRadix=[,haltonOffset=][,randomize[,group=]]]\n\ [-columns=...] [-samples=] [-seed=] [-verbose] \n\ [-gaussian=columnName=[,meanValue=|@][,sigmaValue=|@][,units=]] \n\ [-uniform=columnName=[,minimumValue=|@][,maximumValue=|@][,units=]] \n\ [-poisson=columnName=[,meanValue=|@][,units=] [-optimalHalton] \n"; char *USAGE2 = "-columns Gives the name of the independent variable for the distribution\n\ and the name of the cumulative distribution function (CDF) or the\n\ distribution function (DF). The CDF is a nondecreasing function normalized\n\ to 1. The DF need not be normalized (a histogram is acceptable, for\n\ example). The output samples will have the same name as the independent\n\ variable unless the output qualifier is given. By default, the data is\n\ taken from . If the datafile qualifier is given, the data is \n\ taken from the named file. If haltonRadix is given, then the samples\n\ are generated using a Halton sequence, rather than random numbers.\n\ If Halton sequences are used, one can randomize the values in groups\n\ identified by an integer group ID. This prevents strange banding from\n\ arising in configuration space. Also, the haltonOffset can be used to\n\ skip the first values in the halton sequence, which generates a\n\ different but not independent sequence.\n\ If factor or offset are given, the output values are transformed \n\ according to x -> factor*x+offset.\n\ -gaussian Sample gaussian distribution with specified mean and width, \n\ gives the output column. \n\ If mean value or sigma is specified by meanValue=@ or \n\ sigmaValue=@, the mean or sigma value will be obtained from \n\ input file parameter named by corresponding .\n\ Defaut mean is 0 and default sigma is 1. \n"; char *USAGE3 = "-uniform Sample with uniform distribution with specified minimum and maximum value.\n\ gives the output column. \n\ If mimimum or maximum value is specified by minimumValue=@ or \n\ maximumValue=@, the minimum or maximum value will be obtained from \n\ input file parameter named by corresponding .\n\ The default minimum/maximum value is 0/1. \n\ -poisson Sample with poisson distribution with specified mean. gives the \n\ output column. If the mean value is specified by meanValue=@, \n\ the mean value will be obtained from input file parameter named by .\n\ The default mean is 1.\n\ -samples Specifies the number of samples to generate.\n\ -seed Specifies the seed for the random number generator. If not given,\n\ the generator is seeded from the system clock.\n\ -optimalHalton if provided, the improved halton sequence will be used for generating random numbers.\n\ -verbose Prints information to stderr while working.\n\n\ Program by Michael Borland. (This is version 3, March 2005.)\n"; #define SEQ_DATAFILE 0x0001UL #define SEQ_INDEPNAME 0x0002UL #define SEQ_CDFNAME 0x0004UL #define SEQ_DFNAME 0x0008UL #define SEQ_OUTPUTNAME 0x0010UL #define SEQ_HALTONRADIX 0x0020UL #define SEQ_RANDOMIZE 0x0040UL #define SEQ_RANDOMGROUP 0x0080UL #define SEQ_UNITSGIVEN 0x0100UL #define SEQ_HALTONOFFSET 0x0200UL #define SEQ_DIRECT_GAUSSIAN 0x0400UL #define SEQ_DIRECT_UNIFORM 0x0800UL #define SEQ_DIRECT_POISSON 0x1000UL typedef struct { unsigned long flags; char *dataFileName, *indepName, *CDFName, *DFName, *outputName, *units, *meanPar, *sigmaPar, *minPar, *maxPar; SDDS_DATASET SDDSin; int32_t haltonRadix, randomizationGroup, haltonOffset; double factor, offset, mean, min, max, sigma; } SEQ_REQUEST ; typedef struct { long group; long *order; } RANDOMIZED_ORDER; long CreatePoissonDistributionTable(double **x, double **pos_CDF, double mean); int main(int argc, char **argv) { int iArg; char *input, *output,*meanPar, *sigmaPar, *maxPar, *minPar; long i, j, mainInputOpened, haltonID=0, requireInput=0; unsigned long pipeFlags; SCANNED_ARG *scanned; SDDS_DATASET SDDSin, SDDSout, *SDDSptr; long randomNumberSeed = 0; SEQ_REQUEST *seqRequest; long samples, values, seqRequests, randomizationGroups=0; double *sample, *IVValue, *CDFValue; char msgBuffer[1000]; RANDOMIZED_ORDER *randomizationData = NULL; long verbose, optimalHalton=0; SDDS_RegisterProgramName(argv[0]); argc = scanargs(&scanned, argc, argv); if (argc<2) { fprintf(stderr, "%s%s%s\n", USAGE1, USAGE2, USAGE3); return(1); } seqRequest = NULL; seqRequests = 0; output = input = NULL; pipeFlags = 0; samples = values = 0; sample = IVValue = CDFValue = NULL; verbose = 0; maxPar = minPar = meanPar = sigmaPar = NULL; for (iArg=1; iArgIVValue[j]) { sprintf(msgBuffer, "random variate values not monotonically increasing for %s", seqRequest[i].flags&SEQ_DATAFILE? seqRequest[i].dataFileName:input); SDDS_Bomb(msgBuffer); } if (seqRequest[i].flags&SEQ_DFNAME) /* convert DF to CDF */ CDFValue[j] += CDFValue[j-1]; if (CDFValue[j] < CDFValue[j-1]) { sprintf(msgBuffer, "CDF values decreasing for %s", seqRequest[i].flags&SEQ_DATAFILE? seqRequest[i].dataFileName:input); SDDS_Bomb(msgBuffer); } } if (verbose) fprintf(stderr, "Normalizing CDF\n"); /* normalize the CDF */ if (CDFValue[values-1]<=0) { sprintf(msgBuffer, "CDF not valid for %s\n", seqRequest[i].dataFileName); SDDS_Bomb(msgBuffer); } for (j=0; j0) { if (!optimalHalton) nextHaltonSequencePoint(haltonID); else nextModHaltonSequencePoint(haltonID); } } if (verbose) fprintf(stderr, "Generating samples\n"); for (j=0; j=CDFValue[0]) break; } sample[j] = seqRequest[i].factor*interp(IVValue, CDFValue, values, CDF, 0, 1, &code) + seqRequest[i].offset; } if (seqRequest[i].flags&SEQ_RANDOMIZE) { long k, l; double *sample1; if (verbose) fprintf(stderr, "Randomizing order of values\n"); if (!(sample1 = malloc(sizeof(*sample1)*samples))) SDDS_Bomb("memory allocation failure"); for (l=0; l=npoints) { npoints += 20; *x = SDDS_Realloc(*x, sizeof(**x)*npoints); *pos_CDF = SDDS_Realloc(*pos_CDF, sizeof(**pos_CDF)*npoints); pos = SDDS_Realloc(pos, sizeof(*pos)*npoints); } (*x)[count] = i; if (!i) { pos[i] = exp(-mean); (*pos_CDF)[count] = pos[i]; count ++; } else { pos[i] = pos[i-1]*mean/i; (*pos_CDF)[count] = (*pos_CDF)[count-1]; (*pos_CDF)[count+1] = (*pos_CDF)[count-1] + pos[i]; (*x)[count+1] = i; if (1.0-(*pos_CDF)[count+1]<=1.0e-15) break; count +=2; } i++; } /* fprintf(stderr,"lamda=%f\n", mean); if (!SDDS_InitializeOutput(&pos_out, SDDS_BINARY, 0, NULL, NULL, "pos_dist.sdds")) SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors); if (!SDDS_DefineSimpleColumn(&pos_out, "Count", NULL, SDDS_DOUBLE) || !SDDS_DefineSimpleColumn(&pos_out, "P", NULL, SDDS_DOUBLE)) SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors); if (!SDDS_SaveLayout(&pos_out) || !SDDS_WriteLayout(&pos_out)) SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors); if (!SDDS_StartPage(&pos_out, count) || !SDDS_SetColumnFromDoubles(&pos_out, SDDS_SET_BY_NAME, *x, count, "Count") || !SDDS_SetColumnFromDoubles(&pos_out, SDDS_SET_BY_NAME, *pos_CDF, count, "P")) SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors); if (!SDDS_WritePage(&pos_out) || !SDDS_Terminate(&pos_out)) SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors); */ free(pos); return count; }