/*************************************************************************\ * Copyright (c) 2002 The University of Chicago, as Operator of Argonne * National Laboratory. * Copyright (c) 2002 The Regents of the University of California, as * Operator of Los Alamos National Laboratory. * This file is distributed subject to a Software License Agreement found * in the file LICENSE that is included with this distribution. \*************************************************************************/ /* program: sddshist * purpose: SDDS-format histogram command * Based on the mpl program hist. * Michael Borland, 1995 $Log: sddshist.c,v $ Revision 1.24 2006/12/14 22:21:59 soliday Updated a bunch of programs because SDDS_SaveLayout is now called by SDDS_WriteLayout and it is no longer required to be called directly. Also the AutoCheckMode is turned off by default now so I removed calls to SDDS_SetAutoCheckMode that would attempt to turn it off. It is now up to the programmer to turn it on in new programs until debugging is completed and then remove the call to SDDS_SetAutoCheckMode. Revision 1.23 2005/11/14 17:58:49 soliday Fixed a bug where the lower and upper filter could be used when their values were uninitialized. Revision 1.22 2005/11/04 22:46:14 soliday Updated code to be compiled by a 64 bit processor. Revision 1.21 2003/09/02 19:16:04 soliday Cleaned up code for Linux. Revision 1.20 2002/10/05 19:25:25 borland Fixed problems that occurred when the first page was empty. Revision 1.19 2002/08/14 21:01:49 soliday Fixed bug Revision 1.18 2002/08/14 17:12:47 soliday Added Open License Revision 1.17 2002/08/09 13:57:18 borland Added -expand option to allow increasing the range of the histogram by a factor. Revision 1.16 2001/11/30 16:12:06 borland Fixed UMR. Revision 1.15 2001/07/18 18:54:02 borland Output of CDF added by H. Shang. Revision 1.14 2001/01/10 19:35:39 soliday Standardized usage message. Revision 1.13 2000/08/08 20:31:21 borland The -sides option now accepts a number giving how many bins to add at each end. Revision 1.12 1999/05/25 19:10:41 soliday Removed compiler warning on linux. Revision 1.11 1999/01/06 19:54:45 borland Fixed the version number in the usage message. Revision 1.10 1998/12/16 21:26:03 borland Brought up to date with new version of SDDS_TransferAllParameters. Now correctly transfers through parameters, but overwrites them if it needs to do so. Revision 1.9 1998/11/13 22:39:53 borland Now copies through parameters from the input to the output. Revision 1.8 1997/12/19 17:00:51 borland Now uses scanargsg instead of scanargs. Revision 1.7 1997/08/25 19:23:38 borland Apparently just reformated the code. Revision 1.6 1997/02/05 15:58:32 borland Changed some function prototypes to static to keep the names to myself. * Revision 1.5 1995/11/22 00:39:32 borland * Fixed bug having to do with transfer of a column and changing of its * type without changing the format_string field to match. * * Revision 1.4 1995/11/20 16:24:00 borland * Added code to prevent aborts when there are no rows in the input page. * * Revision 1.3 1995/09/06 14:56:36 saunders * First test release of SDDS1.5 * */ #include "mdb.h" #include "scan.h" #include "SDDS.h" #define SET_BINS 0 #define SET_LOWERLIMIT 1 #define SET_UPPERLIMIT 2 #define SET_DATACOLUMN 3 #define SET_FILTER 4 #define SET_BINSIZE 5 #define SET_WEIGHTCOLUMN 6 #define SET_NORMALIZE 7 #define SET_STATISTICS 8 #define SET_SIDES 9 #define SET_VERBOSE 10 #define SET_PIPE 11 #define SET_CDF 12 #define SET_EXPAND 13 #define N_OPTIONS 14 char *option[N_OPTIONS] = { "bins", "lowerlimit", "upperlimit", "datacolumn", "filter", "sizeofbins", "weightcolumn", "normalize", "statistics", "sides", "verbose", "pipe","cdf", "expand", }; char *USAGE="sddshist [] [] [-pipe=[input][,output]]\n\ -dataColumn= [{-bins= | -sizeOfBins=}] \n\ [-lowerLimit=] [-upperLimit=] [-expand=]\n\ [-filter=,,] \n\ [-weightColumn=] [-sides[=]] \n\ [-normalize[={sum|area|peak}]] \n\ [-cdf[=only]] \n\ [-statistics] [-verbose]\n"; static char *additional_help = "\n\ bins number of bins for histogram.\n\ sizeOfBins size of bins for histogram.\n\ lowerLimit lower limit of histogram.\n\ upperLimit upper limit of histogram.\n\ expand expand the range of the histogram by the given factor.\n\ dataColumn give name of column to histogram.\n\ filter histogram only those points for which the named data lies between the given values.\n\ weightColumn weight the histogram with the named data.\n\ normalize normalize the histogram, as specified.\n\ statistics put statistical information in output file.\n\ sides puts sides on the histogram, down to zero level.\n\ cdf If -cdf is given, CDF is included in the file in addition to the histogram.\n\ If -cdf=only is given, the histogram is not included. \n\ verbose activates informational printouts during processing.\n\n\ Program by Michael Borland. (This is version 5, August 2002, M. Borland)\n"; #define NORMALIZE_PEAK 0 #define NORMALIZE_AREA 1 #define NORMALIZE_SUM 2 #define NORMALIZE_NO 3 #define N_NORMALIZE_OPTIONS 4 char *normalize_option[N_NORMALIZE_OPTIONS] = { "peak", "area", "sum", "no" } ; static long filter(double *x, double *y, double *filterData, long npts, double lower_filter, double upper_filter); static long setupOutputFile(SDDS_DATASET *outTable, char *outputfile, SDDS_DATASET *inTable, char *inputfile, char *dataColumn,char *weightColumn, char *filterColumn, double lowerFilter, double upperFilter, long doStats, long bins, double binSize, long normalizeMode); /* column and parameter indices for output file */ static long iIndep, iFreq, iBins, iBinSize, iLoFilter, iUpFilter, iMean, iRMS, iStDev, iPoints,iCdf; static short cdfOnly,freOnly; int main(int argc, char **argv) { /* next line defines flags to keep track of what is set in command line: */ long binsGiven, lowerLimitGiven, upperLimitGiven; SDDS_DATASET inTable, outTable; double *data; /* pointer to the array to histogram */ double *filterData; /* pointer to the filter data */ double *weightData; /* pointer to the weight data */ double *hist, *hist1; /* to store the histogram */ double *CDF, *CDF1; /* to store the CDF which is defined as CDF(i)=Sum(hist(j), all j such that j<=i)/Sum(hist(j),all j) */ double sum; /* store the total of hist */ double *indep; /* to store values of bin centers */ double lowerLimit, upperLimit; /* lower and upper limits in histogram */ double givenLowerLimit, givenUpperLimit; /* lower and upper limits in histogram */ double range, binSize; long bins; /* number of bins in the histogram */ long doStats; /* if true, include statistics in output file */ double mean, rms, standDev, mad; char *filterColumn, *dataColumn, *weightColumn; double lowerFilter=0, upperFilter=0; /* filter range */ long points; /* number of data points after filtering */ SCANNED_ARG *scanned; /* scanned argument structure */ char *inputfile, *outputfile; /* array of input files, output file */ double dx; /* spacing of bins in histogram */ long i; /* guess what */ long pointsBinned; /* number of points that are in histogram */ long normalizeMode, doSides, verbose, rows, readCode; unsigned long pipeFlags; char *cdf; double expansionFactor = 0; SDDS_RegisterProgramName(argv[0]); argc = scanargs(&scanned, argc, argv); if (argc<3) { fprintf(stderr, "usage: %s\n", USAGE); fputs(additional_help, stderr); exit(1); } binsGiven = lowerLimitGiven = upperLimitGiven = 0; binSize = doSides = 0; inputfile = outputfile = NULL; dataColumn = filterColumn = weightColumn = NULL; doStats = verbose = 0; normalizeMode = NORMALIZE_NO; pipeFlags = 0; dx = 0; cdfOnly=0; freOnly=1; for (i=1; iupperFilter) SDDS_Bomb("invalid -filter syntax/values"); filterColumn = scanned[i].list[1]; break; case SET_WEIGHTCOLUMN: if (weightColumn) SDDS_Bomb("multiple weighting columns not allowed"); if (scanned[i].n_items!=2) SDDS_Bomb("-weightColumn requires a column name"); weightColumn = scanned[i].list[1]; break; case SET_NORMALIZE: if (scanned[i].n_items==1) normalizeMode = NORMALIZE_SUM; else if (scanned[i].n_items!=2 || (normalizeMode=match_string(scanned[i].list[1], normalize_option, N_NORMALIZE_OPTIONS, 0))<0) SDDS_Bomb("invalid -normalize syntax"); break; case SET_STATISTICS: doStats = 1; break; case SET_SIDES: if (scanned[i].n_items==1) doSides = 1; else if (scanned[i].n_items>2 || (sscanf(scanned[i].list[1], "%ld", &doSides)!=1 || doSides<=0)) SDDS_Bomb("invalid -sides syntax"); break; case SET_VERBOSE: verbose = 1; break; case SET_BINSIZE: if (sscanf(scanned[i].list[1], "%le", &binSize)!=1 || binSize<=0) SDDS_Bomb("invalid value for bin size"); break; case SET_PIPE: if (!processPipeOption(scanned[i].list+1, scanned[i].n_items-1, &pipeFlags)) SDDS_Bomb("invalid -pipe syntax"); break; case SET_CDF: if (scanned[i].n_items ==1) cdfOnly=0; else { if (scanned[i].n_items!=2) SDDS_Bomb("invalid -cdf syntax"); cdf = scanned[i].list[1]; if (strcmp(cdf,"only")!=0) SDDS_Bomb("invalid -cdf value, it should be -cdf or -cdf=only"); cdfOnly=1; } freOnly=0; break; default: fprintf(stderr, "error: option %s not recognized\n", scanned[i].list[0]); exit(0); break; } } else { /* argument is filename */ if (!inputfile) inputfile = scanned[i].list[0]; else if (!outputfile) outputfile = scanned[i].list[0]; else SDDS_Bomb("too many filenames seen"); } } processFilenames("sddshist", &inputfile, &outputfile, pipeFlags, 0, NULL); if (binSize && binsGiven) SDDS_Bomb("give either -binSize or -bins, not both"); if (!binsGiven) bins = 20; if (!dataColumn) SDDS_Bomb("-dataColumn must be given"); hist = tmalloc(sizeof(*hist)*(bins+2*doSides)); CDF=tmalloc(sizeof(*hist)*(bins+2*doSides)); indep = tmalloc(sizeof(*indep)*(bins+2*doSides)); pointsBinned = 0; if (!SDDS_InitializeInput(&inTable, inputfile) || SDDS_GetColumnIndex(&inTable, dataColumn)<0 || (weightColumn && SDDS_GetColumnIndex(&inTable, weightColumn)<0) || (filterColumn && SDDS_GetColumnIndex(&inTable, filterColumn)<0)) SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors); if (!setupOutputFile(&outTable, outputfile, &inTable, inputfile, dataColumn, weightColumn, filterColumn, lowerFilter, upperFilter, doStats, bins, binSize, normalizeMode)) SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors); data = weightData = filterData = NULL; while ((readCode=SDDS_ReadPage(&inTable))>0) { if ((rows = SDDS_CountRowsOfInterest(&inTable))<0) SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors); if (rows && (!(data = SDDS_GetColumnInDoubles(&inTable, dataColumn)) || (weightColumn && !(weightData= SDDS_GetColumnInDoubles(&inTable, weightColumn))) || (filterColumn && !(filterData= SDDS_GetColumnInDoubles(&inTable, filterColumn))))) SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors); if (rows && filterColumn) points = filter(data, weightData, filterData, rows, lowerFilter, upperFilter); else points = rows; pointsBinned = 0; if (points) { if (doStats) { if (!weightColumn) computeMoments(&mean, &rms, &standDev, &mad, data, points); else computeWeightedMoments(&mean, &rms, &standDev, &mad, data, weightData, points); } if (!lowerLimitGiven) { lowerLimit = 0; if (points) lowerLimit = data[0]; for (i=0; idata[i]) lowerLimit = data[i]; } else lowerLimit = givenLowerLimit; if (!upperLimitGiven) { upperLimit = 0; if (points) upperLimit = data[0]; for (i=0; i0) { double center; center = (upperLimit+lowerLimit)/2; range = expansionFactor*(upperLimit-lowerLimit); lowerLimit = center - range/2; upperLimit = center + range/2; } dx = (upperLimit-lowerLimit)/bins; if (binSize) { double middle; range = ((range/binSize)+1)*binSize; middle = (lowerLimit+upperLimit)/2; lowerLimit = middle-range/2; upperLimit = middle+range/2; dx = binSize; bins = range/binSize+0.5; if (bins<1 && !doSides) bins = 2*doSides; indep = trealloc(indep, sizeof(*indep)*(bins+2*doSides)); hist = trealloc(hist, sizeof(*hist)*(bins+2*doSides)); CDF = trealloc(CDF, sizeof(*hist)*(bins+2*doSides)); } for (i=-doSides; iupperFilter) keep[i] = 0; else keep[i] = 1; } for (i=j=0; i