/*************************************************************************\ * Copyright (c) 2002 The University of Chicago, as Operator of Argonne * National Laboratory. * Copyright (c) 2002 The Regents of the University of California, as * Operator of Los Alamos National Laboratory. * This file is distributed subject to a Software License Agreement found * in the file LICENSE that is included with this distribution. \*************************************************************************/ /* program: sddsenvelope * purpose: combine data from SDDS pages to make a new file * minimum, maximum, etc. values for specified columns * * Michael Borland, 1995 $Log: sddsenvelope.c,v $ Revision 1.25 2011/02/25 17:43:27 borland Fixed bug in computation of slope and intercept. Revision 1.24 2010/11/05 04:05:49 borland For PMAXIMUM and PMINIMUM, the functionOf name is now part of the output column name. Revision 1.23 2010/11/05 04:05:10 borland Fixed problem with putting functionOf column name into symbol name. Revision 1.22 2010/10/23 22:01:23 borland Added pminimum and pmaximum processing. Revision 1.21 2006/12/14 22:21:58 soliday Updated a bunch of programs because SDDS_SaveLayout is now called by SDDS_WriteLayout and it is no longer required to be called directly. Also the AutoCheckMode is turned off by default now so I removed calls to SDDS_SetAutoCheckMode that would attempt to turn it off. It is now up to the programmer to turn it on in new programs until debugging is completed and then remove the call to SDDS_SetAutoCheckMode. Revision 1.20 2005/11/04 22:46:13 soliday Updated code to be compiled by a 64 bit processor. Revision 1.19 2004/02/14 06:58:06 borland Added -signedLargest option. Revision 1.18 2003/07/03 20:48:50 borland Fixed bug in last revision: didn't work for percentiles with wildcards. Revision 1.17 2003/06/13 22:58:44 borland Percentages can now be floating-point numbers for -percentile option. Revision 1.16 2002/11/12 02:23:37 borland Added -percentile processing. Revision 1.15 2002/11/09 00:18:13 borland Added -largest processing mode. Revision 1.14 2002/08/14 17:12:44 soliday Added Open License Revision 1.13 2001/11/14 22:19:32 shang remove printing warning messages when pages<2 Revision 1.12 2001/11/14 21:28:56 shang added weighted statistical analysis features Revision 1.11 2001/01/10 19:35:36 soliday Standardized usage message. Revision 1.10 2000/04/06 14:27:11 borland Fixed unitialized memory read that showed up with median/decile range addition. Revision 1.9 2000/04/06 09:32:31 emery Added median and decile range processing. Revision 1.8 1999/05/25 19:08:55 soliday Removed compiler warning on linux. Revision 1.7 1999/01/06 19:54:43 borland Fixed the version number in the usage message. Revision 1.6 1996/09/06 15:47:11 borland Fixed problem with naming of columns produced by -sum with power greater than 1. Names were not allowable after SDDS1.5. Also improved error message handling. * Revision 1.5 1996/01/21 00:13:37 borland * Put in initializations for some variables, including value2, value3, and * value4 arrays in structures. * * Revision 1.4 1995/11/10 21:59:22 borland * Fixed bug for slope/intercept computation; was passing SDDS_DATASET structure when I * should have passed a pointer to same. * * Revision 1.3 1995/09/06 14:56:24 saunders * First test release of SDDS1.5 * */ #include "mdb.h" #include "scan.h" #include "SDDS.h" #include #define SET_COPY 0 #define SET_MAXIMA 1 #define SET_MINIMA 2 #define SET_MEANS 3 #define SET_SDS 4 #define SET_RMSS 5 #define SET_SUMS 6 #define SET_SLOPE 7 #define SET_INTERCEPT 8 #define SET_PIPE 9 #define SET_SIGMAS 10 #define SET_MEDIAN 11 #define SET_DRANGE 12 #define SET_WMEANS 13 #define SET_WSDS 14 #define SET_WRMSS 15 #define SET_WSIGMAS 16 #define SET_NOWARNINGS 17 #define SET_LARGEST 18 #define SET_PERCENTILE 19 #define SET_SIGNEDLARGEST 20 #define SET_PMAXIMA 21 #define SET_PMINIMA 22 #define N_OPTIONS 23 char *option[N_OPTIONS] = { "copy", "maximum", "minimum", "mean", "standarddeviations", "rms", "sum", "slope", "intercept", "pipe", "sigmas", "median", "decilerange","wmean", "wstandarddeviations", "wrms","wsigma","nowarnings", "largest", "percentile", "signedlargest", "pmaximum", "pminimum" } ; char *optionSuffix[N_OPTIONS] = { "", "Max", "Min", "Mean", "StDev", "Rms", "Sum", "Slope", "Intercept", "", "Sigma", "Median", "DRange","WMean","WStDev","WRms","WSigma","", "Largest", "Percentile", "SignedLargest", "PMaximum", "PMinimum", } ; /* this structure stores a command-line request for statistics computation */ /* columnName may contain wildcards */ typedef struct { char *columnName; char *weightColumnName; long optionCode, sumPower; double percentile; char *percentileString; char *functionOf; } STAT_REQUEST; /* this structure stores data necessary for accessing/creating SDDS columns and * for computing a statistic */ typedef struct { char *sourceColumn, *weightColumn, *resultColumn, *functionOf; long optionCode, resultIndex, sumPower; double percentile; char *percentileString; /* these store intermediate values during processing */ void *copy; double *value1, *value2, *value3, *value4; double **array; double *sumWeight; } STAT_DEFINITION; long addStatRequests(STAT_REQUEST **statRequest, long requests, char **item, long items, long code, double percentile, long power, char *functionOf, long weighted, char *percentileString); /*weighted=0, no weighted column; else, weighted statistic, the weight factor is given by weightedColumn*/ STAT_DEFINITION *compileStatDefinitions(SDDS_DATASET *inTable, long *stats, STAT_REQUEST *request, long requests); long setupOutputFile(SDDS_DATASET *outTable, char *output, SDDS_DATASET *inTable, STAT_DEFINITION *stat, long stats, long rows); static char *USAGE="sddsenvelope [] [] [-copy=] \n\ [-pipe=[input][,output]] [-nowarnings]\n\ [-maximum=] [-minimum=] \n\ [-pmaximum=,] [-pminimum=,] \n\ [-largest=] [-signedLargest=] \n\ [-mean=] [-sum=,] \n\ [-median=] [-decilerange=] \n\ [-percentile=,] \n\ [-standarddeviation=] [-rms=] \n\ [-sigma=] \n\ [-slope=,]\n\ [-intercept=,] \n\ [-wmean=,] \n\ [-wstandarddeviation=,] \n\ [-wrms=,] \n\ [-wsigma=,] \n\n\ Processes pages from to produce with\n\ one page containing the specified quantities across pages\n\ for each row of the specified columns.\n\ Program by Michael Borland. (This is version 9, October 2010, M. Borland.)"; int main(int argc, char **argv) { STAT_DEFINITION *stat; long stats; STAT_REQUEST *request; long requests; SCANNED_ARG *scanned; /* structure for scanned arguments */ SDDS_DATASET inTable, outTable; long i_arg, code, power, iStat, rows, firstRows, i, pages, nowarnings=0; char *input, *output; double *inputData, indepData, *weight; unsigned long pipeFlags; double decilePoint[2] = {10.0, 90.0 }, decileResult[2]; double percentilePoint, percentileResult; double percentile; SDDS_RegisterProgramName(argv[0]); argc = scanargs(&scanned, argc, argv); if (argc<2) { bomb("too few arguments", USAGE); } weight=NULL; input = output = NULL; stat = NULL; request = NULL; stats = requests = pipeFlags = 0; rows = firstRows = i = 0; for (i_arg=1; i_arg100) { fprintf(stderr, "error: invalid -%s syntax--bad percentage in field %s\n", option[code], scanned[i_arg].list[1]); exit(1); } requests = addStatRequests(&request, requests, scanned[i_arg].list+2, scanned[i_arg].n_items-2, code, percentile, 0, NULL, 0, scanned[i_arg].list[1]); break; case SET_SLOPE: case SET_INTERCEPT: case SET_PMINIMA: case SET_PMAXIMA: if (scanned[i_arg].n_items<3) { fprintf(stderr, "error: invalid -%s syntax\n", option[code]); exit(1); } requests = addStatRequests(&request, requests, scanned[i_arg].list+2, scanned[i_arg].n_items-2, code, 0, 0, scanned[i_arg].list[1], 0, NULL); break; case SET_PIPE: if (!processPipeOption(scanned[i_arg].list+1, scanned[i_arg].n_items-1, &pipeFlags)) SDDS_Bomb("invalid -pipe syntax"); break; case SET_NOWARNINGS: nowarnings=1; break; default: fprintf(stderr, "error: unknown option '%s' given\n", scanned[i_arg].list[0]); exit(1); break; } } else { /* argument is filename */ if (!input) input = scanned[i_arg].list[0]; else if (!output) output = scanned[i_arg].list[0]; else SDDS_Bomb("too many filenames seen"); } } processFilenames("sddsenvelope", &input, &output, pipeFlags, 0, NULL); if (!requests) SDDS_Bomb("no statistics requested"); if (!SDDS_InitializeInput(&inTable, input)) SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors); pages = 0; while ((code=SDDS_ReadPage(&inTable))>0) { pages++; if (!(rows = SDDS_CountRowsOfInterest(&inTable))) SDDS_Bomb("empty data page in input file"); if (code==1) { firstRows = rows; if (!(stat=compileStatDefinitions(&inTable, &stats, request, requests))) { SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors); exit(1); } if (!setupOutputFile(&outTable, output, &inTable, stat, stats, rows)) { if (SDDS_NumberOfErrors()) SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors); else fprintf(stderr, "Error setting up output file.\n"); exit(1); } } else if (firstRows!=rows) SDDS_Bomb("inconsistent number of rows in input file"); for (iStat=0; iStatinputData[i]) stat[iStat].value1[i] = inputData[i]; break; case SET_MAXIMA: if (code==1) for (i=0; iinputData[i]) { stat[iStat].value2[i] = inputData[i]; stat[iStat].value1[i] = indepData; } } break; case SET_PMAXIMA: if (!SDDS_GetParameterAsDouble(&inTable, stat[iStat].functionOf, &indepData)) { fprintf(stderr, "error: unable to get value of parameter %s\n", stat[iStat].functionOf); SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors); } if (code==1) for (i=0; i=*stats) stat = SDDS_Realloc(stat, sizeof(*stat)*(*stats+=10)); if (!has_wildcards(request[iReq].columnName)) { if (SDDS_GetColumnIndex(inTable, request[iReq].columnName)<0) { sprintf(s, "error: column %s not found input file", request[iReq].columnName); SDDS_SetError(s); SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors); } stat[iStat].weightColumn=request[iReq].weightColumnName; stat[iStat].sourceColumn = request[iReq].columnName; stat[iStat].optionCode = request[iReq].optionCode; stat[iStat].percentile = request[iReq].percentile; stat[iStat].percentileString = request[iReq].percentileString; stat[iStat].sumPower = request[iReq].sumPower; stat[iStat].value1 = stat[iStat].value2 = stat[iStat].value3 = stat[iStat].value4 = NULL; stat[iStat].array = NULL; stat[iStat].copy = NULL; stat[iStat].sumWeight=NULL; if ((stat[iStat].functionOf = request[iReq].functionOf) && SDDS_GetParameterIndex(inTable, request[iReq].functionOf)<0) { sprintf(s, "error: parameter %s not found input file", request[iReq].functionOf); SDDS_SetError(s); SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors); } iStat++; } else { SDDS_SetColumnFlags(inTable, 0); if (!SDDS_SetColumnsOfInterest(inTable, SDDS_MATCH_STRING, request[iReq].columnName, SDDS_OR)) SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors); if (!(columnName = SDDS_GetColumnNames(inTable, &columnNames))) { sprintf(s, "no columns selected for wildcard sequence %s", request[iReq].columnName); SDDS_SetError(s); SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors); } if (iStat+columnNames>*stats) stat = SDDS_Realloc(stat, sizeof(*stat)*(*stats=iStat+columnNames+10)); for (iName=0; iName