/*************************************************************************\ * Copyright (c) 2002 The University of Chicago, as Operator of Argonne * National Laboratory. * Copyright (c) 2002 The Regents of the University of California, as * Operator of Los Alamos National Laboratory. * This file is distributed subject to a Software License Agreement found * in the file LICENSE that is included with this distribution. \*************************************************************************/ /* program: sddsgenericfit.c * purpose: perform fitting using a generic form supplied by the user * M. Borland, 2001 $Log: sddsgenericfit.c,v $ Revision 1.24 2012/01/13 22:37:28 shang added -logFile option to log the intermediate optimization result from simplex. Revision 1.23 2012/01/11 22:39:21 shang made -columns option obselete, added -ycolumn option to replace -columns option, and added -copycolumns option to copy provided list of columns to the output file. Revision 1.22 2009/07/30 15:34:50 shang made the lower, upper limit, step size and starting value of the variables to also be able to read from input file parameters. Revision 1.21 2007/12/04 21:03:57 borland Added -expression argument. Revision 1.20 2006/12/14 22:21:58 soliday Updated a bunch of programs because SDDS_SaveLayout is now called by SDDS_WriteLayout and it is no longer required to be called directly. Also the AutoCheckMode is turned off by default now so I removed calls to SDDS_SetAutoCheckMode that would attempt to turn it off. It is now up to the programmer to turn it on in new programs until debugging is completed and then remove the call to SDDS_SetAutoCheckMode. Revision 1.19 2006/10/19 17:55:39 soliday Updated to work with linux-x86_64. Revision 1.18 2005/11/04 22:46:14 soliday Updated code to be compiled by a 64 bit processor. Revision 1.17 2005/01/13 17:08:07 shang made the rpn function calls consistent with the updated rpn libary. Revision 1.16 2003/09/02 19:16:04 soliday Cleaned up code for Linux. Revision 1.15 2003/06/02 21:39:13 soliday Added the function to add parentheses around a string before calling if2pf Revision 1.14 2003/01/21 18:57:47 borland Added yet another argument for simplexMin(). Revision 1.13 2003/01/16 18:45:31 soliday Changed because simplexMin arguments changed. Revision 1.12 2002/08/14 17:12:46 soliday Added Open License Revision 1.11 2001/08/08 19:21:19 borland Made consistent with changes to simplexMin() (addition of n_divisions argument). Revision 1.10 2001/05/15 16:41:26 borland sddsgenericfit now supports the "no1dscans" option of simplexMin() from the commandline. Others were modified to accomodate the new option, but don't allow commandline specification. Revision 1.9 2001/05/07 18:54:08 borland Added 'heat' parameter to -variable option to allow injecting noise into values between restarts. Revision 1.8 2001/05/03 21:29:19 soliday Split usage message because of limits on WIN32. Revision 1.7 2001/05/01 15:49:05 borland Uses original step sizes for restarts. Uses values in input parameters (in input file) for starting values, if input parameters are present. Revision 1.6 2001/04/10 20:42:29 soliday Added the ability to convert algebraic equations to rpn equations. Revision 1.5 2001/04/06 15:41:15 borland Fixed problems with parsing -variable arguments. Made input parameter values accessible in equations. Revision 1.4 2001/04/03 18:03:16 borland Added -startFromPrevious option. Revision 1.3 2001/03/23 23:29:22 emery Added newline to usage message. Louis Revision 1.2 2001/03/20 15:34:18 borland Fixed a bug (use of an uninitialized variable). Revision 1.1 2001/03/20 15:16:32 borland First version. * */ #include "mdb.h" #include "SDDS.h" #include "SDDSaps.h" #include "scan.h" #include "rpn.h" #include #define SET_VARIABLE 0 #define SET_PIPE 1 #define SET_EQUATION 2 #define SET_COLUMNS 3 #define SET_TARGET 4 #define SET_TOLERANCE 5 #define SET_SIMPLEX 6 #define SET_VERBOSITY 7 #define SET_STARTFROMPREVIOUS 8 #define SET_EXPRESSION 9 #define SET_COPYCOLUMNS 10 #define SET_YCOLUMN 11 #define SET_LOGFILE 12 #define N_OPTIONS 13 char *option[N_OPTIONS] = { "variable", "pipe", "equation", "columns", "target", "tolerance", "simplex", "verbosity", "startfromprevious", "expression", "copycolumns", "ycolumn", "logFile", }; static char *USAGE1="sddsgenericfit [-pipe=[input][,output]] [] [] \n\ -equation=[,algebraic] [-expression= [-expression=...]]\n\ [-target=] [-tolerance=]\n\ [-simplex=[restarts=][,cycles=,][evaluations=][,no1DScans]\n\ -variable=name=,lowerLimit=,upperLimit=,stepsize=,startingValue=[,units=][,heat=]\n\ [-variable=...] [-verbosity=] [-startFromPrevious]\n\ [-copy=] -ycolumn=ycolumn_name[,ySigma=] \n\ Uses Simplex method to finds a fit to as a function of by varying the given\n\ variables, which are assumed to appear in the .\n\n\ -ycolumn give the name of dependant data and optional to weight the fit.\n\ This option replaces the old -columns option.\n\ -copycolumns provide list of columns to copy from the input file to the output file.\n\ -logFile if provide, the intermiate fitting result will be written to the logFile.\n\ -equation Give an rpn expression for the equation to use in fitting.\n\ This equation can use the names of any of the columns or parameters in the file,\n\ just as in sddsprocess. It is expected to return a value that will be compared to\n\ the data in column .\n\ -expression Give an rpn expression to evaluate before the equation is evaluated. Can be used \n\ to prepare quantities on the stack or in variables when the equation is complicated.\n\ -target Give the value of the (weighted) rms residual that is acceptably small to consider the\n\ fit \"good\".\n"; static char *USAGE2="-tolerance Give the minimum change in the (weighted) rms residual that is considered significant\n\ enough to justify continuing optimization.\n\ -simplex Give parameters of the simplex optimization. Each start or restart allows cycles\n\ with up to evaluations of the function. Defaults are 10 restarts, 10 cycles, and\n\ 5000 evaluations.\n\ -variable Give the name, lower limit, upper limit, step size, and starting value for a parameter\n\ of the fitting equation. This name must not match the name of an existing column or\n\ parameter in the input file. The value will appear as a parameter in the output file.\n\ If given, the 'heat' value specifies the gaussian parameter of random numbers to be\n\ added to the best results of each restart. This can help avoid getting stuck in\n\ a local minimum.\n\ -verbosity A higher value results in more output during optimization.\n\ -startFromPrevious\n\ When starting fits for a new page, the fit parameters are normally set equal to the\n\ starting values supplied with the -variable option. However, if this option is given,\n\ then the final values from the previous page's fit are used as the starting values for\n\ fitting.\n\ Program by Michael Borland. ("__DATE__")\n"; void report(double res, double *a, long pass, long n_eval, long n_dimen); void setupOutputFile(SDDS_DATASET *OutputTable, long *fitIndex, long *residualIndex, char *output, SDDS_DATASET *InputTable, char *xName, char *yName, char *syName, char **variableName, char **variableUnits, long variables, char **colMatch, long colMatches, SDDS_DATASET *logData, char *logFile); double fitFunction(double *a, long *invalid); static SDDS_DATASET InputTable; static double *xData=NULL, *yData=NULL, *syData=NULL, *yFit=NULL, *yResidual=NULL; static long nData = 0; static char *equation; static long *variableMem, nVariables, verbosity; static char **expression = NULL; static long nExpressions = 0; static char **variableName=NULL; static int32_t step=0; static char *logFile=NULL; static SDDS_DATASET logData; static int32_t maxLogRows=500; #define VARNAME_GIVEN 0x0001U #define LOWER_GIVEN 0x0002U #define UPPER_GIVEN 0x0004U #define STEP_GIVEN 0x0008U #define START_GIVEN 0x0010U #define VARUNITS_GIVEN 0x0020U int main(int argc, char **argv) { long i; SDDS_DATASET OutputTable; SCANNED_ARG *s_arg; long i_arg; char *input, *output, *xName, *yName, *syName, **colMatch; long fitIndex, residualIndex, retval, colMatches; double rmsResidual, chiSqr, sigLevel; unsigned long pipeFlags, dummyFlags; int32_t nEvalMax=5000, nPassMax=10, nRestartMax=10; long nEval, iRestart; double tolerance, target; char **variableUnits; double *lowerLimit, *upperLimit, *stepSize, *startingValue, *paramValue, *paramDelta=NULL, *paramDelta0=NULL; char **startingPar=NULL, **lowerLimitPar=NULL, **upperLimitPar=NULL, **heatPar=NULL, **stepPar=NULL; double *heat, *bestParamValue, bestResult=0; long iVariable, startFromPrevious=0; double result, lastResult=0; char pfix[IFPF_BUF_SIZE]; unsigned long simplexFlags = 0; char *ptr; SDDS_RegisterProgramName(argv[0]); argc = scanargs(&s_arg, argc, argv); colMatches = 0; colMatch = NULL; if (argc<=1) { fprintf(stderr, "%s%s", USAGE1, USAGE2); exit(1); /* bomb(USAGE, NULL);*/ } logFile = NULL; input = output = equation = NULL; variableName = variableUnits = NULL; lowerLimit = upperLimit = stepSize = startingValue = heat = bestParamValue = NULL; nVariables = 0; pipeFlags = 0; verbosity = startFromPrevious = 0; xName = yName = syName = NULL; tolerance = target = 1e-14; for (i_arg=1; i_arg=upperLimit[nVariables]) SDDS_Bomb("invalid limits value for variable"); if (!lowerLimitPar[nVariables] && !upperLimitPar[nVariables] && !startingPar[nVariables] && (startingValue[nVariables]<=lowerLimit[nVariables] || startingValue[nVariables]>=upperLimit[nVariables])) SDDS_Bomb("invalid limits or starting value for variable"); if (!stepPar[nVariables] && stepSize[nVariables]<=0) SDDS_Bomb("invalid step size for variable"); nVariables ++; break; case SET_SIMPLEX: s_arg[i_arg].n_items -= 1; if (!scanItemList(&simplexFlags, s_arg[i_arg].list+1, &s_arg[i_arg].n_items, 0, "restarts", SDDS_LONG, &nRestartMax, 1, 0, "cycles", SDDS_LONG, &nPassMax, 1, 0, "evaluations", SDDS_LONG, &nEvalMax, 1, 0, "no1dscans", -1, NULL, 0, SIMPLEX_NO_1D_SCANS, NULL) || nRestartMax<0 || nPassMax<=0 || nEvalMax<=0) SDDS_Bomb("invalid -simplex syntax/values"); break; case SET_EQUATION: /* if (s_arg[i_arg].n_items!=2 || !strlen(equation=s_arg[i_arg].list[1])) SDDS_Bomb("invalid -equation syntax"); */ if ((s_arg[i_arg].n_items<2) || (s_arg[i_arg].n_items>3)) SDDS_Bomb("invalid -equation syntax"); if (s_arg[i_arg].n_items==2) { if (!strlen(equation=s_arg[i_arg].list[1])) { SDDS_Bomb("invalid -equation syntax"); } } else if (s_arg[i_arg].n_items==3) { if (strncmp(s_arg[i_arg].list[2], "algebraic", strlen(s_arg[i_arg].list[2]))==0) { ptr = addOuterParentheses(s_arg[i_arg].list[1]); if2pf(pfix, ptr, sizeof pfix); free(ptr); if (!SDDS_CopyString(&equation, pfix)) { fprintf(stderr, "error: problem copying argument string\n"); return(1); } } else { SDDS_Bomb("invalid -equation syntax"); } } break; case SET_EXPRESSION: if (s_arg[i_arg].n_items!=2) SDDS_Bomb("invalid -expression syntax"); expression = trealloc(expression, sizeof(*expression)*(nExpressions+1)); expression[nExpressions++] = s_arg[i_arg].list[1]; break; case SET_STARTFROMPREVIOUS: startFromPrevious = 1; break; default: fprintf(stderr, "error: unknown/ambiguous option: %s\n", s_arg[i_arg].list[0]); exit(1); break; } } else { if (input==NULL) input = s_arg[i_arg].list[0]; else if (output==NULL) output = s_arg[i_arg].list[0]; else SDDS_Bomb("too many filenames"); } } processFilenames("sddsgenericfit", &input, &output, pipeFlags, 0, NULL); if (!yName) SDDS_Bomb("-ycolumn option must be given"); if (nVariables==0) SDDS_Bomb("you must give at least one -variables option"); if (equation==NULL) SDDS_Bomb("you must give an equation string"); rpn(getenv("RPN_DEFNS")); if (rpn_check_error()) exit(1); if (!SDDS_InitializeInput(&InputTable, input)) SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors); if ((xName && !SDDS_FindColumn(&InputTable, FIND_NUMERIC_TYPE, xName, NULL)) || !SDDS_FindColumn(&InputTable, FIND_NUMERIC_TYPE, yName, NULL) || (syName && !SDDS_FindColumn(&InputTable, FIND_NUMERIC_TYPE, syName, NULL))) SDDS_Bomb("one or more of the given data columns is nonexistent or nonnumeric"); setupOutputFile(&OutputTable, &fitIndex, &residualIndex, output, &InputTable, xName, yName, syName, variableName, variableUnits, nVariables, colMatch, colMatches, &logData, logFile); if (!(paramValue = SDDS_Malloc(sizeof(*paramValue)*nVariables)) || !(paramDelta = SDDS_Malloc(sizeof(*paramDelta)*nVariables)) || !(paramDelta0 = SDDS_Malloc(sizeof(*paramDelta0)*nVariables)) || !(variableMem = SDDS_Malloc(sizeof(*variableMem)*nVariables))) SDDS_Bomb("memory allocation failure"); for (iVariable=0; iVariable0) { if ((xName && !(xData = SDDS_GetColumnInDoubles(&InputTable, xName))) || !(yData = SDDS_GetColumnInDoubles(&InputTable, yName))) SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors); if (syName && !(syData = SDDS_GetColumnInDoubles(&InputTable, syName))) SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors); if ((nData = SDDS_CountRowsOfInterest(&InputTable))<=nVariables) continue; for (iVariable=0; iVariable=0) { if (!SDDS_GetParameterAsDouble(&InputTable, variableName[iVariable], ¶mValue[iVariable])) SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors); } else if (retval==1 || !startFromPrevious) paramValue[iVariable] = startingValue[iVariable]; paramDelta[iVariable] = stepSize[iVariable]; } if (verbosity>2) { /* show starting values */ fprintf(stderr, "Starting values and step sizes:\n"); for (iVariable=0; iVariableupperLimit[iVariable]) paramValue[iVariable] = upperLimit[iVariable]-paramDelta[iVariable]; } } nEval += simplexMin(&result, paramValue, paramDelta, lowerLimit, upperLimit, NULL, nVariables, target, tolerance, fitFunction, (verbosity>0?report:NULL), nEvalMax, nPassMax, 12, 3, 1.0, simplexFlags); if (iRestart!=0 && result>bestResult) { result = bestResult; for (iVariable=0; iVariable0) fprintf(stderr, "Result of simplex minimization: %le\n", result); if (result0) fprintf(stderr, "Performing restart %ld\n", iRestart+1); } for (iVariable=0; iVariable3) fprintf(stderr, "%ld evaluations of fit function required, giving result %e\n", nEval, result); for (i=result=0; i1) { if (syData) fprintf(stderr, "Significance level: %.5e\n", sigLevel); fprintf(stderr, "RMS deviation: %.15e\n", rmsResidual); } if (!SDDS_StartPage(&OutputTable, nData) || !SDDS_CopyPage(&OutputTable, &InputTable) || !SDDS_SetColumn(&OutputTable, SDDS_SET_BY_INDEX, yFit, nData, fitIndex) || !SDDS_SetColumn(&OutputTable, SDDS_SET_BY_INDEX, yResidual, nData, residualIndex)) SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors); for (iVariable=0; iVariable10) fprintf(stderr, "Running fit function:\n"); if (!syData) { for (i=sum=0; i10) { fprintf(stderr, "%le %le %le\n", xData[i], yData[i], yFit[i]); } sum += sqr(tmp); } } else { for (i=sum=0; imaxLogRows) { if (!SDDS_LengthenTable(&logData, maxLogRows + 500)) SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors); maxLogRows += 500; } if (!SDDS_SetRowValues(&logData, SDDS_SET_BY_NAME|SDDS_PASS_BY_VALUE, step-1, "Step", step, "Chi", sum/nData, NULL)) SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors); for (i=0;i