/*************************************************************************\ * Copyright (c) 2002 The University of Chicago, as Operator of Argonne * National Laboratory. * Copyright (c) 2002 The Regents of the University of California, as * Operator of Los Alamos National Laboratory. * This file is distributed subject to a Software License Agreement found * in the file LICENSE that is included with this distribution. \*************************************************************************/ /* program: sddscorrelate * purpose: compute and evaluate correlations among columns of data * * Michael Borland, 1994, 1995 $Log: sddscorrelate.c,v $ Revision 1.13 2005/11/04 22:46:12 soliday Updated code to be compiled by a 64 bit processor. Revision 1.12 2004/10/13 15:25:11 borland Use a more reliable algorithm for computing the standard deviation in doing outlier elimination. Revision 1.11 2002/08/14 17:12:42 soliday Added Open License Revision 1.10 2000/11/21 17:17:35 soliday Added missing SDDS_Terminate procedures. Revision 1.9 2000/04/13 18:01:44 soliday Removed invalid prototypes. Revision 1.8 2000/04/13 17:09:32 soliday Added missing include statment. Revision 1.7 1999/05/25 19:07:38 soliday Removed compiler warning on linux. Revision 1.6 1999/01/06 19:54:40 borland Fixed the version number in the usage message. * Revision 1.5 1996/02/14 01:05:14 borland * Changed over from scan_item_list() to scanItemList(). * * Revision 1.4 1995/12/10 02:33:20 borland * Added outlier elimination. * * Revision 1.3 1995/09/06 14:56:18 saunders * First test release of SDDS1.5 * */ #include "mdb.h" #include "SDDS.h" #include "scan.h" #include "SDDSutils.h" #include #define SET_COLUMNS 0 #define SET_EXCLUDE 1 #define SET_WITHONLY 2 #define SET_PIPE 3 #define SET_RANKORDER 4 #define SET_STDEVOUTLIER 5 #define N_OPTIONS 6 char *option[N_OPTIONS] = { "columns", "excludecolumns", "withonly", "pipe", "rankorder", "stdevoutlier", }; #define USAGE "sddscorrelate [-pipe=[input][,output]] [] [] \n\ [-columns=] [-excludeColumns=] [-withOnly=]\n\ [-rankOrder] [-stDevOutlier[=limit=][,passes=]]\n\n\ Program by Michael Borland. (This is version 5, February 1996.)\n" void replaceWithRank(double *data, long n); double *findRank(double *data, long n); void markStDevOutliers(double *data, double limit, long passes, short *keep, long n); int main(int argc, char **argv) { int iArg; char **column, **excludeColumn, *withOnly; long columns, excludeColumns; char *input, *output; SCANNED_ARG *scanned; SDDS_DATASET SDDSin, SDDSout; long i, j, row, rows, count, readCode, rankOrder, iName1, iName2; int32_t outlierStDevPasses; double **data, correlation, significance, outlierStDevLimit; double **rank; short **accept; char s[SDDS_MAXLINE]; unsigned long pipeFlags, dummyFlags; SDDS_RegisterProgramName(argv[0]); argc = scanargs(&scanned, argc, argv); if (argc<2) bomb(NULL, USAGE); output = input = withOnly = NULL; columns = excludeColumns = 0; column = excludeColumn = NULL; pipeFlags = 0; rankOrder = 0; outlierStDevPasses = 0; outlierStDevLimit = 1.; rank = NULL; accept = NULL; for (iArg=1; iArg0) { if ((rows = SDDS_CountRowsOfInterest(&SDDSin))<3) continue; if (!SDDS_StartPage(&SDDSout, columns*(columns-1)/2) || !SDDS_SetParameters(&SDDSout, SDDS_SET_BY_NAME|SDDS_PASS_BY_VALUE, "CorrelatedRows", rows, NULL)) SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors); for (i=0; i0) { absLimit = limit*sqrt(variance); for (i=0; iabsLimit) { keep[i] = 0; kept--; } } } } typedef struct { double data; long originalIndex; } DATAnINDEX; int compareData(const void *d1, const void *d2) { double diff; diff = ((DATAnINDEX*)d1)->data - ((DATAnINDEX*)d2)->data; return diff==0? 0 : (diff<0 ? -1 : 1 ); } double *findRank(double *data, long n) { long i; double *rank; if (!(rank = (double*)malloc(sizeof(*rank)*n))) return NULL; for (i=0; i