/*************************************************************************\ * Copyright (c) 2002 The University of Chicago, as Operator of Argonne * National Laboratory. * Copyright (c) 2002 The Regents of the University of California, as * Operator of Los Alamos National Laboratory. * This file is distributed subject to a Software License Agreement found * in the file LICENSE that is included with this distribution. \*************************************************************************/ /* program: sddsselect * purpose: creates a SDDS data set from another data set based * on matching to data in a third data set. * * For example, one can select all of the rows from file1 that * have a match in file2, or that have no match in file2. * Similar to sddsxref, but doesn't transfer any data. * * Michael Borland, 1995 $Log: sddsselect.c,v $ Revision 1.17 2005/07/21 14:13:19 borland Additional changes to prevent possible free'ing of unallocated memory. Revision 1.16 2005/07/21 14:08:31 borland Fixed problem with freeing unallocated memory in key groups. Revision 1.15 2003/09/26 14:50:28 soliday Moved free_scanargs below replaceFileAndBackUp Revision 1.14 2003/09/02 19:16:06 soliday Cleaned up code for Linux. Revision 1.13 2002/10/23 03:26:26 shang fixed the bug which returned segmentation error when the second input file is empty Revision 1.12 2002/10/21 22:44:13 shang replaced the searching method by quick sort and comparison method implemented in sddsxref.c originally Revision 1.11 2002/08/14 17:12:53 soliday Added Open License Revision 1.10 2001/01/23 19:47:29 soliday Fixed Solaris compiler warnings. Revision 1.9 1999/09/28 15:39:49 soliday Added SDDS_Terminate at the end. Revision 1.8 1999/05/25 19:14:55 soliday Removed compiler warning on linux. Revision 1.7 1999/01/06 19:54:55 borland Fixed the version number in the usage message. * Revision 1.6 1996/03/11 02:46:06 borland * Handles empty pages in input2 properly. If invert is on, copies remainder * of input1. Otherwise, deletes remainder of input1. * * Revision 1.5 1996/03/10 01:09:18 borland * Fixed behavior for empty pages. If input2 ends, copies remainder of input1 * to output. * * Revision 1.4 1996/03/09 23:18:44 borland * Added if statement for conditional warning. * * Revision 1.3 1995/09/06 14:57:04 saunders * First test release of SDDS1.5 * */ #include "mdb.h" #include "SDDS.h" /*#include "SDDSaps.h"*/ #include "scan.h" #define SET_MATCH_COLUMN 0 #define SET_EQUATE_COLUMN 1 #define SET_NOWARNINGS 2 #define SET_INVERT 3 #define SET_REUSE 4 #define SET_PIPE 5 #define N_OPTIONS 6 char *option[N_OPTIONS] = { "match", "equate", "nowarnings", "invert", "reuse", "pipe", } ; char *USAGE = "sddsselect [-pipe[=input][,output]] [] [] \n\ [-match=[=]]\n\ [-equate=[=]] [-invert]\n\ [-reuse[=rows][,page]]\n\n\ sddsselect selects data from for writing to \n\ based on the presence or absence of matching data in .\n\ If is not given, is replaced.\n\ match specifies names of columns to match between and\n\ for selection and placement of data taken from\n\ . In general, the first unused match from \n\ is taken.\n\ equate specifies names of columns to equate between and\n\ for selection and placement of data taken from\n\ . In general, the first unused match from \n\ is taken.\n\ reuse specifies that rows of may be reused, i.e., matched\n\ with more than one row of . Also, -reuse=page specifies\n\n\ that only the first page of is used.\n\ invert specifies that only nomatched rows are to be kept.\n\ nowarnings specifies that no warnings should be printed.\n\n\ Program by Michael Borland. (This is version 3, March 1996.)\n"; int main(int argc, char **argv) { SDDS_DATASET SDDS_1, SDDS_2, SDDS_output; long i, j, i_arg, rows1, rows2, reuse, reusePage, i1, i2; SCANNED_ARG *s_arg; char s[200], *ptr; char **match_column, **equate_column; long match_columns, equate_columns; char *input1, *input2, *output; long tmpfile_used, retval1, retval2; long warnings, invert; unsigned long pipeFlags; KEYED_EQUIVALENT **keyGroup=NULL; long keyGroups=0; SDDS_RegisterProgramName(argv[0]); argc = scanargs(&s_arg, argc, argv); if (argc<3) bomb(NULL, USAGE); input1 = input2 = output = NULL; match_column = equate_column = NULL; match_columns = equate_columns = reuse = reusePage = 0; tmpfile_used = invert = 0; warnings = 1; pipeFlags = 0; for (i_arg=1; i_arg0) { if (!reusePage) { if ((retval2=SDDS_ReadPage(&SDDS_2))<=0) { if (warnings) fprintf(stderr, "warning: ends before \n"); if (invert) { /* nothing to match, so everything would normally be thrown out */ if (!SDDS_CopyPage(&SDDS_output, &SDDS_1) || !SDDS_WritePage(&SDDS_output)) SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors); continue; } else /* nothing to match, so everything thrown out */ break; } } else { if (retval1==1 && (retval2=SDDS_ReadPage(&SDDS_2))<=0) SDDS_Bomb(" has no data"); SDDS_SetRowFlags(&SDDS_2, 1); } rows1 = SDDS_CountRowsOfInterest(&SDDS_1); rows2 = SDDS_CountRowsOfInterest(&SDDS_2); if (!SDDS_StartPage(&SDDS_output, rows1)) { SDDS_SetError("Problem starting output page"); SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors); } if (!SDDS_CopyParameters(&SDDS_output, &SDDS_2) || !SDDS_CopyArrays(&SDDS_output, &SDDS_2)) { SDDS_SetError("Problem copying parameter or array data from second input file"); SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors); } if (!SDDS_CopyParameters(&SDDS_output, &SDDS_1) || !SDDS_CopyArrays(&SDDS_output, &SDDS_1)) { SDDS_SetError("Problem copying parameter or array data from first input file"); SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors); } if (match_columns) { char **string1, **string2; long matched; string2 = NULL; if (!(string1 = SDDS_GetColumn(&SDDS_1, match_column[0]))) { fprintf(stderr, "Error: problem getting column %s from file %s\n", match_column[0], input1?input1:"stdin"); SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors); } if (rows2 && !(string2 = SDDS_GetColumn(&SDDS_2, match_column[1]))) { fprintf(stderr, "Error: problem getting column %s from file %s\n", match_column[1], input2); SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors); } if (rows2) keyGroup = MakeSortedKeyGroups(&keyGroups, SDDS_STRING, string2, rows2); for (i1=0; i1=0) { matched = 1; } if ((!matched && !invert) || (matched && invert)) { if (!SDDS_AssertRowFlags(&SDDS_output, SDDS_INDEX_LIMITS, i1, i1, 0)) SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors); } } if (string1) { for (i=0;iequivalent) free(keyGroup[i]->equivalent); free(keyGroup[i]); keyGroup[i] = NULL; } } if (keyGroups) { free(keyGroup); keyGroup = NULL; keyGroups = 0; } } else if (equate_columns) { double *value1, *value2; long equated; value2 = NULL; if (!(value1 = SDDS_GetColumnInDoubles(&SDDS_1, equate_column[0]))) { fprintf(stderr, "Error: problem getting column %s from file %s\n", equate_column[0], input1?input1:"stdin"); SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors); } if (rows2 && !(value2 = SDDS_GetColumnInDoubles(&SDDS_2, equate_column[1]))) { fprintf(stderr, "Error: problem getting column %s from file %s\n", equate_column[1], input2); SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors); } if (rows2) keyGroup = MakeSortedKeyGroups(&keyGroups, SDDS_DOUBLE, value2, rows2); for (i1=0; i1=0) { equated = 1; } if ((!equated && !invert) || (equated && invert)) { if (!SDDS_AssertRowFlags(&SDDS_output, SDDS_INDEX_LIMITS, i1, i1, 0)) SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors); } } if (value1) free(value1); value1 = NULL; if (rows2 && value2) free(value2); value2 = NULL; for (i=0;iequivalent) free(keyGroup[i]->equivalent); free(keyGroup[i]); keyGroup[i] = NULL; } } if (keyGroups) { free(keyGroup); keyGroup = NULL; keyGroups = 0; } } if (!SDDS_WritePage(&SDDS_output)) { SDDS_SetError("Problem writing data to output file"); SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors); } } if (!SDDS_Terminate(&SDDS_1) || !SDDS_Terminate(&SDDS_2) || !SDDS_Terminate(&SDDS_output)) { SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors); exit(1); } if (tmpfile_used && !replaceFileAndBackUp(input1, output)) exit(1); free_scanargs(&s_arg,argc); if (match_columns) free(match_column); return(0); }