/*************************************************************************\ * Copyright (c) 2002 The University of Chicago, as Operator of Argonne * National Laboratory. * Copyright (c) 2002 The Regents of the University of California, as * Operator of Los Alamos National Laboratory. * This file is distributed subject to a Software License Agreement found * in the file LICENSE that is included with this distribution. \*************************************************************************/ /* program: sddsxref * purpose: creates a SDDS data set from two other data sets * by adding data from one data set to the other * based on matching or filtering of column data columns * This is a fast version of sddsmxref, with less capability. * * For example: suppose file.1 contains the columns (name,x,y) and * file.2 contains (name,z,betax,betay). Then * sddsxref file.1 file.2 -take=z -match=name file.3 * would result in (name,x,y,z) being put in file.3. * * Michael Borland, 1994, 1995 $Log: sddsxref.c,v $ Revision 1.44 2010/12/09 16:13:26 borland The ifnot and ifis options now respect the -nowarnings request. Revision 1.43 2009/11/30 19:07:47 soliday Updated to fix a an issue with keyGroups being uninitialized. Revision 1.42 2009/04/01 15:46:07 soliday Fixed a bug with three or more input files. Under certain conditions it was possible to get the wrong results due to CopyRowToNewColumn and SDDS_AssertRowFlags expecting the real row index and not the index of rows of interest. Revision 1.41 2005/11/07 21:48:11 soliday Updated to remove Linux compiler warnings. Revision 1.40 2005/11/04 22:46:19 soliday Updated code to be compiled by a 64 bit processor. Revision 1.39 2005/08/30 15:53:51 shang fixed the bug for renaming arrays Revision 1.38 2005/04/07 19:34:02 borland Added -wildMatch option. Equivalent to -match, but uses wildcards in the data from the second file. Revision 1.37 2004/06/11 21:47:15 borland Fixed bug that resulted in having no rows in the output when the xref file had no rows, even when -fillIn was given. Revision 1.36 2003/09/02 19:16:08 soliday Cleaned up code for Linux. Revision 1.35 2002/10/21 22:44:48 shang moved the sorting and comparison functions to sortfunctions.c in mdblib area Revision 1.34 2002/08/14 17:12:57 soliday Added Open License Revision 1.33 2002/03/28 00:30:33 shang fixed bug for -rename=parameter option Revision 1.32 2001/10/22 23:06:54 borland Fixed problem with file replacement due to premature freeing of two strings. Revision 1.31 2001/10/17 21:29:35 shang added -rename and -editnames option, fixed the memory leak problem and the memory errors in the case of no-matches Revision 1.30 2001/05/08 18:54:25 soliday The fillIn option now works when -match and -equate are not used. Revision 1.29 2001/05/03 20:55:36 soliday Standardized usage messages. Revision 1.28 2001/01/10 19:35:50 soliday Standardized usage message. Revision 1.27 2000/10/31 14:15:32 borland Fixed the usage message, which was corrupted by an over-zealous indentation of the code. Revision 1.26 2000/09/27 17:10:39 emery Fix a bus error that occured when the first input file had zero rows. The fix is testing for the number of rows before attempting to free a pointer which has not been assigned a value. Revision 1.25 2000/07/20 21:00:38 emery Corrected bug where the parameters of the xref file were automatically transfer (fix by D. Blachowicz) and indented code. Revision 1.24 2000/06/01 15:45:58 borland Several bug fixes per D. Blachowicz. Revision 1.22 2000/04/13 18:04:40 soliday Removed invalid prototype. Revision 1.21 1999/07/09 15:08:54 soliday Seperated USAGE string into two strings to fix problem on WIN32 Revision 1.20 1999/05/25 19:17:41 soliday Removed compiler warning on linux. Revision 1.19 1999/01/06 19:55:00 borland Fixed the version number in the usage message. Revision 1.18 1997/10/20 22:20:46 borland Fixed problem for xrefing when there is no match or equate option given. Wasn't properly dealing with reusing of rows or case when file2 had too few rows. Revision 1.17 1997/09/08 23:41:44 borland Fixed a logic bug in checking for existence of output columns. Revision 1.16 1997/03/19 19:20:57 borland Check that columns are being transferred before trying to get list of output column names. Avoids incorrect program termination. * Revision 1.15 1996/06/03 22:45:35 borland * Added SDDS_Terminate calls to avoid stupid Solaris "Broken Pipe" messages. * * Revision 1.14 1996/04/07 00:42:53 borland * Fixed bug that resulted in -reuse=row not working. * * Revision 1.13 1996/04/05 22:55:03 borland * Now uses sorted index lists for better performance. * * Revision 1.12 1996/03/12 04:55:02 borland * Added initialization for leave_all_columns. * * Revision 1.11 1996/02/12 17:25:22 borland * Removed erroneous free'ing of take_column array. * * Revision 1.10 1996/01/29 22:30:28 borland * Added code to avoid attempts to transfer column and parameter definitions * when the element already exists. * * Revision 1.9 1996/01/17 16:05:34 borland * Added test for lack of columns in input2, so that parameters and arrays can * be transfered even if input2 has no column data. * * Revision 1.8 1996/01/15 22:35:55 borland * Now keeps tabular data in input1 if no columns are being taken from * input2, even if input2 has an empty tabular data section. * * Revision 1.7 1996/01/15 18:21:56 borland * Changed behavior when second file has fewer pages than first; now keeps the * parameter and array data while erasing tabular data; this is consistent * with the program behavior when there is tabular data in both files that * doesn't yield any matches. * * Revision 1.6 1996/01/15 04:09:11 borland * Modified behavior when has an empty page; now prints a warning * instead of exiting. Will pass the page through instead of * emitting an empty page. * * Revision 1.5 1996/01/11 20:34:46 borland * Added -fillIn option and supporting code. * * Revision 1.4 1995/11/20 16:24:03 borland * Added code to prevent aborts when there are no rows in the input page. * * Revision 1.3 1995/09/06 14:57:24 saunders * First test release of SDDS1.5 * */ #include "mdb.h" #include "SDDS.h" #include "SDDSaps.h" #include "scan.h" #include #define SET_TAKE_COLUMNS 0 #define SET_LEAVE_COLUMNS 1 #define SET_MATCH_COLUMN 2 #define SET_EQUATE_COLUMN 3 #define SET_TRANSFER 4 #define SET_REUSE 5 #define SET_IFNOT 6 #define SET_NOWARNINGS 7 #define SET_IFIS 8 #define SET_PIPE 9 #define SET_FILLIN 10 #define SET_RENAME 11 #define SET_EDIT_NAMES 12 #define SET_WILD_MATCH 13 #define N_OPTIONS 14 #define COLUMN_MODE 0 #define PARAMETER_MODE 1 #define ARRAY_MODE 2 #define MODES 3 static char *mode_name[MODES] = { "column", "parameter", "array", } ; #define PARAMETER_TRANSFER 0 #define ARRAY_TRANSFER 1 #define TRANSFER_TYPES 2 static char *transfer_type[TRANSFER_TYPES] = { "parameter", "array" }; typedef struct { char *name; long type; } TRANSFER_DEFINITION; typedef struct { char **new_column; char **new_parameter; char **new_array; char **orig_column; char **orig_parameter; char **orig_array; int32_t columns; int32_t parameters; int32_t arrays; } REFDATA; /*structure for getting editnames */ typedef struct { char *match_string, *edit_string; } EDIT_NAME_REQUEST; long expandTransferRequests(char ***match, int32_t *matches, long type, TRANSFER_DEFINITION *transfer, long transfers, SDDS_DATASET *inSet); void add_newnames(SDDS_DATASET *SDDS_dataset, REFDATA *new_data, REFDATA rename_data, EDIT_NAME_REQUEST *edit_column_request, long edit_column_requests, EDIT_NAME_REQUEST *edit_parameter_request, long edit_parameter_requests, EDIT_NAME_REQUEST *edit_array_request,long edit_array_requests,long filenumber); char **process_editnames(char **orig_name, long **orig_flags, long orig_names, EDIT_NAME_REQUEST *edit_request, long edit_requests,long filenumber); long CopyRowToNewColumn(SDDS_DATASET *target, long target_row, SDDS_DATASET *source, long source_row, REFDATA new_data, long columns,char *input2); long CopyParametersFromSecondInput(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source, REFDATA new_data); long CopyArraysFromSecondInput (SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source,REFDATA new_data); typedef char *STRING_PAIR[2]; char *option[N_OPTIONS] = { "take", "leave", "match", "equate", "transfer", "reuse", "ifnot", "nowarnings", "ifis", "pipe", "fillin", "rename","editnames", "wildmatch" } ; char *USAGE1 = "sddsxref [] [...] [] \n\ [-pipe[=input][,output]]\n\ [-ifis={column|parameter},[,...]]\n\ [-ifnot={parameter|column|array},[,...]]\n\ [-transfer={parameter|array},[,...]]\n\ [-take=[,...]] [-leave=[,...]]\n\ [-fillIn] [-reuse[=[rows][,page]]\n\ [-match=[=]]\n\ [-wildMatch=[=]]\n\ [-rename={column|parameter|array},=[,...]]\n\ [-editnames={column|parameter|array},,]\n\ [-equate=[=]]\n\n\ sddsxref takes columns, parameters, and arrays from succesive pages\n\ from the list of files separated by space \n\ and adds them to successive pages from .\n\ If is given, the result is placed there; otherwise, \n\ is replaced. By default, all columns are taken from .\n\n\ ifnot specifies names of parameters, arrays, and columns that may not\n\ exist in if the program is to run as asked.\n\ ifis specifies names of parameters, arrays, and columns that must\n\ exist in if the program is to run as asked.\n\ transfer specifies names of parameters or arrays to transfer from .\n\ take specifies names of columns to take from .\n\ leave specifies names of columns not to take from .\n\ Overrides -take if both name a given column.\n\ -leave=* results in no columns being taken."; char *USAGE2 = "fillIn specifies filling in NULL and 0 values in rows for which\n\ no match is found. By default, such rows are omitted.\n\ match specifies names of columns to match between and\n\ for selection and placement of data taken from\n\ . In general, the first unused match from \n\ is taken.\n\ wildMatch similar to -match, but the data in may contain wildcards.\n\ The first row in that matches each row in is used.\n\ equate specifies names of columns to equate between and\n\ for selection and placement of data taken from\n\ . In general, the first unused match from \n\ is taken.\n\ rename Specifies new names for entities in the output data set.\n\ The entities must still be referred to by their old names in \n\ the other commandline options. \n\ editnames Specifies creation of new names for entities of the specified \n\ type with names matching the specified wildcard string. Editing \n\ is performed using commands reminiscent of emacs keystrokes. \n\ if -editnames={column|parameter|array},wildcard,ei/%ld/ \n\ is specified, the entity names will be changed to N, N is the \n\ position of input files in the command line. \n\ reuse specifies that rows of may be reused, i.e., matched\n\ with more than one row of . Also, -reuse=page specifies\n\n\ that only the first page of is used.\n\ -nowarnings specifies that warning messages should be suppressed.\n\ Program by Michael Borland. (This is version 8, April 2005, M. Borland)\n"; int main(int argc, char **argv) { SDDS_DATASET SDDS_1, SDDS_output; SDDS_DATASET *SDDS_ref; REFDATA *new_data,rename_data, *take_RefData; long i, j, i_arg, rows1, rows2, rows2Max, reuse, reusePage, endWarning,k; int32_t i1, i2, i3; SCANNED_ARG *s_arg; char s[200], *ptr; char **take_column, **leave_column, **output_column=NULL; char **inputfile, **referfile; char **match_column, **equate_column; long take_columns, leave_columns, match_columns, equate_columns, leave_all_columns; int32_t output_columns=0; char *input1, *input2, *output; long tmpfile_used, retval1, retval2, inputfiles, referfiles; long *row_used, wildMatch; TRANSFER_DEFINITION *transfer; long transfers; long warnings; IFITEM_LIST ifnot_item, ifis_item; unsigned long pipeFlags; long fillIn, keyGroups=0; KEYED_EQUIVALENT **keyGroup=NULL; long outputInitialized; int z, it, itm; long col; int firstRun, copyInput1Only; char **string1, **string2; double *value1, *value2; long matched; long equated; EDIT_NAME_REQUEST *edit_column_request,*edit_parameter_request,*edit_array_request; long edit_column_requests,edit_parameter_requests,edit_array_requests; SDDS_RegisterProgramName(argv[0]); argc = scanargs(&s_arg, argc, argv); if (argc<3) { fprintf(stderr, "%s\n%s", USAGE1,USAGE2); exit(1); } SDDS_ref = NULL; take_RefData = NULL; new_data=NULL; rename_data.columns=rename_data.parameters=rename_data.arrays=0; rename_data.new_column=rename_data.orig_column=rename_data.new_parameter =rename_data.orig_parameter=rename_data.new_array=rename_data.orig_array=NULL; edit_column_request=edit_parameter_request=edit_array_request=NULL; edit_column_requests=edit_parameter_requests=edit_array_requests=0; input1 = input2 = output = NULL; take_column = leave_column = NULL; match_column = equate_column = NULL; inputfile = referfile = NULL; take_columns = leave_columns = match_columns = equate_columns = reuse = reusePage = 0; tmpfile_used = inputfiles = referfiles = 0; transfer = NULL; transfers = 0; ifnot_item.items = ifis_item.items = 0; warnings = 1; pipeFlags = 0; fillIn = 0; outputInitialized = 0; rows1=rows2=output_columns=0; string1=string2=NULL; wildMatch = 0; for (i_arg=1; i_argcolumns=new_data->parameters=new_data->arrays=0; new_data->new_column=new_data->orig_column=NULL; new_data->new_parameter=new_data->orig_parameter=NULL; new_data->new_array=new_data->orig_array=NULL; /*no edit requests at all */ if (!edit_column_requests && !edit_parameter_requests && !edit_array_requests && !rename_data.columns && !rename_data.parameters && !rename_data.arrays) return; /*transfer renames to new_data*/ (*new_data).columns=rename_data.columns; (*new_data).parameters=rename_data.parameters; (*new_data).arrays=rename_data.arrays; if (rename_data.columns) { (*new_data).new_column=(char**)malloc(sizeof(char*)*rename_data.columns); (*new_data).orig_column=(char**)malloc(sizeof(char*)*rename_data.columns); for (i=0;idata)[k]+target_row, ((char***)source->data)[j][source_row])) { SDDS_SetError("Unable to copy row--string copy failed (SDDS_CopyRow)"); return(0); } } else { size = SDDS_type_size[type-1]; memcpy((char*)target->data[k]+size*target_row, (char*)source->data[j]+size*source_row, size); } } return(1); } long CopyParametersFromSecondInput(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source,REFDATA new_data) { long i,j,k; char s[1024]; if (new_data.parameters==0) return 1; if (new_data.parameters) { for (i=0;iparameter[j], -1)) { sprintf(s, "Unable to copy parameters for parameters %s",new_data.new_parameter[i]); SDDS_SetError(s); return(0); } } } return 1; } long CopyArraysFromSecondInput (SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source, REFDATA new_data) { long i,j,k,m; char s[1024]; if (new_data.arrays==0) return 1; for (i=0;ilayout.array_definition[j].type !=SDDS_target->layout.array_definition[k].type) { SDDS_SetError("Can't copy arrays between different types (SDDS_CopyArrays"); return 0; } SDDS_target->array[k].definition = SDDS_target->layout.array_definition+k; SDDS_target->array[k].elements = SDDS_source->array[j].elements; if (!(SDDS_target->array[k].dimension = (int32_t*)SDDS_Malloc(sizeof(*SDDS_target->array[k].dimension)* SDDS_target->array[k].definition->dimensions)) || !(SDDS_target->array[k].data = SDDS_Realloc(SDDS_target->array[k].data, SDDS_type_size[SDDS_target->array[k].definition->type-1]* SDDS_target->array[k].elements))) { SDDS_SetError("Unable to copy arrays--allocation failure (SDDS_CopyArrays)"); return(0); } for (m=0; marray[k].definition->dimensions; m++) SDDS_target->array[k].dimension[m] = SDDS_source->array[j].dimension[m]; if (SDDS_target->array[k].definition->type!=SDDS_STRING) memcpy(SDDS_target->array[k].data, SDDS_source->array[j].data, SDDS_type_size[SDDS_target->array[k].definition->type-1]*SDDS_target->array[k].elements); else if (!SDDS_CopyStringArray(SDDS_target->array[k].data, SDDS_source->array[j].data, SDDS_target->array[k].elements)) { SDDS_SetError("Unable to copy arrays (SDDS_CopyArrays)"); return(0); } } return 1; }