/*************************************************************************\ * Copyright (c) 2002 The University of Chicago, as Operator of Argonne * National Laboratory. * Copyright (c) 2002 The Regents of the University of California, as * Operator of Los Alamos National Laboratory. * This file is distributed subject to a Software License Agreement found * in the file LICENSE that is included with this distribution. \*************************************************************************/ /* program: sddsmxref * purpose: creates a SDDS data set from two other data sets * by adding data from one data set to the other * based on matching or filtering of column data columns * * For example: suppose file.1 contains the columns (name,x,y) and * file.2 contains (name,z,betax,betay). Then * sddsmxref file.1 file.2 -take=z -match=name file.3 * would result in (name,x,y,z) being put in file.3. * * Michael Borland, 1994 $Log: sddsmxref.c,v $ Revision 1.22 2010/12/09 16:13:26 borland The ifnot and ifis options now respect the -nowarnings request. Revision 1.21 2005/11/07 21:48:10 soliday Updated to remove Linux compiler warnings. Revision 1.20 2005/11/04 22:46:15 soliday Updated code to be compiled by a 64 bit processor. Revision 1.19 2004/02/17 17:16:19 shang added -rename and -editNames option Revision 1.18 2002/08/14 17:12:49 soliday Added Open License Revision 1.17 2001/05/03 20:55:34 soliday Standardized usage messages. Revision 1.16 2001/01/10 19:35:41 soliday Standardized usage message. Revision 1.15 2000/04/13 18:03:46 soliday Removed invalid prototype. Revision 1.14 1999/09/28 15:39:29 soliday Added SDDS_Terminate at the end. Revision 1.13 1999/07/09 15:08:12 soliday Seperated the USAGE string into two strings to fix problem on WIN32 Revision 1.12 1999/05/25 19:12:32 soliday Removed compiler warning on linux. Revision 1.11 1999/01/06 19:54:49 borland Fixed the version number in the usage message. * Revision 1.10 1996/02/12 17:25:24 borland * Removed erroneous free'ing of take_column array. * * Revision 1.9 1996/01/29 22:30:26 borland * Added code to avoid attempts to transfer column and parameter definitions * when the element already exists. * * Revision 1.8 1996/01/17 16:05:32 borland * Added test for lack of columns in input2, so that parameters and arrays can * be transfered even if input2 has no column data. * * Revision 1.7 1996/01/15 22:35:53 borland * Now keeps tabular data in input1 if no columns are being taken from * input2, even if input2 has an empty tabular data section. * * Revision 1.4 1996/01/11 20:34:49 borland * Added -fillIn option and supporting code. * * Revision 1.3 1995/09/06 14:56:49 saunders * First test release of SDDS1.5 * */ #include "mdb.h" #include "SDDS.h" #include "SDDSaps.h" #include "scan.h" #define SET_TAKE_COLUMNS 0 #define SET_LEAVE_COLUMNS 1 #define SET_MATCH_COLUMNS 2 #define SET_EQUATE_COLUMNS 3 #define SET_TRANSFER 4 #define SET_REUSE 5 #define SET_IFNOT 6 #define SET_NOWARNINGS 7 #define SET_IFIS 8 #define SET_PIPE 9 #define SET_FILLIN 10 #define SET_RENAME 11 #define SET_EDIT 12 #define N_OPTIONS 13 #define PARAMETER_TRANSFER 0 #define ARRAY_TRANSFER 1 #define TRANSFER_TYPES 2 static char *transfer_type[TRANSFER_TYPES] = { "parameter", "array" }; typedef struct { char *name; long type; } TRANSFER_DEFINITION; typedef struct { char **new_column; char **new_parameter; char **new_array; char **orig_column; char **orig_parameter; char **orig_array; int32_t columns; long parameters; long arrays; } REFDATA; /*structure for getting editnames */ typedef struct { char *match_string, *edit_string; } EDIT_NAME_REQUEST; #define COLUMN_MODE 0 #define PARAMETER_MODE 1 #define ARRAY_MODE 2 #define MODES 3 static char *mode_name[MODES] = { "column", "parameter", "array", } ; long expandTransferRequests(char ***match, long *matches, long type, TRANSFER_DEFINITION *transfer, long transfers, SDDS_DATASET *inSet); void process_newnames(SDDS_DATASET *SDDS_dataset, REFDATA *take_RefData, REFDATA rename_data, EDIT_NAME_REQUEST *edit_column_request, long edit_column_requests, EDIT_NAME_REQUEST *edit_parameter_request, long edit_parameter_requests, EDIT_NAME_REQUEST *edit_array_request,long edit_array_requests); char **process_editnames(char **orig_name, long **orig_flags, long orig_names, EDIT_NAME_REQUEST *edit_request, long edit_requests); long CopyRowToNewColumn(SDDS_DATASET *target, long target_row, SDDS_DATASET *source, long source_row, REFDATA new_data, long columns, char *input2); long CopyArraysFromSecondInput (SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source, REFDATA new_data); long CopyParametersFromSecondInput(SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source,REFDATA new_data); void free_refdata(REFDATA *refData, long rename); void free_edit_request(EDIT_NAME_REQUEST *edit_request, long edit_requests); typedef char *STRING_PAIR[2]; long rows_equate(SDDS_DATASET *SDDS1, long row1, SDDS_DATASET *SDDS2, long row2, long equate_columns, STRING_PAIR *equate_column); char *option[N_OPTIONS] = { "take", "leave", "match", "equate", "transfer", "reuse", "ifnot", "nowarnings", "ifis", "pipe", "fillin","rename","editnames", } ; char *USAGE1 = "sddsmxref [] [] [-pipe[=input][,output]] \n\ [-ifis={column|parameter},[,...]]\n\ [-ifnot={parameter|column|array},[,...]]\n\ [-transfer={parameter|array},[,...]]\n\ [-take=[,...]] [-leave=[,...]]\n\ [-fillIn]\n\ [-match=[=][,...]]\n\ [-equate=[=]] [-reuse[=[rows][,page]]]\n\ [-rename={column|parameter|array},=[,...]]\n\ [-editnames={column|parameter|array},,]\n\n\ sddsmxref takes columns, parameters, and arrays from succesive tables\n\ from file and adds them to successive tables from .\n\ If is given, the result is placed there; otherwise, \n\ is replaced. By default, all columns are taken from .\n\n\ ifnot specifies names of parameters, arrays, and columns that may not\n\ exist in if the program is to run as asked.\n\ ifis specifies names of parameters, arrays, and columns that must\n\ exist in if the program is to run as asked.\n\ transfer specifies names of parameters or arrays to transfer from .\n\ take specifies names of columns to take from .\n\ leave specifies names of columns not to take from .\n\ Overrides -take if both name a given column.\n\ -leave=* results in no columns being taken."; char *USAGE2 = "fillIn specifies filling in NULL and 0 values in rows for which\n\ no match is found. By default, such rows are omitted.\n\ match specifies names of columns to match between and\n\ for selection and placement of data taken from\n\ .\n\ equate specifies names of columns to equate between and\n\ for selection and placement of data taken from\n\ .\n\ reuse specifies that rows of may be reused, i.e., matched\n\ with more than one row of . Also, -reuse=page specifies\n\n\ that only the first page of is used.\n\ nowarnings specifies that warning messages should be suppressed.\n\ rename Specifies new names for entities in the output data set.\n\ The entities must still be referred to by their old names in \n\ the other commandline options. \n\ editnames Specifies creation of new names for entities of the specified \n\ type with names matching the specified wildcard string. Editing \n\ is performed using commands reminiscent of emacs keystrokes. \n\ if -editnames={column|parameter|array},wildcard,ei/%ld/ \n\ is specified, the entity names will be changed to N, N is the \n\ position of input files in the command line. \n\ Program by Michael Borland. (This is version 4, February 1996.)\n"; int main(int argc, char **argv) { SDDS_DATASET SDDS_1, SDDS_2, SDDS_output; long i, j, k, i_arg, rows1, rows2, reuse, reusePage; SCANNED_ARG *s_arg; char s[200], *ptr; char **take_column, **leave_column, **output_column; STRING_PAIR *match_column, *equate_column; long take_columns, leave_columns, match_columns, equate_columns, leave_all_columns; int32_t output_columns; char *input1, *input2, *output, *match_value; long tmpfile_used, retval1, retval2; long *row_used, n; TRANSFER_DEFINITION *transfer; long transfers; long warnings, fillIn; IFITEM_LIST ifnot_item, ifis_item; unsigned long pipeFlags; REFDATA rename_data, take_RefData; EDIT_NAME_REQUEST *edit_column_request,*edit_parameter_request,*edit_array_request; long edit_column_requests,edit_parameter_requests,edit_array_requests; SDDS_RegisterProgramName(argv[0]); argc = scanargs(&s_arg, argc, argv); if (argc<3) { fprintf(stderr, "%s\n%s", USAGE1, USAGE2); exit(1); } input1 = input2 = output = NULL; take_column = leave_column = NULL; match_column = equate_column = NULL; take_columns = leave_columns = match_columns = equate_columns = reuse = reusePage = 0; tmpfile_used = 0; transfer = NULL; transfers = 0; ifnot_item.items = ifis_item.items = 0; warnings = 1; pipeFlags = 0; fillIn = 0; leave_all_columns = 0; rename_data.columns=rename_data.parameters=rename_data.arrays=0; rename_data.new_column=rename_data.orig_column=rename_data.new_parameter =rename_data.orig_parameter=rename_data.new_array=rename_data.orig_array=NULL; edit_column_request=edit_parameter_request=edit_array_request=NULL; edit_column_requests=edit_parameter_requests=edit_array_requests=0; take_RefData.columns=take_RefData.parameters=take_RefData.arrays=0; take_RefData.orig_column=take_RefData.new_column=take_RefData.orig_parameter=take_RefData.new_parameter =take_RefData.orig_array=take_RefData.new_array=NULL; for (i_arg=1; i_argdata[index1]+size*row1; data2 = (char*)SDDS2->data[index2]+size*row2; if (memcmp(data1, data2, size)!=0) return(0); } return(1); } long expandTransferRequests(char ***match, long *matches, long type, TRANSFER_DEFINITION *transfer, long transfers, SDDS_DATASET *inSet) { long i, first; int32_t (*matchRoutine)(SDDS_DATASET *SDDS_dataset, char ***nameReturn, int32_t matchMode, int32_t typeMode, ... ); *matches = 0; *match = NULL; switch (type) { case PARAMETER_TRANSFER: matchRoutine = SDDS_MatchParameters; break; case ARRAY_TRANSFER: matchRoutine = SDDS_MatchArrays; break; default: SDDS_Bomb("invalid transfer type--this shouldn't happen"); exit(1); break; } first = 0; for (i=0; icolumns) take_RefData->new_column=(char**)malloc(sizeof(*(take_RefData->new_column))*take_RefData->columns); if (take_RefData->parameters) take_RefData->new_parameter=(char**)malloc(sizeof(*(take_RefData->new_parameter))*take_RefData->parameters); if (take_RefData->arrays) take_RefData->new_array=(char**)malloc(sizeof(*(take_RefData->new_array))*take_RefData->arrays); /*transfer renames to take_RefData*/ for (i=0;icolumns;i++) if ((k=match_string(take_RefData->orig_column[i],rename_data.orig_column,rename_data.columns,EXACT_MATCH))>=0) SDDS_CopyString(&take_RefData->new_column[i],rename_data.new_column[k]); else SDDS_CopyString(&take_RefData->new_column[i],take_RefData->orig_column[i]); for (i=0;iparameters;i++) if ((k=match_string(take_RefData->orig_parameter[i],rename_data.orig_parameter,rename_data.parameters,EXACT_MATCH))>=0) SDDS_CopyString(&take_RefData->new_parameter[i],rename_data.new_parameter[k]); else SDDS_CopyString(&take_RefData->new_parameter[i],take_RefData->orig_parameter[i]); for (i=0;iarrays;i++) if ((k=match_string(take_RefData->orig_array[i],rename_data.orig_array,rename_data.arrays,EXACT_MATCH))>=0) SDDS_CopyString(&take_RefData->new_array[i],rename_data.new_array[k]); else SDDS_CopyString(&take_RefData->new_array[i],take_RefData->orig_array[i]); if (!(column_names=SDDS_GetColumnNames(SDDS_dataset, &columns))) { SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors); exit(1); } if (!(parameter_names=SDDS_GetParameterNames(SDDS_dataset, ¶meters))) { SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors); exit(1); } if (!(array_names=SDDS_GetArrayNames(SDDS_dataset, &arrays))) { SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors); exit(1); } /*process edit names */ if (edit_column_requests) { if ((new_names=process_editnames(column_names,&orig_columnflags,columns, edit_column_request,edit_column_requests))) { for (i=0;iorig_column,take_RefData->columns,EXACT_MATCH))>=0) SDDS_CopyString(&take_RefData->new_column[k],new_names[i]); } free(new_names[i]); } free(new_names); } } if (edit_parameter_requests) { if ((new_names=process_editnames(parameter_names,&orig_parameterflags,parameters, edit_parameter_request,edit_parameter_requests))) { for (i=0;iorig_parameter,take_RefData->parameters,EXACT_MATCH))>=0) SDDS_CopyString(&take_RefData->new_parameter[k],new_names[i]); } free(new_names[i]); } free(new_names); } } if (edit_array_requests) { if ((new_names=process_editnames(array_names,&orig_arrayflags,arrays, edit_array_request,edit_array_requests))) { for (i=0;iorig_array,take_RefData->arrays,EXACT_MATCH))>=0) SDDS_CopyString(&take_RefData->new_array[k],new_names[i]); } free(new_names[i]); } free(new_names); } } if (orig_columnflags) free(orig_columnflags); if (orig_parameterflags) free(orig_parameterflags); if (orig_arrayflags) free(orig_arrayflags); for (i=0;iparameter[j], -1)) { sprintf(s, "Unable to copy parameters for parameters %s",new_data.new_parameter[i]); SDDS_SetError(s); return(0); } } return 1; } long CopyArraysFromSecondInput (SDDS_DATASET *SDDS_target, SDDS_DATASET *SDDS_source, REFDATA new_data) { long i,j,k,m; char s[1024]; if (new_data.arrays==0) return 1; for (i=0;ilayout.array_definition[j].type !=SDDS_target->layout.array_definition[k].type) { SDDS_SetError("Can't copy arrays between different types (SDDS_CopyArrays"); return 0; } SDDS_target->array[k].definition = SDDS_target->layout.array_definition+k; SDDS_target->array[k].elements = SDDS_source->array[j].elements; if (!(SDDS_target->array[k].dimension = (int32_t*)SDDS_Malloc(sizeof(*SDDS_target->array[k].dimension)* SDDS_target->array[k].definition->dimensions)) || !(SDDS_target->array[k].data = SDDS_Realloc(SDDS_target->array[k].data, SDDS_type_size[SDDS_target->array[k].definition->type-1]* SDDS_target->array[k].elements))) { SDDS_SetError("Unable to copy arrays--allocation failure (SDDS_CopyArrays)"); return(0); } for (m=0; marray[k].definition->dimensions; m++) SDDS_target->array[k].dimension[m] = SDDS_source->array[j].dimension[m]; if (SDDS_target->array[k].definition->type!=SDDS_STRING) memcpy(SDDS_target->array[k].data, SDDS_source->array[j].data, SDDS_type_size[SDDS_target->array[k].definition->type-1]*SDDS_target->array[k].elements); else if (!SDDS_CopyStringArray(SDDS_target->array[k].data, SDDS_source->array[j].data, SDDS_target->array[k].elements)) { SDDS_SetError("Unable to copy arrays (SDDS_CopyArrays)"); return(0); } } return 1; } long CopyRowToNewColumn(SDDS_DATASET *target, long target_row, SDDS_DATASET *source, long source_row, REFDATA new_data, long columns, char *input2) { long i,j,k,type,size; char s[1024]; if (!columns) return 1; for (i=0; idata)[k]+target_row, ((char***)source->data)[j][source_row])) { SDDS_SetError("Unable to copy row--string copy failed (SDDS_CopyRow)"); return(0); } } else { size = SDDS_type_size[type-1]; memcpy((char*)target->data[k]+size*target_row, (char*)source->data[j]+size*source_row, size); } } return(1); } void free_edit_request(EDIT_NAME_REQUEST *edit_request, long edit_requests) { long i; if (edit_requests) { for (i=0;i