/*************************************************************************\ * Copyright (c) 2002 The University of Chicago, as Operator of Argonne * National Laboratory. * Copyright (c) 2002 The Regents of the University of California, as * Operator of Los Alamos National Laboratory. * This file is distributed subject to a Software License Agreement found * in the file LICENSE that is included with this distribution. \*************************************************************************/ /* $Log: plaindata2sdds.c,v $ Revision 1.23 2011/04/21 16:07:33 soliday In addition to the standard 'e notation', I have had added support for the less common 'd notation' sometimes used by fortran. I have also added support for files that include a summary after the data columns. Revision 1.22 2009/01/30 18:07:42 soliday Updated to accept lines 10 times as long as before. Revision 1.21 2008/11/20 16:12:37 soliday Modified the commentCharacters code again to fix an issue compiling on Linux. Revision 1.20 2008/11/20 15:51:09 soliday The commentCharacters variable is now set to NULL by default. Revision 1.19 2008/10/16 16:36:01 soliday Added the skipcolumn and commentCharacters options. Revision 1.18 2006/02/13 21:55:09 soliday Fixed problem reading input files if they do not end with a newline character. Revision 1.17 2005/11/04 22:46:11 soliday Updated code to be compiled by a 64 bit processor. Revision 1.16 2004/04/27 17:26:28 soliday Added the count suboption to the -column and -parameter options. This is used to create multiple columns and/or parameters with the same prefix and incrementing numerical suffixes. Revision 1.15 2004/01/06 23:25:03 soliday Updated so that it only allocates the required memory for string values instead of the maximum allowed for an SDDS string value. Revision 1.14 2003/12/22 21:14:27 soliday By default it now uses any white space as the separator. This is useful if there is a mix of white space characters in the ascii plaindata file. Revision 1.13 2003/09/02 19:16:02 soliday Cleaned up code for Linux. Revision 1.12 2002/08/14 17:12:38 soliday Added Open License Revision 1.11 2002/06/04 21:34:00 soliday Added the -skiplines option. Revision 1.10 2001/11/28 23:52:38 shang added -fillin feature for ascii input file Revision 1.9 2001/11/28 19:40:15 borland Added code to insert terminating 0 in strings read from binary mode input files. Also added code to test strings to see if they are too long for the buffer. Revision 1.8 2001/01/23 19:14:56 soliday Standardized usage message. Revision 1.7 2000/04/13 17:08:57 soliday Added Borland C define statement for setmode. Revision 1.6 2000/02/10 19:16:04 soliday Added SDDS_LengthenTable command for files with more than 10000 rows. Revision 1.5 2000/02/08 17:46:26 soliday Fixed problem with separator now working unless it was a space. Revision 1.4 1999/12/07 17:20:32 soliday Now works with WIN32 Revision 1.3 1999/11/15 22:55:25 soliday It now accepts plaindata files without row counts. Revision 1.2 1999/11/11 15:50:53 soliday Fixed some bugs and made some changes requested by Borland Revision 1.1 1999/11/02 19:39:47 soliday First c version * */ #include "mdb.h" #include "SDDS.h" #include "scan.h" #include #if defined(_WIN32) #include #include #if defined(__BORLANDC__) #define _setmode(handle, amode) setmode(handle, amode) #endif #endif #define ASCII_MODE 0 #define BINARY_MODE 1 #define MODES 2 static char *mode_name[MODES] = { "ascii", "binary", }; #define TYPE_SHORT 0 #define TYPE_LONG 1 #define TYPE_FLOAT 2 #define TYPE_DOUBLE 3 #define TYPE_STRING 4 #define TYPE_CHARACTER 5 #define DATATYPES 6 static char *type_name[DATATYPES] = { "short", "long", "float", "double", "string", "character", }; #define HEADER_UNITS 0 #define HEADER_DESCRIPTION 1 #define HEADER_SYMBOL 2 #define HEADER_COUNT 3 #define HEADERELEMENTS 4 static char *header_elements[HEADERELEMENTS] = { "units", "description", "symbol", "count" }; typedef struct { void *values; long elements; char **stringValues; char *units; char *description; char *symbol; char *name; long type; short skip; } COLUMN_DATA_STRUCTURES; typedef struct { char *units; char *description; char *symbol; char *name; long type; } PARAMETER_DATA_STRUCTURES; #define SET_INPUTMODE 0 #define SET_OUTPUTMODE 1 #define SET_SEPARATOR 2 #define SET_NOROWCOUNT 3 #define SET_PARAMETER 4 #define SET_COLUMN 5 #define SET_PIPE 6 #define SET_NOWARNINGS 7 #define SET_ORDER 8 #define SET_FILLIN 9 #define SET_SKIPLINES 10 #define SET_SKIPCOLUMN 11 #define SET_COMMENT 12 #define N_OPTIONS 13 char *option[N_OPTIONS] = { "inputMode", "outputMode", "separator", "noRowCount", "parameter", "column", "pipe", "nowarnings", "order","fillin", "skiplines", "skipcolumn", "commentCharacters", }; #define ROW_ORDER 0 #define COLUMN_ORDER 1 #define ORDERS 2 static char *order_names[ORDERS] = { "rowMajor", "columnMajor", }; char *USAGE = "plaindata2sdds \n\t\ [-pipe=[input][,output]] \n\t\ [-inputMode=] \n\t\ [-outputMode=] \n\t\ [-separator=] \n\t\ [-commentCharacters=] \n\t\ [-noRowCount] \n\t\ [-order=] \n\t\ [-parameter=,[,units=][,description=][,symbol=][,count=]...] \n\t\ [-column=,[,units=][,description=][,symbol=][,count=]...] \n\t\ [-skipcolumn=type] \n\t\ [-nowarnings] [-fillin]\n\t\ [-skiplines=]\n\n\ -inputMode The plain data file can be read in ascii or binary format.\n\ -outputMode The SDDS data file can be written in ascii or binary format.\n\ -separator In ascii mode the columns of the plain data file are\n\ separated by the given character. Be default it uses any\n\ combination of whitespace characters.\n\ -noRowCount The number of rows is not included in the plain data file.\n\ If the plain data file is a binary file then the row count must be included.\n\ -order Row major order is the default. Here each row of the plain data file\n\ consists of one element from each column. In column major order\n\ each column is located entirely on one row.\n\ -parameter Add this option for each parameter in the plain data file.\n\ -fillin fill in blank for string column or 0 for numeric column when there \n\ is no data for it.\n\ -column Add this option for each column in the plain data file.\n\ -skipcolumn Add this option to skip over a column in the plain data file.\n\ -skiplines Add this option to skip header lines.\n\n\ Program by Robert Soliday. ("__DATE__")\n"; void SetColumnData(long type, SDDS_DATASET *dataset, void *values, long rows, long index); void *AllocateColumnData(long type, void *values, long rows); char **AllocateColumnStringData(char **values, long rows, long previous_rows); long getToken(char *s, char *buffer, long buflen, char separator, long whitespace); void ConvertDNotationToENotation (char *line); /* ********** */ int main(int argc, char **argv) { FILE *fileID; COLUMN_DATA_STRUCTURES *columnValues; PARAMETER_DATA_STRUCTURES *parameterValues; SDDS_DATASET SDDS_dataset; SCANNED_ARG *s_arg; long i, j, k, n, i_arg, rows; long maxRows=10000, initRows=10000, row, par, col, page, size, readline=1, fillin=0; int32_t ptrSize=0; char *input, *output, s[1024], *ptr, *ptr2, data[10240],temp[10240]; unsigned long pipeFlags=0; long noWarnings=0, tmpfile_used=0, columnOrder=0, whitespace=1; short shortValue; int32_t longValue; float floatValue; double doubleValue; char stringValue[SDDS_MAXLINE]; char characterValue; char buffer[124], buffer2[124]; long *parameterIndex, *columnIndex; long binary=0, noRowCount=0, inputBinary=0, count=0; char separator; char commentCharacters[20]; short checkComment=0; short commentFound; long parameters=0, columns=0; long skiplines=0; short abort=0, recover=1; input = output = NULL; separator = ' '; columnValues = NULL; parameterValues = NULL; parameterIndex = columnIndex = NULL; SDDS_RegisterProgramName(argv[0]); argc = scanargs(&s_arg, argc, argv); if (argc<3) bomb(NULL, USAGE); for (i_arg=1; i_argSDDS_MAXLINE-1) SDDS_Bomb("String is too long"); if (size > 0) { if (fread(&stringValue,size,1,fileID) != 1) { SDDS_SetError("Unable to read string parameter"); SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors); } stringValue[size] = 0; } else { strcpy(stringValue,""); } if (!SDDS_SetParameters(&SDDS_dataset,SDDS_SET_BY_INDEX|SDDS_PASS_BY_VALUE,par,stringValue,-1)) SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors); break; case SDDS_CHARACTER: if (fread(&characterValue,1,1,fileID) != 1) { SDDS_SetError("Unable to read character parameter"); SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors); } if (!SDDS_SetParameters(&SDDS_dataset,SDDS_SET_BY_INDEX|SDDS_PASS_BY_VALUE,par,characterValue,-1)) SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors); break; } } for (i=0;i columnValues[i].elements) { if (columnValues[i].type == SDDS_STRING) { columnValues[i].stringValues = AllocateColumnStringData(columnValues[i].stringValues, rows, columnValues[i].elements); } else { columnValues[i].values = AllocateColumnData(columnValues[i].type, columnValues[i].values, rows); } columnValues[i].elements = rows; } } if (columnOrder) { for (col=0;colSDDS_MAXLINE-1) SDDS_Bomb("String is too long"); columnValues[col].stringValues[i] = malloc(size + 1); if (size > 0) { if (fread(columnValues[col].stringValues[i],size,1,fileID) != 1) { SDDS_SetError("Unable to read string column"); SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors); } columnValues[col].stringValues[i][size] = 0; } else { strcpy(columnValues[col].stringValues[i],""); } } break; case SDDS_CHARACTER: for (i=0;iSDDS_MAXLINE-1) SDDS_Bomb("String is too long"); columnValues[col].stringValues[i] = malloc(size + 1); if (size > 0) { if (fread(columnValues[col].stringValues[i],size,1,fileID) != 1) { SDDS_SetError("Unable to read string column"); SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors); } columnValues[col].stringValues[i][size] = 0; } else { strcpy(columnValues[col].stringValues[i],""); } break; case SDDS_CHARACTER: if (fread(((char*)(columnValues[col].values)+i),1,1,fileID) != 1) { SDDS_SetError("Unable to read character column"); SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors); } break; } } } } if (rows > maxRows) { if (!SDDS_LengthenTable(&SDDS_dataset, rows - maxRows)) { SDDS_SetError("Unable to lengthen table"); SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors); } maxRows = rows; } j=n; for (i=0;i 0) { fgets(ptr, ptrSize, fileID); skiplines--; } if (!fgetsSkipCommentsResize(&ptr,&ptrSize,fileID, '!')) break; commentFound=0; if (checkComment) { for (i=0; i 0) && ((row < rows) || (noRowCount))) { if (columnOrder) { if (noRowCount) { cp_str(&ptr2,ptr); rows = 0; while (getToken(ptr2,data,10240,separator,whitespace) >= 0) { rows++; } free(ptr2); } if (rows > columnValues[col].elements) { if (columnValues[col].type == SDDS_STRING) { columnValues[col].stringValues = AllocateColumnStringData(columnValues[col].stringValues, rows, columnValues[col].elements); } else { columnValues[col].values = AllocateColumnData(columnValues[col].type, columnValues[col].values, rows); } columnValues[col].elements = rows; } switch (columnValues[col].type) { case SDDS_SHORT: for (row=0;row maxRows) { if (!SDDS_LengthenTable(&SDDS_dataset, rows - maxRows)) { SDDS_SetError("Unable to lengthen table"); SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors); } maxRows = rows; } if (columnValues[col].skip == 0) { if (columnValues[col].type == SDDS_STRING) { SetColumnData(columnValues[col].type, &SDDS_dataset, columnValues[col].stringValues, rows, col); } else { SetColumnData(columnValues[col].type, &SDDS_dataset, columnValues[col].values, rows, col); } n++; } col++; row = 0; } else { if (noRowCount) { if (row == 0) { rows = 3; } else if (row == rows - 1) { rows = rows + 3; for (i=0;i columnValues[i].elements) { if (columnValues[i].type == SDDS_STRING) { columnValues[i].stringValues = AllocateColumnStringData(columnValues[i].stringValues, rows, columnValues[i].elements); } else { columnValues[i].values = AllocateColumnData(columnValues[i].type, columnValues[i].values, rows); } } columnValues[i].elements = rows; } } } if (row == 0) for (i=0;i columnValues[i].elements) { if (columnValues[i].type == SDDS_STRING) { columnValues[i].stringValues = AllocateColumnStringData(columnValues[i].stringValues, rows, columnValues[i].elements); } else { columnValues[i].values = AllocateColumnData(columnValues[i].type, columnValues[i].values, rows); } } columnValues[i].elements = rows; } if (noRowCount) { cp_str(&ptr2,ptr); i = 0; while (getToken(ptr2,data,10240,separator,whitespace) >= 0) i++; free(ptr2); if ((i != columns) && (parameters>0 && i==1)) { if (row > 0) { if (row > maxRows) { if (!SDDS_LengthenTable(&SDDS_dataset, row - maxRows)) { SDDS_SetError("Unable to lengthen table"); SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors); } maxRows = row; } n=0; for (j=0;j maxRows) { if (!SDDS_LengthenTable(&SDDS_dataset, rows - maxRows)) { SDDS_SetError("Unable to lengthen table"); SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors); } maxRows = rows; } n=0; for (i=0;i 0) && (row == rows)) || (columns == 0))) { if (!SDDS_WritePage(&SDDS_dataset)) { SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors); } maxRows = 10000; if (!SDDS_StartPage(&SDDS_dataset, initRows)) SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors); row = par = col = 0; rows = -1; } ptr[0] = 0; } if (noRowCount) { if (row > 0) { if (row > maxRows) { if (!SDDS_LengthenTable(&SDDS_dataset, row - maxRows)) { SDDS_SetError("Unable to lengthen table"); SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors); } maxRows = row; } n=0; for (j=0;j=buflen) return(-1); strncpy(buffer, ptr1, n); buffer[n]=0; /* update the original string to delete the token */ strcpy(ptr0, s); /* return the string length */ return(n); } /* Description: Converts Fortran D notation to C++ e notation */ void ConvertDNotationToENotation (char *line) { char *ptr = line; while (*ptr && (ptr = strstr (ptr, "D+"))) { *ptr = 'e'; ptr++; *ptr = '+'; ptr++; } ptr = line; while (*ptr && (ptr = strstr (ptr, "D-"))) { *ptr = 'e'; ptr++; *ptr = '-'; ptr++; } }