/*************************************************************************\ * Copyright (c) 2002 The University of Chicago, as Operator of Argonne * National Laboratory. * Copyright (c) 2002 The Regents of the University of California, as * Operator of Los Alamos National Laboratory. * This file is distributed subject to a Software License Agreement found * in the file LICENSE that is included with this distribution. \*************************************************************************/ /* program: sddsmatrixlapackop * purpose: a general matrix operation sdds tool using LAPACK or CLAPACK routines * * $Log: sddsmatrixop.c,v $ * Revision 1.4 2010/08/12 22:13:19 shang * now non-LAPACK routine can also compute the determinant if requested. * * Revision 1.3 2009/01/22 21:06:22 shang * modified to be able to work with files that contain string columns * * Revision 1.2 2009/01/13 21:55:24 soliday * Fixed some issues on WIN32 * * Revision 1.1 2008/08/22 15:10:22 shang * added LAPACK (CLAPACK) routines for matrix operation and improved the matrix operations using column major ordered matrices. * */ #include "mdb.h" #include "scan.h" #include "rpn.h" #include "matrixop.h" #define CLO_PUSH 0 #define CLO_ADD 1 #define CLO_SUBTRACT 2 #define CLO_SCALARMULTIPLY 3 #define CLO_SCALARDIVIDE 4 #define CLO_TRANSPOSE 5 #define CLO_INVERT 6 #define CLO_MULTIPLY 7 #define CLO_IDENTITY 8 #define CLO_PIPE 9 #define CLO_NEGATION 10 #define CLO_VERBOSE 11 #define CLO_DIVIDE 12 #define CLO_SWAP 13 #define CLO_COLUMNNAMES 14 #define CLO_DETERMINANT 15 #define CLO_ROOT 16 #define CLO_DIGITS 17 #define CLO_PRINTPACKAGE 18 #define N_OPTIONS 19 /*note: only -pipe=out is valid */ char *option[N_OPTIONS] = {"push","add","subtract","scalarmultiply","scalardivide", "transpose","invert","multiply","identity","pipe","negation", "verbose","divide","swap", "columnNames", "determinant", "root", "digits", "printPackage"} ; #define BINARYOPTIONS 5 char *binaryOption[BINARYOPTIONS]={ "add","subtract","multiply","divide","swap", }; /*here, the unary option just mean that only one matrix is needed to be poped from the stack */ #define UNARYOPTIONS 7 char *unaryOption[UNARYOPTIONS]={ "scalarmultiply","scalardivide","transpose","invert","identity", "negation", "determinant", } ; char *USAGE1 = "sddsmatrixop [input_matrix] [output_matrix] [-pipe=in|out] [-verbose] \n\ {[-push=] [-multiply]|[-add]|[-subtract]|[-invert]...} [-root=] \n\ [-columnNames=filename=[,column=]] [-determinant] [-printPackage]\n\ SDDS file which contains the input matrix -- the first element \n\ in the stack.\n\ The result matrix is written into SDDS file named by .\n\ -push= the matrix (file) that will pushed into stack. \n\ -add addition operator, other operator are also named by their meanings. \n\ such as multiply,invert,transpose,negation, ect. \n\ -invert[=largestSingularValues=]|[-smallestSingularValues=]|[minimumSingularValueRatio=]] \n\ matrix inversion.\n\ -identity means push a unit matrix on the stack which has the same demension as \n\ top top matrix in the statck. \n\ -identity= push a square unit matrix with dimension of into stack.\n\ -swap swap the top two matrices in the stack. \n"; char *USAGE2 = "-multiply normal matrix-matrix multiplication. \n\ -multiply=hadamard element-by-element multiplication, similar to addition but \n\ with * operator. \n\ -divide=hadamard element-by-element division. \n\ -columnNames=filename=[,column=] \n\ This takes column from . The default for is \n\ ColumnName. This supplies the output column names. \n\ -scalarmultiply= multiply the matrix by a constant value. \n\ -scalardivide= divide the matrix by a constact value. \n\ -determinant compute the determinant of final matrix and write to the output file \n\ \"Determinant\" parameter.\n\ -root use the string specified to generate column names.\n\ -digits minimum number of digits used in the number appended to the root\n\ part of the column names. Default is value 2.\n\ -printPackage prints out the linear algebra package that was compiled.\n\ The -push and operator can be repeated many times. \n\n\ sddsmatrixop provides a generic method for matrix operations.\n\ Program by Hairong Shang. (This is version 1.0, June 2003)\n"; #define MAX_PAGES 256 /*the number of pages in a file, each page is an element of an array stack */ long matrixStack[MAX_PAGES][STACKSIZE]; long matrixStackPtr[MAX_PAGES]; /*each page of files is a stack, each stack has a maximum number size of STACKSIZE */ /*the array size of stack is number of pages in the input file */ static long input_pages; /*number of pages in the input file */ int32_t InitializeInputAndGetColumnNames(SDDS_DATASET *SDDS_dataset, char *filename, char ***numericalColumnName, int32_t *numericalColumns); void push_matrix(char *fileName, char ***doubleColumnName,int32_t *doubleColumnNames,long isInput); long push_address(long address, long index); long pop_address(long index); int main(int argc, char **argv) { SCANNED_ARG *s_arg; SDDS_TABLE outputPage,columnPage; char *inputFile,*outputFile,*matrixA,*matrixB, **doubleColumnName,*columnNameFile,**givenColumnName,*columnNameFileColumn; char *root=NULL, **outputColumnName=NULL; unsigned long pipeFlags,dummyFlags; int32_t currentOption, i,j,i_arg,n,keepDefinition,doubleColumns,givenColumnNames, do_determinant, outputColumns; long digits=0, identityN, tmpfile_used, printPackage=0; double scalarValue, det; MAT **A,**B, **temp; short isBinary, isUnary,verbose=0; char format[100], name[1024]; int32_t operationsSupplied=0, largestSValue=0, smallestSValue=0, inputRead=0; double minRatio = 0; do_determinant=0; input_pages=0; columnNameFileColumn=NULL; doubleColumnName=NULL; doubleColumns=0; inputFile=outputFile=NULL; A=B=temp=NULL; pipeFlags=0; tmpfile_used=0; dstackptr = 0; sstackptr=0; keepDefinition=1; matrixA=matrixB=NULL; columnNameFile=NULL; givenColumnName=NULL; givenColumnNames=0; for (i=0;i=0) { isBinary=1; if (sstackptr<2) { fprintf(stderr,"Too few values on string stack.\n"); exit(1); } matrixB=pop_string(); matrixA=pop_string(); for (i=0;i=0) { isUnary=1; if (sstackptr<1) { fprintf(stderr,"Too few values on string stack.\n"); exit(1); } matrixB=pop_string(); for (i=0;i0 && !scanItemList(&dummyFlags, s_arg[i_arg].list, &s_arg[i_arg].n_items, 0, "filename", SDDS_STRING, &columnNameFile, 1, 0, "column", SDDS_STRING, &columnNameFileColumn, 1, 0,NULL)) SDDS_Bomb("invalid -parameter syntax"); if (!columnNameFileColumn) SDDS_CopyString(&columnNameFileColumn,"ColumnName"); s_arg[i_arg].list -=1; s_arg[i_arg].n_items +=1; break; case CLO_PUSH: if (s_arg[i_arg].n_items!=2) SDDS_Bomb("invalid -push option!"); push_matrix(s_arg[i_arg].list[1],&doubleColumnName,&doubleColumns,0); push_string(s_arg[i_arg].list[1]); break; case CLO_PIPE: if (!processPipeOption(s_arg[i_arg].list+1, s_arg[i_arg].n_items-1, &pipeFlags)) SDDS_Bomb("invalid -pipe syntax"); switch(pipeFlags) { case USE_STDOUT: break; default: push_matrix(NULL,&doubleColumnName,&doubleColumns,1); push_string("stdin"); inputRead = 1; break; } break; case CLO_ADD: for (i=0;i option!"); for (i=0;im*B[i]->n; j++) B[i]->base[j] *= scalarValue; temp[i] = B[i]; } operationsSupplied++; break; case CLO_SCALARDIVIDE: if (s_arg[i_arg].n_items!=2) SDDS_Bomb("invalid -scalardivide option!"); if (!(get_double(&scalarValue,s_arg[i_arg].list[1]))) SDDS_Bomb("no value given for option -scalardivide"); if (!scalarValue) SDDS_Bomb("Matrix is divided by zero!"); for (i=0;im*B[i]->n; j++) B[i]->base[j] /= scalarValue; temp[i]=B[i]; } operationsSupplied++; break; case CLO_TRANSPOSE: for (i=0;i0 && !scanItemList(&dummyFlags, s_arg[i_arg].list+1, &s_arg[i_arg].n_items, 0, "largestSingluarValuesr", SDDS_LONG, &largestSValue, 1, 0, "smallestSingularValues", SDDS_LONG, &smallestSValue, 1, 0, "minimumSingularValueRatio", SDDS_DOUBLE, &minRatio, 1, 0, NULL)) SDDS_Bomb("invalid -parameter syntax"); s_arg[i_arg].n_items +=1; if (largestSValue * smallestSValue * minRatio !=0) SDDS_Bomb("Only one of largestSValue, smallestSValue and ratio can be specified!"); for (i=0;im*B[i]->n; j++) B[i]->base[i] *=-1; temp[i] = B[i]; } operationsSupplied++; break; case CLO_IDENTITY: if (s_arg[i_arg].n_items==1) { /*the indentity matrix has the same dimension as the top matrix */ for (i=0;im, B[i]->n); } else if (s_arg[i_arg].n_items==2) { if (!(get_long(&identityN,s_arg[i_arg].list[1]))) SDDS_Bomb("invalid -identity option"); for (i=0;iSDDS_ReadPage(&columnPage) ) SDDS_Bomb("Unable to read column file!"); givenColumnNames=SDDS_CountRowsOfInterest(&columnPage); if (givenColumnNames) { if (!(givenColumnName=(char**)SDDS_GetColumn(&columnPage,columnNameFileColumn))) SDDS_Bomb("Unable to get the ColumnName from column name file!"); } if (!SDDS_Terminate(&columnPage)) SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors); } if( !SDDS_InitializeOutput(&outputPage,SDDS_BINARY, 1,NULL, NULL, outputFile)) SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors); if (do_determinant && !SDDS_DefineSimpleParameter(&outputPage, "Determinant", NULL, SDDS_DOUBLE)) SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors); if (givenColumnNames) { SDDS_FreeStringArray(doubleColumnName, doubleColumns); free(doubleColumnName); outputColumns = temp[0]->n; if (outputColumns<=givenColumnNames) outputColumnName = givenColumnName; else { outputColumnName = (char**)malloc(sizeof(*outputColumnName)*temp[0]->n); for (i=0; in; if (!digits) digits = log10(temp[0]->n)+1; if (!root) sprintf(format,"doubleColumn%%0%ldld", digits); else sprintf(format,"%s%%0%ldld", root, digits); outputColumnName = (char**)malloc(sizeof(*outputColumnName)*outputColumns); for (i=0; in) SDDS_Bomb("The columns of output matrix and double columns do not match!"); outputColumns = doubleColumns; outputColumnName = doubleColumnName; } if (!SDDS_DefineSimpleColumns(&outputPage, outputColumns, outputColumnName, NULL, SDDS_DOUBLE)) SDDS_PrintErrors(stdout, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors); if (!SDDS_WriteLayout(&outputPage)) SDDS_PrintErrors(stdout, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors); for (j=0;jm)) SDDS_PrintErrors(stdout, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors); if (do_determinant) { det = matrix_det(temp[j]); if (!SDDS_SetParameters(&outputPage, SDDS_SET_BY_NAME|SDDS_PASS_BY_VALUE, "Determinant", det, NULL)) SDDS_PrintErrors(stdout, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors); } for (i=0;in;i++) { if (ime[i],temp[j]->m, outputColumnName[i])) SDDS_PrintErrors(stderr, SDDS_EXIT_PrintErrors|SDDS_VERBOSE_PrintErrors); } if (!SDDS_WritePage(&outputPage)) SDDS_PrintErrors(stderr, SDDS_EXIT_PrintErrors|SDDS_VERBOSE_PrintErrors); matrix_free(temp[j]); } if (!!SDDS_Terminate(&outputPage)) SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors); if (tmpfile_used && !replaceFileAndBackUp(inputFile, outputFile)) exit(1); free_scanargs(&s_arg,argc); free(A); free(B); free(temp); if (columnNameFileColumn) free(columnNameFileColumn); if (columnNameFile) free(columnNameFile); if (outputColumns<=givenColumnNames) { SDDS_FreeStringArray(givenColumnName, givenColumnNames); free(givenColumnName); } else { SDDS_FreeStringArray(outputColumnName, outputColumns); free(outputColumnName); if (givenColumnName) free(givenColumnName); } return 0; } void push_matrix(char *fileName,char ***doubleColumnName, int32_t *doubleColumns,long isInput) { SDDS_DATASET inputPage; char **numericalColumnNames=NULL; int32_t numericalColumns, rows; long i, ipage,pages; MAT *matrix=NULL, *matrix1; /* note that the matrix loaded from input file is actually the transpose matrix of it should be to save time. The operations are done between tranposed matrix; the benefit is that: no transpose need to be done when write to output file; while in previose version, readfrom input -->transpose -->operation --> transpose --> output now, readfrom input-->operation-->output, should be much more efficient */ /* for subtraction, addition, hadamard multiplya and division, the operation order are the same however, for C=A*B, Ct = Bt*At */ InitializeInputAndGetColumnNames(&inputPage, fileName, &numericalColumnNames, &numericalColumns); /* count the numerical columns in input */ pages=0; while ((ipage=SDDS_ReadTable(&inputPage))>0) { if( !SDDS_SetColumnFlags(&inputPage, 0) ) SDDS_PrintErrors(stderr, SDDS_EXIT_PrintErrors|SDDS_VERBOSE_PrintErrors); if ( !SDDS_SetColumnsOfInterest(&inputPage, SDDS_NAME_ARRAY, numericalColumns, numericalColumnNames)) SDDS_PrintErrors(stderr, SDDS_EXIT_PrintErrors|SDDS_VERBOSE_PrintErrors); if (!(rows=SDDS_CountRowsOfInterest(&inputPage))) { fprintf(stderr,"No date found in file %s\n", fileName?fileName:"stdin"); exit(1); } matrix = NEW(MAT); matrix->n = matrix->max_n = numericalColumns; matrix->m = matrix->max_m = rows; if (!(matrix->base = (Real *)SDDS_GetCastMatrixOfRowsByColumn(&inputPage, &rows, SDDS_DOUBLE))) SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors | SDDS_EXIT_PrintErrors); matrix->me = (Real **)calloc(matrix->n, sizeof(Real *)); for (i=0; in; i++) matrix->me[i] = &(matrix->base[i*matrix->m]); push_address((long)matrix,pages); pages++; if (pages>MAX_PAGES) { fprintf(stderr,"The number of pages in %s file exceeds 256!\n",fileName?fileName:"stdin"); exit(1); } } if (input_pages==0) input_pages=pages; else if (pages>input_pages) { fprintf(stderr,"Warning: file %s has more pages than the input file, the extra pages will be ignored.\n",fileName); } else if (pages=MAX_PAGES) { fputs("The index of stack exceeds the MAX_PAGES\n", stderr); return(0); } if (matrixStackPtr[index]>=STACKSIZE) { fputs("stack overflow--numeric stack size exceeded (push_address)\n", stderr); return(0); } matrixStack[index][matrixStackPtr[index]++] = address; return 1; } long pop_address(long index) { if (index>=MAX_PAGES) { fputs("The index of stack exceeds the MAX_PAGES\n", stderr); return(0); } if (matrixStackPtr[index]<1) { fprintf(stderr,"too few items on matrix stack %ld\n", index); return(0); } return(matrixStack[index][--matrixStackPtr[index]]); } int32_t InitializeInputAndGetColumnNames(SDDS_DATASET *SDDS_dataset, char *filename, char ***numericalColumnName, int32_t *numericalColumns) { char **columnName=NULL, **numColName=NULL; int32_t columns=0, i, numCol, columnType; if (!SDDS_InitializeInput(SDDS_dataset, filename) || !(columnName=(char**)SDDS_GetColumnNames(SDDS_dataset, &columns))) SDDS_PrintErrors(stderr, SDDS_EXIT_PrintErrors|SDDS_VERBOSE_PrintErrors); numCol = 0; for(i=0;i