/*************************************************************************\ * Copyright (c) 2002 The University of Chicago, as Operator of Argonne * National Laboratory. * Copyright (c) 2002 The Regents of the University of California, as * Operator of Los Alamos National Laboratory. * This file is distributed subject to a Software License Agreement found * in the file LICENSE that is included with this distribution. \*************************************************************************/ /* $Log: get_token_tq.c,v $ Revision 1.9 2007/07/13 15:02:44 soliday Renamed strcpy_s to strcpy_ss because Microsoft is already using this name. Revision 1.8 2007/02/02 18:45:28 borland Added str_copy.c, with strcpy_s() routine for safe string copies. Converted routines from strcpy() to strcpy_s(). Revision 1.7 2003/08/28 20:13:43 soliday Cleaned up the code. Revision 1.6 2002/08/14 16:06:09 soliday Added Open License Revision 1.5 2000/04/05 21:33:12 soliday Added prototype for seek_level Revision 1.4 1999/09/14 18:12:18 soliday interpret_escaped_quotes is defined in a header file instead of locally Revision 1.3 1999/05/25 21:55:25 soliday Removed compiler warnings under Linux. Revision 1.2 1995/09/05 21:17:34 saunders First test release of the SDDS1.5 package. */ /* file: get_token_t.c * contents: get_token_t(), in_charset(), get_token_tq() * seek_level() * Michael Borland, 1988 */ #include "mdb.h" #include char *seek_level(char *s, char qs, char qe); int in_charset(char c, char *s); /* routine: get_token_t() * purpose: get a token from a character string, where a token is defined * as anything bounded by two of the characters from the token * string. */ char *get_token_t(char *s, char *t) { char *ptr0, *ptr1, *ptr; /* skip all leading characters of s found in string t */ ptr0 = s; while (in_charset(*s, t) && *s) s++; if (*s==0) return(NULL); ptr1 = s; /* skip to next character of s found in t, skipping over quoted */ /* portions */ do { if (*s=='"' && !(s!=ptr0 && *(s-1)=='\\')) { s++; while (*s && !(*s=='"' && *(s-1)!='\\') ) s++; if (*s=='"') s++; } else s++; } while (*s && !in_charset(*s, t)); ptr = tmalloc(sizeof(*ptr)*(s-ptr1+1)); strncpy(ptr, ptr1, s-ptr1); ptr[s-ptr1]=0; strcpy_ss(ptr0, s); interpret_escaped_quotes(ptr); return(ptr); } /* routine: in_charset() * purpose: determine if character is a member of a set of characters. * Returns the number of the character that matches, or zero * for no match. */ int in_charset(char c, char *set) { register int i; i = 1; while (*set) { if (*set==c) return(i); set++; i++; } return(0); } /* routine: get_token_tq() * purpose: get a token from a character string, where a token is defined * as anything bounded by from the front by one of the characters * from the token first string and from the end by one of the * characters from the second token string. This version also * recognizes generalized quotation marks, using two parallel strings * of such (beginning and ending) marks supplied by the caller. * Beginning quotation characters are construed as token-start * characters and ending quotation characters as token-end characters, * but anything inside of a quoted section can not end a token. * Bounding token start/end characters are deleted from the token * before it is returned, as are PAIRS of corresponding quote * characters. * * For example, to parse MAD beamline definitions, one could use: * ts = "," te = "," qs = "(" qe = ")" . * This would section the string into sub-strings of items * separated by commas, with parenthesized expressions kept * intact. */ char *get_token_tq(s, ts, te, qs, qe) char *s; /* the string to be scanned */ char *ts, *te; /* strings of token start and end characters */ char *qs, *qe; /* strings of starting and ending quotation characters */ { register char *ptr0, *ptr1, *ptr; register int in_quotes; /* skip all leading characters of s found in string t */ ptr0 = s; while (*s && in_charset(*s, ts) && !in_charset(*s, qs)) { s++; } if (*s==0) return(NULL); ptr1 = s; if ((in_quotes=in_charset(*s, qs))) s++; /* skip to next character of s found in t */ do { if (in_quotes) { if ((ptr = seek_level(s, *(qs+in_quotes-1), *(qe+in_quotes-1)))) { s = ptr; in_quotes = 0; } else { s += strlen(s); in_quotes = 0; } } else { in_quotes = in_charset(*s, qs); s++; } } while (*s && (in_quotes || !in_charset(*s, te))); ptr = tmalloc((unsigned)sizeof(*ptr)*(s-ptr1+1)); strncpy(ptr, ptr1, s-ptr1); ptr[s-ptr1]=0; if (*s) strcpy_ss(ptr0, s+1); else *ptr0 = *s; interpret_escaped_quotes(ptr); return(ptr); } /* routine: seek_level() * purpose: seek through a string to find the place where a parenthesis * or quotation-mark nesting of zero occurs, assuming that * a nesting of 1 existed prior to the call. Note that if * qs=qe, the routine simply finds the next quotation-mark. * The routine returns a pointer to the place in the string * where the nesting is zero (after the last quotation-mark * or parenthesis). */ char *seek_level(char *s, char qs, char qe) { register int qlevel; char *ptr0; ptr0 = s; qlevel = 1; while (*s && qlevel) { if (*s==qe && !(s!=ptr0 && *(s-1)=='\\')) qlevel--; else if (*s==qs && !(s!=ptr0 && *(s-1)=='\\')) qlevel++; s++; } if (qlevel==0) return(s); return(NULL); } void interpret_escaped_quotes(char *s) { char *ptr; ptr = s; while (*ptr) { if (*ptr=='\\' && *(ptr+1)=='"') strcpy_ss(ptr, ptr+1); else ptr++; } }