!+ ! Module for parsing a document with a tree format of key/value pairs. ! ! A key/value pair is called an "object". ! The document model is a tree of objects. ! The "value" of an object will either be an array of one or more tokens or ! an array of of one or more subobjects (children) (but not both). ! ! Example: ! {a = {b = 7*atan(3, 4), d = "xyz", e = [1, "true", rats], f = {g = 3^34}}} ! In this example, the root object, which has a blank key name, has a single child subobject ! whose (key) name is "a". Object "a" has three children whose names are "b", "d", e", and "f". ! Object "b" has a single token "7*atan(3, 4)". ! Object "e" has three tokens. Etc. ! ! Notes: ! Keys or Tokens are either quoted strings or non-quoted strings. ! Leading and trailing whitespace in non-quoted keys and tokens is ignored. ! To include a quotation mark in a name or token, escape the mark with the backslash character. ! A key or token may use single or double quotation marks for delimitors. ! In a non-quoted key or token, a tab character is converted to a single space character. ! A non-quoted key or token may contain the "[" character as long as the first character is not a "[". ! In a non-quoted key or token, parentheses or square brackets (that is "(...)", and "[...]") must ! be present in matched left/right pairs. ! Characters not allowed in non-quoted keys or tokens: ! "{", "}", or "=" ! In a quoted key or token: Equal sign "=" is not allowed. [This rule is needed to catch missing quote marks.] ! In a non-quoted key or token, commas are only allowed if the comma appears within ! brackets. For example: ! z = atan2(3, 4) ! For a non-quoted key, whitespace is only allowed within brackets. ! For a non-quoted token, whitespace is allowed (but leading and trailing whitespace will be ignored). Example: ! z = 2 * 3 ! Subobject arrays are delimited using curley braces "{...}". ! Token arrays are delimited using square brackets "[...]". ! A single token (not withing square backets) is equivalent to an array of a single token. ! That is, the token ! abc ! is equivalent to ! [abc] ! Mixing quoted and non-quoted tokens in an array is allowed. ! Mixing tokens and objects in an array is forbidden. Example: ! a = [3, b = 8] ! Not allowed. "3" is a token, "b = 8" is an object. ! Having multiple keys of the same name is allowed. Example: ! a = r, a = 7, a = r ! The outer "{...}" braces in a document are optional. ! When the document is parsed, key/value order will be preserved. ! When the document is parsed, the quote marks of quoted keys or tokens will be preserved. ! That is, quoted keys or tokens are distinct from nonquoted keys or tokens. !- module object_model_mod use sim_utils implicit none ! One and only one of %n_child and %n_token is nonzero. type temp_struct character(40) :: str = '' end type type object_struct character(:), allocatable :: name type (object_struct), pointer :: parent => null() type (var_length_string_struct), allocatable :: token(:) ! Array of tokens type (object_struct), pointer :: child(:) => null() ! Array of subobjects integer :: n_child = 0 ! Number of subobjects. integer :: n_token = 0 ! Number of tokens. end type contains !------------------------------------------------------------------------- !------------------------------------------------------------------------- !------------------------------------------------------------------------- !+ ! Function object_document_parse(obj, root_name, line, why_invalid, get_more_text_func, parse_one, sub_call) result (valid) ! ! Object model parsing routine. ! Note: The initial "{" is optional. ! Note: At the end, the returned line argument will contain any unparsed text that was outside the tree. ! For example, if the text was: ! "{a = 5, b = 7} xxx" ! Then the unparsed text will be " xxx". ! If the document does not contain the outer "{...}" braces: ! if parse_one = False (the default), the entire document is parsed. ! If parse_one = True, only one object is parsed. Example: "a = 5, b = 7" --> only "a = 5" is parsed. ! ! Input: ! root_name -- character(*): Name to use of the root object obj. ! line -- character(:), allocatable: Initial text to parse. If no text present then ! get_more_text_func will be called. ! why_invalid -- character(*): String set to error message if there is an error. ! get_more_text_func(line) result (valid) ! -- Function: Routine called to append more text to the string being parsed. ! The interface is: ! function get_more_text_func(line, end_of_document, why_invalid) result (valid) ! character(:), allocatable :: line, why_invalid ! logical end_of_document ! logical valid ! parse_one -- logical, optional: If True, parse only one object and then return. Default = False. ! sub_call -- logical, optional: Used to signal if calling recursively. ! For internal use only. Do not set this argument. ! ! output: ! obj -- object_struct: Root object for parsed tree. ! line -- character(:), allocatable: Unparsed text. ! !- recursive function object_document_parse(obj, root_name, line, why_invalid, get_more_text_func, parse_one, sub_call) result (valid) interface function get_more_text_func(line, end_of_document, why_invalid) result (valid) character(:), allocatable :: line, why_invalid logical end_of_document logical valid end function end interface type (object_struct), target :: obj integer n, ns, ip, ip0, parse_state, nb1, nb2, n_item integer, parameter :: at_start$ = 0, before_obj_name$ = 1, in_obj_name$ = 2, in_obj_quoted_name$ = 3 integer, parameter :: after_obj_name$ = 4, after_equal_sign$ = 5 integer, parameter :: in_token_before_string$ = 10, in_token_in_string$ = 11 integer, parameter :: in_token_in_quoted_string$ = 12, in_token_after_string$ = 13 integer, parameter :: in_subobjects$ = 20 logical, optional :: parse_one, sub_call logical valid, abort, has_opening_bracket, in_array, end_of_document, debug character(*) root_name character(:), allocatable :: line, why_invalid, why_func_invalid character(1), parameter :: tab = char(9) character(1) ach, quote_mark ! valid = .true. debug = .false. if (.not. present(sub_call)) then if (.not. allocated(obj%name)) allocate (character(len_trim(root_name)):: obj%name) obj%name = trim(root_name) endif has_opening_bracket = .false. quote_mark = ' ' if (.not. allocated(line)) then allocate(character(1):: line) line = '' valid = get_more_text_func(line, end_of_document, why_func_invalid) if (.not. valid) then valid = set_this_invalid (why_invalid, why_func_invalid, obj) return endif endif ip = 0 if (present(sub_call)) then parse_state = before_obj_name$ else parse_state = at_start$ endif do while (.true.) ip = ip + 1 if (ip > len(line)) then valid = get_more_text_func(line, end_of_document, why_func_invalid) if (.not. valid) then valid = set_this_invalid (why_invalid, why_func_invalid, obj) return endif if (end_of_document) then if (.not. has_opening_bracket .and. .not. in_array .and. & (parse_state == in_token_in_string$ .or. parse_state == in_token_after_string$)) then obj%n_token = obj%n_token + 1 n = obj%n_token call re_allocate (obj%token, n) ns = ip - ip0 - 1 if (.not. allocated(obj%token(n)%str)) allocate (character(ns):: obj%token(n)%str) obj%token(n)%str = trim(line(ip0:ip-1)) line = '' return else valid = set_this_invalid (why_invalid, 'MISSING END "}" DELIMITOR.', obj) return endif endif endif ach = line(ip:ip) if (debug) print '(i4, 2x, 3a)', ip, ach, ' :::', line(ip+1:) ! Handle quote character if (ach == '"' .or. ach == "'") then select case (quote_mark) case (' ') quote_mark = ach ip0 = ip select case (parse_state) case (before_obj_name$); parse_state = in_obj_quoted_name$ case (after_equal_sign$, in_token_before_string$); parse_state = in_token_in_quoted_string$ case default valid = set_this_invalid (why_invalid, 'QUOTE MARK OUT OF PLACE', obj) return end select cycle case ('"', "'") if (line(ip-1:ip-1) == '\') cycle ! ' if (ach /= quote_mark) cycle select case (parse_state) case (in_obj_quoted_name$) obj%name = line(ip0:ip) parse_state = after_obj_name$ case (in_token_in_quoted_string$) parse_state = in_token_after_string$ case default call err_exit end select quote_mark = ' ' cycle end select cycle endif if (quote_mark /= ' ') then if (ach == '=') then valid = set_this_invalid (why_invalid, 'EQUAL SIGN "=" NOT ALLOWED IN QUOTED STRING.', obj) return endif cycle endif ! Handle non quote mark character if (ach == tab) ach = ' ' select case (parse_state) ! At start before possible '{' character. This character is considered optional. case (at_start$) if (ach == ' ') cycle if (ach == '{') then has_opening_bracket = .true. else ip = ip - 1 ! Reparse character endif line = line(ip+1:) call subobject_reallocate(obj, 5) valid = object_document_parse(obj%child(1), '', line, why_invalid, get_more_text_func, sub_call = .true.) if (.not. valid) return obj%n_child = 1 if (logic_option(.false., parse_one) .and. .not. has_opening_bracket) return parse_state = in_subobjects$ ip = 0 ! Before a variable name case (before_obj_name$) if (ach == ' ') cycle select case (ach) case (',', '{', '}', '(', ')', '[', ']') valid = set_this_invalid (why_invalid, 'MISPLACED ' // ach // ' CHARACTER', obj) return end select nb1 = 0; nb2 = 0 parse_state = in_obj_name$ ip0 = ip ! Parsing a variable name case (in_obj_name$) select case (ach) case ('['); nb1 = nb1 + 1 case ('('); nb2 = nb2 + 1 case (']') nb1 = nb1 - 1 if (nb1 < 0) then valid = set_this_invalid (why_invalid, 'MISPLACED "]" BRACKET.', obj) return endif case (')') nb2 = nb2 - 1 if (nb2 < 0) then valid = set_this_invalid (why_invalid, 'MISPLACED ")" BRACKET.', obj) return endif case (',') if (nb1 == 0 .and. nb2 == 0) then valid = set_this_invalid (why_invalid, 'COMMA IN NAME', obj) return endif case ('=') if (nb1 /= 0 .or. nb2 /= 0) then valid = set_this_invalid (why_invalid, 'EQUAL SIGN IN NAME', obj) return endif parse_state = after_obj_name$ ip = ip - 1 case (' ') if (nb1 /= 0 .or. nb2 /= 0) cycle parse_state = after_obj_name$ end select ! After a variable name before the equal sign case (after_obj_name$) if (ach == ' ') cycle if (ach /= '=') then valid = set_this_invalid (why_invalid, 'EXPECTING EQUAL SIGN AFTER DATUM NAME', obj) return endif n = ip - ip0 if (.not. allocated(obj%name)) allocate(character(n):: obj%name) obj%name = trim(line(ip0:ip-1)) parse_state = after_equal_sign$ ! After an equal sign. case (after_equal_sign$) select case (ach) case (' ') cycle case (',', ')', ']') valid = set_this_invalid (why_invalid, 'MISPLACED "' // ach // '" CHARACTER.', obj) return case ('{') line = line(ip+1:) call subobject_reallocate(obj, 5) valid = object_document_parse(obj%child(1), '', line, why_invalid, get_more_text_func, sub_call = .true.) if (.not. valid) return obj%n_child = 1 if (logic_option(.false., parse_one) .and. .not. has_opening_bracket) return parse_state = in_subobjects$ ip = 0 case ('[') parse_state = in_token_before_string$ in_array = .true. if (.not. allocated(obj%token)) allocate (obj%token(10)) nb1 = 0; nb2 = 0 case default parse_state = in_token_before_string$ in_array = .false. ip0 = ip nb1 = 0; nb2 = 0 ip = ip - 1 ! Reeval character end select ! Before next token case (in_token_before_string$) if (ach == ' ') cycle ip0 = ip ip = ip - 1 ! Reeval parse_state = in_token_in_string$ ! In token string. case (in_token_in_string$) select case (ach) case ('['); nb1 = nb1 + 1 case ('('); nb2 = nb2 + 1 case (']') if (in_array .and. nb1 == 0 .and. nb2 == 0) then parse_state = in_token_after_string$ ip = ip - 1 else nb1 = nb1 - 1 if (nb1 < 0) then valid = set_this_invalid (why_invalid, 'MISPLACED "]" BRACKET.', obj) return endif endif case (')') nb2 = nb2 - 1 if (nb2 < 0) then valid = set_this_invalid (why_invalid, 'MISPLACED ")" PARENTHESES.', obj) return endif case (',') if (nb1 /= 0 .or. nb2 /= 0) cycle ip = ip - 1 parse_state = in_token_after_string$ case ('}') if (nb1 /= 0 .or. nb2 /= 0) then valid = set_this_invalid (why_invalid, 'MISPLACED "}" CHARACTER.', obj) return endif ip = ip - 1 parse_state = in_token_after_string$ case (' ') if (nb1 /= 0 .or. nb2 /= 0) cycle parse_state = in_token_after_string$ end select ! After token string. case (in_token_after_string$) select case (ach) case (',', ']', '}') obj%n_token = obj%n_token + 1 n = obj%n_token if (n == 1) call re_allocate(obj%token, 2) if (n > size(obj%token)) call re_allocate (obj%token, max(n, 2*(n-1))) ns = ip - ip0 if (.not. allocated(obj%token(1)%str)) allocate (character(ns):: obj%token(n)%str) obj%token(n)%str = trim(line(ip0:ip-1)) if (debug) print *, 'Out: ', line if (in_array .and. ach == '}') then valid = set_this_invalid (why_invalid, 'expecting "," or "]", got: "}"', obj) return elseif (.not. in_array .and. ach == ']') then valid = set_this_invalid (why_invalid, 'expecting "," or "}", got: "]"', obj) return endif if (ach == ']') then line = line(ip+1:) return elseif (ach == '}' .or. .not. in_array) then line = line(ip:) return else parse_state = in_token_before_string$ endif case default if (in_array) then valid = set_this_invalid (why_invalid, 'expecting "," or "]" but got: ' // quote(ach), obj) else valid = set_this_invalid (why_invalid, 'expecting "," or "}" but got: ' // quote(ach), obj) endif return end select ! In subobject case (in_subobjects$) select case (ach) case (' ') cycle case (',') obj%n_child = obj%n_child + 1 line = line(ip+1:) if (obj%n_child == 1) call subobject_reallocate(obj,2) if (obj%n_child > size(obj%child)) call subobject_reallocate(obj,2*obj%n_child) valid = object_document_parse(obj%child(obj%n_child), '', line, why_invalid, get_more_text_func, sub_call = .true.) if (.not. valid) return ip = 0 case ('}') call subobject_reallocate(obj,obj%n_child) line = line(ip+1:) return case default valid = set_this_invalid (why_invalid, 'BAD CHARACTER: ' // ach, obj) return end select end select enddo !-------------------------------------------------- contains function set_this_invalid (why_invalid, err_string, obj) result (valid) type (object_struct) obj character(*) err_string character(:), allocatable :: why_invalid logical valid ! If err_string is blank (can happen if err_string is set by get_more_text_func), set why_invalid to blank. valid = .false. if (err_string == '') then why_invalid = err_string else call str_set(why_invalid, err_string // '.' // ' WHILE PARSING: ' // object_tree_name(obj)) endif end function set_this_invalid end function object_document_parse !------------------------------------------------------------------------- !------------------------------------------------------------------------- !------------------------------------------------------------------------- !+ ! Subroutine object_print(obj, n_indent, last) ! ! Routine to print the contents of an object. ! ! Input: ! obj -- object_struct: Object to be printed. ! n_indent -- integer, optional: Indentation when printing. ! last -- logical, optional: Is last datum in array? If True then no !- recursive subroutine object_print (obj, n_indent, last) type (object_struct), target :: obj character(100) :: blank = '' character(:), allocatable :: line integer, optional :: n_indent integer i, ni logical, optional :: last ! Head object may not have a name. ! Note: In this case, the object must have subobjects and not a value ni = integer_option(0, n_indent) if (obj%n_child > 0) then if (obj%name == '') then print '(2a)', blank(1:ni), '{' else print '(3a)', blank(1:ni), trim(obj%name), ' = {' endif do i = 1, obj%n_child call object_print(obj%child(i), ni+2, last = (i == obj%n_child)) enddo if (logic_option(.true., last)) then print '(2a)', blank(1:ni), '}' else print '(2a)', blank(1:ni), '},' endif elseif (obj%n_token == 0) then print '(4a)', blank(1:ni), trim(obj%name), ' = ????' elseif (obj%n_token == 1) then if (logic_option(.true., last)) then print '(4a)', blank(1:ni), trim(obj%name), ' = ', trim(obj%token(1)%str) else print '(5a)', blank(1:ni), trim(obj%name), ' = ', trim(obj%token(1)%str), ',' endif else allocate (character(40):: line) line = blank(1:ni) // trim(obj%name) // ' = [' // trim(obj%token(1)%str) do i = 2, obj%n_token line = line // ', ' // trim(obj%token(i)%str) enddo if (logic_option(.true., last)) then print '(a)', line // ']' else print '(a)', line // '],' endif endif end subroutine object_print !------------------------------------------------------------------------- !------------------------------------------------------------------------- !------------------------------------------------------------------------- !+ ! Subroutine subobject_reallocate(obj, n) ! ! Routine to reallocate the obj%child(:) array. ! ! Input: ! obj -- object_struct: Object. ! n -- integer: Minimum size for obj%child(:) !- subroutine subobject_reallocate(obj, n) type (object_struct), target :: obj, temp integer n, ix ! if (.not. associated(obj%child)) then allocate (obj%child(n)) obj%n_child = 0 else if (size(obj%child) >= n) return temp%child => obj%child allocate(obj%child(n)) obj%child(1:size(temp%child)) = temp%child deallocate(temp%child) endif do ix = 1, size(obj%child) obj%child(ix)%parent => obj enddo end subroutine subobject_reallocate !------------------------------------------------------------------------- !------------------------------------------------------------------------- !------------------------------------------------------------------------- !+ ! Subroutine object_deallocate(obj) ! ! Routine to deallocate an object ! ! Input: ! obj -- object_struct: Object. ! ! Output: ! obj -- object_struct: Deallocated object. !- recursive subroutine object_deallocate (obj) type (object_struct) obj integer i ! if (obj%n_child > 0) then do i = 1, obj%n_child call object_deallocate (obj%child(i)) enddo deallocate(obj%child) endif end subroutine object_deallocate !------------------------------------------------------------------------- !------------------------------------------------------------------------- !------------------------------------------------------------------------- !+ ! Function subobject_number (obj, name) result (num) ! ! Routine to return how many subobjects match the name. ! ! Input: ! obj -- object_struct: Object. ! name -- character(*): Name of subobject ! ! Output: ! num -- integer: Number of subobjects whose name matches name argument. !- function subobject_number (obj, name) result (num) type (object_struct) obj, temp integer num, n character(*) name ! num = 0 do n = 1, obj%n_child if (obj%child(n)%name == name) num = num + 1 enddo end function subobject_number !------------------------------------------------------------------------- !------------------------------------------------------------------------- !------------------------------------------------------------------------- !+ ! Function pointer_to_subobject (obj, name, indx) result (subobj) ! ! Routine to return a pointer to a subobject with a given name. ! The indx argument is used when searching for multiple subobjects of the same name. ! ! Input: ! obj -- object_struct: Object. ! name -- character(*): Name of subobject ! indx -- integer, optional: If present, start subobject search from this index + 1. ! Default is 0. ! ! Output: ! indx -- integer, optional: Index of subobject found. ! subobj -- object_struct, pointer: Pointer to subobject. Null if not found. !- function pointer_to_subobject (obj, name, indx) result (subobj) type (object_struct), target :: obj type (object_struct), pointer :: subobj integer, optional :: indx integer n, n0 character(*) name ! subobj => null() n0 = integer_option(0, indx) + 1 do n = n0, obj%n_child if (obj%child(n)%name /= name) cycle subobj => obj%child(n) if (present(indx)) indx = n return enddo end function pointer_to_subobject !------------------------------------------------------------------------- !------------------------------------------------------------------------- !------------------------------------------------------------------------- !+ ! Function object_tree_name(obj) result (tree_name) ! ! Routine to return the tree name of an object which is the path from the root to ! the object. For example, given the object tree ! {a = {b = 1, b = {c = 7, c = 8}}} ! The tree name for "c = 7" object is ! "a->b[2]->c[1]" ! ! Input: ! obj -- object_struct: Object ! ! Output: ! tree_name -- character(:), allocatable: Tree name !- function object_tree_name(obj) result (tree_name) type (object_struct), target :: obj type (object_struct), pointer :: op1, op2 integer i, n character(:), allocatable :: tree_name ! op1 => obj allocate(character(0):: tree_name) do op2 => op1%parent if (.not. associated(op2)) then tree_name = op1%name // '->' // tree_name exit endif if (subobject_number(op2, op1%name) == 1) then tree_name = op1%name // '->' // tree_name else n = 0 do i = 1, op2%n_child if (op2%child(i)%name /= op1%name) cycle n = n + 1 if (associated(op1, op2%child(i))) exit enddo tree_name = op1%name // '[' // int_str(n) // ']->' // tree_name endif op1 => op2 if (.not. associated(op1%parent) .and. op1%name == '') exit enddo n = len(tree_name) tree_name = tree_name(1:n-2) ! Trim off trailing "->" end function object_tree_name !------------------------------------------------------------------------- !------------------------------------------------------------------------- !------------------------------------------------------------------------- !+ ! Function object_root (obj) result (root_obj_ptr) ! ! Routine to point to the root object of the tree obj is contained in. ! ! Input: ! obj -- object_struct: Object. ! ! Output: ! root_obj_ptr -- object_struct, pointer: Pointer to the root object !- function object_root (obj) result (root_obj_ptr) type (object_struct), target :: obj type (object_struct), pointer :: root_obj_ptr root_obj_ptr => obj do if (.not. associated(root_obj_ptr%parent)) return root_obj_ptr => obj%parent enddo end function object_root end module