diff --git a/ocaml/lib/parsers/config/lexer.ml b/ocaml/lib/parsers/config/lexer.ml index 918791e..9416431 100644 --- a/ocaml/lib/parsers/config/lexer.ml +++ b/ocaml/lib/parsers/config/lexer.ml @@ -17,84 +17,76 @@ let lex_keyword (literal: string): token = let lex_keyvalue (literal: string): token = Value literal -let is_boundary char = char == '=' || char == ' ' +exception Malformed_source of string + +let string_of_token (token: token): string = + match token with + | Key k -> (match k with + | SuCommand -> "[ KEY: su_command ]" + | Unknown -> "[ UNKNOWN KEY ]") + | Equal -> "[ OP: equal ]" + | Value v -> "[ VAL: " ^ v ^ " ]" + | Space -> "{ Space }" + | LineBreak -> "{ LineBreak }\n" + | End -> "{ End of File }\n" + | Unknown s -> (String.make 1 s) + +let string_of_tokens (tokens: token lists): string = + String.concat " " $ map string_of_token (List.concat tokens) let lex_keypair (chars: char list) (position: int): token * int = - let boundary = - match List.find_index is_boundary chars with + (* For a keypair abc = bcd\n, the middle position is the first space + before =, or = itself if there are no spaces. The final position is the + middle position if parsing before it, or the newline \n if past it *) + + let middle_position = + match List.find_index (fun c -> c == '=' || c == ' ') chars with | Some b -> b - | None -> assert false (* TODO: Exception 'line has no equals sign' *) + | None -> raise $ Malformed_source + ("No equal operator for position " ^ str_int position) in - let next_position = - if position < boundary then boundary - else (List.length chars) - 1 in + let final_position = + if position < middle_position then middle_position + else (length chars) - 1 in let literal = str_chars - (List.filteri (fun i _ -> i >= position && i < next_position) chars) in + (ifilter (fun i _ -> i >= position && i < final_position) chars) in - elog @@ "[lex_keypair] Position " ^ str_int position ^ - ": Found literal '" ^ literal ^ - "', boundary " ^ (str_int boundary) ^ - " next position " ^ (str_int next_position); - - if position < boundary then - lex_keyword literal, next_position + if position < middle_position then + lex_keyword literal, final_position else - lex_keyvalue literal, List.length chars - 1 + lex_keyvalue literal, final_position let lex (chars: char list) (position: int): token * int = - elog @@ "[lex] Position " ^ (str_int position); - match List.nth chars position with - | '=' -> Equal, position + 1 - | ' '|'\t' -> Space, position + 1 - | '\n' -> LineBreak, position + 1 - | 'a'..'z'|'~'|'/' -> lex_keypair chars position - | c -> Unknown c, position + 1 + match pick position chars with + | '=' -> Equal, position + 1 + | ' '|'\t' -> Space, position + 1 + | '\n' -> LineBreak, position + 1 + | 'a'..'z'|'~'|'/' -> lex_keypair chars position + | c -> Unknown c, position + 1 -let read (path: string): char list list = - let contents = (System.File.read path) in - let lines = String.split_on_char '\n' contents in - let lines = List.mapi - (fun i s -> if i+1 < List.length lines then s ^ "\n" else s) lines in - let rec split (strings: string list) position (char_lists: char list list) = - if position == List.length strings then char_lists - else split strings (position + 1) - (chars_str (List.nth strings position) :: char_lists) +let read (path: string): char lists = + let contents = System.File.read path in + let undelimited_lines = String.split_on_char '\n' contents in + let lines = imap + (* adds a newline to each line end, except the last *) + (fun i s -> if i + 1 < length undelimited_lines then s ^ "\n" else s) + undelimited_lines in + let rec to_char_lists + (strings: string list) (position: int) (char_lists: char lists) = + if position == length strings then char_lists + else to_char_lists strings (position + 1) + char_lists $: chars_str (pick position strings) in - List.rev (split lines 0 []) + to_char_lists lines 0 [] let scan_line (input: char list): token list = - elog @@ "[scan_line] At " ^ (String.trim @@ str_chars input); - let rec traverse (chars: char list) (position: int) (tokens: token list) = - if position == List.length chars then tokens + let rec to_tokens (chars: char list) (position: int) (tokens: token list) = + if position == length chars then tokens else let token, next_position = lex chars position in - traverse chars next_position (token :: tokens) - in List.rev (traverse input 0 []) - -let scan (char_lists: char list list): token list list = - let rec scan' (char_lists': char list list) (position: int) (token_lists: token list list) = - if position == List.length char_lists' then [End] :: token_lists - else scan' char_lists' (position + 1) (scan_line (List.nth char_lists' position) :: token_lists) + to_tokens chars next_position $ token :: tokens in - List.rev (scan' char_lists 0 []) - -let string_of_tokens (tokens: token list list): string = - let string_of_token (token: token): string = - match token with - | Key k -> (match k with - | SuCommand -> "[ KEY: su_command ]" - | Unknown -> "[ UNKNOWN KEY ]") - | Equal -> "[ OP: equal ]" - | Value v -> "[ VAL: " ^ v ^ " ]" - | Space -> "{ Space }" - | LineBreak -> "{ LineBreak }\n" - | End -> "{ End of File }\n" - | Unknown s -> (String.make 1 s) - in - let rec join_strings (tokens: token list) position (output: string): string = - if position == List.length tokens then output - else join_strings tokens (position + 1) - (output ^ " " ^ string_of_token (List.nth tokens position)) - in - join_strings (List.concat tokens) 0 "" + reverse $ to_tokens input 0 [] +let scan (char_lists: char lists): token lists = + rmap (scan_line) char_lists $: [End] diff --git a/ocaml/lib/parsers/config/lexer.mli b/ocaml/lib/parsers/config/lexer.mli new file mode 100644 index 0000000..4dfbf03 --- /dev/null +++ b/ocaml/lib/parsers/config/lexer.mli @@ -0,0 +1,4 @@ +type token +val read : string -> char list list +val scan : char list list -> token list list +val string_of_tokens : token list list -> string diff --git a/ocaml/lib/utilities/aliases.ml b/ocaml/lib/utilities/aliases.ml index f76e4fe..10bae6d 100644 --- a/ocaml/lib/utilities/aliases.ml +++ b/ocaml/lib/utilities/aliases.ml @@ -1,5 +1,24 @@ -let str_int = string_of_int +(* an 'alias' is an alternate name with minor or no alterations to behavior *) + +(* logging *) let print = print_endline let elog = Log.elog + +(* casts *) +let str_int = string_of_int let chars_str = Text.chars_of_string let str_chars = Text.string_of_chars + +(* control flow & precedence *) +let ($) = (@@) + +(* lists *) +type 'a lists = 'a list list +let ($:) list element = list @ [element] +let pick index list = List.nth list index +let rmap = List.rev_map +let reverse = List.rev +let length = List.length +let ifilter = List.filteri +let imap = List.mapi +let map = List.map