OCaml: Refactor configuration lexer for readability, conciseness
This commit is contained in:
parent
df3a9e9a68
commit
8b1aae16c6
3 changed files with 80 additions and 65 deletions
|
|
@ -17,84 +17,76 @@ let lex_keyword (literal: string): token =
|
|||
|
||||
let lex_keyvalue (literal: string): token = Value literal
|
||||
|
||||
let is_boundary char = char == '=' || char == ' '
|
||||
exception Malformed_source of string
|
||||
|
||||
let string_of_token (token: token): string =
|
||||
match token with
|
||||
| Key k -> (match k with
|
||||
| SuCommand -> "[ KEY: su_command ]"
|
||||
| Unknown -> "[ UNKNOWN KEY ]")
|
||||
| Equal -> "[ OP: equal ]"
|
||||
| Value v -> "[ VAL: " ^ v ^ " ]"
|
||||
| Space -> "{ Space }"
|
||||
| LineBreak -> "{ LineBreak }\n"
|
||||
| End -> "{ End of File }\n"
|
||||
| Unknown s -> (String.make 1 s)
|
||||
|
||||
let string_of_tokens (tokens: token lists): string =
|
||||
String.concat " " $ map string_of_token (List.concat tokens)
|
||||
|
||||
let lex_keypair (chars: char list) (position: int): token * int =
|
||||
|
||||
let boundary =
|
||||
match List.find_index is_boundary chars with
|
||||
(* For a keypair abc = bcd\n, the middle position is the first space
|
||||
before =, or = itself if there are no spaces. The final position is the
|
||||
middle position if parsing before it, or the newline \n if past it *)
|
||||
|
||||
let middle_position =
|
||||
match List.find_index (fun c -> c == '=' || c == ' ') chars with
|
||||
| Some b -> b
|
||||
| None -> assert false (* TODO: Exception 'line has no equals sign' *)
|
||||
| None -> raise $ Malformed_source
|
||||
("No equal operator for position " ^ str_int position)
|
||||
in
|
||||
let next_position =
|
||||
if position < boundary then boundary
|
||||
else (List.length chars) - 1 in
|
||||
let final_position =
|
||||
if position < middle_position then middle_position
|
||||
else (length chars) - 1 in
|
||||
let literal = str_chars
|
||||
(List.filteri (fun i _ -> i >= position && i < next_position) chars) in
|
||||
(ifilter (fun i _ -> i >= position && i < final_position) chars) in
|
||||
|
||||
elog @@ "[lex_keypair] Position " ^ str_int position ^
|
||||
": Found literal '" ^ literal ^
|
||||
"', boundary " ^ (str_int boundary) ^
|
||||
" next position " ^ (str_int next_position);
|
||||
|
||||
if position < boundary then
|
||||
lex_keyword literal, next_position
|
||||
if position < middle_position then
|
||||
lex_keyword literal, final_position
|
||||
else
|
||||
lex_keyvalue literal, List.length chars - 1
|
||||
lex_keyvalue literal, final_position
|
||||
|
||||
let lex (chars: char list) (position: int): token * int =
|
||||
elog @@ "[lex] Position " ^ (str_int position);
|
||||
match List.nth chars position with
|
||||
| '=' -> Equal, position + 1
|
||||
| ' '|'\t' -> Space, position + 1
|
||||
| '\n' -> LineBreak, position + 1
|
||||
| 'a'..'z'|'~'|'/' -> lex_keypair chars position
|
||||
| c -> Unknown c, position + 1
|
||||
match pick position chars with
|
||||
| '=' -> Equal, position + 1
|
||||
| ' '|'\t' -> Space, position + 1
|
||||
| '\n' -> LineBreak, position + 1
|
||||
| 'a'..'z'|'~'|'/' -> lex_keypair chars position
|
||||
| c -> Unknown c, position + 1
|
||||
|
||||
let read (path: string): char list list =
|
||||
let contents = (System.File.read path) in
|
||||
let lines = String.split_on_char '\n' contents in
|
||||
let lines = List.mapi
|
||||
(fun i s -> if i+1 < List.length lines then s ^ "\n" else s) lines in
|
||||
let rec split (strings: string list) position (char_lists: char list list) =
|
||||
if position == List.length strings then char_lists
|
||||
else split strings (position + 1)
|
||||
(chars_str (List.nth strings position) :: char_lists)
|
||||
let read (path: string): char lists =
|
||||
let contents = System.File.read path in
|
||||
let undelimited_lines = String.split_on_char '\n' contents in
|
||||
let lines = imap
|
||||
(* adds a newline to each line end, except the last *)
|
||||
(fun i s -> if i + 1 < length undelimited_lines then s ^ "\n" else s)
|
||||
undelimited_lines in
|
||||
let rec to_char_lists
|
||||
(strings: string list) (position: int) (char_lists: char lists) =
|
||||
if position == length strings then char_lists
|
||||
else to_char_lists strings (position + 1)
|
||||
char_lists $: chars_str (pick position strings)
|
||||
in
|
||||
List.rev (split lines 0 [])
|
||||
to_char_lists lines 0 []
|
||||
|
||||
let scan_line (input: char list): token list =
|
||||
elog @@ "[scan_line] At " ^ (String.trim @@ str_chars input);
|
||||
let rec traverse (chars: char list) (position: int) (tokens: token list) =
|
||||
if position == List.length chars then tokens
|
||||
let rec to_tokens (chars: char list) (position: int) (tokens: token list) =
|
||||
if position == length chars then tokens
|
||||
else let token, next_position = lex chars position in
|
||||
traverse chars next_position (token :: tokens)
|
||||
in List.rev (traverse input 0 [])
|
||||
|
||||
let scan (char_lists: char list list): token list list =
|
||||
let rec scan' (char_lists': char list list) (position: int) (token_lists: token list list) =
|
||||
if position == List.length char_lists' then [End] :: token_lists
|
||||
else scan' char_lists' (position + 1) (scan_line (List.nth char_lists' position) :: token_lists)
|
||||
to_tokens chars next_position $ token :: tokens
|
||||
in
|
||||
List.rev (scan' char_lists 0 [])
|
||||
|
||||
let string_of_tokens (tokens: token list list): string =
|
||||
let string_of_token (token: token): string =
|
||||
match token with
|
||||
| Key k -> (match k with
|
||||
| SuCommand -> "[ KEY: su_command ]"
|
||||
| Unknown -> "[ UNKNOWN KEY ]")
|
||||
| Equal -> "[ OP: equal ]"
|
||||
| Value v -> "[ VAL: " ^ v ^ " ]"
|
||||
| Space -> "{ Space }"
|
||||
| LineBreak -> "{ LineBreak }\n"
|
||||
| End -> "{ End of File }\n"
|
||||
| Unknown s -> (String.make 1 s)
|
||||
in
|
||||
let rec join_strings (tokens: token list) position (output: string): string =
|
||||
if position == List.length tokens then output
|
||||
else join_strings tokens (position + 1)
|
||||
(output ^ " " ^ string_of_token (List.nth tokens position))
|
||||
in
|
||||
join_strings (List.concat tokens) 0 ""
|
||||
reverse $ to_tokens input 0 []
|
||||
|
||||
let scan (char_lists: char lists): token lists =
|
||||
rmap (scan_line) char_lists $: [End]
|
||||
|
|
|
|||
4
ocaml/lib/parsers/config/lexer.mli
Normal file
4
ocaml/lib/parsers/config/lexer.mli
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
type token
|
||||
val read : string -> char list list
|
||||
val scan : char list list -> token list list
|
||||
val string_of_tokens : token list list -> string
|
||||
|
|
@ -1,5 +1,24 @@
|
|||
let str_int = string_of_int
|
||||
(* an 'alias' is an alternate name with minor or no alterations to behavior *)
|
||||
|
||||
(* logging *)
|
||||
let print = print_endline
|
||||
let elog = Log.elog
|
||||
|
||||
(* casts *)
|
||||
let str_int = string_of_int
|
||||
let chars_str = Text.chars_of_string
|
||||
let str_chars = Text.string_of_chars
|
||||
|
||||
(* control flow & precedence *)
|
||||
let ($) = (@@)
|
||||
|
||||
(* lists *)
|
||||
type 'a lists = 'a list list
|
||||
let ($:) list element = list @ [element]
|
||||
let pick index list = List.nth list index
|
||||
let rmap = List.rev_map
|
||||
let reverse = List.rev
|
||||
let length = List.length
|
||||
let ifilter = List.filteri
|
||||
let imap = List.mapi
|
||||
let map = List.map
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue