OCaml: Refactor configuration lexer for readability, conciseness

2025-05-05 02:06:37 -03:00 · 2025-05-05 02:06:37 -03:00 · 8b1aae16c6
commit 8b1aae16c6
parent df3a9e9a68
3 changed files with 80 additions and 65 deletions
--- a/ocaml/lib/parsers/config/lexer.ml
+++ b/ocaml/lib/parsers/config/lexer.ml
@ -17,84 +17,76 @@ let lex_keyword (literal: string): token =

 let lex_keyvalue (literal: string): token = Value literal

-let is_boundary char = char == '=' || char == ' '
+exception Malformed_source of string
+
+let string_of_token (token: token): string =
+    match token with
+    | Key k -> (match k with
+        | SuCommand -> "[ KEY: su_command ]"
+        | Unknown -> "[ UNKNOWN KEY ]")
+    | Equal -> "[ OP: equal ]"
+    | Value v -> "[ VAL: " ^ v ^ " ]"
+    | Space -> "{ Space }"
+    | LineBreak -> "{ LineBreak }\n"
+    | End -> "{ End of File }\n"
+    | Unknown s -> (String.make 1 s)
+
+let string_of_tokens (tokens: token lists): string =
+    String.concat " " $ map string_of_token (List.concat tokens)

 let lex_keypair (chars: char list) (position: int): token * int =

-    let boundary =
-        match List.find_index is_boundary chars with
+    (* For a keypair abc = bcd\n, the middle position is the first space
+       before =, or = itself if there are no spaces. The final position is the
+       middle position if parsing before it, or the newline \n if past it *)
+
+    let middle_position =
+        match List.find_index (fun c -> c == '=' || c == ' ') chars with
        | Some b -> b
-        | None -> assert false (* TODO: Exception 'line has no equals sign' *)
+        | None -> raise $ Malformed_source
+            ("No equal operator for position " ^ str_int position)
    in
-    let next_position =
-        if position < boundary then boundary
-        else (List.length chars) - 1 in
+    let final_position =
+        if position < middle_position then middle_position
+        else (length chars) - 1 in
    let literal = str_chars
-        (List.filteri (fun i _ -> i >= position && i < next_position) chars) in
+        (ifilter (fun i _ -> i >= position && i < final_position) chars) in

-    elog @@ "[lex_keypair] Position " ^ str_int position ^
-        ": Found literal '" ^ literal ^
-        "', boundary " ^ (str_int boundary) ^
-        " next position " ^ (str_int next_position);
-
-    if position < boundary then
-        lex_keyword literal, next_position
+    if position < middle_position then
+        lex_keyword literal, final_position
    else
-        lex_keyvalue literal, List.length chars - 1
+        lex_keyvalue literal, final_position

 let lex (chars: char list) (position: int): token * int =
-    elog @@ "[lex] Position " ^ (str_int position);
-    match List.nth chars position with
-    | '=' -> Equal, position + 1
-    | ' '|'\t' -> Space, position + 1
-    | '\n' -> LineBreak, position + 1
-    | 'a'..'z'|'~'|'/' -> lex_keypair chars position
-    | c -> Unknown c, position + 1
+    match pick position chars with
+        | '=' -> Equal, position + 1
+        | ' '|'\t' -> Space, position + 1
+        | '\n' -> LineBreak, position + 1
+        | 'a'..'z'|'~'|'/' -> lex_keypair chars position
+        | c -> Unknown c, position + 1

-let read (path: string): char list list =
-    let contents = (System.File.read path) in
-    let lines = String.split_on_char '\n' contents in
-    let lines = List.mapi
-        (fun i s -> if i+1 < List.length lines then s ^ "\n" else s) lines in
-    let rec split (strings: string list) position (char_lists: char list list) =
-        if position == List.length strings then char_lists
-        else split strings (position + 1)
-            (chars_str (List.nth strings position) :: char_lists)
+let read (path: string): char lists =
+    let contents = System.File.read path in
+    let undelimited_lines = String.split_on_char '\n' contents in
+    let lines = imap
+        (* adds a newline to each line end, except the last *)
+        (fun i s -> if i + 1 < length undelimited_lines then s ^ "\n" else s)
+        undelimited_lines in
+    let rec to_char_lists
+        (strings: string list) (position: int) (char_lists: char lists) =
+        if position == length strings then char_lists
+        else to_char_lists strings (position + 1)
+            char_lists $: chars_str (pick position strings)
    in
-    List.rev (split lines 0 [])
+    to_char_lists lines 0 []

 let scan_line (input: char list): token list =
-    elog @@ "[scan_line] At " ^ (String.trim @@ str_chars input);
-    let rec traverse (chars: char list) (position: int) (tokens: token list) =
-        if position == List.length chars then tokens
+    let rec to_tokens (chars: char list) (position: int) (tokens: token list) =
+        if position == length chars then tokens
        else let token, next_position = lex chars position in
-            traverse chars next_position (token :: tokens)
-    in List.rev (traverse input 0 [])
-
-let scan (char_lists: char list list): token list list =
-    let rec scan' (char_lists': char list list) (position: int) (token_lists: token list list) =
-        if position == List.length char_lists' then [End] :: token_lists
-        else scan' char_lists' (position + 1) (scan_line (List.nth char_lists' position) :: token_lists)
+            to_tokens chars next_position $ token :: tokens
    in
-    List.rev (scan' char_lists 0 [])
-
-let string_of_tokens (tokens: token list list): string =
-    let string_of_token (token: token): string =
-        match token with
-        | Key k -> (match k with
-            | SuCommand -> "[ KEY: su_command ]"
-            | Unknown -> "[ UNKNOWN KEY ]")
-        | Equal -> "[ OP: equal ]"
-        | Value v -> "[ VAL: " ^ v ^ " ]"
-        | Space -> "{ Space }"
-        | LineBreak -> "{ LineBreak }\n"
-        | End -> "{ End of File }\n"
-        | Unknown s -> (String.make 1 s)
-    in
-    let rec join_strings (tokens: token list) position (output: string): string =
-        if position == List.length tokens then output
-        else join_strings tokens (position + 1)
-            (output ^ " " ^ string_of_token (List.nth tokens position))
-    in
-    join_strings (List.concat tokens) 0 ""
+    reverse $ to_tokens input 0 []

+let scan (char_lists: char lists): token lists =
+    rmap (scan_line) char_lists $: [End]
--- a/ocaml/lib/parsers/config/lexer.mli
+++ b/ocaml/lib/parsers/config/lexer.mli
@ -0,0 +1,4 @@
+type token
+val read : string -> char list list
+val scan : char list list -> token list list
+val string_of_tokens : token list list -> string
--- a/ocaml/lib/utilities/aliases.ml
+++ b/ocaml/lib/utilities/aliases.ml
@ -1,5 +1,24 @@
-let str_int = string_of_int
+(* an 'alias' is an alternate name with minor or no alterations to behavior *)
+
+(* logging *)
 let print = print_endline
 let elog = Log.elog
+
+(* casts *)
+let str_int = string_of_int
 let chars_str = Text.chars_of_string
 let str_chars = Text.string_of_chars
+
+(* control flow & precedence *)
+let ($) = (@@)
+
+(* lists *)
+type 'a lists = 'a list list
+let ($:) list element = list @ [element]
+let pick index list = List.nth list index
+let rmap = List.rev_map
+let reverse = List.rev
+let length = List.length
+let ifilter = List.filteri
+let imap = List.mapi
+let map = List.map