From 9675ab4877a4b142d1a07cd031f39d0b53b0e047 Mon Sep 17 00:00:00 2001 From: jutty Date: Mon, 5 May 2025 10:56:57 -0300 Subject: [PATCH] OCaml: Trim leading and trailing whitespace before lexing configuration --- ocaml/README.md | 82 +++++++++++++++++++++++++++---- ocaml/TODO.md | 50 ------------------- ocaml/lib/parsers/config/lexer.ml | 8 +-- 3 files changed, 77 insertions(+), 63 deletions(-) delete mode 100644 ocaml/TODO.md diff --git a/ocaml/README.md b/ocaml/README.md index 5d78529..6c350ce 100644 --- a/ocaml/README.md +++ b/ocaml/README.md @@ -1,17 +1,81 @@ +# iganaq OCaml + +This is the OCaml implementation of the iganaq prototype. See the [root README](../README.md) for the general description. + ## Configuration parser -Grammar: +Grammar v0.2: - assignment = { space }, key, { space }, equal, { space }, value, "\n" - space = " " | "\t" - key = letter, { letter | digit | "_" } - equal = "=" + assignment = { space }, key, { space }, equal, [ space ], value + key = letter, { letter | digit | "_" }, equal + value = valuable, { " " | valuable }, break valuable = ( letter | digit | "_" | "-" | "~" | "/" ), { valuable } - value = valuable, { " " | valuable } + equal = "=" + break = "\n" + space = " " | "\t" Written using the ISO 14977 EBNF Notation . In this grammar, `digit` implies `decimal digit`. + +Spaces between the key and the `=` operator are lexed but meaningless. Spaces between the `=` operator and the first non-space character of the value are lexed and considered as part of the value. Spaces before the key and between the value and the newline are not lexed. + +- Note: non-terminals `key` and `value` are ambiguous. + - Resolved by specifying what character terminates each + See also: - - Comparison of BNF notations: - - W3C ABNF Notation: - - IETF RFC 5234 ABNF Notation (replaces 4234, 2234): + +## Task list + +- Comparison of BNF notations: +- W3C ABNF Notation: +- IETF RFC 5234 ABNF Notation (replaces 4234, 2234): +- [ ] Spec requirements integration test coverage + - [x] Add log function + - [x] Output begins with ` [log] ` + - [x] Only prints if `DEBUG` is set + - [ ] Add interactive pkg tests (INS v0 B2.5) + - [ ] Get su command from `$XDG_CONFIG_HOME/tori/tori.conf` + - [ ] Default to `su -c` + - [ ] Validation + - [ ] Valid path or in `PATH` + - [ ] Executability + - ~~`true` exits with status 0 (see note 3)~~ + - [x] Add logging + - [x] Logs only if DEBUG is set + - [x] Print each command executed, not just package names + - [x] Case with no packages provided + - [x] Prints a message + - [x] MUST NOT run any system commands + - [x] Unrecognized command: exit code 1 + - [x] Command `user`: print the output of `whoami` + - [x] Command `os`: print the OS name + - [x] log the contents of /etc/os-release + +- [ ] Refactorings + - [ ] Simplify and analyze `System.File` + - [ ] Simplify Reader + +- [ ] Additionals + - [ ] Create interface files + - [ ] Expand unit tests coverage + - [ ] Try out doc generation + +- [ ] Check out + - [ ] + - [ ] + - [ ] + - [ ] + +### Notes + + 1. INS = Iganaq Napkin Spec: + 2. INS v0 B2.5 "MUST NOT run any system commands" is only testable if we wrap + command execution properly in e.g. a list containing all executed commands + and ensure no command is ever executed without being appended to it + 3. INS v0 A3.4 "running 'true' with exit code 0" requires the user to input + their password every time. This should be dropped from the spec instead + 4. INS v0.1 changes requirement B2.3 to "MUST print the OS name and MUST log + contents of /etc/os-release" in order to make the logging function testable + without user input + 3. As per item 3 above, INS v0.2 drops "run 'true' with exit code 0" from A3.4 + diff --git a/ocaml/TODO.md b/ocaml/TODO.md deleted file mode 100644 index 18997ae..0000000 --- a/ocaml/TODO.md +++ /dev/null @@ -1,50 +0,0 @@ -- [ ] Spec requirements integration test coverage - - [x] Add log function - - [x] Output begins with ` [log] ` - - [x] Only prints if `DEBUG` is set - - [ ] Add interactive pkg tests (INS v0 B2.5) - - [ ] Get su command from `$XDG_CONFIG_HOME/tori/tori.conf` - - [ ] Default to `su -c` - - [ ] Validation - - [ ] Valid path or in `PATH` - - [ ] Executability - - ~~`true` exits with status 0 (see note 3)~~ - - [x] Add logging - - [x] Logs only if DEBUG is set - - [x] Print each command executed, not just package names - - [x] Case with no packages provided - - [x] Prints a message - - [x] MUST NOT run any system commands - - [x] Unrecognized command: exit code 1 - - [x] Command `user`: print the output of `whoami` - - [x] Command `os`: print the OS name - - [x] log the contents of /etc/os-release - -- [ ] Refactorings - - [ ] Simplify and analyze `System.File` - - [ ] Simplify Reader - -- [ ] Additionals - - [ ] Create interface files - - [ ] Expand unit tests coverage - - [ ] Try out doc generation - -- [ ] Check out - - [ ] - - [ ] - - [ ] - - [ ] - -## Notes - - 1. INS = Iganaq Napkin Spec: - 2. INS v0 B2.5 "MUST NOT run any system commands" is only testable if we wrap - command execution properly in e.g. a list containing all executed commands - and ensure no command is ever executed without being appended to it - 3. INS v0 A3.4 "running 'true' with exit code 0" requires the user to input - their password every time. This should be dropped from the spec instead - 4. INS v0.1 changes requirement B2.3 to "MUST print the OS name and MUST log - contents of /etc/os-release" in order to make the logging function testable - without user input - 3. As per item 3 above, INS v0.2 drops "run 'true' with exit code 0" from A3.4 - diff --git a/ocaml/lib/parsers/config/lexer.ml b/ocaml/lib/parsers/config/lexer.ml index 9416431..35cbc09 100644 --- a/ocaml/lib/parsers/config/lexer.ml +++ b/ocaml/lib/parsers/config/lexer.ml @@ -66,12 +66,12 @@ let lex (chars: char list) (position: int): token * int = | c -> Unknown c, position + 1 let read (path: string): char lists = - let contents = System.File.read path in - let undelimited_lines = String.split_on_char '\n' contents in + let lines = System.File.read path + |> String.split_on_char '\n' + |> List.map String.trim in let lines = imap + (fun i s -> if i + 1 < length lines then s ^ "\n" else s) lines in (* adds a newline to each line end, except the last *) - (fun i s -> if i + 1 < length undelimited_lines then s ^ "\n" else s) - undelimited_lines in let rec to_char_lists (strings: string list) (position: int) (char_lists: char lists) = if position == length strings then char_lists