Add a context parser for PreFormat blocks

2026-06-02 16:00:40 -03:00 · 2026-06-02 16:00:40 -03:00 · d0ca4e6cb3
commit d0ca4e6cb3
parent 29c2beb3ed
9 changed files with 145 additions and 94 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -86,9 +86,9 @@ dependencies = [
 [[package]]
 name = "bitflags"
-version = "2.11.1"
+version = "2.12.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c4512299f36f043ab09a583e57bceb5a5aab7a73db1805848e8fef3c9e8c78b3"
+checksum = "84d7ced0ae9557296835c32bf1b1e02b44c746701f898460fb000d7eaa84f00a"
 [[package]]
 name = "block-buffer"
@ -238,7 +238,7 @@ dependencies = [
 [[package]]
 name = "en"
-version = "0.4.0-alpha"
+version = "0.4.1-alpha"
 dependencies = [
 "axum",
 "serde",
@ -531,9 +531,9 @@ checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981"
 [[package]]
 name = "log"
-version = "0.4.30"
+version = "0.4.31"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "616ec5685824bcc94416c6d4a7a446eea774a31efd7062c8480ba6fd06d7a6e5"
+checksum = "113b30b4cd05f7c06868fdb2854f66a7b9fece9a48425351cd532e810d74024f"
 [[package]]
 name = "matchit"
--- a/Cargo.toml
+++ b/Cargo.toml
@ -1,6 +1,6 @@
 [package]
 name = "en"
-version = "0.4.0-alpha"
+version = "0.4.1-alpha"
 description = "A non-linear writing instrument."
 license = "AGPL-3.0-only"
--- a/src/syntax/content/parser/context.rs
+++ b/src/syntax/content/parser/context.rs
@ -1,12 +1,13 @@
 use crate::syntax::content::parser::{
    State, Token,
-    token::{Header, Paragraph, PreFormat, Verse},
+    token::{Header, Paragraph, Verse},
 };
 pub mod anchor;
 pub mod block;
 pub mod inline;
 pub mod list;
 pub mod preformat;
 pub mod quote;
 pub mod table;
@ -38,30 +39,32 @@ pub enum Inline {
 }
 /// # Panics
-/// Panics if there is an open header or list at end of input.
+/// Panics if there is an open token at end of input that can't be easily
 /// closed by simply adding a matching closing token. This normally is handled
 /// by context parsers and probably indicates an error in one of them.
 pub fn close(state: &State, tokens: &mut Vec<Token>) {
    match state.context.block {
        Block::PreFormat => {
            tokens.push(Token::PreFormat(PreFormat::new(false)));
        },
        Block::Paragraph => {
            tokens.push(Token::Paragraph(Paragraph::new(false)));
        },
        Block::List => {
            panic!("End of input with open list")
        },
        Block::Header(level) => {
            tokens.push(Token::Header(Header::from_u8(level, false, None)));
        },
        Block::Quote => {
            panic!("End of input with open quote")
        },
        Block::Table => {
            panic!("End of input with open table")
        },
        Block::Verse => {
            tokens.push(Token::Verse(Verse::new(false)));
        },
        Block::PreFormat => {
            panic!("End of input with open preformat: {tokens:#?}")
        },
        Block::List => {
            panic!("End of input with open list: {tokens:#?}")
        },
        Block::Quote => {
            panic!("End of input with open quote: {tokens:#?}")
        },
        Block::Table => {
            panic!("End of input with open table: {tokens:#?}")
        },
        Block::None => (),
    }
 }
--- a/src/syntax/content/parser/context/block.rs
+++ b/src/syntax/content/parser/context/block.rs
@ -6,15 +6,17 @@ use crate::{
    syntax::content::{
        Parseable as _,
        parser::{
-            Block, Lexeme, State, Token,
+            Block, Lexeme, State, Token, context,
            token::{
-                Header, LineBreak, List, Literal, Paragraph, PreFormat, Quote,
+                Header, LineBreak, List, Paragraph, PreFormat, Quote, Table,
-                Table, Verse,
+                Verse,
            },
        },
    },
 };
 /// A return of `true` will trigger a `continue` on the outer parser, causing
 /// no more subsequent parsing of the current lexeme.
 pub fn parse(
    lexeme: &Lexeme,
    state: &mut State,
@ -27,8 +29,7 @@ pub fn parse(
            if PreFormat::probe(lexeme) {
                log!(VERBOSE, "Block Context: None -> PreFormat on {lexeme}");
                state.context.block = Block::PreFormat;
-                tokens.push(Token::PreFormat(PreFormat::new(true)));
+                return true
                return true;
            } else if Header::probe(lexeme) {
                let mut header = Header::lex(lexeme);
                header.dom_id = Some(Header::make_id(
@ -44,7 +45,7 @@ pub fn parse(
                log!(VERBOSE, "Block Context: None -> List on {lexeme}");
                state.context.block = Block::List;
                state.buffers.list.candidate.ordered = lexeme.match_char('+');
-                return super::list::parse(
+                return context::list::parse(
                    lexeme, state, tokens, iterator, graph,
                );
            } else if Quote::probe(lexeme) {
@ -71,14 +72,7 @@ pub fn parse(
            }
        },
        Block::PreFormat => {
-            if PreFormat::probe(lexeme) {
+            return context::preformat::parse(lexeme, state, tokens, iterator);
                tokens.push(Token::PreFormat(PreFormat::new(false)));
                log!(VERBOSE, "Block Context: PreFormat -> None on {lexeme}");
                state.context.block = Block::None;
            } else {
                tokens.push(Token::Literal(Literal::lex(lexeme)));
            }
            return true;
        },
        Block::Paragraph => {
            if Paragraph::probe_end(lexeme) {
@ -95,13 +89,17 @@ pub fn parse(
            }
        },
        Block::List => {
-            return super::list::parse(lexeme, state, tokens, iterator, graph);
+            return context::list::parse(lexeme, state, tokens, iterator, graph);
        },
        Block::Quote => {
-            return super::quote::parse(lexeme, state, tokens, iterator, graph);
+            return context::quote::parse(
                lexeme, state, tokens, iterator, graph,
            );
        },
        Block::Table => {
-            return super::table::parse(lexeme, state, tokens, iterator, graph);
+            return context::table::parse(
                lexeme, state, tokens, iterator, graph,
            );
        },
        Block::Verse => {
            if Verse::probe_end(lexeme) {
@ -127,7 +125,7 @@ mod tests {
        graph::Graph,
        syntax::content::parser::{
            self, Block, State, Token, context,
-            token::{Header, PreFormat, header::Level},
+            token::{Header, header::Level},
        },
    };
@ -161,16 +159,6 @@ mod tests {
        assert_eq!(vec, vec![Token::Header(Header::from_u8(1, false, None))]);
    }
    #[test]
    fn end_with_open_preformat() {
        let mut state = State::default();
        state.context.block = Block::PreFormat;
        let mut vec: Vec<Token> = vec![];
        context::close(&state, &mut vec);
        assert_eq!(vec, vec![Token::PreFormat(PreFormat::new(false))]);
    }
    #[test]
    fn truncated_header_level() {
        let u: usize = 999;
--- a/src/syntax/content/parser/context/preformat.rs
+++ b/src/syntax/content/parser/context/preformat.rs
@ -0,0 +1,61 @@
 use std::{iter::Peekable, slice::Iter};
 use crate::{
    prelude::*,
    syntax::content::{
        Parseable as _,
        parser::{Lexeme, State, Token, context::Block, token::PreFormat},
    },
 };
 /// Handles open `PreFormat` contexts until a block is fully parsed.
 ///
 /// A return of `true` will trigger a continue in the outer parser,
 /// skipping any further parsing of the current lexeme.
 ///
 /// # Panics
 /// This parser can handle only the List context, and will panic if passed an
 /// unrelated context since it has no knowledge on how to handle them.
 pub fn parse(
    lexeme: &Lexeme,
    state: &mut State,
    tokens: &mut Vec<Token>,
    iterator: &mut Peekable<Iter<'_, Lexeme>>,
 ) -> bool {
    let buffer = &mut state.buffers.preformat;
    let candidate = &mut buffer.candidate;
    #[expect(clippy::wildcard_enum_match_arm)]
    match state.context.block {
        Block::PreFormat => {
            if lexeme.match_first_char('<') {
                candidate.text.push_str("&lt;");
                candidate.text.push_str(
                    lexeme.text().strip_prefix('<').unwrap_or(&lexeme.text()),
                );
            } else if lexeme.match_last_char('>') {
                candidate.text.push_str(
                    lexeme.text().strip_suffix('>').unwrap_or(&lexeme.text()),
                );
                candidate.text.push_str("&gt;");
            } else if lexeme.match_char('\\') {
                candidate.text.push_str(lexeme.next().as_str());
                iterator.next();
                return true;
            } else if PreFormat::probe(lexeme) {
                // found end of block, push it and reset state
                log!(VERBOSE, "Accepting preformat candidate {candidate}");
                tokens.push(Token::PreFormat(candidate.clone()));
                state.context.block = Block::None;
                *candidate = PreFormat::default();
            } else {
                // anything else is pushed into the candidate preformat's text
                candidate.text.push_str(&lexeme.text());
            }
        },
        _ => {
            panic!("PreFormat context parser called for {:?}", state.context)
        },
    }
    true
 }
--- a/src/syntax/content/parser/lexeme.rs
+++ b/src/syntax/content/parser/lexeme.rs
@ -32,6 +32,8 @@ impl Lexeme {
    pub fn mutate_text(&mut self, new: &str) { self.text = new.to_string(); }
    /// Returns an Option containing the character if the raw lexeme text
    /// is composed of a single character, None if it has multiple characters.
    pub fn as_char(&self) -> Option<char> {
        if self.text.chars().count() == 1 {
            self.text.chars().nth(0)
@ -56,6 +58,7 @@ impl Lexeme {
        }
    }
    /// Returns true if the raw lexeme text is a single matching character.
    pub fn match_char(&self, c: char) -> bool {
        self.as_char().is_some_and(|as_char| as_char == c)
    }
@ -86,6 +89,8 @@ impl Lexeme {
            && self.match_third_char(c3)
    }
    /// Returns true if the lexeme raw text is composed of a single character
    /// and this character is in the provided slice.
    pub fn match_char_in(&self, slice: &[char]) -> bool {
        self.as_char().is_some_and(|c| slice.contains(&c))
    }
--- a/src/syntax/content/parser/lexer.rs
+++ b/src/syntax/content/parser/lexer.rs
@ -38,7 +38,9 @@ pub(super) fn lex(
    let mut iterator = lexemes.iter().peekable();
    while let Some(lexeme) = iterator.next() {
-        if lexeme.match_char('\\') {
+        if lexeme.match_char('\\')
            && !matches!(state.context.block, context::Block::PreFormat)
        {
            if let Some(next) = iterator.next() {
                tokens.push(Token::Literal(Literal::lex(next)));
            }
--- a/src/syntax/content/parser/state.rs
+++ b/src/syntax/content/parser/state.rs
@ -3,7 +3,7 @@ use std::collections::HashMap;
 use crate::syntax::content::parser::{
    Token,
    context::Context,
-    token::{Anchor, Item, List, Quote, Table},
+    token::{Anchor, Item, List, PreFormat, Quote, Table},
 };
 #[derive(Clone, Default, Debug)]
@ -29,6 +29,7 @@ pub struct Buffers {
    pub list: ListBuffer,
    pub quote: QuoteBuffer,
    pub table: TableBuffer,
    pub preformat: PreFormatBuffer,
 }
 #[derive(Default, Clone, Debug)]
@ -59,6 +60,11 @@ pub struct TableBuffer {
    pub in_header: bool,
 }
 #[derive(Default, Clone, Debug)]
 pub struct PreFormatBuffer {
    pub candidate: PreFormat,
 }
 impl std::fmt::Display for AnchorBuffer {
    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
        let display_text = if self.text.is_empty() {
--- a/src/syntax/content/parser/token/preformat.rs
+++ b/src/syntax/content/parser/token/preformat.rs
@ -1,46 +1,42 @@
 use crate::syntax::content::{Lexeme, Parseable};
-#[derive(Debug, Clone, Eq, PartialEq)]
+#[derive(Debug, Default, Clone, Eq, PartialEq)]
 pub struct PreFormat {
-    open: Option<bool>,
+    pub text: String,
 }
 impl PreFormat {
-    pub const fn new(open: bool) -> PreFormat { PreFormat { open: Some(open) } }
+    pub fn new(text: &str) -> PreFormat {
        PreFormat {
            text: String::from(text),
        }
    }
 }
 impl std::fmt::Display for PreFormat {
    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        let display_open_state = if let Some(open_state) = self.open {
+        let character_count = self.text.chars().count();
-            if open_state { "open" } else { "closed" }
+        let is_whitespace = self.text.trim_ascii().is_empty();
        let summary = if is_whitespace {
            "empty"
        } else {
-            "unknown"
+            &format!("{character_count} chars")
        };
-        write!(f, "PreFormat [{display_open_state}]")
+        write!(f, "PreFormat [{summary}]")
    }
 }
 impl Parseable for PreFormat {
    fn probe(lexeme: &Lexeme) -> bool {
-        lexeme.match_first_char('`') && (lexeme.next() == "\n" || lexeme.last())
+        lexeme.match_char('`') && (lexeme.next() == "\n" || lexeme.last())
    }
-    fn lex(_lexeme: &Lexeme) -> PreFormat { PreFormat { open: None } }
+    fn lex(_lexeme: &Lexeme) -> PreFormat {
-
+        panic!("Attempt to lex a preformat directly from a lexeme")
    fn render(&self) -> String {
        if let Some(o) = self.open {
            if o {
                "<pre>".to_owned()
            } else {
                "</pre>".to_owned()
            }
        } else {
            panic!(
                "Attempt to render a preformat tag while open state is unknown"
            )
        }
    }
    fn render(&self) -> String { format!("<pre>{}</pre>", self.text) }
    fn flatten(&self) -> String { String::default() }
 }
@ -50,49 +46,39 @@ mod tests {
    use crate::syntax::content::parser::Token;
    #[test]
    #[should_panic(
        expected = "Attempt to lex a preformat directly from a lexeme"
    )]
    fn lex() {
-        let from_empty_lexeme = PreFormat::lex(&Lexeme::default());
+        let lexeme = Lexeme::new("a", "b", "c");
-        assert!(from_empty_lexeme.open.is_none());
+        PreFormat::lex(&lexeme);
        let from_non_empty_lexeme = PreFormat::lex(&Lexeme::default());
        assert!(from_non_empty_lexeme.open.is_none());
    }
    #[test]
    #[should_panic(expected = "Attempt to render a preformat tag while \
            open state is unknown")]
    fn render() {
        let from_empty_lexeme = PreFormat::lex(&Lexeme::default());
        from_empty_lexeme.render();
        let from_non_empty_lexeme = PreFormat::lex(&Lexeme::default());
        from_non_empty_lexeme.render();
    }
    #[test]
    fn token_display() {
-        let mut preformat = PreFormat::new(true);
+        let mut preformat = PreFormat::new("");
        assert_eq!(
            format!("{}", Token::PreFormat(preformat.clone())),
-            "Tk:PreFormat [open]"
+            "Tk:PreFormat [empty]"
        );
-        preformat.open = Some(false);
+        preformat.text = "\n ".to_string();
        assert_eq!(
            format!("{}", Token::PreFormat(preformat.clone())),
-            "Tk:PreFormat [closed]"
+            "Tk:PreFormat [empty]"
        );
-        preformat.open = None;
+        preformat.text = "text".to_string();
        assert_eq!(
            format!("{}", Token::PreFormat(preformat)),
-            "Tk:PreFormat [unknown]"
+            "Tk:PreFormat [4 chars]"
        );
    }
    #[test]
    fn flatten() {
-        let preformat = PreFormat::new(false);
+        let preformat = PreFormat::new("");
        assert_eq!(preformat.flatten(), "");
        let token = Token::PreFormat(preformat);