Extract context-specific parsing to a separate module

2026-01-02 00:52:20 -03:00 · 2026-01-02 00:52:20 -03:00 · 5ed2036e36
commit 5ed2036e36
parent 5c0f786686
3 changed files with 158 additions and 120 deletions
--- a/src/syntax/content/parser.rs
+++ b/src/syntax/content/parser.rs
@ -7,10 +7,12 @@ use token::{
    preformat::PreFormat, literal::Literal, code::Code, oblique::Oblique,
 };
 use lexeme::Lexeme;
 use context::{Context, Block, Inline};
 pub mod token;
 pub mod lexeme;
 pub mod segment;
 pub mod context;
 const LEXMAP: LexMap = &[
    (LineBreak::probe, |word| {
@ -29,9 +31,9 @@ fn lex(text: &str, map: LexMap, config: &Config) -> Vec<Token> {
    let mut iterator = lexemes.iter().peekable();
    while let Some(lexeme) = iterator.next() {
        match state.context.block {
-            BlockContext::None => {
+            Block::None => {
                if PreFormat::probe(lexeme) {
-                    state.context.block = BlockContext::PreFormat;
+                    state.context.block = Block::PreFormat;
                    tokens.push(Token::PreFormat(PreFormat::new(true)));
                    continue;
                } else if Header::probe(lexeme) {
@ -41,49 +43,49 @@ fn lex(text: &str, map: LexMap, config: &Config) -> Vec<Token> {
                        iterator.peek().map_or(&Lexeme::new("", ""), |l| l),
                        &mut state.dom_ids,
                    ));
-                    state.context.block = BlockContext::Header(header.level());
+                    state.context.block = Block::Header(header.level());
                    tokens.push(Token::Header(header));
                    continue;
                } else if Paragraph::probe(lexeme) {
-                    state.context.block = BlockContext::Paragraph;
+                    state.context.block = Block::Paragraph;
                    tokens.push(Token::Paragraph(Paragraph::new(true)));
                }
            },
-            BlockContext::PreFormat => {
+            Block::PreFormat => {
                if PreFormat::probe(lexeme) {
                    tokens.push(Token::PreFormat(PreFormat::new(false)));
-                    state.context.block = BlockContext::None;
+                    state.context.block = Block::None;
                } else {
                    tokens.push(Token::Literal(Literal::lex(lexeme)));
                }
                continue;
            },
-            BlockContext::Paragraph => {
+            Block::Paragraph => {
                if lexeme.text() == "\n" {
                    tokens.push(Token::Paragraph(Paragraph::new(false)));
-                    state.context.block = BlockContext::None;
+                    state.context.block = Block::None;
                }
            },
-            BlockContext::Header(n) => {
+            Block::Header(n) => {
                if lexeme.text() == "\n" {
                    tokens.push(Token::Header(Header::from_u8(n, false, None)));
-                    state.context.block = BlockContext::None;
+                    state.context.block = Block::None;
                }
            },
        }
        match state.context.inline {
-            InlineContext::None => {
+            Inline::None => {
                if Code::probe(lexeme) {
-                    state.context.inline = InlineContext::Code;
+                    state.context.inline = Inline::Code;
                    tokens.push(Token::Code(Code::new(true)));
                    continue;
                } else if Oblique::probe(lexeme) {
-                    state.context.inline = InlineContext::Oblique;
+                    state.context.inline = Inline::Oblique;
                    tokens.push(Token::Oblique(Oblique::new(true)));
                    continue;
                } else if Anchor::probe(lexeme) {
-                    state.context.inline = InlineContext::Anchor;
+                    state.context.inline = Inline::Anchor;
                    state.buffers.anchor.clear();
                    if lexeme.match_first_char('|') {
@ -94,81 +96,27 @@ fn lex(text: &str, map: LexMap, config: &Config) -> Vec<Token> {
                    continue;
                }
            },
-            InlineContext::Code => {
+            Inline::Code => {
                if Code::probe(lexeme) {
-                    state.context.inline = InlineContext::None;
+                    state.context.inline = Inline::None;
                    tokens.push(Token::Code(Code::new(false)));
                    continue;
                }
            },
-            InlineContext::Oblique => {
+            Inline::Oblique => {
                if Oblique::probe(lexeme) {
-                    state.context.inline = InlineContext::None;
+                    state.context.inline = Inline::None;
                    tokens.push(Token::Oblique(Oblique::new(false)));
                    continue;
                }
            },
-            InlineContext::Anchor => {
+            Inline::Anchor => {
-                let buffer = &mut state.buffers.anchor;
+                if context::anchor::parse(
-                let candidate = &mut buffer.candidate;
+                    lexeme,
-                if candidate.text.is_empty() {
+                    &mut iterator,
-                    if lexeme.next() == "|" {
+                    &mut state,
-                        buffer.text.push_str(&lexeme.text());
+                    &mut tokens,
-                        candidate.text.clone_from(&buffer.text);
+                ) {
                    } else {
                        buffer.text.push_str(&lexeme.text());
                    }
                    continue;
                } else if candidate.destination.is_none() {
                    // candidate is leading and we found the second pipe
                    if candidate.leading && lexeme.text() == "|" {
                        // third pipe immediately after second: forcing flanking
                        if lexeme.match_next_first_char('|') {
                            candidate.destination =
                                Some(candidate.text.clone());
                            let token = Token::Anchor(candidate.clone());
                            tokens.push(token);
                            state.context.inline = InlineContext::None;
                            iterator.next();
                            continue;
                        // whitespace or punctuation after pipe: flanking anchor
                        } else if lexeme.is_next_whitespace()
                            || lexeme.is_next_punctuation()
                        {
                            candidate.destination =
                                Some(candidate.text.clone());
                            let token = Token::Anchor(candidate.clone());
                            tokens.push(token);
                            state.context.inline = InlineContext::None;
                        // non-whitespace after pipe is the destination
                        } else {
                            candidate.destination = Some(lexeme.next().clone());
                            let token = Token::Anchor(candidate.clone());
                            tokens.push(token);
                            state.context.inline = InlineContext::None;
                            // if there is a trailing pipe, consume it
                            if let Some(next) = iterator.next()
                                && next.next() == "|"
                            {
                                iterator.next();
                            }
                        }
                    // candidate is nonleading and we found a second pipe
                    } else if !candidate.leading && lexeme.next() == "|" {
                        candidate.destination = Some(lexeme.text());
                        tokens.push(Token::Anchor(candidate.clone()));
                        state.context.inline = InlineContext::None;
                        iterator.next();
                    // candidate is nonleading and we found whitespace
                    } else if lexeme.is_next_whitespace() {
                        candidate.destination = Some(lexeme.text());
                        let token = Token::Anchor(candidate.clone());
                        tokens.push(token);
                        state.context.inline = InlineContext::None;
                    // candidate is nonleading and we haven't found whitespace
                    } else {
                        buffer.destination.push_str(&lexeme.text());
                    }
                    continue;
                }
            },
@ -182,48 +130,16 @@ fn lex(text: &str, map: LexMap, config: &Config) -> Vec<Token> {
        }
    }
-    close(&state, &mut tokens);
+    context::close(&state, &mut tokens);
    tokens
 }
-fn close(state: &State, tokens: &mut Vec<Token>) {
+pub struct State {
    match state.context.block {
        BlockContext::PreFormat => {
            tokens.push(Token::PreFormat(PreFormat::new(false)));
        },
        BlockContext::Paragraph => {
            tokens.push(Token::Paragraph(Paragraph::new(false)));
        },
        BlockContext::Header(_) => panic!("End of file with open header"),
        BlockContext::None => (),
    }
 }
 enum BlockContext {
    Paragraph,
    Header(u8),
    PreFormat,
    None,
 }
 enum InlineContext {
    Anchor,
    Code,
    Oblique,
    None,
 }
 struct State {
    context: Context,
    dom_ids: HashMap<String, Vec<String>>,
    buffers: Buffers,
 }
 struct Context {
    block: BlockContext,
    inline: InlineContext,
 }
 struct Buffers {
    anchor: AnchorBuffer,
 }
@ -247,8 +163,8 @@ impl State {
    fn new() -> State {
        State {
            context: Context {
-                inline: InlineContext::None,
+                inline: Inline::None,
-                block: BlockContext::None,
+                block: Block::None,
            },
            dom_ids: HashMap::new(),
            buffers: Buffers {
@ -325,6 +241,14 @@ mod tests {
        );
    }
    #[test]
    fn anchor_to_node_s() {
        assert_eq!(
            read_noconfig("The |letter s|s|'s node: |s|!"),
            r#"<p>The <a href="/node/s">letter s</a>'s node: <a href="/node/s">s</a>!</p>"#
        );
    }
    #[test]
    fn clear_anchor_buffer() {
        assert_eq!(
@ -356,27 +280,27 @@ mod tests {
    }
    #[test]
-    #[should_panic(expected = "End of file with open header")]
+    #[should_panic(expected = "End of input with open header")]
    fn end_with_open_header() {
        let default_state = State::new();
        let state = State {
            context: Context {
-                block: BlockContext::Header(1),
+                block: Block::Header(1),
                ..default_state.context
            },
            ..default_state
        };
-        close(&state, &mut vec![]);
+        context::close(&state, &mut vec![]);
    }
    #[test]
    fn end_with_open_preformat() {
        let mut state = State::new();
-        state.context.block = BlockContext::PreFormat;
+        state.context.block = Block::PreFormat;
        let mut vec: Vec<Token> = vec![];
-        close(&state, &mut vec);
+        context::close(&state, &mut vec);
        assert_eq!(vec, vec![Token::PreFormat(PreFormat::new(false))]);
    }
--- a/src/syntax/content/parser/context.rs
+++ b/src/syntax/content/parser/context.rs
@ -0,0 +1,40 @@
 use crate::syntax::content::parser::{
    token::{Token, paragraph::Paragraph, preformat::PreFormat},
    State,
 };
 pub mod anchor;
 pub struct Context {
    pub block: Block,
    pub inline: Inline,
 }
 pub enum Block {
    Paragraph,
    Header(u8),
    PreFormat,
    None,
 }
 pub enum Inline {
    Anchor,
    Code,
    Oblique,
    None,
 }
 /// # Panics
 /// Panics if there is an open header at end of input.
 pub fn close(state: &State, tokens: &mut Vec<Token>) {
    match state.context.block {
        Block::PreFormat => {
            tokens.push(Token::PreFormat(PreFormat::new(false)));
        },
        Block::Paragraph => {
            tokens.push(Token::Paragraph(Paragraph::new(false)));
        },
        Block::Header(_) => panic!("End of input with open header"),
        Block::None => (),
    }
 }
--- a/src/syntax/content/parser/context/anchor.rs
+++ b/src/syntax/content/parser/context/anchor.rs
@ -0,0 +1,74 @@
 use std::{iter::Peekable, slice::Iter};
 use crate::syntax::content::parser::{
    State, context::Inline, lexeme::Lexeme, token::Token,
 };
 pub fn parse(
    lexeme: &Lexeme,
    iterator: &mut Peekable<Iter<'_, Lexeme>>,
    state: &mut State,
    tokens: &mut Vec<Token>,
 ) -> bool {
    let buffer = &mut state.buffers.anchor;
    let candidate = &mut buffer.candidate;
    if candidate.text.is_empty() {
        if lexeme.next() == "|" {
            buffer.text.push_str(&lexeme.text());
            candidate.text.clone_from(&buffer.text);
        } else {
            buffer.text.push_str(&lexeme.text());
        }
        return true;
    } else if candidate.destination.is_none() {
        // candidate is leading and we found the second pipe
        if candidate.leading && lexeme.text() == "|" {
            // third pipe immediately after second: forcing flanking
            if lexeme.match_next_first_char('|') {
                candidate.destination = Some(candidate.text.clone());
                let token = Token::Anchor(candidate.clone());
                tokens.push(token);
                state.context.inline = Inline::None;
                iterator.next();
                return true;
                // whitespace or punctuation after pipe: flanking anchor
            } else if lexeme.is_next_whitespace()
                || lexeme.is_next_punctuation()
            {
                candidate.destination = Some(candidate.text.clone());
                let token = Token::Anchor(candidate.clone());
                tokens.push(token);
                state.context.inline = Inline::None;
                // non-whitespace after pipe is the destination
            } else {
                candidate.destination = Some(lexeme.next().clone());
                let token = Token::Anchor(candidate.clone());
                tokens.push(token);
                state.context.inline = Inline::None;
                // if there is a trailing pipe, consume it
                if let Some(next) = iterator.next()
                    && next.next() == "|"
                {
                    iterator.next();
                }
            }
            // candidate is nonleading and we found a second pipe
        } else if !candidate.leading && lexeme.next() == "|" {
            candidate.destination = Some(lexeme.text());
            tokens.push(Token::Anchor(candidate.clone()));
            state.context.inline = Inline::None;
            iterator.next();
            // candidate is nonleading and we found whitespace
        } else if lexeme.is_next_whitespace() {
            candidate.destination = Some(lexeme.text());
            let token = Token::Anchor(candidate.clone());
            tokens.push(token);
            state.context.inline = Inline::None;
            // candidate is nonleading and we haven't found whitespace
        } else {
            buffer.destination.push_str(&lexeme.text());
        }
        return true;
    }
    false
 }