From 5ed2036e36fd2741e8ee66bee857a13d72941d45 Mon Sep 17 00:00:00 2001
From: jutty <j@jutty.dev>
Date: Fri, 2 Jan 2026 00:52:20 -0300
Subject: [PATCH] Extract context-specific parsing to a separate module

---
 src/syntax/content/parser.rs                | 164 ++++++--------------
 src/syntax/content/parser/context.rs        |  40 +++++
 src/syntax/content/parser/context/anchor.rs |  74 +++++++++
 3 files changed, 158 insertions(+), 120 deletions(-)
 create mode 100644 src/syntax/content/parser/context.rs
 create mode 100644 src/syntax/content/parser/context/anchor.rs
diff --git a/src/syntax/content/parser.rs b/src/syntax/content/parser.rs
index 08893b0..1837644 100644
--- a/src/syntax/content/parser.rs
+++ b/src/syntax/content/parser.rs
@@ -7,10 +7,12 @@ use token::{
     preformat::PreFormat, literal::Literal, code::Code, oblique::Oblique,
 };
 use lexeme::Lexeme;
+use context::{Context, Block, Inline};
 
 pub mod token;
 pub mod lexeme;
 pub mod segment;
+pub mod context;
 
 const LEXMAP: LexMap = &[
     (LineBreak::probe, |word| {
@@ -29,9 +31,9 @@ fn lex(text: &str, map: LexMap, config: &Config) -> Vec<Token> {
     let mut iterator = lexemes.iter().peekable();
     while let Some(lexeme) = iterator.next() {
         match state.context.block {
-            BlockContext::None => {
+            Block::None => {
                 if PreFormat::probe(lexeme) {
-                    state.context.block = BlockContext::PreFormat;
+                    state.context.block = Block::PreFormat;
                     tokens.push(Token::PreFormat(PreFormat::new(true)));
                     continue;
                 } else if Header::probe(lexeme) {
@@ -41,49 +43,49 @@ fn lex(text: &str, map: LexMap, config: &Config) -> Vec<Token> {
                         iterator.peek().map_or(&Lexeme::new("", ""), |l| l),
                         &mut state.dom_ids,
                     ));
-                    state.context.block = BlockContext::Header(header.level());
+                    state.context.block = Block::Header(header.level());
                     tokens.push(Token::Header(header));
                     continue;
                 } else if Paragraph::probe(lexeme) {
-                    state.context.block = BlockContext::Paragraph;
+                    state.context.block = Block::Paragraph;
                     tokens.push(Token::Paragraph(Paragraph::new(true)));
                 }
             },
-            BlockContext::PreFormat => {
+            Block::PreFormat => {
                 if PreFormat::probe(lexeme) {
                     tokens.push(Token::PreFormat(PreFormat::new(false)));
-                    state.context.block = BlockContext::None;
+                    state.context.block = Block::None;
                 } else {
                     tokens.push(Token::Literal(Literal::lex(lexeme)));
                 }
                 continue;
             },
-            BlockContext::Paragraph => {
+            Block::Paragraph => {
                 if lexeme.text() == "\n" {
                     tokens.push(Token::Paragraph(Paragraph::new(false)));
-                    state.context.block = BlockContext::None;
+                    state.context.block = Block::None;
                 }
             },
-            BlockContext::Header(n) => {
+            Block::Header(n) => {
                 if lexeme.text() == "\n" {
                     tokens.push(Token::Header(Header::from_u8(n, false, None)));
-                    state.context.block = BlockContext::None;
+                    state.context.block = Block::None;
                 }
             },
         }
 
         match state.context.inline {
-            InlineContext::None => {
+            Inline::None => {
                 if Code::probe(lexeme) {
-                    state.context.inline = InlineContext::Code;
+                    state.context.inline = Inline::Code;
                     tokens.push(Token::Code(Code::new(true)));
                     continue;
                 } else if Oblique::probe(lexeme) {
-                    state.context.inline = InlineContext::Oblique;
+                    state.context.inline = Inline::Oblique;
                     tokens.push(Token::Oblique(Oblique::new(true)));
                     continue;
                 } else if Anchor::probe(lexeme) {
-                    state.context.inline = InlineContext::Anchor;
+                    state.context.inline = Inline::Anchor;
                     state.buffers.anchor.clear();
 
                     if lexeme.match_first_char('|') {
@@ -94,81 +96,27 @@ fn lex(text: &str, map: LexMap, config: &Config) -> Vec<Token> {
                     continue;
                 }
             },
-            InlineContext::Code => {
+            Inline::Code => {
                 if Code::probe(lexeme) {
-                    state.context.inline = InlineContext::None;
+                    state.context.inline = Inline::None;
                     tokens.push(Token::Code(Code::new(false)));
                     continue;
                 }
             },
-            InlineContext::Oblique => {
+            Inline::Oblique => {
                 if Oblique::probe(lexeme) {
-                    state.context.inline = InlineContext::None;
+                    state.context.inline = Inline::None;
                     tokens.push(Token::Oblique(Oblique::new(false)));
                     continue;
                 }
             },
-            InlineContext::Anchor => {
-                let buffer = &mut state.buffers.anchor;
-                let candidate = &mut buffer.candidate;
-                if candidate.text.is_empty() {
-                    if lexeme.next() == "|" {
-                        buffer.text.push_str(&lexeme.text());
-                        candidate.text.clone_from(&buffer.text);
-                    } else {
-                        buffer.text.push_str(&lexeme.text());
-                    }
-                    continue;
-                } else if candidate.destination.is_none() {
-                    // candidate is leading and we found the second pipe
-                    if candidate.leading && lexeme.text() == "|" {
-                        // third pipe immediately after second: forcing flanking
-                        if lexeme.match_next_first_char('|') {
-                            candidate.destination =
-                                Some(candidate.text.clone());
-                            let token = Token::Anchor(candidate.clone());
-                            tokens.push(token);
-                            state.context.inline = InlineContext::None;
-                            iterator.next();
-                            continue;
-                        // whitespace or punctuation after pipe: flanking anchor
-                        } else if lexeme.is_next_whitespace()
-                            || lexeme.is_next_punctuation()
-                        {
-                            candidate.destination =
-                                Some(candidate.text.clone());
-                            let token = Token::Anchor(candidate.clone());
-                            tokens.push(token);
-                            state.context.inline = InlineContext::None;
-                        // non-whitespace after pipe is the destination
-                        } else {
-                            candidate.destination = Some(lexeme.next().clone());
-                            let token = Token::Anchor(candidate.clone());
-                            tokens.push(token);
-                            state.context.inline = InlineContext::None;
-                            // if there is a trailing pipe, consume it
-                            if let Some(next) = iterator.next()
-                                && next.next() == "|"
-                            {
-                                iterator.next();
-                            }
-                        }
-                    // candidate is nonleading and we found a second pipe
-                    } else if !candidate.leading && lexeme.next() == "|" {
-                        candidate.destination = Some(lexeme.text());
-                        tokens.push(Token::Anchor(candidate.clone()));
-                        state.context.inline = InlineContext::None;
-                        iterator.next();
-                    // candidate is nonleading and we found whitespace
-                    } else if lexeme.is_next_whitespace() {
-                        candidate.destination = Some(lexeme.text());
-                        let token = Token::Anchor(candidate.clone());
-                        tokens.push(token);
-                        state.context.inline = InlineContext::None;
-                    // candidate is nonleading and we haven't found whitespace
-                    } else {
-                        buffer.destination.push_str(&lexeme.text());
-                    }
+            Inline::Anchor => {
+                if context::anchor::parse(
+                    lexeme,
+                    &mut iterator,
+                    &mut state,
+                    &mut tokens,
+                ) {
                     continue;
                 }
             },
@@ -182,48 +130,16 @@ fn lex(text: &str, map: LexMap, config: &Config) -> Vec<Token> {
         }
     }
 
-    close(&state, &mut tokens);
+    context::close(&state, &mut tokens);
     tokens
 }
 
-fn close(state: &State, tokens: &mut Vec<Token>) {
-    match state.context.block {
-        BlockContext::PreFormat => {
-            tokens.push(Token::PreFormat(PreFormat::new(false)));
-        },
-        BlockContext::Paragraph => {
-            tokens.push(Token::Paragraph(Paragraph::new(false)));
-        },
-        BlockContext::Header(_) => panic!("End of file with open header"),
-        BlockContext::None => (),
-    }
-}
-
-enum BlockContext {
-    Paragraph,
-    Header(u8),
-    PreFormat,
-    None,
-}
-
-enum InlineContext {
-    Anchor,
-    Code,
-    Oblique,
-    None,
-}
-
-struct State {
+pub struct State {
     context: Context,
     dom_ids: HashMap<String, Vec<String>>,
     buffers: Buffers,
 }
 
-struct Context {
-    block: BlockContext,
-    inline: InlineContext,
-}
-
 struct Buffers {
     anchor: AnchorBuffer,
 }
@@ -247,8 +163,8 @@ impl State {
     fn new() -> State {
         State {
             context: Context {
-                inline: InlineContext::None,
-                block: BlockContext::None,
+                inline: Inline::None,
+                block: Block::None,
             },
             dom_ids: HashMap::new(),
             buffers: Buffers {
@@ -325,6 +241,14 @@ mod tests {
         );
     }
 
+    #[test]
+    fn anchor_to_node_s() {
+        assert_eq!(
+            read_noconfig("The |letter s|s|'s node: |s|!"),
+            r#"<p>The <a href="/node/s">letter s</a>'s node: <a href="/node/s">s</a>!</p>"#
+        );
+    }
+
     #[test]
     fn clear_anchor_buffer() {
         assert_eq!(
@@ -356,27 +280,27 @@ mod tests {
     }
 
     #[test]
-    #[should_panic(expected = "End of file with open header")]
+    #[should_panic(expected = "End of input with open header")]
     fn end_with_open_header() {
         let default_state = State::new();
         let state = State {
             context: Context {
-                block: BlockContext::Header(1),
+                block: Block::Header(1),
                 ..default_state.context
             },
             ..default_state
         };
 
-        close(&state, &mut vec![]);
+        context::close(&state, &mut vec![]);
     }
 
     #[test]
     fn end_with_open_preformat() {
         let mut state = State::new();
-        state.context.block = BlockContext::PreFormat;
+        state.context.block = Block::PreFormat;
 
         let mut vec: Vec<Token> = vec![];
-        close(&state, &mut vec);
+        context::close(&state, &mut vec);
         assert_eq!(vec, vec![Token::PreFormat(PreFormat::new(false))]);
     }
 
diff --git a/src/syntax/content/parser/context.rs b/src/syntax/content/parser/context.rs
new file mode 100644
index 0000000..99d4f1c
--- /dev/null
+++ b/src/syntax/content/parser/context.rs
@@ -0,0 +1,40 @@
+use crate::syntax::content::parser::{
+    token::{Token, paragraph::Paragraph, preformat::PreFormat},
+    State,
+};
+
+pub mod anchor;
+
+pub struct Context {
+    pub block: Block,
+    pub inline: Inline,
+}
+
+pub enum Block {
+    Paragraph,
+    Header(u8),
+    PreFormat,
+    None,
+}
+
+pub enum Inline {
+    Anchor,
+    Code,
+    Oblique,
+    None,
+}
+
+/// # Panics
+/// Panics if there is an open header at end of input.
+pub fn close(state: &State, tokens: &mut Vec<Token>) {
+    match state.context.block {
+        Block::PreFormat => {
+            tokens.push(Token::PreFormat(PreFormat::new(false)));
+        },
+        Block::Paragraph => {
+            tokens.push(Token::Paragraph(Paragraph::new(false)));
+        },
+        Block::Header(_) => panic!("End of input with open header"),
+        Block::None => (),
+    }
+}
diff --git a/src/syntax/content/parser/context/anchor.rs b/src/syntax/content/parser/context/anchor.rs
new file mode 100644
index 0000000..fc47246
--- /dev/null
+++ b/src/syntax/content/parser/context/anchor.rs
@@ -0,0 +1,74 @@
+use std::{iter::Peekable, slice::Iter};
+
+use crate::syntax::content::parser::{
+    State, context::Inline, lexeme::Lexeme, token::Token,
+};
+
+pub fn parse(
+    lexeme: &Lexeme,
+    iterator: &mut Peekable<Iter<'_, Lexeme>>,
+    state: &mut State,
+    tokens: &mut Vec<Token>,
+) -> bool {
+    let buffer = &mut state.buffers.anchor;
+    let candidate = &mut buffer.candidate;
+    if candidate.text.is_empty() {
+        if lexeme.next() == "|" {
+            buffer.text.push_str(&lexeme.text());
+            candidate.text.clone_from(&buffer.text);
+        } else {
+            buffer.text.push_str(&lexeme.text());
+        }
+        return true;
+    } else if candidate.destination.is_none() {
+        // candidate is leading and we found the second pipe
+        if candidate.leading && lexeme.text() == "|" {
+            // third pipe immediately after second: forcing flanking
+            if lexeme.match_next_first_char('|') {
+                candidate.destination = Some(candidate.text.clone());
+                let token = Token::Anchor(candidate.clone());
+                tokens.push(token);
+                state.context.inline = Inline::None;
+                iterator.next();
+                return true;
+                // whitespace or punctuation after pipe: flanking anchor
+            } else if lexeme.is_next_whitespace()
+                || lexeme.is_next_punctuation()
+            {
+                candidate.destination = Some(candidate.text.clone());
+                let token = Token::Anchor(candidate.clone());
+                tokens.push(token);
+                state.context.inline = Inline::None;
+                // non-whitespace after pipe is the destination
+            } else {
+                candidate.destination = Some(lexeme.next().clone());
+                let token = Token::Anchor(candidate.clone());
+                tokens.push(token);
+                state.context.inline = Inline::None;
+                // if there is a trailing pipe, consume it
+                if let Some(next) = iterator.next()
+                    && next.next() == "|"
+                {
+                    iterator.next();
+                }
+            }
+            // candidate is nonleading and we found a second pipe
+        } else if !candidate.leading && lexeme.next() == "|" {
+            candidate.destination = Some(lexeme.text());
+            tokens.push(Token::Anchor(candidate.clone()));
+            state.context.inline = Inline::None;
+            iterator.next();
+            // candidate is nonleading and we found whitespace
+        } else if lexeme.is_next_whitespace() {
+            candidate.destination = Some(lexeme.text());
+            let token = Token::Anchor(candidate.clone());
+            tokens.push(token);
+            state.context.inline = Inline::None;
+            // candidate is nonleading and we haven't found whitespace
+        } else {
+            buffer.destination.push_str(&lexeme.text());
+        }
+        return true;
+    }
+    false
+}