Simplify Anchor context parser

2026-01-02 21:25:41 -03:00 · 2026-01-02 21:25:41 -03:00 · cbefcdcad7
commit cbefcdcad7
parent 48765de3b6
3 changed files with 90 additions and 50 deletions
--- a/src/syntax/content/parser.rs
+++ b/src/syntax/content/parser.rs
@ -1,6 +1,6 @@
 use std::collections::{HashMap};
-use crate::types::Config;
+use crate::{prelude::*,types::Config};
 use super::{Parseable as _, Token, LexMap};
 use token::{
    anchor::Anchor, linebreak::LineBreak, paragraph::Paragraph, header::Header,
@ -28,6 +28,8 @@ fn lex(text: &str, map: LexMap, config: &Config) -> Vec<Token> {
    let segments = segment::segment(text);
    let lexemes = Lexeme::collect(&segments);
    log!("Lexing segments: {segments:?}");
    let mut iterator = lexemes.iter().peekable();
    while let Some(lexeme) = iterator.next() {
        match state.context.block {
@ -81,13 +83,22 @@ fn lex(text: &str, map: LexMap, config: &Config) -> Vec<Token> {
                    tokens.push(Token::Code(Code::new(true)));
                    continue;
                } else if Anchor::probe(lexeme) {
                    log!("Positively probed anchor: {lexeme:?}");
                    state.context.inline = Inline::Anchor;
                    state.buffers.anchor.clear();
-                    if lexeme.match_first_char('|') {
+                    if lexeme.match_as_char('|') {
                        log!("{:#?} matches as a pipe char", lexeme.text());
                        state.buffers.anchor.candidate.leading = true;
                    } else {
                        log!(
                            "{:#?} not a pipe: assuming it's the anchor text",
                            lexeme.text(),
                        );
                        state.buffers.anchor.candidate.text = lexeme.text();
                        // because we probed positively and this is not a pipe,
                        // the next lexeme must be and so it was now parsed
                        iterator.next();
                    }
                    continue;
                } else if Oblique::probe(lexeme) {
@ -294,7 +305,7 @@ mod tests {
    }
    #[test]
-    fn http_external_anchor_leading_no_third_then_punctuation_then_eof() {
+    fn http_external_anchor_leading_no_third_then_punctuation_then_eoi() {
        assert_eq!(
            read_noconfig("|Rust toolchain|https://rustup.rs/,"),
            r#"<p><a href="https://rustup.rs/">Rust toolchain</a></p>"#
@ -302,7 +313,7 @@ mod tests {
    }
    #[test]
-    fn http_external_anchor_leading_no_third_then_eof() {
+    fn http_external_anchor_leading_no_third_then_eoi() {
        assert_eq!(
            read_noconfig("|Rust toolchain|https://rustup.rs/"),
            r#"<p><a href="https://rustup.rs/">Rust toolchain</a></p>"#
@ -331,7 +342,7 @@ mod tests {
    }
    #[test]
-    fn eof_pre() {
+    fn eoi_pre() {
        let payload = "Jp8INpWzsQmk20jpIhBFCfMUXOztxv0w";
        assert_eq!(
            read_noconfig(&format!("`\n{payload}\n`")),
--- a/src/syntax/content/parser/context/anchor.rs
+++ b/src/syntax/content/parser/context/anchor.rs
@ -1,74 +1,100 @@
 use std::{iter::Peekable, slice::Iter};
-use crate::syntax::content::parser::{
+use crate::{
-    State, context::Inline, lexeme::Lexeme, token::Token,
+    prelude::*,
    syntax::content::parser::{
        State, context::Inline, lexeme::Lexeme, token::Token,
    },
 };
 /// Handles open anchor contexts until an anchor token is fully parsed.
 ///
 /// This function is only called if the current inline context is Anchor.
 ///
 /// A return kind of true will trigger a continue in the outer parser,
 /// skipping any further parsing of the current lexeme.
 ///
 /// # Panics
 /// This function will panic if can't determine the destination of an anchor.
 pub fn parse(
    lexeme: &Lexeme,
    iterator: &mut Peekable<Iter<'_, Lexeme>>,
    state: &mut State,
    tokens: &mut Vec<Token>,
 ) -> bool {
    log!("Resolving open context: {:#?}", state.clone().buffers.anchor);
    let buffer = &mut state.buffers.anchor;
    let candidate = &mut buffer.candidate;
    // This is only true if the anchor is leading, otherwise the outer parser
    // would already have set its text to the word before the first pipe
    if candidate.text.is_empty() {
        log!("Seeking text at {:#?} -> {:#?}", lexeme.text(), lexeme.next());
        if lexeme.next() == "|" {
            buffer.text.push_str(&lexeme.text());
            candidate.text.clone_from(&buffer.text);
            log!("End: {:#?}", lexeme.text());
            return true;
        } else {
            log!("Pushing non-terminal {:#?} into buffer {:#?}",
                lexeme.text(), buffer.text);
            buffer.text.push_str(&lexeme.text());
            return true;
        }
-        return true;
+    }
-    } else if candidate.destination.is_none() {
+
-        // candidate is leading and we found the second pipe
+    if candidate.destination.is_none() {
-        if candidate.leading && lexeme.text() == "|" {
+
-            // third pipe immediately after second: forcing flanking
+        log!("Seeking destination at {:#?} -> {:#?}",
-            if lexeme.match_next_first_char('|') {
+            lexeme.text(), lexeme.next());
        // Conditions to this decision tree should match the destination end
        if lexeme.last(){
            log!("End: no more input");
            candidate.destination = Some(candidate.text.clone());
        } else if lexeme.match_as_char('|') && lexeme.is_next_boundary() {
            if buffer.destination.is_empty() {
                candidate.destination = Some(candidate.text.clone());
                let token = Token::Anchor(candidate.clone());
                tokens.push(token);
                state.context.inline = Inline::None;
                iterator.next();
                return true;
                // whitespace or punctuation after pipe: flanking anchor
            } else if lexeme.is_next_whitespace()
                || lexeme.is_next_punctuation()
            {
                candidate.destination = Some(candidate.text.clone());
                let token = Token::Anchor(candidate.clone());
                tokens.push(token);
                state.context.inline = Inline::None;
                // non-whitespace after pipe is the destination
            } else {
-                candidate.destination = Some(lexeme.next().clone());
+                candidate.destination = Some(buffer.destination.clone());
-                let token = Token::Anchor(candidate.clone());
+                return true
                tokens.push(token);
                state.context.inline = Inline::None;
                // if there is a trailing pipe, consume it
                if let Some(next) = iterator.next()
                    && next.next() == "|"
                {
                    iterator.next();
                }
            }
-            // candidate is nonleading and we found a second pipe
+
-        } else if !candidate.leading && lexeme.next() == "|" {
+        } else if lexeme.match_as_char('|') {
-            candidate.destination = Some(lexeme.text());
+            log!("Found a pipe, but no boundary: Destination likely follows");
            return true;
        } else if lexeme.is_punctuation() && lexeme.is_next_whitespace() {
            log!("Found puncutation followed by whitespace");
            candidate.destination = Some(buffer.destination.clone());
            tokens.push(Token::Anchor(candidate.clone()));
            state.context.inline = Inline::None;
-            iterator.next();
+            return false;
-            // candidate is nonleading and we found whitespace
+        } else if lexeme.is_whitespace() {
-        } else if lexeme.is_next_whitespace() {
+            log!("End: Whitespace");
-            candidate.destination = Some(lexeme.text());
+            candidate.destination = Some(buffer.destination.clone());
-            let token = Token::Anchor(candidate.clone());
+
-            tokens.push(token);
+        // This else branch is the 'no end found yet' state and will keep
-            state.context.inline = Inline::None;
+        // pushing lexemes into the buffer until an end is found above
            // candidate is nonleading and we haven't found whitespace
        } else {
            log!(
                "Pushing non-terminal {:#?} into buffer {:#?}",
                lexeme.text(), buffer.destination,
            );
            buffer.destination.push_str(&lexeme.text());
            return true
        }
        return true;
    }
    // This point should never be reached with a still None destination,
    // which would mean there is some case where the end of the destination
    // was never found and we kept filling the buffer endlessly,
    // causing the program to panic anyways when rendering anchors
    assert!(candidate.destination.is_some(),
        "Anchor context parsing done but no destination found: {:#?}",
        state.buffers.anchor
    );
    tokens.push(Token::Anchor(candidate.clone()));
    state.context.inline = Inline::None;
    false
 }
--- a/src/syntax/content/parser/token/anchor.rs
+++ b/src/syntax/content/parser/token/anchor.rs
@ -5,6 +5,7 @@ pub struct Anchor {
    pub text: String,
    pub destination: Option<String>,
    pub leading: bool,
    pub balanced: bool,
    pub external: bool,
 }
@ -45,12 +46,14 @@ impl Anchor {
        destination: &str,
        leading: bool,
        external: bool,
        balanced: bool,
    ) -> Anchor {
        Anchor {
            text: text.to_owned(),
            destination: Some(Anchor::resolve_destination(destination)),
            leading,
            external,
            balanced,
        }
    }
@ -70,7 +73,7 @@ mod tests {
    #[test]
    fn render_anchor() {
-        let anchor = Anchor::new("AnchorText", "AnchorDest", true, false);
+        let anchor = Anchor::new("AnchorText", "AnchorDest", true, false, false);
        assert_eq!(
            anchor.render(),
            r#"<a href="/node/AnchorDest">AnchorText</a>"#