Handle several anchor edge cases

2026-01-03 16:02:11 -03:00 · 2026-01-03 16:02:11 -03:00 · 9f04a4606c
commit 9f04a4606c
parent f9bff6acab
3 changed files with 164 additions and 61 deletions
--- a/src/syntax/content/parser.rs
+++ b/src/syntax/content/parser.rs
@ -1,6 +1,6 @@
 use std::collections::{HashMap};
-use crate::{prelude::*,types::Config};
+use crate::{prelude::*, types::Config};
 use super::{Parseable as _, Token, LexMap};
 use token::{
    anchor::Anchor, linebreak::LineBreak, paragraph::Paragraph, header::Header,
@ -15,10 +15,12 @@ pub mod segment;
 pub mod context;
 const LEXMAP: LexMap = &[
-    (LineBreak::probe, |word| {
+    (LineBreak::probe, |lexeme| {
-        Token::LineBreak(LineBreak::lex(word))
+        Token::LineBreak(LineBreak::lex(lexeme))
    }),
    (Literal::probe, |lexeme| {
        Token::Literal(Literal::lex(lexeme))
    }),
    (Literal::probe, |word| Token::Literal(Literal::lex(word))),
 ];
 fn lex(text: &str, map: LexMap, config: &Config) -> Vec<Token> {
@ -49,6 +51,9 @@ fn lex(text: &str, map: LexMap, config: &Config) -> Vec<Token> {
                    tokens.push(Token::Header(header));
                    continue;
                } else if Paragraph::probe(lexeme) {
                    log!(
                        "Probed {lexeme:#?} from Block::None -> Block::Paragraph"
                    );
                    state.context.block = Block::Paragraph;
                    tokens.push(Token::Paragraph(Paragraph::new(true)));
                }
@ -63,7 +68,12 @@ fn lex(text: &str, map: LexMap, config: &Config) -> Vec<Token> {
                continue;
            },
            Block::Paragraph => {
-                if lexeme.text() == "\n" {
+                if lexeme.text() == "\n"
                    && matches!(state.context.inline, Inline::None)
                {
                    log!(
                        "Probed {lexeme:#?} from Block::Paragraph -> Block::None"
                    );
                    tokens.push(Token::Paragraph(Paragraph::new(false)));
                    state.context.block = Block::None;
                }
@ -122,12 +132,7 @@ fn lex(text: &str, map: LexMap, config: &Config) -> Vec<Token> {
                }
            },
            Inline::Anchor => {
-                if context::anchor::parse(
+                if context::anchor::parse(lexeme, &mut state, &mut tokens) {
                    lexeme,
                    &mut iterator,
                    &mut state,
                    &mut tokens,
                ) {
                    continue;
                }
            },
@ -135,7 +140,9 @@ fn lex(text: &str, map: LexMap, config: &Config) -> Vec<Token> {
        for &(ref probe, lex) in map {
            if probe(lexeme) {
-                tokens.push(lex(lexeme));
+                let token = lex(lexeme);
                log!("Lexmap lexed {lexeme:?} into {token:?}");
                tokens.push(token);
                break;
            }
        }
@ -223,23 +230,31 @@ mod tests {
    }
    #[test]
-    fn force_flanking() {
+    fn flanking_with_trailing_comma() {
        assert_eq!(
-            read_noconfig("|Node||"),
+            read_noconfig("|Node|,"),
            r#"<p><a href="/node/Node">Node</a>,</p>"#
        );
    }
    #[test]
    fn flanking_with_trailing_comma_and_space() {
        assert_eq!(
            read_noconfig("|Node|, at"),
            r#"<p><a href="/node/Node">Node</a>, at</p>"#
        );
    }
    #[test]
    fn flanking_at_eoi() {
        assert_eq!(
            read_noconfig("|Node|"),
            r#"<p><a href="/node/Node">Node</a></p>"#
        );
    }
    #[test]
-    fn force_flanking_with_trailing_letter() {
+    fn needless_three_pipe_anchor() {
        assert_eq!(
            read_noconfig("|Node||s"),
            r#"<p><a href="/node/Node">Node</a>s</p>"#
        );
    }
    #[test]
    fn flanking_with_trailing_pipe() {
        assert_eq!(
            read_noconfig("|Node|Destination|"),
            r#"<p><a href="/node/Destination">Node</a></p>"#
@ -278,6 +293,22 @@ mod tests {
        );
    }
    #[test]
    fn nonleading_plural_anchor_at_eoi() {
        assert_eq!(
            read_noconfig("element|s"),
            r#"<p><a href="/node/element">elements</a></p>"#
        );
    }
    #[test]
    fn leading_plural_anchor_at_eoi() {
        assert_eq!(
            read_noconfig("|element|s"),
            r#"<p><a href="/node/element">elements</a></p>"#
        );
    }
    #[test]
    fn http_external_anchor() {
        assert_eq!(
@ -289,26 +320,26 @@ mod tests {
    }
    #[test]
-    fn http_external_anchor_leading_no_third() {
+    fn http_external_anchor_leading_no_third_then_newline() {
        assert_eq!(
-            read_noconfig("|Rust toolchain|https://rustup.rs/ "),
+            read_noconfig(concat!(
-            r#"<p><a href="https://rustup.rs/">Rust toolchain</a> </p>"#
+                "|Rust toolchain|https://rustup.rs/",
                "\n",
                "at rustup.rs",
            )),
            concat!(
                r#"<p><a href="https://rustup.rs/">Rust toolchain</a>"#,
                "\n",
                "at rustup.rs</p>",
            )
        );
    }
    #[test]
-    fn http_external_anchor_leading_no_third_then_punctuation_then_space() {
+    fn http_external_anchor_leading_no_third_then_space() {
        assert_eq!(
-            read_noconfig("|Rust toolchain|https://rustup.rs/, "),
+            read_noconfig("|Rust toolchain|https://rustup.rs/ at rustup.rs"),
-            r#"<p><a href="https://rustup.rs/">Rust toolchain</a>, </p>"#
+            r#"<p><a href="https://rustup.rs/">Rust toolchain</a> at rustup.rs</p>"#
        );
    }
    #[test]
    fn http_external_anchor_leading_no_third_then_punctuation_then_eoi() {
        assert_eq!(
            read_noconfig("|Rust toolchain|https://rustup.rs/,"),
            r#"<p><a href="https://rustup.rs/">Rust toolchain</a></p>"#
        );
    }
@ -321,13 +352,40 @@ mod tests {
    }
    #[test]
-    fn clear_anchor_buffer() {
+    fn newline_wrapped_anchor() {
        assert_eq!(
-            read_noconfig("|SomeAnchor|\n|SomeOtherAnchor|"),
+            read_noconfig("\n|SomeAnchor|\n"),
            concat!(
                "\n",
                r#"<p><a href="/node/SomeAnchor">SomeAnchor</a></p>"#,
                "\n"
            ),
        );
    }
    #[test]
    fn newline_separated_anchors() {
        assert_eq!(
            read_noconfig("|SomeAnchor|\n|SomeOtherAnchor|\n"),
            concat!(
                r#"<p><a href="/node/SomeAnchor">SomeAnchor</a></p>"#,
                "\n",
-                r#"<p><a href="/node/SomeOtherAnchor">SomeOtherAnchor</a></p>"#
+                r#"<p><a href="/node/SomeOtherAnchor">SomeOtherAnchor</a></p>"#,
                "\n"
            )
        );
    }
    #[test]
    fn empty_line_separated_anchors() {
        assert_eq!(
            read_noconfig("|SomeAnchor|\n\n|SomeOtherAnchor|\n"),
            concat!(
                r#"<p><a href="/node/SomeAnchor">SomeAnchor</a></p>"#,
                "\n",
                "\n",
                r#"<p><a href="/node/SomeOtherAnchor">SomeOtherAnchor</a></p>"#,
                "\n",
            ),
        );
    }
--- a/src/syntax/content/parser/context/anchor.rs
+++ b/src/syntax/content/parser/context/anchor.rs
@ -1,5 +1,3 @@
 use std::{iter::Peekable, slice::Iter};
 use crate::{
    prelude::*,
    syntax::content::parser::{
@ -11,61 +9,89 @@ use crate::{
 ///
 /// This function is only called if the current inline context is Anchor.
 ///
-/// A return kind of true will trigger a continue in the outer parser,
+/// A return of `true` will trigger a continue in the outer parser,
 /// skipping any further parsing of the current lexeme.
 ///
 /// # Panics
 /// This function will panic if can't determine the destination of an anchor.
 pub fn parse(
    lexeme: &Lexeme,
    iterator: &mut Peekable<Iter<'_, Lexeme>>,
    state: &mut State,
    tokens: &mut Vec<Token>,
 ) -> bool {
-    log!("Resolving open context: {:#?}", state.clone().buffers.anchor);
+    log!(
        "Resolving open context: {:#?}",
        state.clone().buffers.anchor
    );
    let buffer = &mut state.buffers.anchor;
    let candidate = &mut buffer.candidate;
    // This is only true if the anchor is leading, otherwise the outer parser
    // would already have set its text to the word before the first pipe
    if candidate.text.is_empty() {
-        log!("Seeking text at {:#?} -> {:#?}", lexeme.text(), lexeme.next());
+        log!(
            "Seeking end of text at {:#?} -> {:#?}",
            lexeme.text(),
            lexeme.next()
        );
        if lexeme.next() == "|" {
            log!("End: Next lexeme is a pipe");
            buffer.text.push_str(&lexeme.text());
            candidate.text.clone_from(&buffer.text);
            log!("End: {:#?}", lexeme.text());
            return true;
        } else {
-            log!("Pushing non-terminal {:#?} into buffer {:#?}",
+            log!(
-                lexeme.text(), buffer.text);
+                "Pushing non-terminal {:#?} into buffer {:#?}",
                lexeme.text(),
                buffer.text
            );
            buffer.text.push_str(&lexeme.text());
            return true;
        }
        return true;
    }
    if candidate.destination.is_none() {
        log!(
            "Seeking end of destination at {:#?} -> {:#?}",
            lexeme.text(),
            lexeme.next()
        );
-        log!("Seeking destination at {:#?} -> {:#?}",
+        // Conditions in this decision tree should match the destination end
-            lexeme.text(), lexeme.next());
+        if lexeme.match_as_char('s')
-
+            && lexeme.is_next_boundary()
-        // Conditions to this decision tree should match the destination end
+            && !lexeme.match_next_as_char('|')
-        if lexeme.last(){
+        {
-            log!("End: no more input");
+            log!("End: Plural anchor");
            candidate.destination = Some(candidate.text.clone());
            candidate.text.push('s');
            if lexeme.last() {
                tokens.push(Token::Anchor(candidate.clone()));
                state.context.inline = Inline::None;
            }
            return true;
        } else if lexeme.match_as_char('|') && lexeme.is_next_boundary() {
-
+            log!("End: Pipe followed by boundary");
            if buffer.destination.is_empty() {
                candidate.destination = Some(candidate.text.clone());
            } else {
                candidate.destination = Some(buffer.destination.clone());
                return true
            }
-
+            tokens.push(Token::Anchor(candidate.clone()));
-        } else if lexeme.match_as_char('|') {
+            state.context.inline = Inline::None;
            log!("Found a pipe, but no boundary: Destination likely follows");
            return true;
-        } else if lexeme.is_punctuation() && lexeme.is_next_whitespace() {
+        } else if lexeme.match_as_char('|') && !candidate.balanced {
-            log!("Found puncutation followed by whitespace");
+            log!("Found a pipe, but no boundary: Destination likely follows");
            candidate.balanced = true;
            return true;
        } else if lexeme.match_as_char('|') {
            log!("End: Explicit end-of-destination pipe");
            candidate.destination = Some(buffer.destination.clone());
            return true;
        } else if !candidate.external
            && lexeme.is_punctuation()
            && lexeme.is_next_whitespace()
        {
            log!("End: Punctuation followed by whitespace");
            candidate.destination = Some(buffer.destination.clone());
            tokens.push(Token::Anchor(candidate.clone()));
            state.context.inline = Inline::None;
@ -73,16 +99,28 @@ pub fn parse(
        } else if lexeme.is_whitespace() {
            log!("End: Whitespace");
            candidate.destination = Some(buffer.destination.clone());
            tokens.push(Token::Anchor(candidate.clone()));
            state.context.inline = Inline::None;
            return false;
        // This else branch is the 'no end found yet' state and will keep
        // pushing lexemes into the buffer until an end is found above
        } else {
            log!(
                "Pushing non-terminal {:#?} into buffer {:#?}",
-                lexeme.text(), buffer.destination,
+                lexeme.text(),
                buffer.destination,
            );
            if lexeme.match_as_char(':') {
                candidate.external = true;
            }
            buffer.destination.push_str(&lexeme.text());
-            return true
+            if lexeme.last() {
                candidate.destination = Some(buffer.destination.clone());
                tokens.push(Token::Anchor(candidate.clone()));
                state.context.inline = Inline::None;
            }
            return true;
        }
    }
@ -90,7 +128,8 @@ pub fn parse(
    // which would mean there is some case where the end of the destination
    // was never found and we kept filling the buffer endlessly,
    // causing the program to panic anyways when rendering anchors
-    assert!(candidate.destination.is_some(),
+    assert!(
        candidate.destination.is_some(),
        "Anchor context parsing done but no destination found: {:#?}",
        state.buffers.anchor
    );
--- a/src/syntax/content/parser/lexeme.rs
+++ b/src/syntax/content/parser/lexeme.rs
@ -55,6 +55,10 @@ impl Lexeme {
        self.as_char().is_some_and(|as_char| as_char == c)
    }
    pub fn match_next_as_char(&self, c: char) -> bool {
        self.next_as_char().is_some_and(|next| next == c)
    }
    pub fn is_punctuation(&self) -> bool {
        let punctuation = Delimiters::default().punctuation;
        self.as_char().is_some_and(|c| punctuation.contains(&c))
@ -80,8 +84,10 @@ impl Lexeme {
    pub fn is_next_boundary(&self) -> bool {
        let delimiters = Delimiters::default();
-        self.next_as_char()
+        self.last
-            .is_some_and(|c| delimiters.is_boundary(c))
+            || self
                .next_as_char()
                .is_some_and(|c| delimiters.is_boundary(c))
    }
    pub fn next_first_char(&self) -> Option<char> {