Fix unbalanced anchor tags

2026-01-05 00:35:38 -03:00 · 2026-01-05 00:35:38 -03:00 · cb24837ff0
commit cb24837ff0
parent dfa835178f
6 changed files with 88 additions and 26 deletions
--- a/src/syntax/content/parser.rs
+++ b/src/syntax/content/parser.rs
@ -51,9 +51,7 @@ fn lex(text: &str, map: LexMap, config: &Config) -> Vec<Token> {
                    tokens.push(Token::Header(header));
                    continue;
                } else if Paragraph::probe(lexeme) {
-                    log!(
+                    log!("Probed block context None -> Paragraph: {lexeme:?}");
                        "Probed {lexeme:#?} from Block::None -> Block::Paragraph"
                    );
                    state.context.block = Block::Paragraph;
                    tokens.push(Token::Paragraph(Paragraph::new(true)));
                }
@ -68,12 +66,8 @@ fn lex(text: &str, map: LexMap, config: &Config) -> Vec<Token> {
                continue;
            },
            Block::Paragraph => {
-                if lexeme.text() == "\n"
+                if Paragraph::probe_end(lexeme) {
-                    && matches!(state.context.inline, Inline::None)
+                    log!("Probed block context Paragraph -> None: {lexeme:?}");
                {
                    log!(
                        "Probed {lexeme:#?} from Block::Paragraph -> Block::None"
                    );
                    tokens.push(Token::Paragraph(Paragraph::new(false)));
                    state.context.block = Block::None;
                }
@ -357,8 +351,7 @@ mod tests {
            read_noconfig("\n|SomeAnchor|\n"),
            concat!(
                "\n",
-                r#"<p><a href="/node/SomeAnchor">SomeAnchor</a></p>"#,
+                r#"<p><a href="/node/SomeAnchor">SomeAnchor</a></p>"#
                "\n"
            ),
        );
    }
@ -368,10 +361,9 @@ mod tests {
        assert_eq!(
            read_noconfig("|SomeAnchor|\n|SomeOtherAnchor|\n"),
            concat!(
-                r#"<p><a href="/node/SomeAnchor">SomeAnchor</a></p>"#,
+                r#"<p><a href="/node/SomeAnchor">SomeAnchor</a>"#,
                "\n",
-                r#"<p><a href="/node/SomeOtherAnchor">SomeOtherAnchor</a></p>"#,
+                r#"<a href="/node/SomeOtherAnchor">SomeOtherAnchor</a></p>"#
                "\n"
            )
        );
    }
@ -384,18 +376,53 @@ mod tests {
                r#"<p><a href="/node/SomeAnchor">SomeAnchor</a></p>"#,
                "\n",
                "\n",
-                r#"<p><a href="/node/SomeOtherAnchor">SomeOtherAnchor</a></p>"#,
+                r#"<p><a href="/node/SomeOtherAnchor">SomeOtherAnchor</a></p>"#
                "\n",
            ),
        );
    }
    #[test]
    fn homepage_footer() {
        assert_eq!(
            read_noconfig(
                "made by jutty|https://jutty.dev • acknowledgments|Acknowledgments • |source code|https://codeberg.org/jutty/en"
            ),
            r#"<p>made by <a href="https://jutty.dev">jutty</a> • <a href="/node/Acknowledgments">acknowledgments</a> • <a href="https://codeberg.org/jutty/en">source code</a></p>"#
        );
    }
    #[test]
    fn trailing_anchor() {
        assert_eq!(
            read_noconfig("see acks|acks"),
            r#"<p>see <a href="/node/acks">acks</a></p>"#
        );
    }
    #[test]
    fn trailing_anchor_with_newline() {
        assert_eq!(
            read_noconfig("\nsee acks|acks\n"),
            concat!("\n", r#"<p>see <a href="/node/acks">acks</a></p>"#)
        );
    }
    #[test]
    fn trailing_oblique() {
        assert_eq!(read_noconfig("see _acks_"), "<p>see <em>acks</em></p>");
    }
    #[test]
    fn trailing_oblique_with_newline() {
        assert_eq!(read_noconfig("see _acks_\n"), "<p>see <em>acks</em></p>");
    }
    #[test]
    fn pre() {
        let payload = "D0qdJ184f3q1okbYu3Xm1d93jj6jy615";
        assert_eq!(
            read_noconfig(&format!("`\n{payload}\n`\n")),
-            format!("<pre>\n{payload}\n</pre>\n"),
+            format!("<pre>\n{payload}\n</pre>"),
        );
    }
--- a/src/syntax/content/parser/context.rs
+++ b/src/syntax/content/parser/context.rs
@ -1,6 +1,6 @@
 use crate::syntax::content::parser::{
    token::{Token, paragraph::Paragraph, preformat::PreFormat},
    State,
    token::{Token, paragraph::Paragraph, preformat::PreFormat},
 };
 pub mod anchor;
--- a/src/syntax/content/parser/context/anchor.rs
+++ b/src/syntax/content/parser/context/anchor.rs
@ -57,6 +57,7 @@ pub fn parse(
        );
        // Conditions in this decision tree should match the destination end
        // or some intermediary state necessary to finding it
        if lexeme.match_as_char('s')
            && lexeme.is_next_boundary()
            && !lexeme.match_next_as_char('|')
@ -80,7 +81,7 @@ pub fn parse(
            state.context.inline = Inline::None;
            return true;
        } else if lexeme.match_as_char('|') && !candidate.balanced {
-            log!("Found a pipe, but no boundary: Destination likely follows");
+            log!("State: Found a pipe, but no boundary: destination follows");
            candidate.balanced = true;
            return true;
        } else if lexeme.match_as_char('|') {
@ -96,12 +97,13 @@ pub fn parse(
            tokens.push(Token::Anchor(candidate.clone()));
            state.context.inline = Inline::None;
            return false;
-        } else if lexeme.is_whitespace() {
+        } else if lexeme.is_next_whitespace() {
-            log!("End: Whitespace");
+            log!("End: next is whitespace");
            buffer.destination.push_str(&lexeme.text());
            candidate.destination = Some(buffer.destination.clone());
            tokens.push(Token::Anchor(candidate.clone()));
            state.context.inline = Inline::None;
-            return false;
+            return true;
        // This else branch is the 'no end found yet' state and will keep
        // pushing lexemes into the buffer until an end is found above
@ -137,3 +139,32 @@ pub fn parse(
    state.context.inline = Inline::None;
    false
 }
 #[cfg(test)]
 mod tests {
    use crate::{syntax::content::parser, types::Graph};
    fn read_noconfig(input: &str) -> String {
        parser::read(input, &Graph::new(None).meta.config)
    }
    #[test]
    fn indifferent_trailing_pipe() {
        assert_eq!(read_noconfig("|a|a|"), read_noconfig("a|a|"));
    }
    #[test]
    fn indifferent_leading_pipe() {
        assert_eq!(read_noconfig("|a|a|"), read_noconfig("|a|a"));
    }
    #[test]
    fn indifferent_multiline_trailing_pipe() {
        assert_eq!(read_noconfig("|a|a|\nn"), read_noconfig("a|a|\nn"));
    }
    #[test]
    fn indifferent_multiline_leading_pipe() {
        assert_eq!(read_noconfig("|a|a|\nn"), read_noconfig("|a|a\nn"));
    }
 }
--- a/src/syntax/content/parser/token/linebreak.rs
+++ b/src/syntax/content/parser/token/linebreak.rs
@ -2,12 +2,12 @@ use crate::{
    syntax::content::{Parseable, parser::lexeme::Lexeme},
 };
-#[derive(Debug, Clone, Eq, PartialEq)]
+#[derive(Default, Debug, Clone, Eq, PartialEq)]
 pub struct LineBreak {}
 impl Parseable for LineBreak {
    fn probe(lexeme: &Lexeme) -> bool {
-        lexeme.text() == "\n"
+        lexeme.text() == "\n" && !lexeme.last()
    }
    fn lex(_lexeme: &Lexeme) -> LineBreak {
--- a/src/syntax/content/parser/token/literal.rs
+++ b/src/syntax/content/parser/token/literal.rs
@ -6,8 +6,8 @@ pub struct Literal {
 }
 impl Parseable for Literal {
-    fn probe(_lexeme: &Lexeme) -> bool {
+    fn probe(lexeme: &Lexeme) -> bool {
-        true
+        !(lexeme.last() && lexeme.is_whitespace())
    }
    fn lex(lexeme: &Lexeme) -> Literal {
--- a/src/syntax/content/parser/token/paragraph.rs
+++ b/src/syntax/content/parser/token/paragraph.rs
@ -9,6 +9,10 @@ impl Paragraph {
    pub fn new(open: bool) -> Paragraph {
        Paragraph { open: Some(open) }
    }
    pub fn probe_end(lexeme: &Lexeme) -> bool {
        lexeme.match_as_char('\n') && lexeme.match_next_as_char('\n')
    }
 }
 impl Parseable for Paragraph {