Fix unbalanced anchor tags

This commit is contained in:
Juno Takano 2026-01-05 00:35:38 -03:00
commit cb24837ff0
6 changed files with 88 additions and 26 deletions

View file

@ -51,9 +51,7 @@ fn lex(text: &str, map: LexMap, config: &Config) -> Vec<Token> {
tokens.push(Token::Header(header)); tokens.push(Token::Header(header));
continue; continue;
} else if Paragraph::probe(lexeme) { } else if Paragraph::probe(lexeme) {
log!( log!("Probed block context None -> Paragraph: {lexeme:?}");
"Probed {lexeme:#?} from Block::None -> Block::Paragraph"
);
state.context.block = Block::Paragraph; state.context.block = Block::Paragraph;
tokens.push(Token::Paragraph(Paragraph::new(true))); tokens.push(Token::Paragraph(Paragraph::new(true)));
} }
@ -68,12 +66,8 @@ fn lex(text: &str, map: LexMap, config: &Config) -> Vec<Token> {
continue; continue;
}, },
Block::Paragraph => { Block::Paragraph => {
if lexeme.text() == "\n" if Paragraph::probe_end(lexeme) {
&& matches!(state.context.inline, Inline::None) log!("Probed block context Paragraph -> None: {lexeme:?}");
{
log!(
"Probed {lexeme:#?} from Block::Paragraph -> Block::None"
);
tokens.push(Token::Paragraph(Paragraph::new(false))); tokens.push(Token::Paragraph(Paragraph::new(false)));
state.context.block = Block::None; state.context.block = Block::None;
} }
@ -357,8 +351,7 @@ mod tests {
read_noconfig("\n|SomeAnchor|\n"), read_noconfig("\n|SomeAnchor|\n"),
concat!( concat!(
"\n", "\n",
r#"<p><a href="/node/SomeAnchor">SomeAnchor</a></p>"#, r#"<p><a href="/node/SomeAnchor">SomeAnchor</a></p>"#
"\n"
), ),
); );
} }
@ -368,10 +361,9 @@ mod tests {
assert_eq!( assert_eq!(
read_noconfig("|SomeAnchor|\n|SomeOtherAnchor|\n"), read_noconfig("|SomeAnchor|\n|SomeOtherAnchor|\n"),
concat!( concat!(
r#"<p><a href="/node/SomeAnchor">SomeAnchor</a></p>"#, r#"<p><a href="/node/SomeAnchor">SomeAnchor</a>"#,
"\n", "\n",
r#"<p><a href="/node/SomeOtherAnchor">SomeOtherAnchor</a></p>"#, r#"<a href="/node/SomeOtherAnchor">SomeOtherAnchor</a></p>"#
"\n"
) )
); );
} }
@ -384,18 +376,53 @@ mod tests {
r#"<p><a href="/node/SomeAnchor">SomeAnchor</a></p>"#, r#"<p><a href="/node/SomeAnchor">SomeAnchor</a></p>"#,
"\n", "\n",
"\n", "\n",
r#"<p><a href="/node/SomeOtherAnchor">SomeOtherAnchor</a></p>"#, r#"<p><a href="/node/SomeOtherAnchor">SomeOtherAnchor</a></p>"#
"\n",
), ),
); );
} }
#[test]
fn homepage_footer() {
assert_eq!(
read_noconfig(
"made by jutty|https://jutty.dev • acknowledgments|Acknowledgments • |source code|https://codeberg.org/jutty/en"
),
r#"<p>made by <a href="https://jutty.dev">jutty</a> • <a href="/node/Acknowledgments">acknowledgments</a> • <a href="https://codeberg.org/jutty/en">source code</a></p>"#
);
}
#[test]
fn trailing_anchor() {
assert_eq!(
read_noconfig("see acks|acks"),
r#"<p>see <a href="/node/acks">acks</a></p>"#
);
}
#[test]
fn trailing_anchor_with_newline() {
assert_eq!(
read_noconfig("\nsee acks|acks\n"),
concat!("\n", r#"<p>see <a href="/node/acks">acks</a></p>"#)
);
}
#[test]
fn trailing_oblique() {
assert_eq!(read_noconfig("see _acks_"), "<p>see <em>acks</em></p>");
}
#[test]
fn trailing_oblique_with_newline() {
assert_eq!(read_noconfig("see _acks_\n"), "<p>see <em>acks</em></p>");
}
#[test] #[test]
fn pre() { fn pre() {
let payload = "D0qdJ184f3q1okbYu3Xm1d93jj6jy615"; let payload = "D0qdJ184f3q1okbYu3Xm1d93jj6jy615";
assert_eq!( assert_eq!(
read_noconfig(&format!("`\n{payload}\n`\n")), read_noconfig(&format!("`\n{payload}\n`\n")),
format!("<pre>\n{payload}\n</pre>\n"), format!("<pre>\n{payload}\n</pre>"),
); );
} }

View file

@ -1,6 +1,6 @@
use crate::syntax::content::parser::{ use crate::syntax::content::parser::{
token::{Token, paragraph::Paragraph, preformat::PreFormat},
State, State,
token::{Token, paragraph::Paragraph, preformat::PreFormat},
}; };
pub mod anchor; pub mod anchor;

View file

@ -57,6 +57,7 @@ pub fn parse(
); );
// Conditions in this decision tree should match the destination end // Conditions in this decision tree should match the destination end
// or some intermediary state necessary to finding it
if lexeme.match_as_char('s') if lexeme.match_as_char('s')
&& lexeme.is_next_boundary() && lexeme.is_next_boundary()
&& !lexeme.match_next_as_char('|') && !lexeme.match_next_as_char('|')
@ -80,7 +81,7 @@ pub fn parse(
state.context.inline = Inline::None; state.context.inline = Inline::None;
return true; return true;
} else if lexeme.match_as_char('|') && !candidate.balanced { } else if lexeme.match_as_char('|') && !candidate.balanced {
log!("Found a pipe, but no boundary: Destination likely follows"); log!("State: Found a pipe, but no boundary: destination follows");
candidate.balanced = true; candidate.balanced = true;
return true; return true;
} else if lexeme.match_as_char('|') { } else if lexeme.match_as_char('|') {
@ -96,12 +97,13 @@ pub fn parse(
tokens.push(Token::Anchor(candidate.clone())); tokens.push(Token::Anchor(candidate.clone()));
state.context.inline = Inline::None; state.context.inline = Inline::None;
return false; return false;
} else if lexeme.is_whitespace() { } else if lexeme.is_next_whitespace() {
log!("End: Whitespace"); log!("End: next is whitespace");
buffer.destination.push_str(&lexeme.text());
candidate.destination = Some(buffer.destination.clone()); candidate.destination = Some(buffer.destination.clone());
tokens.push(Token::Anchor(candidate.clone())); tokens.push(Token::Anchor(candidate.clone()));
state.context.inline = Inline::None; state.context.inline = Inline::None;
return false; return true;
// This else branch is the 'no end found yet' state and will keep // This else branch is the 'no end found yet' state and will keep
// pushing lexemes into the buffer until an end is found above // pushing lexemes into the buffer until an end is found above
@ -137,3 +139,32 @@ pub fn parse(
state.context.inline = Inline::None; state.context.inline = Inline::None;
false false
} }
#[cfg(test)]
mod tests {
use crate::{syntax::content::parser, types::Graph};
fn read_noconfig(input: &str) -> String {
parser::read(input, &Graph::new(None).meta.config)
}
#[test]
fn indifferent_trailing_pipe() {
assert_eq!(read_noconfig("|a|a|"), read_noconfig("a|a|"));
}
#[test]
fn indifferent_leading_pipe() {
assert_eq!(read_noconfig("|a|a|"), read_noconfig("|a|a"));
}
#[test]
fn indifferent_multiline_trailing_pipe() {
assert_eq!(read_noconfig("|a|a|\nn"), read_noconfig("a|a|\nn"));
}
#[test]
fn indifferent_multiline_leading_pipe() {
assert_eq!(read_noconfig("|a|a|\nn"), read_noconfig("|a|a\nn"));
}
}

View file

@ -2,12 +2,12 @@ use crate::{
syntax::content::{Parseable, parser::lexeme::Lexeme}, syntax::content::{Parseable, parser::lexeme::Lexeme},
}; };
#[derive(Debug, Clone, Eq, PartialEq)] #[derive(Default, Debug, Clone, Eq, PartialEq)]
pub struct LineBreak {} pub struct LineBreak {}
impl Parseable for LineBreak { impl Parseable for LineBreak {
fn probe(lexeme: &Lexeme) -> bool { fn probe(lexeme: &Lexeme) -> bool {
lexeme.text() == "\n" lexeme.text() == "\n" && !lexeme.last()
} }
fn lex(_lexeme: &Lexeme) -> LineBreak { fn lex(_lexeme: &Lexeme) -> LineBreak {

View file

@ -6,8 +6,8 @@ pub struct Literal {
} }
impl Parseable for Literal { impl Parseable for Literal {
fn probe(_lexeme: &Lexeme) -> bool { fn probe(lexeme: &Lexeme) -> bool {
true !(lexeme.last() && lexeme.is_whitespace())
} }
fn lex(lexeme: &Lexeme) -> Literal { fn lex(lexeme: &Lexeme) -> Literal {

View file

@ -9,6 +9,10 @@ impl Paragraph {
pub fn new(open: bool) -> Paragraph { pub fn new(open: bool) -> Paragraph {
Paragraph { open: Some(open) } Paragraph { open: Some(open) }
} }
pub fn probe_end(lexeme: &Lexeme) -> bool {
lexeme.match_as_char('\n') && lexeme.match_next_as_char('\n')
}
} }
impl Parseable for Paragraph { impl Parseable for Paragraph {