Handle Anchor edge cases involving punctuation

This commit is contained in:
Juno Takano 2026-01-05 19:39:56 -03:00
commit 5a7557ba7d
7 changed files with 193 additions and 45 deletions

View file

@ -65,8 +65,8 @@ pub fn parse(
state.context.inline = Inline::None;
}
return true;
} else if lexeme.match_as_char('|') && lexeme.is_next_boundary() {
log!("End: Pipe followed by boundary");
} else if lexeme.match_as_char('|') && lexeme.is_next_delimiter() {
log!("End: Pipe followed by delimiter");
if buffer.destination.is_empty() {
candidate.destination = Some(candidate.text.clone());
} else {
@ -79,15 +79,17 @@ pub fn parse(
log!("State: Found a pipe, but no boundary: destination follows");
candidate.balanced = true;
return true;
} else if lexeme.match_as_char(':') {
log!("State: Found a colon, marking anchor as external");
candidate.external = true;
buffer.destination.push_str(&lexeme.text());
return true;
} else if lexeme.match_as_char('|') {
log!("End: Explicit end-of-destination pipe");
candidate.destination = Some(buffer.destination.clone());
return true;
} else if !candidate.external
&& lexeme.is_punctuation()
&& lexeme.is_next_whitespace()
{
log!("End: Punctuation followed by whitespace");
} else if !candidate.external && lexeme.is_delimiter() {
log!("End: Internal anchor trailed by delimiter");
candidate.destination = Some(buffer.destination.clone());
tokens.push(Token::Anchor(candidate.clone()));
state.context.inline = Inline::None;
@ -99,6 +101,13 @@ pub fn parse(
tokens.push(Token::Anchor(candidate.clone()));
state.context.inline = Inline::None;
return true;
} else if lexeme.last() {
log!("End: end of input");
buffer.destination.push_str(&lexeme.text());
candidate.destination = Some(buffer.destination.clone());
tokens.push(Token::Anchor(candidate.clone()));
state.context.inline = Inline::None;
return true;
// This else branch is the 'no end found yet' state and will keep
// pushing lexemes into the buffer until an end is found above
@ -108,9 +117,6 @@ pub fn parse(
lexeme.text(),
buffer.destination,
);
if lexeme.match_as_char(':') {
candidate.external = true;
}
buffer.destination.push_str(&lexeme.text());
if lexeme.last() {
candidate.destination = Some(buffer.destination.clone());
@ -330,4 +336,116 @@ mod tests {
fn indifferent_multiline_leading_pipe() {
assert_eq!(read("|a|a|\nn"), read("|a|a\nn"));
}
#[test]
fn anchor_with_trailing_single_quote() {
assert_eq!(
read("the |lion|'s mouth"),
r#"<p>the <a href="/node/lion">lion</a>'s mouth</p>"#,
);
}
#[test]
fn anchor_with_trailing_double_quote() {
assert_eq!(
read(r#"the "|real|" motive"#),
r#"<p>the "<a href="/node/real">real</a>" motive</p>"#,
);
}
#[test]
fn anchor_with_trailing_parenthesis() {
assert_eq!(
read("this (though |true|) was questioned"),
r#"<p>this (though <a href="/node/true">true</a>) was questioned</p>"#,
);
}
#[test]
fn anchor_with_leading_single_quote() {
assert_eq!(
read("the 'real|Reality' motive"),
r#"<p>the '<a href="/node/Reality">real</a>' motive</p>"#,
);
}
#[test]
fn anchor_with_leading_double_quote() {
assert_eq!(
read(r#"the "real|Reality" motive"#),
r#"<p>the "<a href="/node/Reality">real</a>" motive</p>"#,
);
}
#[test]
fn anchor_with_leading_parenthesis() {
assert_eq!(
read("her (last|Surname) name"),
r#"<p>her (<a href="/node/Surname">last</a>) name</p>"#,
);
}
#[test]
fn anchor_with_internal_apostrophe() {
assert_eq!(
read("the |lion's mouth|album was released"),
r#"<p>the <a href="/node/album">lion's mouth</a> was released</p>"#
);
}
#[test]
fn nonleading_anchor_with_internal_apostrophe() {
assert_eq!(
read("they decided to stay at Jane's|YellowHouse that night"),
r#"<p>they decided to stay at <a href="/node/YellowHouse">Jane's</a> that night</p>"#
);
}
#[test]
fn nonleading_anchor_with_internal_apostrophe_at_eoi() {
assert_eq!(
read("they decided to stay at Jane's|YellowHouse"),
r#"<p>they decided to stay at <a href="/node/YellowHouse">Jane's</a></p>"#
);
}
#[test]
fn nonleading_anchor_with_internal_apostrophe_at_soi() {
assert_eq!(
read("Jane's|YellowHouse that night"),
r#"<p><a href="/node/YellowHouse">Jane's</a> that night</p>"#
);
}
#[test]
fn anchor_with_internal_double_quotes() {
assert_eq!(
read(r#"the |"real"|Truth motive"#),
r#"<p>the <a href="/node/Truth">"real"</a> motive</p>"#,
);
}
#[test]
fn anchor_with_internal_double_quotes_wrapping_spaced_words() {
assert_eq!(
read(r#"the |"bare reality"|Ideology they believed"#),
r#"<p>the <a href="/node/Ideology">"bare reality"</a> they believed</p>"#,
);
}
#[test]
fn anchor_with_internal_parenthesis() {
assert_eq!(
read("her |last (name)|Surname was Amad"),
r#"<p>her <a href="/node/Surname">last (name)</a> was Amad</p>"#,
);
}
#[test]
fn anchor_with_internal_parenthesis_wrapping_spaced_words() {
assert_eq!(
read("this |truth (though questionable) was fine|Absurd to them "),
r#"<p>this <a href="/node/Absurd">truth (though questionable) was fine</a> to them</p>"#
);
}
}

View file

@ -1,14 +1,17 @@
use std::{iter::Peekable, slice::Iter};
use crate::{prelude::*,syntax::content::{
Parseable as _,
parser::{
context, Inline,
lexeme::Lexeme,
state::State,
token::{Token, code::Code, anchor::Anchor},
use crate::{
prelude::*,
syntax::content::{
Parseable as _,
parser::{
context, Inline,
lexeme::Lexeme,
state::State,
token::{Token, code::Code, anchor::Anchor},
},
},
}};
};
pub fn parse(
lexeme: &Lexeme,

View file

@ -92,6 +92,19 @@ impl Lexeme {
.is_some_and(|c| delimiters.is_boundary(c))
}
pub fn is_delimiter(&self) -> bool {
let delimiters = Delimiters::default();
self.as_char().is_some_and(|c| delimiters.is_delimiter(c))
}
pub fn is_next_delimiter(&self) -> bool {
let delimiters = Delimiters::default();
self.last
|| self
.next_as_char()
.is_some_and(|c| delimiters.is_delimiter(c))
}
pub fn next_first_char(&self) -> Option<char> {
self.next.chars().nth(0)
}

View file

@ -1,9 +1,12 @@
use crate::syntax::content::{
Parseable as _,
parser::{
lexeme::Lexeme,
token::{Token, oblique::Oblique},
state::State,
use crate::{
prelude::*,
syntax::content::{
Parseable as _,
parser::{
lexeme::Lexeme,
token::{Token, oblique::Oblique},
state::State,
},
},
};
@ -13,6 +16,7 @@ pub fn parse(
tokens: &mut Vec<Token>,
) -> bool {
if Oblique::probe(lexeme) {
log!("Oblique probed {lexeme}");
tokens.push(Token::Oblique(Oblique::new(!state.switches.oblique)));
state.switches.oblique = !state.switches.oblique;
return true;
@ -28,6 +32,22 @@ mod tests {
parser::read(input, &Graph::new(None).meta.config)
}
#[test]
fn oblique_anchor() {
assert_eq!(
read("w _|S|_ w"),
r#"<p>w <em><a href="/node/S">S</a></em> w</p>"#
);
}
#[test]
fn oblique_anchor_with_trailing_comma() {
assert_eq!(
read("w _|S|_, w"),
r#"<p>w <em><a href="/node/S">S</a></em>, w</p>"#
);
}
#[test]
fn oblique() {
assert_eq!(

View file

@ -6,7 +6,6 @@ pub mod delimiter {
pub struct Delimiters {
pub atomic: Vec<char>,
pub boundary: Vec<char>,
pub flanking: Vec<char>,
pub punctuation: Vec<char>,
pub whitespace: Vec<char>,
@ -14,21 +13,11 @@ pub mod delimiter {
impl Default for Delimiters {
fn default() -> Self {
let atomic = vec!['`', '|'];
let flanking = vec!['_', '*'];
let punctuation = vec![',', '.', ';', ':', '?', '!'];
let whitespace = vec!['\n', ' '];
let boundary =
[atomic.clone(), punctuation.clone(), whitespace.clone()]
.concat();
Delimiters {
atomic,
boundary,
flanking,
punctuation,
whitespace,
atomic: vec!['`', '|'],
flanking: vec!['_', '*', '(', ')', '\'', '"'],
punctuation: vec![',', '.', ';', ':', '?', '!'],
whitespace: vec!['\n', ' '],
}
}
}
@ -44,12 +33,16 @@ pub mod delimiter {
.contains(&c)
}
pub fn is_delimiter(&self, c: char) -> bool {
self.is_boundary(c) || self.flanking.contains(&c)
}
fn is_str_delimiter(&self, s: &str) -> bool {
if s.chars().count() > 1 {
return false;
}
if let Some(c) = s.chars().nth(0) {
self.boundary.contains(&c) || self.flanking.contains(&c)
self.is_delimiter(c)
} else {
false
}

View file

@ -53,7 +53,7 @@ impl std::fmt::Display for Token {
Token::Span(ref d) => format!("{d}"),
};
write!(f, "T*{data}")
write!(f, "Tk:{data}")
}
}

View file

@ -12,7 +12,8 @@ pub struct Anchor {
impl Parseable for Anchor {
fn probe(lexeme: &Lexeme) -> bool {
lexeme.text() == "|"
|| (!lexeme.is_whitespace() && lexeme.next() == "|")
|| ((!lexeme.is_whitespace() && !lexeme.is_delimiter())
&& lexeme.next() == "|")
}
fn lex(_lexeme: &Lexeme) -> Anchor {
@ -58,13 +59,13 @@ impl std::fmt::Display for Anchor {
let mut tail = String::default();
if self.leading {
tail.push_str(" [Leading]");
tail.push_str(" +Leading");
}
if self.balanced {
tail.push_str(" [Balanced]");
tail.push_str(" +Balanced");
}
if self.external {
tail.push_str(" [External]");
tail.push_str(" +External");
}
write!(