diff --git a/src/syntax/content/parser/context/anchor.rs b/src/syntax/content/parser/context/anchor.rs index 94e7dd4..1ce26ff 100644 --- a/src/syntax/content/parser/context/anchor.rs +++ b/src/syntax/content/parser/context/anchor.rs @@ -65,8 +65,8 @@ pub fn parse( state.context.inline = Inline::None; } return true; - } else if lexeme.match_as_char('|') && lexeme.is_next_boundary() { - log!("End: Pipe followed by boundary"); + } else if lexeme.match_as_char('|') && lexeme.is_next_delimiter() { + log!("End: Pipe followed by delimiter"); if buffer.destination.is_empty() { candidate.destination = Some(candidate.text.clone()); } else { @@ -79,15 +79,17 @@ pub fn parse( log!("State: Found a pipe, but no boundary: destination follows"); candidate.balanced = true; return true; + } else if lexeme.match_as_char(':') { + log!("State: Found a colon, marking anchor as external"); + candidate.external = true; + buffer.destination.push_str(&lexeme.text()); + return true; } else if lexeme.match_as_char('|') { log!("End: Explicit end-of-destination pipe"); candidate.destination = Some(buffer.destination.clone()); return true; - } else if !candidate.external - && lexeme.is_punctuation() - && lexeme.is_next_whitespace() - { - log!("End: Punctuation followed by whitespace"); + } else if !candidate.external && lexeme.is_delimiter() { + log!("End: Internal anchor trailed by delimiter"); candidate.destination = Some(buffer.destination.clone()); tokens.push(Token::Anchor(candidate.clone())); state.context.inline = Inline::None; @@ -99,6 +101,13 @@ pub fn parse( tokens.push(Token::Anchor(candidate.clone())); state.context.inline = Inline::None; return true; + } else if lexeme.last() { + log!("End: end of input"); + buffer.destination.push_str(&lexeme.text()); + candidate.destination = Some(buffer.destination.clone()); + tokens.push(Token::Anchor(candidate.clone())); + state.context.inline = Inline::None; + return true; // This else branch is the 'no end found yet' state and will keep // pushing lexemes into the buffer until an end is found above @@ -108,9 +117,6 @@ pub fn parse( lexeme.text(), buffer.destination, ); - if lexeme.match_as_char(':') { - candidate.external = true; - } buffer.destination.push_str(&lexeme.text()); if lexeme.last() { candidate.destination = Some(buffer.destination.clone()); @@ -330,4 +336,116 @@ mod tests { fn indifferent_multiline_leading_pipe() { assert_eq!(read("|a|a|\nn"), read("|a|a\nn")); } + + #[test] + fn anchor_with_trailing_single_quote() { + assert_eq!( + read("the |lion|'s mouth"), + r#"
the lion's mouth
"#, + ); + } + + #[test] + fn anchor_with_trailing_double_quote() { + assert_eq!( + read(r#"the "|real|" motive"#), + r#"the "real" motive
"#, + ); + } + + #[test] + fn anchor_with_trailing_parenthesis() { + assert_eq!( + read("this (though |true|) was questioned"), + r#"this (though true) was questioned
"#, + ); + } + + #[test] + fn anchor_with_leading_single_quote() { + assert_eq!( + read("the 'real|Reality' motive"), + r#"the 'real' motive
"#, + ); + } + + #[test] + fn anchor_with_leading_double_quote() { + assert_eq!( + read(r#"the "real|Reality" motive"#), + r#"the "real" motive
"#, + ); + } + + #[test] + fn anchor_with_leading_parenthesis() { + assert_eq!( + read("her (last|Surname) name"), + r#"her (last) name
"#, + ); + } + + #[test] + fn anchor_with_internal_apostrophe() { + assert_eq!( + read("the |lion's mouth|album was released"), + r#"the lion's mouth was released
"# + ); + } + + #[test] + fn nonleading_anchor_with_internal_apostrophe() { + assert_eq!( + read("they decided to stay at Jane's|YellowHouse that night"), + r#"they decided to stay at Jane's that night
"# + ); + } + + #[test] + fn nonleading_anchor_with_internal_apostrophe_at_eoi() { + assert_eq!( + read("they decided to stay at Jane's|YellowHouse"), + r#"they decided to stay at Jane's
"# + ); + } + + #[test] + fn nonleading_anchor_with_internal_apostrophe_at_soi() { + assert_eq!( + read("Jane's|YellowHouse that night"), + r#"Jane's that night
"# + ); + } + + #[test] + fn anchor_with_internal_double_quotes() { + assert_eq!( + read(r#"the |"real"|Truth motive"#), + r#"the "real" motive
"#, + ); + } + + #[test] + fn anchor_with_internal_double_quotes_wrapping_spaced_words() { + assert_eq!( + read(r#"the |"bare reality"|Ideology they believed"#), + r#"the "bare reality" they believed
"#, + ); + } + + #[test] + fn anchor_with_internal_parenthesis() { + assert_eq!( + read("her |last (name)|Surname was Amad"), + r#"her last (name) was Amad
"#, + ); + } + + #[test] + fn anchor_with_internal_parenthesis_wrapping_spaced_words() { + assert_eq!( + read("this |truth (though questionable) was fine|Absurd to them "), + r#"this truth (though questionable) was fine to them
"# + ); + } } diff --git a/src/syntax/content/parser/context/inline.rs b/src/syntax/content/parser/context/inline.rs index 8ee20b1..b773e82 100644 --- a/src/syntax/content/parser/context/inline.rs +++ b/src/syntax/content/parser/context/inline.rs @@ -1,14 +1,17 @@ use std::{iter::Peekable, slice::Iter}; -use crate::{prelude::*,syntax::content::{ - Parseable as _, - parser::{ - context, Inline, - lexeme::Lexeme, - state::State, - token::{Token, code::Code, anchor::Anchor}, +use crate::{ + prelude::*, + syntax::content::{ + Parseable as _, + parser::{ + context, Inline, + lexeme::Lexeme, + state::State, + token::{Token, code::Code, anchor::Anchor}, + }, }, -}}; +}; pub fn parse( lexeme: &Lexeme, diff --git a/src/syntax/content/parser/lexeme.rs b/src/syntax/content/parser/lexeme.rs index d33c646..2596a0c 100644 --- a/src/syntax/content/parser/lexeme.rs +++ b/src/syntax/content/parser/lexeme.rs @@ -92,6 +92,19 @@ impl Lexeme { .is_some_and(|c| delimiters.is_boundary(c)) } + pub fn is_delimiter(&self) -> bool { + let delimiters = Delimiters::default(); + self.as_char().is_some_and(|c| delimiters.is_delimiter(c)) + } + + pub fn is_next_delimiter(&self) -> bool { + let delimiters = Delimiters::default(); + self.last + || self + .next_as_char() + .is_some_and(|c| delimiters.is_delimiter(c)) + } + pub fn next_first_char(&self) -> Optionw S w
"# + ); + } + + #[test] + fn oblique_anchor_with_trailing_comma() { + assert_eq!( + read("w _|S|_, w"), + r#"w S, w
"# + ); + } + #[test] fn oblique() { assert_eq!( diff --git a/src/syntax/content/parser/segment.rs b/src/syntax/content/parser/segment.rs index 147c3ad..53a7837 100644 --- a/src/syntax/content/parser/segment.rs +++ b/src/syntax/content/parser/segment.rs @@ -6,7 +6,6 @@ pub mod delimiter { pub struct Delimiters { pub atomic: Vec