diff --git a/src/syntax/content/parser/context/anchor.rs b/src/syntax/content/parser/context/anchor.rs index 94e7dd4..1ce26ff 100644 --- a/src/syntax/content/parser/context/anchor.rs +++ b/src/syntax/content/parser/context/anchor.rs @@ -65,8 +65,8 @@ pub fn parse( state.context.inline = Inline::None; } return true; - } else if lexeme.match_as_char('|') && lexeme.is_next_boundary() { - log!("End: Pipe followed by boundary"); + } else if lexeme.match_as_char('|') && lexeme.is_next_delimiter() { + log!("End: Pipe followed by delimiter"); if buffer.destination.is_empty() { candidate.destination = Some(candidate.text.clone()); } else { @@ -79,15 +79,17 @@ pub fn parse( log!("State: Found a pipe, but no boundary: destination follows"); candidate.balanced = true; return true; + } else if lexeme.match_as_char(':') { + log!("State: Found a colon, marking anchor as external"); + candidate.external = true; + buffer.destination.push_str(&lexeme.text()); + return true; } else if lexeme.match_as_char('|') { log!("End: Explicit end-of-destination pipe"); candidate.destination = Some(buffer.destination.clone()); return true; - } else if !candidate.external - && lexeme.is_punctuation() - && lexeme.is_next_whitespace() - { - log!("End: Punctuation followed by whitespace"); + } else if !candidate.external && lexeme.is_delimiter() { + log!("End: Internal anchor trailed by delimiter"); candidate.destination = Some(buffer.destination.clone()); tokens.push(Token::Anchor(candidate.clone())); state.context.inline = Inline::None; @@ -99,6 +101,13 @@ pub fn parse( tokens.push(Token::Anchor(candidate.clone())); state.context.inline = Inline::None; return true; + } else if lexeme.last() { + log!("End: end of input"); + buffer.destination.push_str(&lexeme.text()); + candidate.destination = Some(buffer.destination.clone()); + tokens.push(Token::Anchor(candidate.clone())); + state.context.inline = Inline::None; + return true; // This else branch is the 'no end found yet' state and will keep // pushing lexemes into the buffer until an end is found above @@ -108,9 +117,6 @@ pub fn parse( lexeme.text(), buffer.destination, ); - if lexeme.match_as_char(':') { - candidate.external = true; - } buffer.destination.push_str(&lexeme.text()); if lexeme.last() { candidate.destination = Some(buffer.destination.clone()); @@ -330,4 +336,116 @@ mod tests { fn indifferent_multiline_leading_pipe() { assert_eq!(read("|a|a|\nn"), read("|a|a\nn")); } + + #[test] + fn anchor_with_trailing_single_quote() { + assert_eq!( + read("the |lion|'s mouth"), + r#"

the lion's mouth

"#, + ); + } + + #[test] + fn anchor_with_trailing_double_quote() { + assert_eq!( + read(r#"the "|real|" motive"#), + r#"

the "real" motive

"#, + ); + } + + #[test] + fn anchor_with_trailing_parenthesis() { + assert_eq!( + read("this (though |true|) was questioned"), + r#"

this (though true) was questioned

"#, + ); + } + + #[test] + fn anchor_with_leading_single_quote() { + assert_eq!( + read("the 'real|Reality' motive"), + r#"

the 'real' motive

"#, + ); + } + + #[test] + fn anchor_with_leading_double_quote() { + assert_eq!( + read(r#"the "real|Reality" motive"#), + r#"

the "real" motive

"#, + ); + } + + #[test] + fn anchor_with_leading_parenthesis() { + assert_eq!( + read("her (last|Surname) name"), + r#"

her (last) name

"#, + ); + } + + #[test] + fn anchor_with_internal_apostrophe() { + assert_eq!( + read("the |lion's mouth|album was released"), + r#"

the lion's mouth was released

"# + ); + } + + #[test] + fn nonleading_anchor_with_internal_apostrophe() { + assert_eq!( + read("they decided to stay at Jane's|YellowHouse that night"), + r#"

they decided to stay at Jane's that night

"# + ); + } + + #[test] + fn nonleading_anchor_with_internal_apostrophe_at_eoi() { + assert_eq!( + read("they decided to stay at Jane's|YellowHouse"), + r#"

they decided to stay at Jane's

"# + ); + } + + #[test] + fn nonleading_anchor_with_internal_apostrophe_at_soi() { + assert_eq!( + read("Jane's|YellowHouse that night"), + r#"

Jane's that night

"# + ); + } + + #[test] + fn anchor_with_internal_double_quotes() { + assert_eq!( + read(r#"the |"real"|Truth motive"#), + r#"

the "real" motive

"#, + ); + } + + #[test] + fn anchor_with_internal_double_quotes_wrapping_spaced_words() { + assert_eq!( + read(r#"the |"bare reality"|Ideology they believed"#), + r#"

the "bare reality" they believed

"#, + ); + } + + #[test] + fn anchor_with_internal_parenthesis() { + assert_eq!( + read("her |last (name)|Surname was Amad"), + r#"

her last (name) was Amad

"#, + ); + } + + #[test] + fn anchor_with_internal_parenthesis_wrapping_spaced_words() { + assert_eq!( + read("this |truth (though questionable) was fine|Absurd to them "), + r#"

this truth (though questionable) was fine to them

"# + ); + } } diff --git a/src/syntax/content/parser/context/inline.rs b/src/syntax/content/parser/context/inline.rs index 8ee20b1..b773e82 100644 --- a/src/syntax/content/parser/context/inline.rs +++ b/src/syntax/content/parser/context/inline.rs @@ -1,14 +1,17 @@ use std::{iter::Peekable, slice::Iter}; -use crate::{prelude::*,syntax::content::{ - Parseable as _, - parser::{ - context, Inline, - lexeme::Lexeme, - state::State, - token::{Token, code::Code, anchor::Anchor}, +use crate::{ + prelude::*, + syntax::content::{ + Parseable as _, + parser::{ + context, Inline, + lexeme::Lexeme, + state::State, + token::{Token, code::Code, anchor::Anchor}, + }, }, -}}; +}; pub fn parse( lexeme: &Lexeme, diff --git a/src/syntax/content/parser/lexeme.rs b/src/syntax/content/parser/lexeme.rs index d33c646..2596a0c 100644 --- a/src/syntax/content/parser/lexeme.rs +++ b/src/syntax/content/parser/lexeme.rs @@ -92,6 +92,19 @@ impl Lexeme { .is_some_and(|c| delimiters.is_boundary(c)) } + pub fn is_delimiter(&self) -> bool { + let delimiters = Delimiters::default(); + self.as_char().is_some_and(|c| delimiters.is_delimiter(c)) + } + + pub fn is_next_delimiter(&self) -> bool { + let delimiters = Delimiters::default(); + self.last + || self + .next_as_char() + .is_some_and(|c| delimiters.is_delimiter(c)) + } + pub fn next_first_char(&self) -> Option { self.next.chars().nth(0) } diff --git a/src/syntax/content/parser/point.rs b/src/syntax/content/parser/point.rs index 3d78586..139c7a1 100644 --- a/src/syntax/content/parser/point.rs +++ b/src/syntax/content/parser/point.rs @@ -1,9 +1,12 @@ -use crate::syntax::content::{ - Parseable as _, - parser::{ - lexeme::Lexeme, - token::{Token, oblique::Oblique}, - state::State, +use crate::{ + prelude::*, + syntax::content::{ + Parseable as _, + parser::{ + lexeme::Lexeme, + token::{Token, oblique::Oblique}, + state::State, + }, }, }; @@ -13,6 +16,7 @@ pub fn parse( tokens: &mut Vec, ) -> bool { if Oblique::probe(lexeme) { + log!("Oblique probed {lexeme}"); tokens.push(Token::Oblique(Oblique::new(!state.switches.oblique))); state.switches.oblique = !state.switches.oblique; return true; @@ -28,6 +32,22 @@ mod tests { parser::read(input, &Graph::new(None).meta.config) } + #[test] + fn oblique_anchor() { + assert_eq!( + read("w _|S|_ w"), + r#"

w S w

"# + ); + } + + #[test] + fn oblique_anchor_with_trailing_comma() { + assert_eq!( + read("w _|S|_, w"), + r#"

w S, w

"# + ); + } + #[test] fn oblique() { assert_eq!( diff --git a/src/syntax/content/parser/segment.rs b/src/syntax/content/parser/segment.rs index 147c3ad..53a7837 100644 --- a/src/syntax/content/parser/segment.rs +++ b/src/syntax/content/parser/segment.rs @@ -6,7 +6,6 @@ pub mod delimiter { pub struct Delimiters { pub atomic: Vec, - pub boundary: Vec, pub flanking: Vec, pub punctuation: Vec, pub whitespace: Vec, @@ -14,21 +13,11 @@ pub mod delimiter { impl Default for Delimiters { fn default() -> Self { - let atomic = vec!['`', '|']; - let flanking = vec!['_', '*']; - let punctuation = vec![',', '.', ';', ':', '?', '!']; - let whitespace = vec!['\n', ' ']; - - let boundary = - [atomic.clone(), punctuation.clone(), whitespace.clone()] - .concat(); - Delimiters { - atomic, - boundary, - flanking, - punctuation, - whitespace, + atomic: vec!['`', '|'], + flanking: vec!['_', '*', '(', ')', '\'', '"'], + punctuation: vec![',', '.', ';', ':', '?', '!'], + whitespace: vec!['\n', ' '], } } } @@ -44,12 +33,16 @@ pub mod delimiter { .contains(&c) } + pub fn is_delimiter(&self, c: char) -> bool { + self.is_boundary(c) || self.flanking.contains(&c) + } + fn is_str_delimiter(&self, s: &str) -> bool { if s.chars().count() > 1 { return false; } if let Some(c) = s.chars().nth(0) { - self.boundary.contains(&c) || self.flanking.contains(&c) + self.is_delimiter(c) } else { false } diff --git a/src/syntax/content/parser/token.rs b/src/syntax/content/parser/token.rs index 5993fcf..fca3424 100644 --- a/src/syntax/content/parser/token.rs +++ b/src/syntax/content/parser/token.rs @@ -53,7 +53,7 @@ impl std::fmt::Display for Token { Token::Span(ref d) => format!("{d}"), }; - write!(f, "T*{data}") + write!(f, "Tk:{data}") } } diff --git a/src/syntax/content/parser/token/anchor.rs b/src/syntax/content/parser/token/anchor.rs index dec0ff6..e84b471 100644 --- a/src/syntax/content/parser/token/anchor.rs +++ b/src/syntax/content/parser/token/anchor.rs @@ -12,7 +12,8 @@ pub struct Anchor { impl Parseable for Anchor { fn probe(lexeme: &Lexeme) -> bool { lexeme.text() == "|" - || (!lexeme.is_whitespace() && lexeme.next() == "|") + || ((!lexeme.is_whitespace() && !lexeme.is_delimiter()) + && lexeme.next() == "|") } fn lex(_lexeme: &Lexeme) -> Anchor { @@ -58,13 +59,13 @@ impl std::fmt::Display for Anchor { let mut tail = String::default(); if self.leading { - tail.push_str(" [Leading]"); + tail.push_str(" +Leading"); } if self.balanced { - tail.push_str(" [Balanced]"); + tail.push_str(" +Balanced"); } if self.external { - tail.push_str(" [External]"); + tail.push_str(" +External"); } write!(