diff --git a/src/syntax/content/parser.rs b/src/syntax/content/parser.rs index 712e127..5bc1639 100644 --- a/src/syntax/content/parser.rs +++ b/src/syntax/content/parser.rs @@ -80,10 +80,6 @@ fn lex(text: &str, map: LexMap, config: &Config) -> Vec { state.context.inline = Inline::Code; tokens.push(Token::Code(Code::new(true))); continue; - } else if Oblique::probe(lexeme) { - state.context.inline = Inline::Oblique; - tokens.push(Token::Oblique(Oblique::new(true))); - continue; } else if Anchor::probe(lexeme) { state.context.inline = Inline::Anchor; state.buffers.anchor.clear(); @@ -94,6 +90,10 @@ fn lex(text: &str, map: LexMap, config: &Config) -> Vec { state.buffers.anchor.candidate.text = lexeme.text(); } continue; + } else if Oblique::probe(lexeme) { + state.context.inline = Inline::Oblique; + tokens.push(Token::Oblique(Oblique::new(true))); + continue; } }, Inline::Code => { @@ -155,7 +155,7 @@ struct AnchorBuffer { impl AnchorBuffer { fn clear(&mut self) { - self.candidate = Anchor::empty(); + self.candidate = Anchor::default(); self.text = String::new(); self.destination = String::new(); } @@ -251,6 +251,64 @@ mod tests { ); } + #[test] + fn nonleading_plural_anchor() { + assert_eq!( + read_noconfig("The flower|s bloomed"), + r#"

The flowers bloomed

"# + ); + } + + #[test] + fn leading_plural_anchor() { + assert_eq!( + read_noconfig("Interfaces are |element|s of |system|s."), + r#"

Interfaces are elements of systems.

"# + ); + } + + #[test] + fn http_external_anchor() { + assert_eq!( + read_noconfig( + "a |false dichotomy|https://en.wikipedia.org/wiki/False_dilemma|." + ), + r#"

a false dichotomy.

"# + ); + } + + #[test] + fn http_external_anchor_leading_no_third() { + assert_eq!( + read_noconfig("|Rust toolchain|https://rustup.rs/ "), + r#"

Rust toolchain

"# + ); + } + + #[test] + fn http_external_anchor_leading_no_third_then_punctuation_then_space() { + assert_eq!( + read_noconfig("|Rust toolchain|https://rustup.rs/, "), + r#"

Rust toolchain,

"# + ); + } + + #[test] + fn http_external_anchor_leading_no_third_then_punctuation_then_eof() { + assert_eq!( + read_noconfig("|Rust toolchain|https://rustup.rs/,"), + r#"

Rust toolchain

"# + ); + } + + #[test] + fn http_external_anchor_leading_no_third_then_eof() { + assert_eq!( + read_noconfig("|Rust toolchain|https://rustup.rs/"), + r#"

Rust toolchain

"# + ); + } + #[test] fn clear_anchor_buffer() { assert_eq!( diff --git a/src/syntax/content/parser/segment.rs b/src/syntax/content/parser/segment.rs index 63aa202..147c3ad 100644 --- a/src/syntax/content/parser/segment.rs +++ b/src/syntax/content/parser/segment.rs @@ -62,31 +62,35 @@ pub mod delimiter { let mut iterator = text.chars().peekable(); while let Some(c) = iterator.next() { - // if the current char is an atomic delimiter - if delimiters.atomic.contains(&c) { + // if the current char is a boundary + if delimiters.is_boundary(c) { atomized.push(c.to_string()); + continue; // if the current char is a flanking delimiter } else if delimiters.flanking.contains(&c) { // if next char is a boundary if iterator .peek() - .is_some_and(|next| delimiters.is_boundary(*next)) + .is_none_or(|next| delimiters.is_boundary(*next)) { atomized.push(c.to_string()); + continue; // if the previous char was whitespace } else if let Some(last_string) = atomized.last() && let Some(last_char) = last_string.chars().last() - && last_char.is_whitespace() + && delimiters.whitespace.contains(&last_char) { atomized.push(c.to_string()); + continue; } + } // if there is a last atomized element - } else if let Some(last) = atomized.last_mut() { - // if the last atomized element is a delimiter - if delimiters.is_delimiter(last) { + if let Some(last) = atomized.last_mut() { + // if the last atomized element is a boundary + if delimiters.is_str_delimiter(last) { atomized.push(c.to_string()); } else { last.push(c); @@ -104,10 +108,83 @@ pub mod delimiter { mod tests { use super::*; + #[test] + fn atomize_nonflanking_underscore() { + assert_eq!(atomize("false_dichotomy"), vec!["false_dichotomy"]); + } + + #[test] + fn atomize_left_flanking_underscore() { + assert_eq!( + atomize("_false_dichotomy"), + vec!["_", "false_dichotomy"] + ); + } + + #[test] + fn atomize_right_flanking_underscore() { + assert_eq!( + atomize("false_dichotomy_"), + vec!["false_dichotomy", "_"] + ); + } + + #[test] + fn atomize_dual_flanking_underscore() { + assert_eq!( + atomize("_false_dichotomy_"), + vec!["_", "false_dichotomy", "_"] + ); + } + + #[test] + fn atomize_flankign_sentence() { + assert_eq!( + atomize( + "about_colors: the colors _amber_, _orange_ and _yellow mustard_ to `jane_bishop@mail.com`." + ), + vec![ + "about_colors", + ":", + " ", + "the", + " ", + "colors", + " ", + "_", + "amber", + "_", + ",", + " ", + "_", + "orange", + "_", + " ", + "and", + " ", + "_", + "yellow", + " ", + "mustard", + "_", + " ", + "to", + " ", + "`", + "jane_bishop@mail", + ".", + "com", + "`", + "." + ], + ); + } + #[test] fn atomize_words() { - let words = " justification for the actions of those who hold authority inevitably dwindles "; // 2 - let actual = atomize(words); + let actual = atomize( + " justification for the actions of those who hold authority inevitably dwindles ", + ); let expected = vec![ " ", " ", @@ -179,8 +256,7 @@ pub mod delimiter { #[test] fn atomize_pipes() { - let s = "every other |time| as it was perceived"; - let actual = atomize(s); + let actual = atomize("every other |time| as it was perceived"); let expected = vec![ "every", " ", @@ -203,8 +279,9 @@ pub mod delimiter { #[test] fn atomize_pipes_and_ticks() { - let s = "every other |time| as `it could or |perhaps somehow|then or now| it was` perceived"; - let actual = atomize(s); + let actual = atomize( + "every other |time| as `it could or |perhaps somehow|then or now| it was` perceived", + ); let expected = vec![ "every", " ", @@ -247,9 +324,8 @@ pub mod delimiter { #[test] fn atomize_newlines() { - let s = "a`c`adc`da \ndcdb d` cdb` dc\ndb `dc ` d ad ` bdc"; - - let actual = atomize(s); + let actual = + atomize("a`c`adc`da \ndcdb d` cdb` dc\ndb `dc ` d ad ` bdc"); let expected = vec![ "a", "`", "c", "`", "adc", "`", "da", " ", "\n", "dcdb", " ", "d", "`", " ", "cdb", "`", " ", "dc", "\n", "db", " ", "`", diff --git a/src/syntax/content/parser/token/anchor.rs b/src/syntax/content/parser/token/anchor.rs index ddb4bf8..bacf03f 100644 --- a/src/syntax/content/parser/token/anchor.rs +++ b/src/syntax/content/parser/token/anchor.rs @@ -5,6 +5,7 @@ pub struct Anchor { pub text: String, pub destination: Option, pub leading: bool, + pub external: bool, } impl Parseable for Anchor { @@ -39,11 +40,17 @@ impl Parseable for Anchor { } impl Anchor { - pub fn new(text: &str, destination: &str, spaced: bool) -> Anchor { + pub fn new( + text: &str, + destination: &str, + leading: bool, + external: bool, + ) -> Anchor { Anchor { text: text.to_owned(), destination: Some(Anchor::resolve_destination(destination)), - leading: spaced, + leading, + external, } } @@ -54,14 +61,6 @@ impl Anchor { format!("/node/{raw}") } } - - pub fn empty() -> Anchor { - Anchor { - text: String::new(), - destination: None, - leading: false, - } - } } #[cfg(test)] @@ -71,7 +70,7 @@ mod tests { #[test] fn render_anchor() { - let anchor = Anchor::new("AnchorText", "AnchorDest", true); + let anchor = Anchor::new("AnchorText", "AnchorDest", true, false); assert_eq!( anchor.render(), r#"AnchorText"# @@ -89,7 +88,7 @@ mod tests { #[test] #[should_panic(expected = "without knowing its destination")] fn unknown_destination_render() { - let anchor = Anchor::empty(); + let anchor = Anchor::default(); drop(anchor.render()); } }