Improve testing and handling of some Anchor edge cases

This commit is contained in:
Juno Takano 2026-01-02 14:34:08 -03:00
commit 21ab00b3eb
3 changed files with 166 additions and 33 deletions

View file

@ -80,10 +80,6 @@ fn lex(text: &str, map: LexMap, config: &Config) -> Vec<Token> {
state.context.inline = Inline::Code;
tokens.push(Token::Code(Code::new(true)));
continue;
} else if Oblique::probe(lexeme) {
state.context.inline = Inline::Oblique;
tokens.push(Token::Oblique(Oblique::new(true)));
continue;
} else if Anchor::probe(lexeme) {
state.context.inline = Inline::Anchor;
state.buffers.anchor.clear();
@ -94,6 +90,10 @@ fn lex(text: &str, map: LexMap, config: &Config) -> Vec<Token> {
state.buffers.anchor.candidate.text = lexeme.text();
}
continue;
} else if Oblique::probe(lexeme) {
state.context.inline = Inline::Oblique;
tokens.push(Token::Oblique(Oblique::new(true)));
continue;
}
},
Inline::Code => {
@ -155,7 +155,7 @@ struct AnchorBuffer {
impl AnchorBuffer {
fn clear(&mut self) {
self.candidate = Anchor::empty();
self.candidate = Anchor::default();
self.text = String::new();
self.destination = String::new();
}
@ -251,6 +251,64 @@ mod tests {
);
}
#[test]
fn nonleading_plural_anchor() {
assert_eq!(
read_noconfig("The flower|s bloomed"),
r#"<p>The <a href="/node/flower">flowers</a> bloomed</p>"#
);
}
#[test]
fn leading_plural_anchor() {
assert_eq!(
read_noconfig("Interfaces are |element|s of |system|s."),
r#"<p>Interfaces are <a href="/node/element">elements</a> of <a href="/node/system">systems</a>.</p>"#
);
}
#[test]
fn http_external_anchor() {
assert_eq!(
read_noconfig(
"a |false dichotomy|https://en.wikipedia.org/wiki/False_dilemma|."
),
r#"<p>a <a href="https://en.wikipedia.org/wiki/False_dilemma">false dichotomy</a>.</p>"#
);
}
#[test]
fn http_external_anchor_leading_no_third() {
assert_eq!(
read_noconfig("|Rust toolchain|https://rustup.rs/ "),
r#"<p><a href="https://rustup.rs/">Rust toolchain</a> </p>"#
);
}
#[test]
fn http_external_anchor_leading_no_third_then_punctuation_then_space() {
assert_eq!(
read_noconfig("|Rust toolchain|https://rustup.rs/, "),
r#"<p><a href="https://rustup.rs/">Rust toolchain</a>, </p>"#
);
}
#[test]
fn http_external_anchor_leading_no_third_then_punctuation_then_eof() {
assert_eq!(
read_noconfig("|Rust toolchain|https://rustup.rs/,"),
r#"<p><a href="https://rustup.rs/">Rust toolchain</a></p>"#
);
}
#[test]
fn http_external_anchor_leading_no_third_then_eof() {
assert_eq!(
read_noconfig("|Rust toolchain|https://rustup.rs/"),
r#"<p><a href="https://rustup.rs/">Rust toolchain</a></p>"#
);
}
#[test]
fn clear_anchor_buffer() {
assert_eq!(

View file

@ -62,31 +62,35 @@ pub mod delimiter {
let mut iterator = text.chars().peekable();
while let Some(c) = iterator.next() {
// if the current char is an atomic delimiter
if delimiters.atomic.contains(&c) {
// if the current char is a boundary
if delimiters.is_boundary(c) {
atomized.push(c.to_string());
continue;
// if the current char is a flanking delimiter
} else if delimiters.flanking.contains(&c) {
// if next char is a boundary
if iterator
.peek()
.is_some_and(|next| delimiters.is_boundary(*next))
.is_none_or(|next| delimiters.is_boundary(*next))
{
atomized.push(c.to_string());
continue;
// if the previous char was whitespace
} else if let Some(last_string) = atomized.last()
&& let Some(last_char) = last_string.chars().last()
&& last_char.is_whitespace()
&& delimiters.whitespace.contains(&last_char)
{
atomized.push(c.to_string());
continue;
}
}
// if there is a last atomized element
} else if let Some(last) = atomized.last_mut() {
// if the last atomized element is a delimiter
if delimiters.is_delimiter(last) {
if let Some(last) = atomized.last_mut() {
// if the last atomized element is a boundary
if delimiters.is_str_delimiter(last) {
atomized.push(c.to_string());
} else {
last.push(c);
@ -104,10 +108,83 @@ pub mod delimiter {
mod tests {
use super::*;
#[test]
fn atomize_nonflanking_underscore() {
assert_eq!(atomize("false_dichotomy"), vec!["false_dichotomy"]);
}
#[test]
fn atomize_left_flanking_underscore() {
assert_eq!(
atomize("_false_dichotomy"),
vec!["_", "false_dichotomy"]
);
}
#[test]
fn atomize_right_flanking_underscore() {
assert_eq!(
atomize("false_dichotomy_"),
vec!["false_dichotomy", "_"]
);
}
#[test]
fn atomize_dual_flanking_underscore() {
assert_eq!(
atomize("_false_dichotomy_"),
vec!["_", "false_dichotomy", "_"]
);
}
#[test]
fn atomize_flankign_sentence() {
assert_eq!(
atomize(
"about_colors: the colors _amber_, _orange_ and _yellow mustard_ to `jane_bishop@mail.com`."
),
vec![
"about_colors",
":",
" ",
"the",
" ",
"colors",
" ",
"_",
"amber",
"_",
",",
" ",
"_",
"orange",
"_",
" ",
"and",
" ",
"_",
"yellow",
" ",
"mustard",
"_",
" ",
"to",
" ",
"`",
"jane_bishop@mail",
".",
"com",
"`",
"."
],
);
}
#[test]
fn atomize_words() {
let words = " justification for the actions of those who hold authority inevitably dwindles "; // 2
let actual = atomize(words);
let actual = atomize(
" justification for the actions of those who hold authority inevitably dwindles ",
);
let expected = vec![
" ",
" ",
@ -179,8 +256,7 @@ pub mod delimiter {
#[test]
fn atomize_pipes() {
let s = "every other |time| as it was perceived";
let actual = atomize(s);
let actual = atomize("every other |time| as it was perceived");
let expected = vec![
"every",
" ",
@ -203,8 +279,9 @@ pub mod delimiter {
#[test]
fn atomize_pipes_and_ticks() {
let s = "every other |time| as `it could or |perhaps somehow|then or now| it was` perceived";
let actual = atomize(s);
let actual = atomize(
"every other |time| as `it could or |perhaps somehow|then or now| it was` perceived",
);
let expected = vec![
"every",
" ",
@ -247,9 +324,8 @@ pub mod delimiter {
#[test]
fn atomize_newlines() {
let s = "a`c`adc`da \ndcdb d` cdb` dc\ndb `dc ` d ad ` bdc";
let actual = atomize(s);
let actual =
atomize("a`c`adc`da \ndcdb d` cdb` dc\ndb `dc ` d ad ` bdc");
let expected = vec![
"a", "`", "c", "`", "adc", "`", "da", " ", "\n", "dcdb", " ",
"d", "`", " ", "cdb", "`", " ", "dc", "\n", "db", " ", "`",

View file

@ -5,6 +5,7 @@ pub struct Anchor {
pub text: String,
pub destination: Option<String>,
pub leading: bool,
pub external: bool,
}
impl Parseable for Anchor {
@ -39,11 +40,17 @@ impl Parseable for Anchor {
}
impl Anchor {
pub fn new(text: &str, destination: &str, spaced: bool) -> Anchor {
pub fn new(
text: &str,
destination: &str,
leading: bool,
external: bool,
) -> Anchor {
Anchor {
text: text.to_owned(),
destination: Some(Anchor::resolve_destination(destination)),
leading: spaced,
leading,
external,
}
}
@ -54,14 +61,6 @@ impl Anchor {
format!("/node/{raw}")
}
}
pub fn empty() -> Anchor {
Anchor {
text: String::new(),
destination: None,
leading: false,
}
}
}
#[cfg(test)]
@ -71,7 +70,7 @@ mod tests {
#[test]
fn render_anchor() {
let anchor = Anchor::new("AnchorText", "AnchorDest", true);
let anchor = Anchor::new("AnchorText", "AnchorDest", true, false);
assert_eq!(
anchor.render(),
r#"<a href="/node/AnchorDest">AnchorText</a>"#
@ -89,7 +88,7 @@ mod tests {
#[test]
#[should_panic(expected = "without knowing its destination")]
fn unknown_destination_render() {
let anchor = Anchor::empty();
let anchor = Anchor::default();
drop(anchor.render());
}
}