Fix an anchor clustering corner case

This commit is contained in:
Juno Takano 2025-12-21 14:51:49 -03:00
commit 6b34bb4531

View file

@ -24,11 +24,34 @@ pub fn cluster(text: &str) -> Vec<String> {
} }
let Some(delimiter) = delimiter::match_delimiter(&word) else { let Some(delimiter) = delimiter::match_delimiter(&word) else {
log!("Skip: {word:?} does not start with a delimiter"); log!("Skip: {word:?} does not have a delimiter");
clusters.push(word); clusters.push(word);
continue; continue;
}; };
if !delimiter.leading && !word.starts_with(delimiter.char) {
clusters.push(word);
continue;
}
if (!delimiter.greedy
&& !delimiter.triple
&& word.matches(delimiter.char).count() == 2)
|| (delimiter.triple
&& (2..=3).contains(&word.matches(delimiter.char).count()))
{
log!("Skip: {word:?} is almost atomic, but must be split");
match word.rsplit_once(delimiter.char) {
Some((head, tail)) => {
log!("Pushing head {head:?}, tail {tail:?} into clusters");
clusters.push(format!("{head}{}", delimiter.char));
clusters.push(tail.to_string());
continue;
},
None => unreachable!(),
}
}
if let Some(next) = iterator.peek() if let Some(next) = iterator.peek()
&& next == "\n" && next == "\n"
&& delimiter.greedy && delimiter.greedy
@ -46,23 +69,6 @@ pub fn cluster(text: &str) -> Vec<String> {
continue; continue;
} }
if (!delimiter.greedy
&& !delimiter.triple
&& word.matches(delimiter.char).count() == 2)
|| (delimiter.triple && word.matches(delimiter.char).count() == 3)
{
log!("Skip: {word:?} is almost atomic, but must be split");
match word.rsplit_once(delimiter.char) {
Some((head, tail)) => {
log!("Pushing head {head:?}, tail {tail:?} into clusters");
clusters.push(format!("{head}{}", delimiter.char));
clusters.push(tail.to_string());
continue;
},
None => unreachable!(),
}
}
log!("Found cluster from {delimiter:?} in {word:?}"); log!("Found cluster from {delimiter:?} in {word:?}");
let mut parts: Vec<String> = vec![word.clone()]; let mut parts: Vec<String> = vec![word.clone()];
log!("Seeking from a base of {parts:?}"); log!("Seeking from a base of {parts:?}");
@ -138,30 +144,49 @@ mod delimiter {
pub string: String, pub string: String,
pub greedy: bool, pub greedy: bool,
pub triple: bool, pub triple: bool,
pub leading: bool,
} }
fn make_delimiters() -> Vec<Delimiter> { fn make_delimiters() -> (Vec<Delimiter>, Vec<Delimiter>) {
vec![ let delimiters = [
Delimiter { Delimiter {
char: '|', char: '|',
string: "|".to_string(), string: "|".to_string(),
greedy: true, greedy: true,
triple: true, triple: true,
leading: false,
}, },
Delimiter { Delimiter {
char: '`', char: '`',
string: "`".to_string(), string: "`".to_string(),
greedy: false, greedy: false,
triple: false, triple: false,
leading: true,
}, },
] ];
(
delimiters.iter().filter(|d| d.leading).cloned().collect(),
delimiters.iter().filter(|d| !d.leading).cloned().collect(),
)
} }
pub fn match_delimiter(word: &str) -> Option<Delimiter> { pub fn match_delimiter(word: &str) -> Option<Delimiter> {
let (leading, nonleading) = make_delimiters();
let first_char = word.chars().next()?; let first_char = word.chars().next()?;
make_delimiters()
.iter() if let Some(leading_match) =
.find(|d| d.char == first_char) leading.iter().find(|d| d.char == first_char).cloned()
.cloned() {
Some(leading_match)
} else {
for delimiter in nonleading {
if word.contains(delimiter.char) {
return Some(delimiter);
}
}
None
}
} }
} }