Fix an anchor clustering corner case
This commit is contained in:
parent
76b836f0a0
commit
6b34bb4531
1 changed files with 47 additions and 22 deletions
|
|
@ -24,11 +24,34 @@ pub fn cluster(text: &str) -> Vec<String> {
|
||||||
}
|
}
|
||||||
|
|
||||||
let Some(delimiter) = delimiter::match_delimiter(&word) else {
|
let Some(delimiter) = delimiter::match_delimiter(&word) else {
|
||||||
log!("Skip: {word:?} does not start with a delimiter");
|
log!("Skip: {word:?} does not have a delimiter");
|
||||||
clusters.push(word);
|
clusters.push(word);
|
||||||
continue;
|
continue;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
if !delimiter.leading && !word.starts_with(delimiter.char) {
|
||||||
|
clusters.push(word);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!delimiter.greedy
|
||||||
|
&& !delimiter.triple
|
||||||
|
&& word.matches(delimiter.char).count() == 2)
|
||||||
|
|| (delimiter.triple
|
||||||
|
&& (2..=3).contains(&word.matches(delimiter.char).count()))
|
||||||
|
{
|
||||||
|
log!("Skip: {word:?} is almost atomic, but must be split");
|
||||||
|
match word.rsplit_once(delimiter.char) {
|
||||||
|
Some((head, tail)) => {
|
||||||
|
log!("Pushing head {head:?}, tail {tail:?} into clusters");
|
||||||
|
clusters.push(format!("{head}{}", delimiter.char));
|
||||||
|
clusters.push(tail.to_string());
|
||||||
|
continue;
|
||||||
|
},
|
||||||
|
None => unreachable!(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if let Some(next) = iterator.peek()
|
if let Some(next) = iterator.peek()
|
||||||
&& next == "\n"
|
&& next == "\n"
|
||||||
&& delimiter.greedy
|
&& delimiter.greedy
|
||||||
|
|
@ -46,23 +69,6 @@ pub fn cluster(text: &str) -> Vec<String> {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!delimiter.greedy
|
|
||||||
&& !delimiter.triple
|
|
||||||
&& word.matches(delimiter.char).count() == 2)
|
|
||||||
|| (delimiter.triple && word.matches(delimiter.char).count() == 3)
|
|
||||||
{
|
|
||||||
log!("Skip: {word:?} is almost atomic, but must be split");
|
|
||||||
match word.rsplit_once(delimiter.char) {
|
|
||||||
Some((head, tail)) => {
|
|
||||||
log!("Pushing head {head:?}, tail {tail:?} into clusters");
|
|
||||||
clusters.push(format!("{head}{}", delimiter.char));
|
|
||||||
clusters.push(tail.to_string());
|
|
||||||
continue;
|
|
||||||
},
|
|
||||||
None => unreachable!(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
log!("Found cluster from {delimiter:?} in {word:?}");
|
log!("Found cluster from {delimiter:?} in {word:?}");
|
||||||
let mut parts: Vec<String> = vec![word.clone()];
|
let mut parts: Vec<String> = vec![word.clone()];
|
||||||
log!("Seeking from a base of {parts:?}");
|
log!("Seeking from a base of {parts:?}");
|
||||||
|
|
@ -138,30 +144,49 @@ mod delimiter {
|
||||||
pub string: String,
|
pub string: String,
|
||||||
pub greedy: bool,
|
pub greedy: bool,
|
||||||
pub triple: bool,
|
pub triple: bool,
|
||||||
|
pub leading: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
fn make_delimiters() -> Vec<Delimiter> {
|
fn make_delimiters() -> (Vec<Delimiter>, Vec<Delimiter>) {
|
||||||
vec![
|
let delimiters = [
|
||||||
Delimiter {
|
Delimiter {
|
||||||
char: '|',
|
char: '|',
|
||||||
string: "|".to_string(),
|
string: "|".to_string(),
|
||||||
greedy: true,
|
greedy: true,
|
||||||
triple: true,
|
triple: true,
|
||||||
|
leading: false,
|
||||||
},
|
},
|
||||||
Delimiter {
|
Delimiter {
|
||||||
char: '`',
|
char: '`',
|
||||||
string: "`".to_string(),
|
string: "`".to_string(),
|
||||||
greedy: false,
|
greedy: false,
|
||||||
triple: false,
|
triple: false,
|
||||||
|
leading: true,
|
||||||
},
|
},
|
||||||
]
|
];
|
||||||
|
|
||||||
|
(
|
||||||
|
delimiters.iter().filter(|d| d.leading).cloned().collect(),
|
||||||
|
delimiters.iter().filter(|d| !d.leading).cloned().collect(),
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn match_delimiter(word: &str) -> Option<Delimiter> {
|
pub fn match_delimiter(word: &str) -> Option<Delimiter> {
|
||||||
|
let (leading, nonleading) = make_delimiters();
|
||||||
|
|
||||||
let first_char = word.chars().next()?;
|
let first_char = word.chars().next()?;
|
||||||
make_delimiters()
|
|
||||||
.iter()
|
if let Some(leading_match) =
|
||||||
.find(|d| d.char == first_char)
|
leading.iter().find(|d| d.char == first_char).cloned()
|
||||||
.cloned()
|
{
|
||||||
|
Some(leading_match)
|
||||||
|
} else {
|
||||||
|
for delimiter in nonleading {
|
||||||
|
if word.contains(delimiter.char) {
|
||||||
|
return Some(delimiter);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue