Handle Anchor edge cases involving punctuation
This commit is contained in:
parent
b745b74c76
commit
5a7557ba7d
7 changed files with 193 additions and 45 deletions
|
|
@ -65,8 +65,8 @@ pub fn parse(
|
|||
state.context.inline = Inline::None;
|
||||
}
|
||||
return true;
|
||||
} else if lexeme.match_as_char('|') && lexeme.is_next_boundary() {
|
||||
log!("End: Pipe followed by boundary");
|
||||
} else if lexeme.match_as_char('|') && lexeme.is_next_delimiter() {
|
||||
log!("End: Pipe followed by delimiter");
|
||||
if buffer.destination.is_empty() {
|
||||
candidate.destination = Some(candidate.text.clone());
|
||||
} else {
|
||||
|
|
@ -79,15 +79,17 @@ pub fn parse(
|
|||
log!("State: Found a pipe, but no boundary: destination follows");
|
||||
candidate.balanced = true;
|
||||
return true;
|
||||
} else if lexeme.match_as_char(':') {
|
||||
log!("State: Found a colon, marking anchor as external");
|
||||
candidate.external = true;
|
||||
buffer.destination.push_str(&lexeme.text());
|
||||
return true;
|
||||
} else if lexeme.match_as_char('|') {
|
||||
log!("End: Explicit end-of-destination pipe");
|
||||
candidate.destination = Some(buffer.destination.clone());
|
||||
return true;
|
||||
} else if !candidate.external
|
||||
&& lexeme.is_punctuation()
|
||||
&& lexeme.is_next_whitespace()
|
||||
{
|
||||
log!("End: Punctuation followed by whitespace");
|
||||
} else if !candidate.external && lexeme.is_delimiter() {
|
||||
log!("End: Internal anchor trailed by delimiter");
|
||||
candidate.destination = Some(buffer.destination.clone());
|
||||
tokens.push(Token::Anchor(candidate.clone()));
|
||||
state.context.inline = Inline::None;
|
||||
|
|
@ -99,6 +101,13 @@ pub fn parse(
|
|||
tokens.push(Token::Anchor(candidate.clone()));
|
||||
state.context.inline = Inline::None;
|
||||
return true;
|
||||
} else if lexeme.last() {
|
||||
log!("End: end of input");
|
||||
buffer.destination.push_str(&lexeme.text());
|
||||
candidate.destination = Some(buffer.destination.clone());
|
||||
tokens.push(Token::Anchor(candidate.clone()));
|
||||
state.context.inline = Inline::None;
|
||||
return true;
|
||||
|
||||
// This else branch is the 'no end found yet' state and will keep
|
||||
// pushing lexemes into the buffer until an end is found above
|
||||
|
|
@ -108,9 +117,6 @@ pub fn parse(
|
|||
lexeme.text(),
|
||||
buffer.destination,
|
||||
);
|
||||
if lexeme.match_as_char(':') {
|
||||
candidate.external = true;
|
||||
}
|
||||
buffer.destination.push_str(&lexeme.text());
|
||||
if lexeme.last() {
|
||||
candidate.destination = Some(buffer.destination.clone());
|
||||
|
|
@ -330,4 +336,116 @@ mod tests {
|
|||
fn indifferent_multiline_leading_pipe() {
|
||||
assert_eq!(read("|a|a|\nn"), read("|a|a\nn"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn anchor_with_trailing_single_quote() {
|
||||
assert_eq!(
|
||||
read("the |lion|'s mouth"),
|
||||
r#"<p>the <a href="/node/lion">lion</a>'s mouth</p>"#,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn anchor_with_trailing_double_quote() {
|
||||
assert_eq!(
|
||||
read(r#"the "|real|" motive"#),
|
||||
r#"<p>the "<a href="/node/real">real</a>" motive</p>"#,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn anchor_with_trailing_parenthesis() {
|
||||
assert_eq!(
|
||||
read("this (though |true|) was questioned"),
|
||||
r#"<p>this (though <a href="/node/true">true</a>) was questioned</p>"#,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn anchor_with_leading_single_quote() {
|
||||
assert_eq!(
|
||||
read("the 'real|Reality' motive"),
|
||||
r#"<p>the '<a href="/node/Reality">real</a>' motive</p>"#,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn anchor_with_leading_double_quote() {
|
||||
assert_eq!(
|
||||
read(r#"the "real|Reality" motive"#),
|
||||
r#"<p>the "<a href="/node/Reality">real</a>" motive</p>"#,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn anchor_with_leading_parenthesis() {
|
||||
assert_eq!(
|
||||
read("her (last|Surname) name"),
|
||||
r#"<p>her (<a href="/node/Surname">last</a>) name</p>"#,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn anchor_with_internal_apostrophe() {
|
||||
assert_eq!(
|
||||
read("the |lion's mouth|album was released"),
|
||||
r#"<p>the <a href="/node/album">lion's mouth</a> was released</p>"#
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn nonleading_anchor_with_internal_apostrophe() {
|
||||
assert_eq!(
|
||||
read("they decided to stay at Jane's|YellowHouse that night"),
|
||||
r#"<p>they decided to stay at <a href="/node/YellowHouse">Jane's</a> that night</p>"#
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn nonleading_anchor_with_internal_apostrophe_at_eoi() {
|
||||
assert_eq!(
|
||||
read("they decided to stay at Jane's|YellowHouse"),
|
||||
r#"<p>they decided to stay at <a href="/node/YellowHouse">Jane's</a></p>"#
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn nonleading_anchor_with_internal_apostrophe_at_soi() {
|
||||
assert_eq!(
|
||||
read("Jane's|YellowHouse that night"),
|
||||
r#"<p><a href="/node/YellowHouse">Jane's</a> that night</p>"#
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn anchor_with_internal_double_quotes() {
|
||||
assert_eq!(
|
||||
read(r#"the |"real"|Truth motive"#),
|
||||
r#"<p>the <a href="/node/Truth">"real"</a> motive</p>"#,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn anchor_with_internal_double_quotes_wrapping_spaced_words() {
|
||||
assert_eq!(
|
||||
read(r#"the |"bare reality"|Ideology they believed"#),
|
||||
r#"<p>the <a href="/node/Ideology">"bare reality"</a> they believed</p>"#,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn anchor_with_internal_parenthesis() {
|
||||
assert_eq!(
|
||||
read("her |last (name)|Surname was Amad"),
|
||||
r#"<p>her <a href="/node/Surname">last (name)</a> was Amad</p>"#,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn anchor_with_internal_parenthesis_wrapping_spaced_words() {
|
||||
assert_eq!(
|
||||
read("this |truth (though questionable) was fine|Absurd to them "),
|
||||
r#"<p>this <a href="/node/Absurd">truth (though questionable) was fine</a> to them</p>"#
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,14 +1,17 @@
|
|||
use std::{iter::Peekable, slice::Iter};
|
||||
|
||||
use crate::{prelude::*,syntax::content::{
|
||||
Parseable as _,
|
||||
parser::{
|
||||
context, Inline,
|
||||
lexeme::Lexeme,
|
||||
state::State,
|
||||
token::{Token, code::Code, anchor::Anchor},
|
||||
use crate::{
|
||||
prelude::*,
|
||||
syntax::content::{
|
||||
Parseable as _,
|
||||
parser::{
|
||||
context, Inline,
|
||||
lexeme::Lexeme,
|
||||
state::State,
|
||||
token::{Token, code::Code, anchor::Anchor},
|
||||
},
|
||||
},
|
||||
}};
|
||||
};
|
||||
|
||||
pub fn parse(
|
||||
lexeme: &Lexeme,
|
||||
|
|
|
|||
|
|
@ -92,6 +92,19 @@ impl Lexeme {
|
|||
.is_some_and(|c| delimiters.is_boundary(c))
|
||||
}
|
||||
|
||||
pub fn is_delimiter(&self) -> bool {
|
||||
let delimiters = Delimiters::default();
|
||||
self.as_char().is_some_and(|c| delimiters.is_delimiter(c))
|
||||
}
|
||||
|
||||
pub fn is_next_delimiter(&self) -> bool {
|
||||
let delimiters = Delimiters::default();
|
||||
self.last
|
||||
|| self
|
||||
.next_as_char()
|
||||
.is_some_and(|c| delimiters.is_delimiter(c))
|
||||
}
|
||||
|
||||
pub fn next_first_char(&self) -> Option<char> {
|
||||
self.next.chars().nth(0)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,9 +1,12 @@
|
|||
use crate::syntax::content::{
|
||||
Parseable as _,
|
||||
parser::{
|
||||
lexeme::Lexeme,
|
||||
token::{Token, oblique::Oblique},
|
||||
state::State,
|
||||
use crate::{
|
||||
prelude::*,
|
||||
syntax::content::{
|
||||
Parseable as _,
|
||||
parser::{
|
||||
lexeme::Lexeme,
|
||||
token::{Token, oblique::Oblique},
|
||||
state::State,
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
|
|
@ -13,6 +16,7 @@ pub fn parse(
|
|||
tokens: &mut Vec<Token>,
|
||||
) -> bool {
|
||||
if Oblique::probe(lexeme) {
|
||||
log!("Oblique probed {lexeme}");
|
||||
tokens.push(Token::Oblique(Oblique::new(!state.switches.oblique)));
|
||||
state.switches.oblique = !state.switches.oblique;
|
||||
return true;
|
||||
|
|
@ -28,6 +32,22 @@ mod tests {
|
|||
parser::read(input, &Graph::new(None).meta.config)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn oblique_anchor() {
|
||||
assert_eq!(
|
||||
read("w _|S|_ w"),
|
||||
r#"<p>w <em><a href="/node/S">S</a></em> w</p>"#
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn oblique_anchor_with_trailing_comma() {
|
||||
assert_eq!(
|
||||
read("w _|S|_, w"),
|
||||
r#"<p>w <em><a href="/node/S">S</a></em>, w</p>"#
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn oblique() {
|
||||
assert_eq!(
|
||||
|
|
|
|||
|
|
@ -6,7 +6,6 @@ pub mod delimiter {
|
|||
|
||||
pub struct Delimiters {
|
||||
pub atomic: Vec<char>,
|
||||
pub boundary: Vec<char>,
|
||||
pub flanking: Vec<char>,
|
||||
pub punctuation: Vec<char>,
|
||||
pub whitespace: Vec<char>,
|
||||
|
|
@ -14,21 +13,11 @@ pub mod delimiter {
|
|||
|
||||
impl Default for Delimiters {
|
||||
fn default() -> Self {
|
||||
let atomic = vec!['`', '|'];
|
||||
let flanking = vec!['_', '*'];
|
||||
let punctuation = vec![',', '.', ';', ':', '?', '!'];
|
||||
let whitespace = vec!['\n', ' '];
|
||||
|
||||
let boundary =
|
||||
[atomic.clone(), punctuation.clone(), whitespace.clone()]
|
||||
.concat();
|
||||
|
||||
Delimiters {
|
||||
atomic,
|
||||
boundary,
|
||||
flanking,
|
||||
punctuation,
|
||||
whitespace,
|
||||
atomic: vec!['`', '|'],
|
||||
flanking: vec!['_', '*', '(', ')', '\'', '"'],
|
||||
punctuation: vec![',', '.', ';', ':', '?', '!'],
|
||||
whitespace: vec!['\n', ' '],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -44,12 +33,16 @@ pub mod delimiter {
|
|||
.contains(&c)
|
||||
}
|
||||
|
||||
pub fn is_delimiter(&self, c: char) -> bool {
|
||||
self.is_boundary(c) || self.flanking.contains(&c)
|
||||
}
|
||||
|
||||
fn is_str_delimiter(&self, s: &str) -> bool {
|
||||
if s.chars().count() > 1 {
|
||||
return false;
|
||||
}
|
||||
if let Some(c) = s.chars().nth(0) {
|
||||
self.boundary.contains(&c) || self.flanking.contains(&c)
|
||||
self.is_delimiter(c)
|
||||
} else {
|
||||
false
|
||||
}
|
||||
|
|
|
|||
|
|
@ -53,7 +53,7 @@ impl std::fmt::Display for Token {
|
|||
Token::Span(ref d) => format!("{d}"),
|
||||
};
|
||||
|
||||
write!(f, "T*{data}")
|
||||
write!(f, "Tk:{data}")
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -12,7 +12,8 @@ pub struct Anchor {
|
|||
impl Parseable for Anchor {
|
||||
fn probe(lexeme: &Lexeme) -> bool {
|
||||
lexeme.text() == "|"
|
||||
|| (!lexeme.is_whitespace() && lexeme.next() == "|")
|
||||
|| ((!lexeme.is_whitespace() && !lexeme.is_delimiter())
|
||||
&& lexeme.next() == "|")
|
||||
}
|
||||
|
||||
fn lex(_lexeme: &Lexeme) -> Anchor {
|
||||
|
|
@ -58,13 +59,13 @@ impl std::fmt::Display for Anchor {
|
|||
let mut tail = String::default();
|
||||
|
||||
if self.leading {
|
||||
tail.push_str(" [Leading]");
|
||||
tail.push_str(" +Leading");
|
||||
}
|
||||
if self.balanced {
|
||||
tail.push_str(" [Balanced]");
|
||||
tail.push_str(" +Balanced");
|
||||
}
|
||||
if self.external {
|
||||
tail.push_str(" [External]");
|
||||
tail.push_str(" +External");
|
||||
}
|
||||
|
||||
write!(
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue