Handle several anchor edge cases
This commit is contained in:
parent
f9bff6acab
commit
9f04a4606c
3 changed files with 164 additions and 61 deletions
|
|
@ -1,6 +1,6 @@
|
||||||
use std::collections::{HashMap};
|
use std::collections::{HashMap};
|
||||||
|
|
||||||
use crate::{prelude::*,types::Config};
|
use crate::{prelude::*, types::Config};
|
||||||
use super::{Parseable as _, Token, LexMap};
|
use super::{Parseable as _, Token, LexMap};
|
||||||
use token::{
|
use token::{
|
||||||
anchor::Anchor, linebreak::LineBreak, paragraph::Paragraph, header::Header,
|
anchor::Anchor, linebreak::LineBreak, paragraph::Paragraph, header::Header,
|
||||||
|
|
@ -15,10 +15,12 @@ pub mod segment;
|
||||||
pub mod context;
|
pub mod context;
|
||||||
|
|
||||||
const LEXMAP: LexMap = &[
|
const LEXMAP: LexMap = &[
|
||||||
(LineBreak::probe, |word| {
|
(LineBreak::probe, |lexeme| {
|
||||||
Token::LineBreak(LineBreak::lex(word))
|
Token::LineBreak(LineBreak::lex(lexeme))
|
||||||
|
}),
|
||||||
|
(Literal::probe, |lexeme| {
|
||||||
|
Token::Literal(Literal::lex(lexeme))
|
||||||
}),
|
}),
|
||||||
(Literal::probe, |word| Token::Literal(Literal::lex(word))),
|
|
||||||
];
|
];
|
||||||
|
|
||||||
fn lex(text: &str, map: LexMap, config: &Config) -> Vec<Token> {
|
fn lex(text: &str, map: LexMap, config: &Config) -> Vec<Token> {
|
||||||
|
|
@ -49,6 +51,9 @@ fn lex(text: &str, map: LexMap, config: &Config) -> Vec<Token> {
|
||||||
tokens.push(Token::Header(header));
|
tokens.push(Token::Header(header));
|
||||||
continue;
|
continue;
|
||||||
} else if Paragraph::probe(lexeme) {
|
} else if Paragraph::probe(lexeme) {
|
||||||
|
log!(
|
||||||
|
"Probed {lexeme:#?} from Block::None -> Block::Paragraph"
|
||||||
|
);
|
||||||
state.context.block = Block::Paragraph;
|
state.context.block = Block::Paragraph;
|
||||||
tokens.push(Token::Paragraph(Paragraph::new(true)));
|
tokens.push(Token::Paragraph(Paragraph::new(true)));
|
||||||
}
|
}
|
||||||
|
|
@ -63,7 +68,12 @@ fn lex(text: &str, map: LexMap, config: &Config) -> Vec<Token> {
|
||||||
continue;
|
continue;
|
||||||
},
|
},
|
||||||
Block::Paragraph => {
|
Block::Paragraph => {
|
||||||
if lexeme.text() == "\n" {
|
if lexeme.text() == "\n"
|
||||||
|
&& matches!(state.context.inline, Inline::None)
|
||||||
|
{
|
||||||
|
log!(
|
||||||
|
"Probed {lexeme:#?} from Block::Paragraph -> Block::None"
|
||||||
|
);
|
||||||
tokens.push(Token::Paragraph(Paragraph::new(false)));
|
tokens.push(Token::Paragraph(Paragraph::new(false)));
|
||||||
state.context.block = Block::None;
|
state.context.block = Block::None;
|
||||||
}
|
}
|
||||||
|
|
@ -122,12 +132,7 @@ fn lex(text: &str, map: LexMap, config: &Config) -> Vec<Token> {
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
Inline::Anchor => {
|
Inline::Anchor => {
|
||||||
if context::anchor::parse(
|
if context::anchor::parse(lexeme, &mut state, &mut tokens) {
|
||||||
lexeme,
|
|
||||||
&mut iterator,
|
|
||||||
&mut state,
|
|
||||||
&mut tokens,
|
|
||||||
) {
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|
@ -135,7 +140,9 @@ fn lex(text: &str, map: LexMap, config: &Config) -> Vec<Token> {
|
||||||
|
|
||||||
for &(ref probe, lex) in map {
|
for &(ref probe, lex) in map {
|
||||||
if probe(lexeme) {
|
if probe(lexeme) {
|
||||||
tokens.push(lex(lexeme));
|
let token = lex(lexeme);
|
||||||
|
log!("Lexmap lexed {lexeme:?} into {token:?}");
|
||||||
|
tokens.push(token);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -223,23 +230,31 @@ mod tests {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn force_flanking() {
|
fn flanking_with_trailing_comma() {
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
read_noconfig("|Node||"),
|
read_noconfig("|Node|,"),
|
||||||
|
r#"<p><a href="/node/Node">Node</a>,</p>"#
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn flanking_with_trailing_comma_and_space() {
|
||||||
|
assert_eq!(
|
||||||
|
read_noconfig("|Node|, at"),
|
||||||
|
r#"<p><a href="/node/Node">Node</a>, at</p>"#
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn flanking_at_eoi() {
|
||||||
|
assert_eq!(
|
||||||
|
read_noconfig("|Node|"),
|
||||||
r#"<p><a href="/node/Node">Node</a></p>"#
|
r#"<p><a href="/node/Node">Node</a></p>"#
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn force_flanking_with_trailing_letter() {
|
fn needless_three_pipe_anchor() {
|
||||||
assert_eq!(
|
|
||||||
read_noconfig("|Node||s"),
|
|
||||||
r#"<p><a href="/node/Node">Node</a>s</p>"#
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn flanking_with_trailing_pipe() {
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
read_noconfig("|Node|Destination|"),
|
read_noconfig("|Node|Destination|"),
|
||||||
r#"<p><a href="/node/Destination">Node</a></p>"#
|
r#"<p><a href="/node/Destination">Node</a></p>"#
|
||||||
|
|
@ -278,6 +293,22 @@ mod tests {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn nonleading_plural_anchor_at_eoi() {
|
||||||
|
assert_eq!(
|
||||||
|
read_noconfig("element|s"),
|
||||||
|
r#"<p><a href="/node/element">elements</a></p>"#
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn leading_plural_anchor_at_eoi() {
|
||||||
|
assert_eq!(
|
||||||
|
read_noconfig("|element|s"),
|
||||||
|
r#"<p><a href="/node/element">elements</a></p>"#
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn http_external_anchor() {
|
fn http_external_anchor() {
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
|
|
@ -289,26 +320,26 @@ mod tests {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn http_external_anchor_leading_no_third() {
|
fn http_external_anchor_leading_no_third_then_newline() {
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
read_noconfig("|Rust toolchain|https://rustup.rs/ "),
|
read_noconfig(concat!(
|
||||||
r#"<p><a href="https://rustup.rs/">Rust toolchain</a> </p>"#
|
"|Rust toolchain|https://rustup.rs/",
|
||||||
|
"\n",
|
||||||
|
"at rustup.rs",
|
||||||
|
)),
|
||||||
|
concat!(
|
||||||
|
r#"<p><a href="https://rustup.rs/">Rust toolchain</a>"#,
|
||||||
|
"\n",
|
||||||
|
"at rustup.rs</p>",
|
||||||
|
)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn http_external_anchor_leading_no_third_then_punctuation_then_space() {
|
fn http_external_anchor_leading_no_third_then_space() {
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
read_noconfig("|Rust toolchain|https://rustup.rs/, "),
|
read_noconfig("|Rust toolchain|https://rustup.rs/ at rustup.rs"),
|
||||||
r#"<p><a href="https://rustup.rs/">Rust toolchain</a>, </p>"#
|
r#"<p><a href="https://rustup.rs/">Rust toolchain</a> at rustup.rs</p>"#
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn http_external_anchor_leading_no_third_then_punctuation_then_eoi() {
|
|
||||||
assert_eq!(
|
|
||||||
read_noconfig("|Rust toolchain|https://rustup.rs/,"),
|
|
||||||
r#"<p><a href="https://rustup.rs/">Rust toolchain</a></p>"#
|
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -321,13 +352,40 @@ mod tests {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn clear_anchor_buffer() {
|
fn newline_wrapped_anchor() {
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
read_noconfig("|SomeAnchor|\n|SomeOtherAnchor|"),
|
read_noconfig("\n|SomeAnchor|\n"),
|
||||||
|
concat!(
|
||||||
|
"\n",
|
||||||
|
r#"<p><a href="/node/SomeAnchor">SomeAnchor</a></p>"#,
|
||||||
|
"\n"
|
||||||
|
),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn newline_separated_anchors() {
|
||||||
|
assert_eq!(
|
||||||
|
read_noconfig("|SomeAnchor|\n|SomeOtherAnchor|\n"),
|
||||||
concat!(
|
concat!(
|
||||||
r#"<p><a href="/node/SomeAnchor">SomeAnchor</a></p>"#,
|
r#"<p><a href="/node/SomeAnchor">SomeAnchor</a></p>"#,
|
||||||
"\n",
|
"\n",
|
||||||
r#"<p><a href="/node/SomeOtherAnchor">SomeOtherAnchor</a></p>"#
|
r#"<p><a href="/node/SomeOtherAnchor">SomeOtherAnchor</a></p>"#,
|
||||||
|
"\n"
|
||||||
|
)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn empty_line_separated_anchors() {
|
||||||
|
assert_eq!(
|
||||||
|
read_noconfig("|SomeAnchor|\n\n|SomeOtherAnchor|\n"),
|
||||||
|
concat!(
|
||||||
|
r#"<p><a href="/node/SomeAnchor">SomeAnchor</a></p>"#,
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
r#"<p><a href="/node/SomeOtherAnchor">SomeOtherAnchor</a></p>"#,
|
||||||
|
"\n",
|
||||||
),
|
),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,3 @@
|
||||||
use std::{iter::Peekable, slice::Iter};
|
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
prelude::*,
|
prelude::*,
|
||||||
syntax::content::parser::{
|
syntax::content::parser::{
|
||||||
|
|
@ -11,61 +9,89 @@ use crate::{
|
||||||
///
|
///
|
||||||
/// This function is only called if the current inline context is Anchor.
|
/// This function is only called if the current inline context is Anchor.
|
||||||
///
|
///
|
||||||
/// A return kind of true will trigger a continue in the outer parser,
|
/// A return of `true` will trigger a continue in the outer parser,
|
||||||
/// skipping any further parsing of the current lexeme.
|
/// skipping any further parsing of the current lexeme.
|
||||||
///
|
///
|
||||||
/// # Panics
|
/// # Panics
|
||||||
/// This function will panic if can't determine the destination of an anchor.
|
/// This function will panic if can't determine the destination of an anchor.
|
||||||
pub fn parse(
|
pub fn parse(
|
||||||
lexeme: &Lexeme,
|
lexeme: &Lexeme,
|
||||||
iterator: &mut Peekable<Iter<'_, Lexeme>>,
|
|
||||||
state: &mut State,
|
state: &mut State,
|
||||||
tokens: &mut Vec<Token>,
|
tokens: &mut Vec<Token>,
|
||||||
) -> bool {
|
) -> bool {
|
||||||
log!("Resolving open context: {:#?}", state.clone().buffers.anchor);
|
log!(
|
||||||
|
"Resolving open context: {:#?}",
|
||||||
|
state.clone().buffers.anchor
|
||||||
|
);
|
||||||
let buffer = &mut state.buffers.anchor;
|
let buffer = &mut state.buffers.anchor;
|
||||||
let candidate = &mut buffer.candidate;
|
let candidate = &mut buffer.candidate;
|
||||||
|
|
||||||
// This is only true if the anchor is leading, otherwise the outer parser
|
// This is only true if the anchor is leading, otherwise the outer parser
|
||||||
// would already have set its text to the word before the first pipe
|
// would already have set its text to the word before the first pipe
|
||||||
if candidate.text.is_empty() {
|
if candidate.text.is_empty() {
|
||||||
log!("Seeking text at {:#?} -> {:#?}", lexeme.text(), lexeme.next());
|
log!(
|
||||||
|
"Seeking end of text at {:#?} -> {:#?}",
|
||||||
|
lexeme.text(),
|
||||||
|
lexeme.next()
|
||||||
|
);
|
||||||
if lexeme.next() == "|" {
|
if lexeme.next() == "|" {
|
||||||
|
log!("End: Next lexeme is a pipe");
|
||||||
buffer.text.push_str(&lexeme.text());
|
buffer.text.push_str(&lexeme.text());
|
||||||
candidate.text.clone_from(&buffer.text);
|
candidate.text.clone_from(&buffer.text);
|
||||||
log!("End: {:#?}", lexeme.text());
|
|
||||||
return true;
|
|
||||||
} else {
|
} else {
|
||||||
log!("Pushing non-terminal {:#?} into buffer {:#?}",
|
log!(
|
||||||
lexeme.text(), buffer.text);
|
"Pushing non-terminal {:#?} into buffer {:#?}",
|
||||||
|
lexeme.text(),
|
||||||
|
buffer.text
|
||||||
|
);
|
||||||
buffer.text.push_str(&lexeme.text());
|
buffer.text.push_str(&lexeme.text());
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if candidate.destination.is_none() {
|
if candidate.destination.is_none() {
|
||||||
|
log!(
|
||||||
|
"Seeking end of destination at {:#?} -> {:#?}",
|
||||||
|
lexeme.text(),
|
||||||
|
lexeme.next()
|
||||||
|
);
|
||||||
|
|
||||||
log!("Seeking destination at {:#?} -> {:#?}",
|
// Conditions in this decision tree should match the destination end
|
||||||
lexeme.text(), lexeme.next());
|
if lexeme.match_as_char('s')
|
||||||
|
&& lexeme.is_next_boundary()
|
||||||
// Conditions to this decision tree should match the destination end
|
&& !lexeme.match_next_as_char('|')
|
||||||
if lexeme.last(){
|
{
|
||||||
log!("End: no more input");
|
log!("End: Plural anchor");
|
||||||
candidate.destination = Some(candidate.text.clone());
|
candidate.destination = Some(candidate.text.clone());
|
||||||
|
candidate.text.push('s');
|
||||||
|
if lexeme.last() {
|
||||||
|
tokens.push(Token::Anchor(candidate.clone()));
|
||||||
|
state.context.inline = Inline::None;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
} else if lexeme.match_as_char('|') && lexeme.is_next_boundary() {
|
} else if lexeme.match_as_char('|') && lexeme.is_next_boundary() {
|
||||||
|
log!("End: Pipe followed by boundary");
|
||||||
if buffer.destination.is_empty() {
|
if buffer.destination.is_empty() {
|
||||||
candidate.destination = Some(candidate.text.clone());
|
candidate.destination = Some(candidate.text.clone());
|
||||||
} else {
|
} else {
|
||||||
candidate.destination = Some(buffer.destination.clone());
|
candidate.destination = Some(buffer.destination.clone());
|
||||||
return true
|
|
||||||
}
|
}
|
||||||
|
tokens.push(Token::Anchor(candidate.clone()));
|
||||||
} else if lexeme.match_as_char('|') {
|
state.context.inline = Inline::None;
|
||||||
log!("Found a pipe, but no boundary: Destination likely follows");
|
|
||||||
return true;
|
return true;
|
||||||
} else if lexeme.is_punctuation() && lexeme.is_next_whitespace() {
|
} else if lexeme.match_as_char('|') && !candidate.balanced {
|
||||||
log!("Found puncutation followed by whitespace");
|
log!("Found a pipe, but no boundary: Destination likely follows");
|
||||||
|
candidate.balanced = true;
|
||||||
|
return true;
|
||||||
|
} else if lexeme.match_as_char('|') {
|
||||||
|
log!("End: Explicit end-of-destination pipe");
|
||||||
|
candidate.destination = Some(buffer.destination.clone());
|
||||||
|
return true;
|
||||||
|
} else if !candidate.external
|
||||||
|
&& lexeme.is_punctuation()
|
||||||
|
&& lexeme.is_next_whitespace()
|
||||||
|
{
|
||||||
|
log!("End: Punctuation followed by whitespace");
|
||||||
candidate.destination = Some(buffer.destination.clone());
|
candidate.destination = Some(buffer.destination.clone());
|
||||||
tokens.push(Token::Anchor(candidate.clone()));
|
tokens.push(Token::Anchor(candidate.clone()));
|
||||||
state.context.inline = Inline::None;
|
state.context.inline = Inline::None;
|
||||||
|
|
@ -73,16 +99,28 @@ pub fn parse(
|
||||||
} else if lexeme.is_whitespace() {
|
} else if lexeme.is_whitespace() {
|
||||||
log!("End: Whitespace");
|
log!("End: Whitespace");
|
||||||
candidate.destination = Some(buffer.destination.clone());
|
candidate.destination = Some(buffer.destination.clone());
|
||||||
|
tokens.push(Token::Anchor(candidate.clone()));
|
||||||
|
state.context.inline = Inline::None;
|
||||||
|
return false;
|
||||||
|
|
||||||
// This else branch is the 'no end found yet' state and will keep
|
// This else branch is the 'no end found yet' state and will keep
|
||||||
// pushing lexemes into the buffer until an end is found above
|
// pushing lexemes into the buffer until an end is found above
|
||||||
} else {
|
} else {
|
||||||
log!(
|
log!(
|
||||||
"Pushing non-terminal {:#?} into buffer {:#?}",
|
"Pushing non-terminal {:#?} into buffer {:#?}",
|
||||||
lexeme.text(), buffer.destination,
|
lexeme.text(),
|
||||||
|
buffer.destination,
|
||||||
);
|
);
|
||||||
|
if lexeme.match_as_char(':') {
|
||||||
|
candidate.external = true;
|
||||||
|
}
|
||||||
buffer.destination.push_str(&lexeme.text());
|
buffer.destination.push_str(&lexeme.text());
|
||||||
return true
|
if lexeme.last() {
|
||||||
|
candidate.destination = Some(buffer.destination.clone());
|
||||||
|
tokens.push(Token::Anchor(candidate.clone()));
|
||||||
|
state.context.inline = Inline::None;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -90,7 +128,8 @@ pub fn parse(
|
||||||
// which would mean there is some case where the end of the destination
|
// which would mean there is some case where the end of the destination
|
||||||
// was never found and we kept filling the buffer endlessly,
|
// was never found and we kept filling the buffer endlessly,
|
||||||
// causing the program to panic anyways when rendering anchors
|
// causing the program to panic anyways when rendering anchors
|
||||||
assert!(candidate.destination.is_some(),
|
assert!(
|
||||||
|
candidate.destination.is_some(),
|
||||||
"Anchor context parsing done but no destination found: {:#?}",
|
"Anchor context parsing done but no destination found: {:#?}",
|
||||||
state.buffers.anchor
|
state.buffers.anchor
|
||||||
);
|
);
|
||||||
|
|
|
||||||
|
|
@ -55,6 +55,10 @@ impl Lexeme {
|
||||||
self.as_char().is_some_and(|as_char| as_char == c)
|
self.as_char().is_some_and(|as_char| as_char == c)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn match_next_as_char(&self, c: char) -> bool {
|
||||||
|
self.next_as_char().is_some_and(|next| next == c)
|
||||||
|
}
|
||||||
|
|
||||||
pub fn is_punctuation(&self) -> bool {
|
pub fn is_punctuation(&self) -> bool {
|
||||||
let punctuation = Delimiters::default().punctuation;
|
let punctuation = Delimiters::default().punctuation;
|
||||||
self.as_char().is_some_and(|c| punctuation.contains(&c))
|
self.as_char().is_some_and(|c| punctuation.contains(&c))
|
||||||
|
|
@ -80,8 +84,10 @@ impl Lexeme {
|
||||||
|
|
||||||
pub fn is_next_boundary(&self) -> bool {
|
pub fn is_next_boundary(&self) -> bool {
|
||||||
let delimiters = Delimiters::default();
|
let delimiters = Delimiters::default();
|
||||||
self.next_as_char()
|
self.last
|
||||||
.is_some_and(|c| delimiters.is_boundary(c))
|
|| self
|
||||||
|
.next_as_char()
|
||||||
|
.is_some_and(|c| delimiters.is_boundary(c))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn next_first_char(&self) -> Option<char> {
|
pub fn next_first_char(&self) -> Option<char> {
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue