Simplify Anchor context parser
This commit is contained in:
parent
48765de3b6
commit
cbefcdcad7
3 changed files with 90 additions and 50 deletions
|
|
@ -1,6 +1,6 @@
|
||||||
use std::collections::{HashMap};
|
use std::collections::{HashMap};
|
||||||
|
|
||||||
use crate::types::Config;
|
use crate::{prelude::*,types::Config};
|
||||||
use super::{Parseable as _, Token, LexMap};
|
use super::{Parseable as _, Token, LexMap};
|
||||||
use token::{
|
use token::{
|
||||||
anchor::Anchor, linebreak::LineBreak, paragraph::Paragraph, header::Header,
|
anchor::Anchor, linebreak::LineBreak, paragraph::Paragraph, header::Header,
|
||||||
|
|
@ -28,6 +28,8 @@ fn lex(text: &str, map: LexMap, config: &Config) -> Vec<Token> {
|
||||||
let segments = segment::segment(text);
|
let segments = segment::segment(text);
|
||||||
let lexemes = Lexeme::collect(&segments);
|
let lexemes = Lexeme::collect(&segments);
|
||||||
|
|
||||||
|
log!("Lexing segments: {segments:?}");
|
||||||
|
|
||||||
let mut iterator = lexemes.iter().peekable();
|
let mut iterator = lexemes.iter().peekable();
|
||||||
while let Some(lexeme) = iterator.next() {
|
while let Some(lexeme) = iterator.next() {
|
||||||
match state.context.block {
|
match state.context.block {
|
||||||
|
|
@ -81,13 +83,22 @@ fn lex(text: &str, map: LexMap, config: &Config) -> Vec<Token> {
|
||||||
tokens.push(Token::Code(Code::new(true)));
|
tokens.push(Token::Code(Code::new(true)));
|
||||||
continue;
|
continue;
|
||||||
} else if Anchor::probe(lexeme) {
|
} else if Anchor::probe(lexeme) {
|
||||||
|
log!("Positively probed anchor: {lexeme:?}");
|
||||||
state.context.inline = Inline::Anchor;
|
state.context.inline = Inline::Anchor;
|
||||||
state.buffers.anchor.clear();
|
state.buffers.anchor.clear();
|
||||||
|
|
||||||
if lexeme.match_first_char('|') {
|
if lexeme.match_as_char('|') {
|
||||||
|
log!("{:#?} matches as a pipe char", lexeme.text());
|
||||||
state.buffers.anchor.candidate.leading = true;
|
state.buffers.anchor.candidate.leading = true;
|
||||||
} else {
|
} else {
|
||||||
|
log!(
|
||||||
|
"{:#?} not a pipe: assuming it's the anchor text",
|
||||||
|
lexeme.text(),
|
||||||
|
);
|
||||||
state.buffers.anchor.candidate.text = lexeme.text();
|
state.buffers.anchor.candidate.text = lexeme.text();
|
||||||
|
// because we probed positively and this is not a pipe,
|
||||||
|
// the next lexeme must be and so it was now parsed
|
||||||
|
iterator.next();
|
||||||
}
|
}
|
||||||
continue;
|
continue;
|
||||||
} else if Oblique::probe(lexeme) {
|
} else if Oblique::probe(lexeme) {
|
||||||
|
|
@ -294,7 +305,7 @@ mod tests {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn http_external_anchor_leading_no_third_then_punctuation_then_eof() {
|
fn http_external_anchor_leading_no_third_then_punctuation_then_eoi() {
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
read_noconfig("|Rust toolchain|https://rustup.rs/,"),
|
read_noconfig("|Rust toolchain|https://rustup.rs/,"),
|
||||||
r#"<p><a href="https://rustup.rs/">Rust toolchain</a></p>"#
|
r#"<p><a href="https://rustup.rs/">Rust toolchain</a></p>"#
|
||||||
|
|
@ -302,7 +313,7 @@ mod tests {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn http_external_anchor_leading_no_third_then_eof() {
|
fn http_external_anchor_leading_no_third_then_eoi() {
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
read_noconfig("|Rust toolchain|https://rustup.rs/"),
|
read_noconfig("|Rust toolchain|https://rustup.rs/"),
|
||||||
r#"<p><a href="https://rustup.rs/">Rust toolchain</a></p>"#
|
r#"<p><a href="https://rustup.rs/">Rust toolchain</a></p>"#
|
||||||
|
|
@ -331,7 +342,7 @@ mod tests {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn eof_pre() {
|
fn eoi_pre() {
|
||||||
let payload = "Jp8INpWzsQmk20jpIhBFCfMUXOztxv0w";
|
let payload = "Jp8INpWzsQmk20jpIhBFCfMUXOztxv0w";
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
read_noconfig(&format!("`\n{payload}\n`")),
|
read_noconfig(&format!("`\n{payload}\n`")),
|
||||||
|
|
|
||||||
|
|
@ -1,74 +1,100 @@
|
||||||
use std::{iter::Peekable, slice::Iter};
|
use std::{iter::Peekable, slice::Iter};
|
||||||
|
|
||||||
use crate::syntax::content::parser::{
|
use crate::{
|
||||||
State, context::Inline, lexeme::Lexeme, token::Token,
|
prelude::*,
|
||||||
|
syntax::content::parser::{
|
||||||
|
State, context::Inline, lexeme::Lexeme, token::Token,
|
||||||
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/// Handles open anchor contexts until an anchor token is fully parsed.
|
||||||
|
///
|
||||||
|
/// This function is only called if the current inline context is Anchor.
|
||||||
|
///
|
||||||
|
/// A return kind of true will trigger a continue in the outer parser,
|
||||||
|
/// skipping any further parsing of the current lexeme.
|
||||||
|
///
|
||||||
|
/// # Panics
|
||||||
|
/// This function will panic if can't determine the destination of an anchor.
|
||||||
pub fn parse(
|
pub fn parse(
|
||||||
lexeme: &Lexeme,
|
lexeme: &Lexeme,
|
||||||
iterator: &mut Peekable<Iter<'_, Lexeme>>,
|
iterator: &mut Peekable<Iter<'_, Lexeme>>,
|
||||||
state: &mut State,
|
state: &mut State,
|
||||||
tokens: &mut Vec<Token>,
|
tokens: &mut Vec<Token>,
|
||||||
) -> bool {
|
) -> bool {
|
||||||
|
log!("Resolving open context: {:#?}", state.clone().buffers.anchor);
|
||||||
let buffer = &mut state.buffers.anchor;
|
let buffer = &mut state.buffers.anchor;
|
||||||
let candidate = &mut buffer.candidate;
|
let candidate = &mut buffer.candidate;
|
||||||
|
|
||||||
|
// This is only true if the anchor is leading, otherwise the outer parser
|
||||||
|
// would already have set its text to the word before the first pipe
|
||||||
if candidate.text.is_empty() {
|
if candidate.text.is_empty() {
|
||||||
|
log!("Seeking text at {:#?} -> {:#?}", lexeme.text(), lexeme.next());
|
||||||
if lexeme.next() == "|" {
|
if lexeme.next() == "|" {
|
||||||
buffer.text.push_str(&lexeme.text());
|
buffer.text.push_str(&lexeme.text());
|
||||||
candidate.text.clone_from(&buffer.text);
|
candidate.text.clone_from(&buffer.text);
|
||||||
|
log!("End: {:#?}", lexeme.text());
|
||||||
|
return true;
|
||||||
} else {
|
} else {
|
||||||
|
log!("Pushing non-terminal {:#?} into buffer {:#?}",
|
||||||
|
lexeme.text(), buffer.text);
|
||||||
buffer.text.push_str(&lexeme.text());
|
buffer.text.push_str(&lexeme.text());
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
return true;
|
}
|
||||||
} else if candidate.destination.is_none() {
|
|
||||||
// candidate is leading and we found the second pipe
|
if candidate.destination.is_none() {
|
||||||
if candidate.leading && lexeme.text() == "|" {
|
|
||||||
// third pipe immediately after second: forcing flanking
|
log!("Seeking destination at {:#?} -> {:#?}",
|
||||||
if lexeme.match_next_first_char('|') {
|
lexeme.text(), lexeme.next());
|
||||||
|
|
||||||
|
// Conditions to this decision tree should match the destination end
|
||||||
|
if lexeme.last(){
|
||||||
|
log!("End: no more input");
|
||||||
|
candidate.destination = Some(candidate.text.clone());
|
||||||
|
} else if lexeme.match_as_char('|') && lexeme.is_next_boundary() {
|
||||||
|
|
||||||
|
if buffer.destination.is_empty() {
|
||||||
candidate.destination = Some(candidate.text.clone());
|
candidate.destination = Some(candidate.text.clone());
|
||||||
let token = Token::Anchor(candidate.clone());
|
|
||||||
tokens.push(token);
|
|
||||||
state.context.inline = Inline::None;
|
|
||||||
iterator.next();
|
|
||||||
return true;
|
|
||||||
// whitespace or punctuation after pipe: flanking anchor
|
|
||||||
} else if lexeme.is_next_whitespace()
|
|
||||||
|| lexeme.is_next_punctuation()
|
|
||||||
{
|
|
||||||
candidate.destination = Some(candidate.text.clone());
|
|
||||||
let token = Token::Anchor(candidate.clone());
|
|
||||||
tokens.push(token);
|
|
||||||
state.context.inline = Inline::None;
|
|
||||||
// non-whitespace after pipe is the destination
|
|
||||||
} else {
|
} else {
|
||||||
candidate.destination = Some(lexeme.next().clone());
|
candidate.destination = Some(buffer.destination.clone());
|
||||||
let token = Token::Anchor(candidate.clone());
|
return true
|
||||||
tokens.push(token);
|
|
||||||
state.context.inline = Inline::None;
|
|
||||||
// if there is a trailing pipe, consume it
|
|
||||||
if let Some(next) = iterator.next()
|
|
||||||
&& next.next() == "|"
|
|
||||||
{
|
|
||||||
iterator.next();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
// candidate is nonleading and we found a second pipe
|
|
||||||
} else if !candidate.leading && lexeme.next() == "|" {
|
} else if lexeme.match_as_char('|') {
|
||||||
candidate.destination = Some(lexeme.text());
|
log!("Found a pipe, but no boundary: Destination likely follows");
|
||||||
|
return true;
|
||||||
|
} else if lexeme.is_punctuation() && lexeme.is_next_whitespace() {
|
||||||
|
log!("Found puncutation followed by whitespace");
|
||||||
|
candidate.destination = Some(buffer.destination.clone());
|
||||||
tokens.push(Token::Anchor(candidate.clone()));
|
tokens.push(Token::Anchor(candidate.clone()));
|
||||||
state.context.inline = Inline::None;
|
state.context.inline = Inline::None;
|
||||||
iterator.next();
|
return false;
|
||||||
// candidate is nonleading and we found whitespace
|
} else if lexeme.is_whitespace() {
|
||||||
} else if lexeme.is_next_whitespace() {
|
log!("End: Whitespace");
|
||||||
candidate.destination = Some(lexeme.text());
|
candidate.destination = Some(buffer.destination.clone());
|
||||||
let token = Token::Anchor(candidate.clone());
|
|
||||||
tokens.push(token);
|
// This else branch is the 'no end found yet' state and will keep
|
||||||
state.context.inline = Inline::None;
|
// pushing lexemes into the buffer until an end is found above
|
||||||
// candidate is nonleading and we haven't found whitespace
|
|
||||||
} else {
|
} else {
|
||||||
|
log!(
|
||||||
|
"Pushing non-terminal {:#?} into buffer {:#?}",
|
||||||
|
lexeme.text(), buffer.destination,
|
||||||
|
);
|
||||||
buffer.destination.push_str(&lexeme.text());
|
buffer.destination.push_str(&lexeme.text());
|
||||||
|
return true
|
||||||
}
|
}
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// This point should never be reached with a still None destination,
|
||||||
|
// which would mean there is some case where the end of the destination
|
||||||
|
// was never found and we kept filling the buffer endlessly,
|
||||||
|
// causing the program to panic anyways when rendering anchors
|
||||||
|
assert!(candidate.destination.is_some(),
|
||||||
|
"Anchor context parsing done but no destination found: {:#?}",
|
||||||
|
state.buffers.anchor
|
||||||
|
);
|
||||||
|
tokens.push(Token::Anchor(candidate.clone()));
|
||||||
|
state.context.inline = Inline::None;
|
||||||
false
|
false
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,7 @@ pub struct Anchor {
|
||||||
pub text: String,
|
pub text: String,
|
||||||
pub destination: Option<String>,
|
pub destination: Option<String>,
|
||||||
pub leading: bool,
|
pub leading: bool,
|
||||||
|
pub balanced: bool,
|
||||||
pub external: bool,
|
pub external: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -45,12 +46,14 @@ impl Anchor {
|
||||||
destination: &str,
|
destination: &str,
|
||||||
leading: bool,
|
leading: bool,
|
||||||
external: bool,
|
external: bool,
|
||||||
|
balanced: bool,
|
||||||
) -> Anchor {
|
) -> Anchor {
|
||||||
Anchor {
|
Anchor {
|
||||||
text: text.to_owned(),
|
text: text.to_owned(),
|
||||||
destination: Some(Anchor::resolve_destination(destination)),
|
destination: Some(Anchor::resolve_destination(destination)),
|
||||||
leading,
|
leading,
|
||||||
external,
|
external,
|
||||||
|
balanced,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -70,7 +73,7 @@ mod tests {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn render_anchor() {
|
fn render_anchor() {
|
||||||
let anchor = Anchor::new("AnchorText", "AnchorDest", true, false);
|
let anchor = Anchor::new("AnchorText", "AnchorDest", true, false, false);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
anchor.render(),
|
anchor.render(),
|
||||||
r#"<a href="/node/AnchorDest">AnchorText</a>"#
|
r#"<a href="/node/AnchorDest">AnchorText</a>"#
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue