Extract context-specific parsing to a separate module
This commit is contained in:
parent
5c0f786686
commit
5ed2036e36
3 changed files with 158 additions and 120 deletions
|
|
@ -7,10 +7,12 @@ use token::{
|
|||
preformat::PreFormat, literal::Literal, code::Code, oblique::Oblique,
|
||||
};
|
||||
use lexeme::Lexeme;
|
||||
use context::{Context, Block, Inline};
|
||||
|
||||
pub mod token;
|
||||
pub mod lexeme;
|
||||
pub mod segment;
|
||||
pub mod context;
|
||||
|
||||
const LEXMAP: LexMap = &[
|
||||
(LineBreak::probe, |word| {
|
||||
|
|
@ -29,9 +31,9 @@ fn lex(text: &str, map: LexMap, config: &Config) -> Vec<Token> {
|
|||
let mut iterator = lexemes.iter().peekable();
|
||||
while let Some(lexeme) = iterator.next() {
|
||||
match state.context.block {
|
||||
BlockContext::None => {
|
||||
Block::None => {
|
||||
if PreFormat::probe(lexeme) {
|
||||
state.context.block = BlockContext::PreFormat;
|
||||
state.context.block = Block::PreFormat;
|
||||
tokens.push(Token::PreFormat(PreFormat::new(true)));
|
||||
continue;
|
||||
} else if Header::probe(lexeme) {
|
||||
|
|
@ -41,49 +43,49 @@ fn lex(text: &str, map: LexMap, config: &Config) -> Vec<Token> {
|
|||
iterator.peek().map_or(&Lexeme::new("", ""), |l| l),
|
||||
&mut state.dom_ids,
|
||||
));
|
||||
state.context.block = BlockContext::Header(header.level());
|
||||
state.context.block = Block::Header(header.level());
|
||||
tokens.push(Token::Header(header));
|
||||
continue;
|
||||
} else if Paragraph::probe(lexeme) {
|
||||
state.context.block = BlockContext::Paragraph;
|
||||
state.context.block = Block::Paragraph;
|
||||
tokens.push(Token::Paragraph(Paragraph::new(true)));
|
||||
}
|
||||
},
|
||||
BlockContext::PreFormat => {
|
||||
Block::PreFormat => {
|
||||
if PreFormat::probe(lexeme) {
|
||||
tokens.push(Token::PreFormat(PreFormat::new(false)));
|
||||
state.context.block = BlockContext::None;
|
||||
state.context.block = Block::None;
|
||||
} else {
|
||||
tokens.push(Token::Literal(Literal::lex(lexeme)));
|
||||
}
|
||||
continue;
|
||||
},
|
||||
BlockContext::Paragraph => {
|
||||
Block::Paragraph => {
|
||||
if lexeme.text() == "\n" {
|
||||
tokens.push(Token::Paragraph(Paragraph::new(false)));
|
||||
state.context.block = BlockContext::None;
|
||||
state.context.block = Block::None;
|
||||
}
|
||||
},
|
||||
BlockContext::Header(n) => {
|
||||
Block::Header(n) => {
|
||||
if lexeme.text() == "\n" {
|
||||
tokens.push(Token::Header(Header::from_u8(n, false, None)));
|
||||
state.context.block = BlockContext::None;
|
||||
state.context.block = Block::None;
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
match state.context.inline {
|
||||
InlineContext::None => {
|
||||
Inline::None => {
|
||||
if Code::probe(lexeme) {
|
||||
state.context.inline = InlineContext::Code;
|
||||
state.context.inline = Inline::Code;
|
||||
tokens.push(Token::Code(Code::new(true)));
|
||||
continue;
|
||||
} else if Oblique::probe(lexeme) {
|
||||
state.context.inline = InlineContext::Oblique;
|
||||
state.context.inline = Inline::Oblique;
|
||||
tokens.push(Token::Oblique(Oblique::new(true)));
|
||||
continue;
|
||||
} else if Anchor::probe(lexeme) {
|
||||
state.context.inline = InlineContext::Anchor;
|
||||
state.context.inline = Inline::Anchor;
|
||||
state.buffers.anchor.clear();
|
||||
|
||||
if lexeme.match_first_char('|') {
|
||||
|
|
@ -94,81 +96,27 @@ fn lex(text: &str, map: LexMap, config: &Config) -> Vec<Token> {
|
|||
continue;
|
||||
}
|
||||
},
|
||||
InlineContext::Code => {
|
||||
Inline::Code => {
|
||||
if Code::probe(lexeme) {
|
||||
state.context.inline = InlineContext::None;
|
||||
state.context.inline = Inline::None;
|
||||
tokens.push(Token::Code(Code::new(false)));
|
||||
continue;
|
||||
}
|
||||
},
|
||||
InlineContext::Oblique => {
|
||||
Inline::Oblique => {
|
||||
if Oblique::probe(lexeme) {
|
||||
state.context.inline = InlineContext::None;
|
||||
state.context.inline = Inline::None;
|
||||
tokens.push(Token::Oblique(Oblique::new(false)));
|
||||
continue;
|
||||
}
|
||||
},
|
||||
InlineContext::Anchor => {
|
||||
let buffer = &mut state.buffers.anchor;
|
||||
let candidate = &mut buffer.candidate;
|
||||
if candidate.text.is_empty() {
|
||||
if lexeme.next() == "|" {
|
||||
buffer.text.push_str(&lexeme.text());
|
||||
candidate.text.clone_from(&buffer.text);
|
||||
} else {
|
||||
buffer.text.push_str(&lexeme.text());
|
||||
}
|
||||
continue;
|
||||
} else if candidate.destination.is_none() {
|
||||
// candidate is leading and we found the second pipe
|
||||
if candidate.leading && lexeme.text() == "|" {
|
||||
// third pipe immediately after second: forcing flanking
|
||||
if lexeme.match_next_first_char('|') {
|
||||
candidate.destination =
|
||||
Some(candidate.text.clone());
|
||||
let token = Token::Anchor(candidate.clone());
|
||||
tokens.push(token);
|
||||
state.context.inline = InlineContext::None;
|
||||
iterator.next();
|
||||
continue;
|
||||
// whitespace or punctuation after pipe: flanking anchor
|
||||
} else if lexeme.is_next_whitespace()
|
||||
|| lexeme.is_next_punctuation()
|
||||
{
|
||||
candidate.destination =
|
||||
Some(candidate.text.clone());
|
||||
let token = Token::Anchor(candidate.clone());
|
||||
tokens.push(token);
|
||||
state.context.inline = InlineContext::None;
|
||||
// non-whitespace after pipe is the destination
|
||||
} else {
|
||||
candidate.destination = Some(lexeme.next().clone());
|
||||
let token = Token::Anchor(candidate.clone());
|
||||
tokens.push(token);
|
||||
state.context.inline = InlineContext::None;
|
||||
// if there is a trailing pipe, consume it
|
||||
if let Some(next) = iterator.next()
|
||||
&& next.next() == "|"
|
||||
{
|
||||
iterator.next();
|
||||
}
|
||||
}
|
||||
// candidate is nonleading and we found a second pipe
|
||||
} else if !candidate.leading && lexeme.next() == "|" {
|
||||
candidate.destination = Some(lexeme.text());
|
||||
tokens.push(Token::Anchor(candidate.clone()));
|
||||
state.context.inline = InlineContext::None;
|
||||
iterator.next();
|
||||
// candidate is nonleading and we found whitespace
|
||||
} else if lexeme.is_next_whitespace() {
|
||||
candidate.destination = Some(lexeme.text());
|
||||
let token = Token::Anchor(candidate.clone());
|
||||
tokens.push(token);
|
||||
state.context.inline = InlineContext::None;
|
||||
// candidate is nonleading and we haven't found whitespace
|
||||
} else {
|
||||
buffer.destination.push_str(&lexeme.text());
|
||||
}
|
||||
Inline::Anchor => {
|
||||
if context::anchor::parse(
|
||||
lexeme,
|
||||
&mut iterator,
|
||||
&mut state,
|
||||
&mut tokens,
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
},
|
||||
|
|
@ -182,48 +130,16 @@ fn lex(text: &str, map: LexMap, config: &Config) -> Vec<Token> {
|
|||
}
|
||||
}
|
||||
|
||||
close(&state, &mut tokens);
|
||||
context::close(&state, &mut tokens);
|
||||
tokens
|
||||
}
|
||||
|
||||
fn close(state: &State, tokens: &mut Vec<Token>) {
|
||||
match state.context.block {
|
||||
BlockContext::PreFormat => {
|
||||
tokens.push(Token::PreFormat(PreFormat::new(false)));
|
||||
},
|
||||
BlockContext::Paragraph => {
|
||||
tokens.push(Token::Paragraph(Paragraph::new(false)));
|
||||
},
|
||||
BlockContext::Header(_) => panic!("End of file with open header"),
|
||||
BlockContext::None => (),
|
||||
}
|
||||
}
|
||||
|
||||
enum BlockContext {
|
||||
Paragraph,
|
||||
Header(u8),
|
||||
PreFormat,
|
||||
None,
|
||||
}
|
||||
|
||||
enum InlineContext {
|
||||
Anchor,
|
||||
Code,
|
||||
Oblique,
|
||||
None,
|
||||
}
|
||||
|
||||
struct State {
|
||||
pub struct State {
|
||||
context: Context,
|
||||
dom_ids: HashMap<String, Vec<String>>,
|
||||
buffers: Buffers,
|
||||
}
|
||||
|
||||
struct Context {
|
||||
block: BlockContext,
|
||||
inline: InlineContext,
|
||||
}
|
||||
|
||||
struct Buffers {
|
||||
anchor: AnchorBuffer,
|
||||
}
|
||||
|
|
@ -247,8 +163,8 @@ impl State {
|
|||
fn new() -> State {
|
||||
State {
|
||||
context: Context {
|
||||
inline: InlineContext::None,
|
||||
block: BlockContext::None,
|
||||
inline: Inline::None,
|
||||
block: Block::None,
|
||||
},
|
||||
dom_ids: HashMap::new(),
|
||||
buffers: Buffers {
|
||||
|
|
@ -325,6 +241,14 @@ mod tests {
|
|||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn anchor_to_node_s() {
|
||||
assert_eq!(
|
||||
read_noconfig("The |letter s|s|'s node: |s|!"),
|
||||
r#"<p>The <a href="/node/s">letter s</a>'s node: <a href="/node/s">s</a>!</p>"#
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn clear_anchor_buffer() {
|
||||
assert_eq!(
|
||||
|
|
@ -356,27 +280,27 @@ mod tests {
|
|||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic(expected = "End of file with open header")]
|
||||
#[should_panic(expected = "End of input with open header")]
|
||||
fn end_with_open_header() {
|
||||
let default_state = State::new();
|
||||
let state = State {
|
||||
context: Context {
|
||||
block: BlockContext::Header(1),
|
||||
block: Block::Header(1),
|
||||
..default_state.context
|
||||
},
|
||||
..default_state
|
||||
};
|
||||
|
||||
close(&state, &mut vec![]);
|
||||
context::close(&state, &mut vec![]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn end_with_open_preformat() {
|
||||
let mut state = State::new();
|
||||
state.context.block = BlockContext::PreFormat;
|
||||
state.context.block = Block::PreFormat;
|
||||
|
||||
let mut vec: Vec<Token> = vec![];
|
||||
close(&state, &mut vec);
|
||||
context::close(&state, &mut vec);
|
||||
assert_eq!(vec, vec![Token::PreFormat(PreFormat::new(false))]);
|
||||
}
|
||||
|
||||
|
|
|
|||
40
src/syntax/content/parser/context.rs
Normal file
40
src/syntax/content/parser/context.rs
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
use crate::syntax::content::parser::{
|
||||
token::{Token, paragraph::Paragraph, preformat::PreFormat},
|
||||
State,
|
||||
};
|
||||
|
||||
pub mod anchor;
|
||||
|
||||
pub struct Context {
|
||||
pub block: Block,
|
||||
pub inline: Inline,
|
||||
}
|
||||
|
||||
pub enum Block {
|
||||
Paragraph,
|
||||
Header(u8),
|
||||
PreFormat,
|
||||
None,
|
||||
}
|
||||
|
||||
pub enum Inline {
|
||||
Anchor,
|
||||
Code,
|
||||
Oblique,
|
||||
None,
|
||||
}
|
||||
|
||||
/// # Panics
|
||||
/// Panics if there is an open header at end of input.
|
||||
pub fn close(state: &State, tokens: &mut Vec<Token>) {
|
||||
match state.context.block {
|
||||
Block::PreFormat => {
|
||||
tokens.push(Token::PreFormat(PreFormat::new(false)));
|
||||
},
|
||||
Block::Paragraph => {
|
||||
tokens.push(Token::Paragraph(Paragraph::new(false)));
|
||||
},
|
||||
Block::Header(_) => panic!("End of input with open header"),
|
||||
Block::None => (),
|
||||
}
|
||||
}
|
||||
74
src/syntax/content/parser/context/anchor.rs
Normal file
74
src/syntax/content/parser/context/anchor.rs
Normal file
|
|
@ -0,0 +1,74 @@
|
|||
use std::{iter::Peekable, slice::Iter};
|
||||
|
||||
use crate::syntax::content::parser::{
|
||||
State, context::Inline, lexeme::Lexeme, token::Token,
|
||||
};
|
||||
|
||||
pub fn parse(
|
||||
lexeme: &Lexeme,
|
||||
iterator: &mut Peekable<Iter<'_, Lexeme>>,
|
||||
state: &mut State,
|
||||
tokens: &mut Vec<Token>,
|
||||
) -> bool {
|
||||
let buffer = &mut state.buffers.anchor;
|
||||
let candidate = &mut buffer.candidate;
|
||||
if candidate.text.is_empty() {
|
||||
if lexeme.next() == "|" {
|
||||
buffer.text.push_str(&lexeme.text());
|
||||
candidate.text.clone_from(&buffer.text);
|
||||
} else {
|
||||
buffer.text.push_str(&lexeme.text());
|
||||
}
|
||||
return true;
|
||||
} else if candidate.destination.is_none() {
|
||||
// candidate is leading and we found the second pipe
|
||||
if candidate.leading && lexeme.text() == "|" {
|
||||
// third pipe immediately after second: forcing flanking
|
||||
if lexeme.match_next_first_char('|') {
|
||||
candidate.destination = Some(candidate.text.clone());
|
||||
let token = Token::Anchor(candidate.clone());
|
||||
tokens.push(token);
|
||||
state.context.inline = Inline::None;
|
||||
iterator.next();
|
||||
return true;
|
||||
// whitespace or punctuation after pipe: flanking anchor
|
||||
} else if lexeme.is_next_whitespace()
|
||||
|| lexeme.is_next_punctuation()
|
||||
{
|
||||
candidate.destination = Some(candidate.text.clone());
|
||||
let token = Token::Anchor(candidate.clone());
|
||||
tokens.push(token);
|
||||
state.context.inline = Inline::None;
|
||||
// non-whitespace after pipe is the destination
|
||||
} else {
|
||||
candidate.destination = Some(lexeme.next().clone());
|
||||
let token = Token::Anchor(candidate.clone());
|
||||
tokens.push(token);
|
||||
state.context.inline = Inline::None;
|
||||
// if there is a trailing pipe, consume it
|
||||
if let Some(next) = iterator.next()
|
||||
&& next.next() == "|"
|
||||
{
|
||||
iterator.next();
|
||||
}
|
||||
}
|
||||
// candidate is nonleading and we found a second pipe
|
||||
} else if !candidate.leading && lexeme.next() == "|" {
|
||||
candidate.destination = Some(lexeme.text());
|
||||
tokens.push(Token::Anchor(candidate.clone()));
|
||||
state.context.inline = Inline::None;
|
||||
iterator.next();
|
||||
// candidate is nonleading and we found whitespace
|
||||
} else if lexeme.is_next_whitespace() {
|
||||
candidate.destination = Some(lexeme.text());
|
||||
let token = Token::Anchor(candidate.clone());
|
||||
tokens.push(token);
|
||||
state.context.inline = Inline::None;
|
||||
// candidate is nonleading and we haven't found whitespace
|
||||
} else {
|
||||
buffer.destination.push_str(&lexeme.text());
|
||||
}
|
||||
return true;
|
||||
}
|
||||
false
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue