diff --git a/src/syntax/content/parser.rs b/src/syntax/content/parser.rs index 0498582..0a3c0f0 100644 --- a/src/syntax/content/parser.rs +++ b/src/syntax/content/parser.rs @@ -1,86 +1,18 @@ -use crate::{prelude::*, graph::Graph}; -use super::{TokenOutput, Parseable as _, LexMap}; -use token::{LineBreak, Literal}; +use crate::{prelude::*, graph::Graph, syntax::content::TokenOutput}; use context::{Block, Inline}; +use lexer::{LEXMAP, lex}; pub use {lexeme::Lexeme, token::Token, state::State}; pub mod token; +pub mod lexer; pub mod lexeme; pub mod segment; pub mod context; pub mod point; pub mod state; -const LEXMAP: LexMap = &[ - (LineBreak::probe, |lexeme| { - Token::LineBreak(LineBreak::lex(lexeme)) - }), - (Literal::probe, |lexeme| { - Token::Literal(Literal::lex(lexeme)) - }), -]; - -fn lex(text: &str, map: LexMap, graph: &Graph, blocking: bool) -> TokenOutput { - let mut tokens: Vec = Vec::default(); - let mut state = State::default(); - - let segments = segment::segment(text); - let lexemes = Lexeme::collect(&segments); - - log!(VERBOSE, "Segments: {segments:?}"); - - let mut iterator = lexemes.iter().peekable(); - while let Some(lexeme) = iterator.next() { - if lexeme.match_char('\\') { - if let Some(next) = iterator.next() { - tokens.push(Token::Literal(Literal::lex(next))); - } - continue; - } - - if blocking { - if context::block::parse( - lexeme, - &mut state, - &mut tokens, - &mut iterator, - graph, - ) { - continue; - } - } - - if point::parse(lexeme, &mut state, &mut tokens, &mut iterator) { - continue; - } - - if context::inline::parse( - lexeme, - &mut state, - &mut tokens, - &mut iterator, - graph, - ) { - continue; - } - - for (probe, lex) in map { - if probe(lexeme) { - let token = lex(lexeme); - log!(VERBOSE, "Lexmap lexed {lexeme} into {token}"); - tokens.push(token); - break; - } - } - } - - context::close(&state, &mut tokens); - - TokenOutput { - tokens, - format_tokens: state.format_tokens, - text: None, - } +fn parse(tokens: &[Token]) -> String { + tokens.iter().map(Token::render).collect::() } pub(super) fn read(input: &str, graph: &Graph) -> String { @@ -112,10 +44,6 @@ pub fn flatten(input: &str, graph: &Graph) -> String { flat } -fn parse(tokens: &[Token]) -> String { - tokens.iter().map(Token::render).collect::() -} - #[cfg(test)] mod tests { use crate::{ diff --git a/src/syntax/content/parser/lexer.rs b/src/syntax/content/parser/lexer.rs new file mode 100644 index 0000000..b87d868 --- /dev/null +++ b/src/syntax/content/parser/lexer.rs @@ -0,0 +1,90 @@ +use crate::{ + prelude::*, + graph::Graph, + syntax::content::{ + TokenOutput, Parseable as _, LexMap, + parser::{ + lexeme::Lexeme, + token::{Token, LineBreak, Literal}, + state::State, + segment, context, point, + }, + }, +}; + +pub(super) const LEXMAP: LexMap = &[ + (LineBreak::probe, |lexeme| { + Token::LineBreak(LineBreak::lex(lexeme)) + }), + (Literal::probe, |lexeme| { + Token::Literal(Literal::lex(lexeme)) + }), +]; + +pub(super) fn lex( + text: &str, + map: LexMap, + graph: &Graph, + blocking: bool, +) -> TokenOutput { + let mut tokens: Vec = Vec::default(); + let mut state = State::default(); + + let segments = segment::segment(text); + let lexemes = Lexeme::collect(&segments); + + log!(VERBOSE, "Segments: {segments:?}"); + + let mut iterator = lexemes.iter().peekable(); + while let Some(lexeme) = iterator.next() { + if lexeme.match_char('\\') { + if let Some(next) = iterator.next() { + tokens.push(Token::Literal(Literal::lex(next))); + } + continue; + } + + if blocking { + if context::block::parse( + lexeme, + &mut state, + &mut tokens, + &mut iterator, + graph, + ) { + continue; + } + } + + if point::parse(lexeme, &mut state, &mut tokens, &mut iterator) { + continue; + } + + if context::inline::parse( + lexeme, + &mut state, + &mut tokens, + &mut iterator, + graph, + ) { + continue; + } + + for (probe, lex) in map { + if probe(lexeme) { + let token = lex(lexeme); + log!(VERBOSE, "Lexmap lexed {lexeme} into {token}"); + tokens.push(token); + break; + } + } + } + + context::close(&state, &mut tokens); + + TokenOutput { + tokens, + format_tokens: state.format_tokens, + text: None, + } +}