Extract lexer to its own module
This commit is contained in:
parent
39c7373e7e
commit
834949939a
2 changed files with 95 additions and 77 deletions
|
|
@ -1,86 +1,18 @@
|
|||
use crate::{prelude::*, graph::Graph};
|
||||
use super::{TokenOutput, Parseable as _, LexMap};
|
||||
use token::{LineBreak, Literal};
|
||||
use crate::{prelude::*, graph::Graph, syntax::content::TokenOutput};
|
||||
use context::{Block, Inline};
|
||||
use lexer::{LEXMAP, lex};
|
||||
pub use {lexeme::Lexeme, token::Token, state::State};
|
||||
|
||||
pub mod token;
|
||||
pub mod lexer;
|
||||
pub mod lexeme;
|
||||
pub mod segment;
|
||||
pub mod context;
|
||||
pub mod point;
|
||||
pub mod state;
|
||||
|
||||
const LEXMAP: LexMap = &[
|
||||
(LineBreak::probe, |lexeme| {
|
||||
Token::LineBreak(LineBreak::lex(lexeme))
|
||||
}),
|
||||
(Literal::probe, |lexeme| {
|
||||
Token::Literal(Literal::lex(lexeme))
|
||||
}),
|
||||
];
|
||||
|
||||
fn lex(text: &str, map: LexMap, graph: &Graph, blocking: bool) -> TokenOutput {
|
||||
let mut tokens: Vec<Token> = Vec::default();
|
||||
let mut state = State::default();
|
||||
|
||||
let segments = segment::segment(text);
|
||||
let lexemes = Lexeme::collect(&segments);
|
||||
|
||||
log!(VERBOSE, "Segments: {segments:?}");
|
||||
|
||||
let mut iterator = lexemes.iter().peekable();
|
||||
while let Some(lexeme) = iterator.next() {
|
||||
if lexeme.match_char('\\') {
|
||||
if let Some(next) = iterator.next() {
|
||||
tokens.push(Token::Literal(Literal::lex(next)));
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if blocking {
|
||||
if context::block::parse(
|
||||
lexeme,
|
||||
&mut state,
|
||||
&mut tokens,
|
||||
&mut iterator,
|
||||
graph,
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if point::parse(lexeme, &mut state, &mut tokens, &mut iterator) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if context::inline::parse(
|
||||
lexeme,
|
||||
&mut state,
|
||||
&mut tokens,
|
||||
&mut iterator,
|
||||
graph,
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (probe, lex) in map {
|
||||
if probe(lexeme) {
|
||||
let token = lex(lexeme);
|
||||
log!(VERBOSE, "Lexmap lexed {lexeme} into {token}");
|
||||
tokens.push(token);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
context::close(&state, &mut tokens);
|
||||
|
||||
TokenOutput {
|
||||
tokens,
|
||||
format_tokens: state.format_tokens,
|
||||
text: None,
|
||||
}
|
||||
fn parse(tokens: &[Token]) -> String {
|
||||
tokens.iter().map(Token::render).collect::<String>()
|
||||
}
|
||||
|
||||
pub(super) fn read(input: &str, graph: &Graph) -> String {
|
||||
|
|
@ -112,10 +44,6 @@ pub fn flatten(input: &str, graph: &Graph) -> String {
|
|||
flat
|
||||
}
|
||||
|
||||
fn parse(tokens: &[Token]) -> String {
|
||||
tokens.iter().map(Token::render).collect::<String>()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::{
|
||||
|
|
|
|||
90
src/syntax/content/parser/lexer.rs
Normal file
90
src/syntax/content/parser/lexer.rs
Normal file
|
|
@ -0,0 +1,90 @@
|
|||
use crate::{
|
||||
prelude::*,
|
||||
graph::Graph,
|
||||
syntax::content::{
|
||||
TokenOutput, Parseable as _, LexMap,
|
||||
parser::{
|
||||
lexeme::Lexeme,
|
||||
token::{Token, LineBreak, Literal},
|
||||
state::State,
|
||||
segment, context, point,
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
pub(super) const LEXMAP: LexMap = &[
|
||||
(LineBreak::probe, |lexeme| {
|
||||
Token::LineBreak(LineBreak::lex(lexeme))
|
||||
}),
|
||||
(Literal::probe, |lexeme| {
|
||||
Token::Literal(Literal::lex(lexeme))
|
||||
}),
|
||||
];
|
||||
|
||||
pub(super) fn lex(
|
||||
text: &str,
|
||||
map: LexMap,
|
||||
graph: &Graph,
|
||||
blocking: bool,
|
||||
) -> TokenOutput {
|
||||
let mut tokens: Vec<Token> = Vec::default();
|
||||
let mut state = State::default();
|
||||
|
||||
let segments = segment::segment(text);
|
||||
let lexemes = Lexeme::collect(&segments);
|
||||
|
||||
log!(VERBOSE, "Segments: {segments:?}");
|
||||
|
||||
let mut iterator = lexemes.iter().peekable();
|
||||
while let Some(lexeme) = iterator.next() {
|
||||
if lexeme.match_char('\\') {
|
||||
if let Some(next) = iterator.next() {
|
||||
tokens.push(Token::Literal(Literal::lex(next)));
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if blocking {
|
||||
if context::block::parse(
|
||||
lexeme,
|
||||
&mut state,
|
||||
&mut tokens,
|
||||
&mut iterator,
|
||||
graph,
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if point::parse(lexeme, &mut state, &mut tokens, &mut iterator) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if context::inline::parse(
|
||||
lexeme,
|
||||
&mut state,
|
||||
&mut tokens,
|
||||
&mut iterator,
|
||||
graph,
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (probe, lex) in map {
|
||||
if probe(lexeme) {
|
||||
let token = lex(lexeme);
|
||||
log!(VERBOSE, "Lexmap lexed {lexeme} into {token}");
|
||||
tokens.push(token);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
context::close(&state, &mut tokens);
|
||||
|
||||
TokenOutput {
|
||||
tokens,
|
||||
format_tokens: state.format_tokens,
|
||||
text: None,
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue