Extract lexer to its own module

This commit is contained in:
Juno Takano 2026-02-09 20:39:21 -03:00
commit 834949939a
2 changed files with 95 additions and 77 deletions

View file

@ -1,86 +1,18 @@
use crate::{prelude::*, graph::Graph};
use super::{TokenOutput, Parseable as _, LexMap};
use token::{LineBreak, Literal};
use crate::{prelude::*, graph::Graph, syntax::content::TokenOutput};
use context::{Block, Inline};
use lexer::{LEXMAP, lex};
pub use {lexeme::Lexeme, token::Token, state::State};
pub mod token;
pub mod lexer;
pub mod lexeme;
pub mod segment;
pub mod context;
pub mod point;
pub mod state;
const LEXMAP: LexMap = &[
(LineBreak::probe, |lexeme| {
Token::LineBreak(LineBreak::lex(lexeme))
}),
(Literal::probe, |lexeme| {
Token::Literal(Literal::lex(lexeme))
}),
];
fn lex(text: &str, map: LexMap, graph: &Graph, blocking: bool) -> TokenOutput {
let mut tokens: Vec<Token> = Vec::default();
let mut state = State::default();
let segments = segment::segment(text);
let lexemes = Lexeme::collect(&segments);
log!(VERBOSE, "Segments: {segments:?}");
let mut iterator = lexemes.iter().peekable();
while let Some(lexeme) = iterator.next() {
if lexeme.match_char('\\') {
if let Some(next) = iterator.next() {
tokens.push(Token::Literal(Literal::lex(next)));
}
continue;
}
if blocking {
if context::block::parse(
lexeme,
&mut state,
&mut tokens,
&mut iterator,
graph,
) {
continue;
}
}
if point::parse(lexeme, &mut state, &mut tokens, &mut iterator) {
continue;
}
if context::inline::parse(
lexeme,
&mut state,
&mut tokens,
&mut iterator,
graph,
) {
continue;
}
for (probe, lex) in map {
if probe(lexeme) {
let token = lex(lexeme);
log!(VERBOSE, "Lexmap lexed {lexeme} into {token}");
tokens.push(token);
break;
}
}
}
context::close(&state, &mut tokens);
TokenOutput {
tokens,
format_tokens: state.format_tokens,
text: None,
}
fn parse(tokens: &[Token]) -> String {
tokens.iter().map(Token::render).collect::<String>()
}
pub(super) fn read(input: &str, graph: &Graph) -> String {
@ -112,10 +44,6 @@ pub fn flatten(input: &str, graph: &Graph) -> String {
flat
}
fn parse(tokens: &[Token]) -> String {
tokens.iter().map(Token::render).collect::<String>()
}
#[cfg(test)]
mod tests {
use crate::{

View file

@ -0,0 +1,90 @@
use crate::{
prelude::*,
graph::Graph,
syntax::content::{
TokenOutput, Parseable as _, LexMap,
parser::{
lexeme::Lexeme,
token::{Token, LineBreak, Literal},
state::State,
segment, context, point,
},
},
};
pub(super) const LEXMAP: LexMap = &[
(LineBreak::probe, |lexeme| {
Token::LineBreak(LineBreak::lex(lexeme))
}),
(Literal::probe, |lexeme| {
Token::Literal(Literal::lex(lexeme))
}),
];
pub(super) fn lex(
text: &str,
map: LexMap,
graph: &Graph,
blocking: bool,
) -> TokenOutput {
let mut tokens: Vec<Token> = Vec::default();
let mut state = State::default();
let segments = segment::segment(text);
let lexemes = Lexeme::collect(&segments);
log!(VERBOSE, "Segments: {segments:?}");
let mut iterator = lexemes.iter().peekable();
while let Some(lexeme) = iterator.next() {
if lexeme.match_char('\\') {
if let Some(next) = iterator.next() {
tokens.push(Token::Literal(Literal::lex(next)));
}
continue;
}
if blocking {
if context::block::parse(
lexeme,
&mut state,
&mut tokens,
&mut iterator,
graph,
) {
continue;
}
}
if point::parse(lexeme, &mut state, &mut tokens, &mut iterator) {
continue;
}
if context::inline::parse(
lexeme,
&mut state,
&mut tokens,
&mut iterator,
graph,
) {
continue;
}
for (probe, lex) in map {
if probe(lexeme) {
let token = lex(lexeme);
log!(VERBOSE, "Lexmap lexed {lexeme} into {token}");
tokens.push(token);
break;
}
}
}
context::close(&state, &mut tokens);
TokenOutput {
tokens,
format_tokens: state.format_tokens,
text: None,
}
}