From d51b9a135e2b1b83bc2ce0ade06f4c3620759442 Mon Sep 17 00:00:00 2001 From: jutty Date: Wed, 17 Dec 2025 00:16:44 -0300 Subject: [PATCH] Scaffold a second content parser pass --- src/handlers/graph.rs | 7 ++-- src/handlers/navigation.rs | 17 +++++++-- src/syntax/content.rs | 30 +++++++++------ src/syntax/content/lexeme.rs | 24 ++++++++++++ src/syntax/content/parser.rs | 31 --------------- src/syntax/content/parsers.rs | 2 + src/syntax/content/parsers/line.rs | 33 ++++++++++++++++ .../content/{ => parsers/line}/elements.rs | 0 .../{ => parsers/line}/elements/header.rs | 22 ++++++----- .../{ => parsers/line}/elements/paragraph.rs | 10 ++--- .../{ => parsers/line}/elements/span.rs | 10 ++--- src/syntax/content/parsers/line/parser.rs | 38 +++++++++++++++++++ src/syntax/content/parsers/word.rs | 15 ++++++++ src/syntax/content/parsers/word/elements.rs | 2 + .../content/parsers/word/elements/anchor.rs | 33 ++++++++++++++++ .../content/parsers/word/elements/literal.rs | 28 ++++++++++++++ src/syntax/content/parsers/word/parser.rs | 37 ++++++++++++++++++ src/syntax/content/{units.rs => token.rs} | 29 ++++++-------- src/types.rs | 4 +- 19 files changed, 282 insertions(+), 90 deletions(-) create mode 100644 src/syntax/content/lexeme.rs delete mode 100644 src/syntax/content/parser.rs create mode 100644 src/syntax/content/parsers.rs create mode 100644 src/syntax/content/parsers/line.rs rename src/syntax/content/{ => parsers/line}/elements.rs (100%) rename src/syntax/content/{ => parsers/line}/elements/header.rs (70%) rename src/syntax/content/{ => parsers/line}/elements/paragraph.rs (64%) rename src/syntax/content/{ => parsers/line}/elements/span.rs (65%) create mode 100644 src/syntax/content/parsers/line/parser.rs create mode 100644 src/syntax/content/parsers/word.rs create mode 100644 src/syntax/content/parsers/word/elements.rs create mode 100644 src/syntax/content/parsers/word/elements/anchor.rs create mode 100644 src/syntax/content/parsers/word/elements/literal.rs create mode 100644 src/syntax/content/parsers/word/parser.rs rename src/syntax/content/{units.rs => token.rs} (53%) diff --git a/src/handlers/graph.rs b/src/handlers/graph.rs index d919afb..34013da 100644 --- a/src/handlers/graph.rs +++ b/src/handlers/graph.rs @@ -1,7 +1,8 @@ use axum::{body::Body, extract::Path, http::Response}; -use crate::syntax::content::elements::paragraph::Paragraph; -use crate::syntax::content::parser; +use crate::syntax::content::parsers::line::elements::paragraph::Paragraph; +use crate::syntax::content; +use crate::syntax::content::parsers::word::elements::literal::Literal; use crate::{formats::populate_graph, handlers, types::Node}; pub async fn node(Path(id): Path) -> Response { @@ -16,7 +17,7 @@ pub async fn node(Path(id): Path) -> Response { context.insert("incoming", &graph.incoming.get(&id)); context.insert("config", &graph.meta.config); - let out_text = parser::read::(&node.text); + let out_text = content::parse::(&node.text); context.insert("text", &out_text); let not_found = *node == empty_node; diff --git a/src/handlers/navigation.rs b/src/handlers/navigation.rs index bd61f31..a9e0751 100644 --- a/src/handlers/navigation.rs +++ b/src/handlers/navigation.rs @@ -8,9 +8,14 @@ use axum::{ use crate::{ formats::populate_graph, handlers, - syntax::content::parser, + syntax::content::{ + self, + parsers::{ + line::elements::{paragraph::Paragraph, span::Span}, + word::elements::literal::Literal, + }, + }, types::{Config, Node}, - syntax::content::elements::{span::Span, paragraph::Paragraph}, }; #[expect(clippy::unused_async)] @@ -24,8 +29,12 @@ pub async fn page(template: &str) -> Response { context.insert("root_node", &root_node); let text_parsed_config = Config { - footer_text: parser::read::(&graph.meta.config.footer_text), - about_text: parser::read::(&graph.meta.config.about_text), + footer_text: content::parse::( + &graph.meta.config.footer_text, + ), + about_text: content::parse::( + &graph.meta.config.about_text, + ), ..graph.meta.config }; diff --git a/src/syntax/content.rs b/src/syntax/content.rs index aab6405..0235c45 100644 --- a/src/syntax/content.rs +++ b/src/syntax/content.rs @@ -1,9 +1,10 @@ -use elements::{header::Header}; -use units::{Token, Lexeme}; +use token::{Token}; +use parsers::{line::Line, word::Word}; +use lexeme::Lexeme; -mod units; -pub mod elements; -pub mod parser; +mod token; +pub mod lexeme; +pub mod parsers; pub trait Parseable: Into { fn probe(lexeme: &Lexeme) -> bool; @@ -13,14 +14,10 @@ pub trait Parseable: Into { type Probe = fn(&Lexeme) -> bool; type Lexer = fn(&Lexeme) -> Token; -type LexEntry = (Probe, Lexer); -type LexMap<'lm> = &'lm [LexEntry]; +type LexMap<'lm> = &'lm [(Probe, Lexer)]; -const LEXMAP: LexMap = - &[(Header::probe, |lexeme| Token::Header(Header::lex(lexeme)))]; - -fn make_lexmap() -> Vec { - let mut vector: Vec<(Probe, Lexer)> = LEXMAP.to_vec(); +fn make_lexmap(base: LexMap) -> Vec<(Probe, Lexer)> { + let mut vector: Vec<(Probe, Lexer)> = base.to_vec(); fn adapter(lex: &Lexeme) -> Token { D::lex(lex).into() @@ -29,3 +26,12 @@ fn make_lexmap() -> Vec { vector.push((DefaultToken::probe, adapter::)); vector } + +pub fn parse( + text: &str, +) -> String { + let escaped_text = tera::escape_html(text); + parsers::line::parser::read::( + &parsers::word::parser::read::(&escaped_text), + ) +} diff --git a/src/syntax/content/lexeme.rs b/src/syntax/content/lexeme.rs new file mode 100644 index 0000000..398c606 --- /dev/null +++ b/src/syntax/content/lexeme.rs @@ -0,0 +1,24 @@ +use super::parsers::{line::Line, word::Word}; + +#[derive(Clone)] +pub enum Lexeme { + Line(Line), + Word(Word), +} + +impl Lexeme { + pub fn to_raw(&self) -> String { + match *self { + Lexeme::Line(ref d) => d.raw.clone(), + Lexeme::Word(ref d) => d.raw.clone(), + } + } + + pub fn to_vec(self) -> Vec { + self.to_raw().split(' ').map(str::to_string).collect() + } + + pub fn first(self) -> Option { + self.to_vec().first().map(String::to_owned) + } +} diff --git a/src/syntax/content/parser.rs b/src/syntax/content/parser.rs deleted file mode 100644 index ebbb83d..0000000 --- a/src/syntax/content/parser.rs +++ /dev/null @@ -1,31 +0,0 @@ -use crate::syntax::content::{Parseable, Token, Lexeme, make_lexmap}; - -pub fn read(text: &str) -> String { - let escaped_text = tera::escape_html(text); - parse(&lex(&escaped_text, &make_lexmap::())) -} - -fn lex(text: &str, map: super::LexMap) -> Vec { - let mut tokens: Vec = Vec::new(); - - for line in text.lines().filter(|x| !x.trim().is_empty()) { - let lexeme = Lexeme::new(line); - - for &(ref matcher, lexer) in map { - if matcher(&lexeme) { - tokens.push(lexer(&lexeme)); - break; - } - } - } - - tokens -} - -fn parse(tokens: &[Token]) -> String { - tokens - .iter() - .map(Token::render) - .collect::>() - .join("\n") -} diff --git a/src/syntax/content/parsers.rs b/src/syntax/content/parsers.rs new file mode 100644 index 0000000..06cc152 --- /dev/null +++ b/src/syntax/content/parsers.rs @@ -0,0 +1,2 @@ +pub mod line; +pub mod word; diff --git a/src/syntax/content/parsers/line.rs b/src/syntax/content/parsers/line.rs new file mode 100644 index 0000000..e8c7c1b --- /dev/null +++ b/src/syntax/content/parsers/line.rs @@ -0,0 +1,33 @@ +use crate::syntax::content::lexeme::Lexeme; + +pub mod parser; +pub mod elements; + +#[derive(Clone)] +pub struct Line { + pub raw: String, + pub first: String, +} + +impl Line { + pub fn new(text: &str) -> Line { + let vec: Vec<&str> = text.split(" ").collect(); + + Line { + raw: text.to_owned(), + first: vec.first().unwrap_or_else(|| unreachable!()).to_string(), + } + } +} + +impl From for Line { + fn from(lexeme: Lexeme) -> Line { + match lexeme { + Lexeme::Word(w) => Line { + raw: w.raw.clone(), + first: w.raw.split(' ').next().unwrap_or_default().to_owned(), + }, + Lexeme::Line(l) => l, + } + } +} diff --git a/src/syntax/content/elements.rs b/src/syntax/content/parsers/line/elements.rs similarity index 100% rename from src/syntax/content/elements.rs rename to src/syntax/content/parsers/line/elements.rs diff --git a/src/syntax/content/elements/header.rs b/src/syntax/content/parsers/line/elements/header.rs similarity index 70% rename from src/syntax/content/elements/header.rs rename to src/syntax/content/parsers/line/elements/header.rs index fe01163..a26b638 100644 --- a/src/syntax/content/elements/header.rs +++ b/src/syntax/content/parsers/line/elements/header.rs @@ -32,8 +32,8 @@ pub struct Header { } impl Header { - fn new(level: usize, text: &str) -> Self { - Self { + fn new(level: usize, text: &str) -> Header { + Header { level: match level { 1 => Level::One, 2 => Level::Two, @@ -52,18 +52,20 @@ impl Header { impl Parseable for Header { fn probe(lexeme: &Lexeme) -> bool { - !lexeme.first.trim().is_empty() - && lexeme.first.replace("#", "").is_empty() - && lexeme.first.len() <= 6 + let first = lexeme.clone().first().unwrap_or_default(); + !first.trim().is_empty() + && first.replace("#", "").is_empty() + && first.len() <= 6 } - fn lex(lexeme: &Lexeme) -> Self { - let header_level = lexeme.first.len(); - log(&Self::lex, &format!("Header level is {header_level}")); + fn lex(lexeme: &Lexeme) -> Header { + let first = lexeme.clone().first().unwrap_or_else(|| unreachable!()); + let header_level = &first.len(); + log(&Header::lex, &format!("Header level is {header_level}")); - let header_text = lexeme.raw.replace(lexeme.first, ""); + let header_text = lexeme.to_raw().replace(&first, ""); - Self::new(header_level, &header_text) + Header::new(*header_level, &header_text) } fn render(&self) -> String { diff --git a/src/syntax/content/elements/paragraph.rs b/src/syntax/content/parsers/line/elements/paragraph.rs similarity index 64% rename from src/syntax/content/elements/paragraph.rs rename to src/syntax/content/parsers/line/elements/paragraph.rs index 5d802ce..31ee35c 100644 --- a/src/syntax/content/elements/paragraph.rs +++ b/src/syntax/content/parsers/line/elements/paragraph.rs @@ -1,5 +1,5 @@ use std::fmt::Display; -use crate::syntax::content::{Parseable, Lexeme}; +use crate::syntax::content::{Parseable, lexeme::Lexeme}; pub struct Paragraph { text: String, @@ -7,12 +7,12 @@ pub struct Paragraph { impl Parseable for Paragraph { fn probe(lexeme: &Lexeme) -> bool { - !lexeme.raw.trim().is_empty() + !lexeme.to_raw().trim().is_empty() } - fn lex(lexeme: &Lexeme) -> Self { - Self { - text: lexeme.raw.trim().to_owned(), + fn lex(lexeme: &Lexeme) -> Paragraph { + Paragraph { + text: lexeme.to_raw().trim().to_owned(), } } diff --git a/src/syntax/content/elements/span.rs b/src/syntax/content/parsers/line/elements/span.rs similarity index 65% rename from src/syntax/content/elements/span.rs rename to src/syntax/content/parsers/line/elements/span.rs index c375f8d..92ee07d 100644 --- a/src/syntax/content/elements/span.rs +++ b/src/syntax/content/parsers/line/elements/span.rs @@ -1,5 +1,5 @@ use std::fmt::Display; -use crate::syntax::content::{Parseable, Lexeme}; +use crate::syntax::content::{Parseable, lexeme::Lexeme}; pub struct Span { text: String, @@ -7,12 +7,12 @@ pub struct Span { impl Parseable for Span { fn probe(lexeme: &Lexeme) -> bool { - !lexeme.raw.trim().is_empty() + !lexeme.to_raw().trim().is_empty() } - fn lex(lexeme: &Lexeme) -> Self { - Self { - text: lexeme.raw.trim().to_owned(), + fn lex(lexeme: &Lexeme) -> Span { + Span { + text: lexeme.to_raw().trim().to_owned(), } } diff --git a/src/syntax/content/parsers/line/parser.rs b/src/syntax/content/parsers/line/parser.rs new file mode 100644 index 0000000..258dbc9 --- /dev/null +++ b/src/syntax/content/parsers/line/parser.rs @@ -0,0 +1,38 @@ +use crate::syntax::content::{ + LexMap, Line, Parseable, Token, parsers::line::elements::header::Header, + make_lexmap, Lexeme, +}; + +const LEXMAP: LexMap = + &[(Header::probe, |line| Token::Header(Header::lex(line)))]; + +pub(in crate::syntax::content) fn read( + text: &str, +) -> String { + parse(&lex(text, &make_lexmap::(LEXMAP))) +} + +fn lex(text: &str, map: LexMap) -> Vec { + let mut tokens: Vec = Vec::new(); + + for raw_line in text.lines() { + let line = Lexeme::Line(Line::new(raw_line)); + + for &(ref probe, lex) in map { + if probe(&line) { + tokens.push(lex(&line)); + break; + } + } + } + + tokens +} + +fn parse(tokens: &[Token]) -> String { + tokens + .iter() + .map(Token::render) + .collect::>() + .join("\n") +} diff --git a/src/syntax/content/parsers/word.rs b/src/syntax/content/parsers/word.rs new file mode 100644 index 0000000..36ff23a --- /dev/null +++ b/src/syntax/content/parsers/word.rs @@ -0,0 +1,15 @@ +pub mod parser; +pub mod elements; + +#[derive(Clone)] +pub struct Word { + pub raw: String, +} + +impl Word { + pub fn new(text: &str) -> Word { + Word { + raw: text.to_owned(), + } + } +} diff --git a/src/syntax/content/parsers/word/elements.rs b/src/syntax/content/parsers/word/elements.rs new file mode 100644 index 0000000..ff86880 --- /dev/null +++ b/src/syntax/content/parsers/word/elements.rs @@ -0,0 +1,2 @@ +pub mod literal; +pub mod anchor; diff --git a/src/syntax/content/parsers/word/elements/anchor.rs b/src/syntax/content/parsers/word/elements/anchor.rs new file mode 100644 index 0000000..8d0dc6a --- /dev/null +++ b/src/syntax/content/parsers/word/elements/anchor.rs @@ -0,0 +1,33 @@ +// use std::fmt::Display; +// use crate::syntax::content::{Parseable, Line}; +// +// pub struct Anchor { +// text: String, +// destination: String, +// } +// +// impl Parseable for Anchor { +// fn probe(line: &Line) -> bool { +// let candidate = line.raw.split(' '); +// !line.first.trim().is_empty() +// && line.first.replace("#", "").is_empty() +// && line.first.len() <= 6 +// } +// +// fn lex(line: &Line) -> Self { +// Self { +// text: line.raw.trim().to_owned(), +// destination: t +// } +// } +// +// fn render(&self) -> String { +// format!(r#"{}"#, &self.destination, &self.text) +// } +// } +// +// impl Display for Anchor { +// fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { +// write!(f, "Anchor: <{}>", &self.text) +// } +// } diff --git a/src/syntax/content/parsers/word/elements/literal.rs b/src/syntax/content/parsers/word/elements/literal.rs new file mode 100644 index 0000000..3c8ee78 --- /dev/null +++ b/src/syntax/content/parsers/word/elements/literal.rs @@ -0,0 +1,28 @@ +use std::fmt::Display; +use crate::syntax::content::{Parseable, lexeme::Lexeme}; + +pub struct Literal { + text: String, +} + +impl Parseable for Literal { + fn probe(lexeme: &Lexeme) -> bool { + !lexeme.to_raw().is_empty() + } + + fn lex(lexeme: &Lexeme) -> Literal { + Literal { + text: lexeme.to_raw().trim().to_owned(), + } + } + + fn render(&self) -> String { + self.text.clone() + } +} + +impl Display for Literal { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "Literal: <{}>", &self.text) + } +} diff --git a/src/syntax/content/parsers/word/parser.rs b/src/syntax/content/parsers/word/parser.rs new file mode 100644 index 0000000..19a3688 --- /dev/null +++ b/src/syntax/content/parsers/word/parser.rs @@ -0,0 +1,37 @@ +use crate::syntax::content::parsers::word::elements::literal::Literal; +use crate::syntax::content::{Parseable, Token, Word, LexMap, make_lexmap}; +use crate::syntax::content::lexeme::Lexeme; + +const LEXMAP: LexMap = + &[(Literal::probe, |line| Token::Literal(Literal::lex(line)))]; + +pub(in crate::syntax::content) fn read( + text: &str, +) -> String { + parse(&lex(text, &make_lexmap::(LEXMAP))) +} + +fn lex(text: &str, map: LexMap) -> Vec { + let mut tokens: Vec = Vec::new(); + + for raw_word in text.split(" ") { + let word = Lexeme::Word(Word::new(raw_word)); + + for &(ref probe, lex) in map { + if probe(&word) { + tokens.push(lex(&word)); + break; + } + } + } + + tokens +} + +fn parse(tokens: &[Token]) -> String { + tokens + .iter() + .map(Token::render) + .collect::>() + .join(" ") +} diff --git a/src/syntax/content/units.rs b/src/syntax/content/token.rs similarity index 53% rename from src/syntax/content/units.rs rename to src/syntax/content/token.rs index 1f38058..5871fb6 100644 --- a/src/syntax/content/units.rs +++ b/src/syntax/content/token.rs @@ -1,5 +1,6 @@ -use crate::syntax::content::Parseable as _; -use crate::syntax::content::elements::{ +use super::Parseable as _; +use super::parsers::word::elements::{literal::Literal}; +use super::parsers::line::elements::{ paragraph::Paragraph, header::Header, span::Span, }; @@ -7,6 +8,7 @@ pub enum Token { Paragraph(Paragraph), Header(Header), Span(Span), + Literal(Literal), } impl Token { @@ -15,40 +17,31 @@ impl Token { Token::Paragraph(ref d) => d.render(), Token::Header(ref d) => d.render(), Token::Span(ref d) => d.render(), + Token::Literal(ref d) => d.render(), } } } impl From for Token { - fn from(d: Paragraph) -> Self { + fn from(d: Paragraph) -> Token { Token::Paragraph(d) } } impl From
for Token { - fn from(d: Header) -> Self { + fn from(d: Header) -> Token { Token::Header(d) } } impl From for Token { - fn from(d: Span) -> Self { + fn from(d: Span) -> Token { Token::Span(d) } } -pub struct Lexeme<'l> { - pub raw: &'l str, - pub first: &'l str, -} - -impl<'l> Lexeme<'l> { - pub fn new(text: &'l str) -> Lexeme<'l> { - let vec: Vec<&'l str> = text.split(" ").collect(); - - Self { - raw: text, - first: vec.first().unwrap_or_else(|| unreachable!()), - } +impl From for Token { + fn from(d: Literal) -> Token { + Token::Literal(d) } } diff --git a/src/types.rs b/src/types.rs index 35f1aeb..f828294 100644 --- a/src/types.rs +++ b/src/types.rs @@ -93,7 +93,7 @@ fn mktrue() -> bool { impl Graph { pub fn new(message: Option) -> Graph { - Self { + Graph { nodes: HashMap::new(), root_node: "VoidNode".to_string(), incoming: HashMap::new(), @@ -128,7 +128,7 @@ impl Graph { impl Node { pub fn new(message: Option) -> Node { - Self { + Node { id: "VoidNode".to_string(), title: "Pure Void".to_string(), text: match message {