From 984c8bcdcc2d564d888212af2ea199d2d0f6df7d Mon Sep 17 00:00:00 2001 From: jutty Date: Tue, 16 Dec 2025 03:48:42 -0300 Subject: [PATCH] Heavy refactor and restructuring of content parser --- Cargo.toml | 1 - src/handlers/graph.rs | 5 +- src/syntax/content.rs | 154 ++++------------------- src/syntax/content/elements.rs | 2 + src/syntax/content/elements/header.rs | 77 ++++++++++++ src/syntax/content/elements/paragraph.rs | 28 +++++ src/syntax/content/parser.rs | 38 ++++++ 7 files changed, 171 insertions(+), 134 deletions(-) create mode 100644 src/syntax/content/elements.rs create mode 100644 src/syntax/content/elements/header.rs create mode 100644 src/syntax/content/elements/paragraph.rs create mode 100644 src/syntax/content/parser.rs diff --git a/Cargo.toml b/Cargo.toml index 9d5f365..b941f2d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -188,7 +188,6 @@ pattern_type_mismatch = "warn" pub_without_shorthand = "warn" redundant_test_prefix = "warn" redundant_type_annotations = "warn" -ref_patterns = "warn" renamed_function_params = "warn" rest_pat_in_fully_bound_structs = "warn" return_and_then = "warn" diff --git a/src/handlers/graph.rs b/src/handlers/graph.rs index 5f4f90e..4c8e39f 100644 --- a/src/handlers/graph.rs +++ b/src/handlers/graph.rs @@ -1,4 +1,5 @@ use axum::{body::Body, extract::Path, http::Response}; +use crate::syntax::content::parser; use crate::{formats::populate_graph, handlers, types::Node}; @@ -16,9 +17,7 @@ pub async fn node(Path(id): Path) -> Response { context.insert("incoming", &graph.incoming.get(&id)); let escaped_text = tera::escape_html(&node.text); - let out_text = crate::syntax::content::parse(&crate::syntax::content::lex( - &escaped_text, - )); + let out_text = parser::read(&escaped_text); context.insert("text", &out_text); let not_found = node.clone() == empty_node; diff --git a/src/syntax/content.rs b/src/syntax/content.rs index c218e08..2ec8f9e 100644 --- a/src/syntax/content.rs +++ b/src/syntax/content.rs @@ -1,3 +1,13 @@ +use elements::{paragraph::Paragraph, header::Header}; + +mod elements; +pub mod parser; + +enum Token { + Paragraph(Paragraph), + Header(Header), +} + struct Lexeme<'l> { pub raw: &'l str, pub first: &'l str, @@ -14,136 +24,20 @@ impl<'l> Lexeme<'l> { } } -pub enum Token { - Paragraph(paragraph::Paragraph), - Header(header::Header), +trait Parseable { + fn probe(lexeme: &Lexeme) -> bool; + fn lex(lexeme: &Lexeme) -> Self + where + Self: Sized; + fn render(&self) -> String; } -pub fn lex(text: &str) -> Vec { - let mut tokens = Vec::new(); +type Matcher = fn(&Lexeme) -> bool; +type Constructor = fn(&Lexeme) -> Token; - for line in text - .lines() - .filter(|x| !x.is_empty()) - .filter(|x| !x.replace(" ", "").is_empty()) - { - let lexeme = Lexeme::new(line); - if header::matches(&lexeme) { - tokens.push(Token::Header(header::lex(&lexeme))); - } else if paragraph::matches(&lexeme) { - tokens.push(Token::Paragraph(paragraph::lex(&lexeme))); - } - } - - tokens -} - -pub fn parse(tokens: &Vec) -> String { - let mut out_text: Vec = Vec::new(); - for token in tokens { - out_text.push(match token { - Token::Paragraph(p) => p.to_string(), - Token::Header(h) => h.to_string(), - }); - } - - out_text.join("\n") -} - -mod paragraph { - use std::fmt::Display; - use super::Lexeme; - - pub struct Paragraph { - text: String, - } - - impl Display for Paragraph { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - write!(f, "

{}

", &self.text) - } - } - - pub fn matches(lexeme: &Lexeme) -> bool { - !lexeme.raw.trim().is_empty() - } - - pub fn lex(lexeme: &Lexeme) -> Paragraph { - Paragraph { - text: lexeme.raw.trim().to_owned(), - } - } -} - -mod header { - use crate::dev::log; - use std::fmt::Display; - use super::Lexeme; - - enum Level { - One, - Two, - Three, - Four, - Five, - Six, - } - - impl Display for Level { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - match *self { - Level::One => write!(f, "1"), - Level::Two => write!(f, "2"), - Level::Three => write!(f, "3"), - Level::Four => write!(f, "4"), - Level::Five => write!(f, "5"), - Level::Six => write!(f, "6"), - } - } - } - - pub struct Header { - level: Level, - text: String, - } - - impl Header { - fn new(level: usize, text: &str) -> Self { - Self { - level: match level { - 1 => Level::One, - 2 => Level::Two, - 3 => Level::Three, - 4 => Level::Four, - 5 => Level::Five, - 6 => Level::Six, - _ => panic!( - "Attempted to construct a header with invalid level" - ), - }, - text: text.to_owned(), - } - } - } - - impl Display for Header { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - write!(f, "{}", &self.level, self.text, &self.level) - } - } - - pub fn matches(lexeme: &Lexeme) -> bool { - !lexeme.first.trim().is_empty() - && lexeme.first.replace("#", "").is_empty() - && lexeme.first.len() <= 6 - } - - pub fn lex(lexeme: &Lexeme) -> Header { - let header_level = lexeme.first.len(); - log(&lex, &format!("Header level is {header_level}")); - - let header_text = lexeme.raw.replace(lexeme.first, ""); - - Header::new(header_level, &header_text) - } -} +static LEXMAP: &[(Matcher, Constructor)] = &[ + (Header::probe, |lexeme| Token::Header(Header::lex(lexeme))), + (Paragraph::probe, |lexeme| { + Token::Paragraph(Paragraph::lex(lexeme)) + }), +]; diff --git a/src/syntax/content/elements.rs b/src/syntax/content/elements.rs new file mode 100644 index 0000000..c2c6381 --- /dev/null +++ b/src/syntax/content/elements.rs @@ -0,0 +1,2 @@ +pub(super) mod paragraph; +pub(super) mod header; diff --git a/src/syntax/content/elements/header.rs b/src/syntax/content/elements/header.rs new file mode 100644 index 0000000..9dda6ba --- /dev/null +++ b/src/syntax/content/elements/header.rs @@ -0,0 +1,77 @@ +use crate::{ + dev::log, + syntax::content::{Parseable, Lexeme}, +}; +use std::fmt::Display; + +enum Level { + One, + Two, + Three, + Four, + Five, + Six, +} + +impl Display for Level { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + match *self { + Level::One => write!(f, "1"), + Level::Two => write!(f, "2"), + Level::Three => write!(f, "3"), + Level::Four => write!(f, "4"), + Level::Five => write!(f, "5"), + Level::Six => write!(f, "6"), + } + } +} + +pub(in crate::syntax::content) struct Header { + level: Level, + text: String, +} + +impl Header { + fn new(level: usize, text: &str) -> Self { + Self { + level: match level { + 1 => Level::One, + 2 => Level::Two, + 3 => Level::Three, + 4 => Level::Four, + 5 => Level::Five, + 6 => Level::Six, + _ => { + panic!("Attempted to construct a header with invalid level") + }, + }, + text: text.to_owned(), + } + } +} + +impl Parseable for Header { + fn probe(lexeme: &Lexeme) -> bool { + !lexeme.first.trim().is_empty() + && lexeme.first.replace("#", "").is_empty() + && lexeme.first.len() <= 6 + } + + fn lex(lexeme: &Lexeme) -> Self { + let header_level = lexeme.first.len(); + log(&Self::lex, &format!("Header level is {header_level}")); + + let header_text = lexeme.raw.replace(lexeme.first, ""); + + Self::new(header_level, &header_text) + } + + fn render(&self) -> String { + format!("{}", &self.level, self.text) + } +} +impl Display for Header { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "Level {} Header: <{}>", &self.level, self.text) + } +} diff --git a/src/syntax/content/elements/paragraph.rs b/src/syntax/content/elements/paragraph.rs new file mode 100644 index 0000000..7367ddc --- /dev/null +++ b/src/syntax/content/elements/paragraph.rs @@ -0,0 +1,28 @@ +use std::fmt::Display; +use crate::syntax::content::{Parseable, Lexeme}; + +pub(in crate::syntax::content) struct Paragraph { + text: String, +} + +impl Parseable for Paragraph { + fn probe(lexeme: &Lexeme) -> bool { + !lexeme.raw.trim().is_empty() + } + + fn lex(lexeme: &Lexeme) -> Self { + Self { + text: lexeme.raw.trim().to_owned(), + } + } + + fn render(&self) -> String { + format!("

{}

", &self.text) + } +} + +impl Display for Paragraph { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "Paragraph: <{}>", &self.text) + } +} diff --git a/src/syntax/content/parser.rs b/src/syntax/content/parser.rs new file mode 100644 index 0000000..edab054 --- /dev/null +++ b/src/syntax/content/parser.rs @@ -0,0 +1,38 @@ +use super::{Parseable as _, Token, Lexeme, LEXMAP}; + +pub fn read(text: &str) -> String { + parse(&lex(text)) +} + +fn lex(text: &str) -> Vec { + let mut tokens = Vec::new(); + + for line in text + .lines() + .filter(|x| !x.is_empty()) + .filter(|x| !x.replace(" ", "").is_empty()) + { + let lexeme = Lexeme::new(line); + + for &(ref matcher, lexer) in LEXMAP { + if matcher(&lexeme) { + tokens.push(lexer(&lexeme)); + break; + } + } + } + + tokens +} + +fn parse(tokens: &[Token]) -> String { + let mut out_text: Vec = Vec::new(); + for token in tokens { + out_text.push(match *token { + Token::Paragraph(ref d) => d.render(), + Token::Header(ref d) => d.render(), + }); + } + + out_text.join("\n") +}