Heavy refactor and restructuring of content parser

This commit is contained in:
Juno Takano 2025-12-16 03:48:42 -03:00
commit 984c8bcdcc
7 changed files with 171 additions and 134 deletions

View file

@ -188,7 +188,6 @@ pattern_type_mismatch = "warn"
pub_without_shorthand = "warn" pub_without_shorthand = "warn"
redundant_test_prefix = "warn" redundant_test_prefix = "warn"
redundant_type_annotations = "warn" redundant_type_annotations = "warn"
ref_patterns = "warn"
renamed_function_params = "warn" renamed_function_params = "warn"
rest_pat_in_fully_bound_structs = "warn" rest_pat_in_fully_bound_structs = "warn"
return_and_then = "warn" return_and_then = "warn"

View file

@ -1,4 +1,5 @@
use axum::{body::Body, extract::Path, http::Response}; use axum::{body::Body, extract::Path, http::Response};
use crate::syntax::content::parser;
use crate::{formats::populate_graph, handlers, types::Node}; use crate::{formats::populate_graph, handlers, types::Node};
@ -16,9 +17,7 @@ pub async fn node(Path(id): Path<String>) -> Response<Body> {
context.insert("incoming", &graph.incoming.get(&id)); context.insert("incoming", &graph.incoming.get(&id));
let escaped_text = tera::escape_html(&node.text); let escaped_text = tera::escape_html(&node.text);
let out_text = crate::syntax::content::parse(&crate::syntax::content::lex( let out_text = parser::read(&escaped_text);
&escaped_text,
));
context.insert("text", &out_text); context.insert("text", &out_text);
let not_found = node.clone() == empty_node; let not_found = node.clone() == empty_node;

View file

@ -1,3 +1,13 @@
use elements::{paragraph::Paragraph, header::Header};
mod elements;
pub mod parser;
enum Token {
Paragraph(Paragraph),
Header(Header),
}
struct Lexeme<'l> { struct Lexeme<'l> {
pub raw: &'l str, pub raw: &'l str,
pub first: &'l str, pub first: &'l str,
@ -14,136 +24,20 @@ impl<'l> Lexeme<'l> {
} }
} }
pub enum Token { trait Parseable {
Paragraph(paragraph::Paragraph), fn probe(lexeme: &Lexeme) -> bool;
Header(header::Header), fn lex(lexeme: &Lexeme) -> Self
where
Self: Sized;
fn render(&self) -> String;
} }
pub fn lex(text: &str) -> Vec<Token> { type Matcher = fn(&Lexeme) -> bool;
let mut tokens = Vec::new(); type Constructor = fn(&Lexeme) -> Token;
for line in text static LEXMAP: &[(Matcher, Constructor)] = &[
.lines() (Header::probe, |lexeme| Token::Header(Header::lex(lexeme))),
.filter(|x| !x.is_empty()) (Paragraph::probe, |lexeme| {
.filter(|x| !x.replace(" ", "").is_empty()) Token::Paragraph(Paragraph::lex(lexeme))
{ }),
let lexeme = Lexeme::new(line); ];
if header::matches(&lexeme) {
tokens.push(Token::Header(header::lex(&lexeme)));
} else if paragraph::matches(&lexeme) {
tokens.push(Token::Paragraph(paragraph::lex(&lexeme)));
}
}
tokens
}
pub fn parse(tokens: &Vec<Token>) -> String {
let mut out_text: Vec<String> = Vec::new();
for token in tokens {
out_text.push(match token {
Token::Paragraph(p) => p.to_string(),
Token::Header(h) => h.to_string(),
});
}
out_text.join("\n")
}
mod paragraph {
use std::fmt::Display;
use super::Lexeme;
pub struct Paragraph {
text: String,
}
impl Display for Paragraph {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "<p>{}</p>", &self.text)
}
}
pub fn matches(lexeme: &Lexeme) -> bool {
!lexeme.raw.trim().is_empty()
}
pub fn lex(lexeme: &Lexeme) -> Paragraph {
Paragraph {
text: lexeme.raw.trim().to_owned(),
}
}
}
mod header {
use crate::dev::log;
use std::fmt::Display;
use super::Lexeme;
enum Level {
One,
Two,
Three,
Four,
Five,
Six,
}
impl Display for Level {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match *self {
Level::One => write!(f, "1"),
Level::Two => write!(f, "2"),
Level::Three => write!(f, "3"),
Level::Four => write!(f, "4"),
Level::Five => write!(f, "5"),
Level::Six => write!(f, "6"),
}
}
}
pub struct Header {
level: Level,
text: String,
}
impl Header {
fn new(level: usize, text: &str) -> Self {
Self {
level: match level {
1 => Level::One,
2 => Level::Two,
3 => Level::Three,
4 => Level::Four,
5 => Level::Five,
6 => Level::Six,
_ => panic!(
"Attempted to construct a header with invalid level"
),
},
text: text.to_owned(),
}
}
}
impl Display for Header {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "<h{}>{}</h{}>", &self.level, self.text, &self.level)
}
}
pub fn matches(lexeme: &Lexeme) -> bool {
!lexeme.first.trim().is_empty()
&& lexeme.first.replace("#", "").is_empty()
&& lexeme.first.len() <= 6
}
pub fn lex(lexeme: &Lexeme) -> Header {
let header_level = lexeme.first.len();
log(&lex, &format!("Header level is {header_level}"));
let header_text = lexeme.raw.replace(lexeme.first, "");
Header::new(header_level, &header_text)
}
}

View file

@ -0,0 +1,2 @@
pub(super) mod paragraph;
pub(super) mod header;

View file

@ -0,0 +1,77 @@
use crate::{
dev::log,
syntax::content::{Parseable, Lexeme},
};
use std::fmt::Display;
enum Level {
One,
Two,
Three,
Four,
Five,
Six,
}
impl Display for Level {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match *self {
Level::One => write!(f, "1"),
Level::Two => write!(f, "2"),
Level::Three => write!(f, "3"),
Level::Four => write!(f, "4"),
Level::Five => write!(f, "5"),
Level::Six => write!(f, "6"),
}
}
}
pub(in crate::syntax::content) struct Header {
level: Level,
text: String,
}
impl Header {
fn new(level: usize, text: &str) -> Self {
Self {
level: match level {
1 => Level::One,
2 => Level::Two,
3 => Level::Three,
4 => Level::Four,
5 => Level::Five,
6 => Level::Six,
_ => {
panic!("Attempted to construct a header with invalid level")
},
},
text: text.to_owned(),
}
}
}
impl Parseable for Header {
fn probe(lexeme: &Lexeme) -> bool {
!lexeme.first.trim().is_empty()
&& lexeme.first.replace("#", "").is_empty()
&& lexeme.first.len() <= 6
}
fn lex(lexeme: &Lexeme) -> Self {
let header_level = lexeme.first.len();
log(&Self::lex, &format!("Header level is {header_level}"));
let header_text = lexeme.raw.replace(lexeme.first, "");
Self::new(header_level, &header_text)
}
fn render(&self) -> String {
format!("<h{}>{}</h{0}>", &self.level, self.text)
}
}
impl Display for Header {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "Level {} Header: <{}>", &self.level, self.text)
}
}

View file

@ -0,0 +1,28 @@
use std::fmt::Display;
use crate::syntax::content::{Parseable, Lexeme};
pub(in crate::syntax::content) struct Paragraph {
text: String,
}
impl Parseable for Paragraph {
fn probe(lexeme: &Lexeme) -> bool {
!lexeme.raw.trim().is_empty()
}
fn lex(lexeme: &Lexeme) -> Self {
Self {
text: lexeme.raw.trim().to_owned(),
}
}
fn render(&self) -> String {
format!("<p>{}</p>", &self.text)
}
}
impl Display for Paragraph {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "Paragraph: <{}>", &self.text)
}
}

View file

@ -0,0 +1,38 @@
use super::{Parseable as _, Token, Lexeme, LEXMAP};
pub fn read(text: &str) -> String {
parse(&lex(text))
}
fn lex(text: &str) -> Vec<Token> {
let mut tokens = Vec::new();
for line in text
.lines()
.filter(|x| !x.is_empty())
.filter(|x| !x.replace(" ", "").is_empty())
{
let lexeme = Lexeme::new(line);
for &(ref matcher, lexer) in LEXMAP {
if matcher(&lexeme) {
tokens.push(lexer(&lexeme));
break;
}
}
}
tokens
}
fn parse(tokens: &[Token]) -> String {
let mut out_text: Vec<String> = Vec::new();
for token in tokens {
out_text.push(match *token {
Token::Paragraph(ref d) => d.render(),
Token::Header(ref d) => d.render(),
});
}
out_text.join("\n")
}