Add types to content lexer

This commit is contained in:
Juno Takano 2025-12-15 22:50:11 -03:00
commit 15a9175afb
3 changed files with 137 additions and 45 deletions

View file

@ -16,7 +16,9 @@ pub async fn node(Path(id): Path<String>) -> Response<Body> {
context.insert("incoming", &graph.incoming.get(&id));
let escaped_text = tera::escape_html(&node.text);
let out_text = crate::syntax::content::parse(&escaped_text);
let out_text = crate::syntax::content::parse(&crate::syntax::content::lex(
&escaped_text,
));
context.insert("text", &out_text);
let not_found = node.clone() == empty_node;

View file

@ -1,57 +1,149 @@
use std::fmt::Write as _;
use crate::dev::log;
struct Lexeme<'l> {
pub raw: &'l str,
pub first: &'l str,
}
pub fn parse(text: &str) -> String {
impl<'l> Lexeme<'l> {
pub fn new(text: &'l str) -> Lexeme<'l> {
let vec: Vec<&'l str> = text.split(" ").collect();
Self {
raw: text,
first: vec.first().unwrap_or_else(|| unreachable!()),
}
}
}
pub enum Token {
Paragraph(paragraph::Paragraph),
Header(header::Header),
}
pub fn lex(text: &str) -> Vec<Token> {
let mut tokens = Vec::new();
for line in text
.lines()
.filter(|x| !x.is_empty())
.filter(|x| !x.replace(" ", "").is_empty())
{
let lexeme = Lexeme::new(line);
if header::matches(&lexeme) {
tokens.push(Token::Header(header::lex(&lexeme)));
} else if paragraph::matches(&lexeme) {
tokens.push(Token::Paragraph(paragraph::lex(&lexeme)));
}
}
tokens
}
pub fn parse(tokens: &Vec<Token>) -> String {
let mut out_text: Vec<String> = Vec::new();
for line in text.lines() {
if line.is_empty() || line.replace(" ", "").is_empty() {
continue;
}
let mut out_line: String = line.to_owned();
let words: Vec<String> = line.split(" ").map(str::to_string).collect();
let first_word: &String =
words.first().unwrap_or_else(|| unreachable!());
if is_header(first_word) {
out_line = parse_header(&out_line, first_word);
}
// if not special, default to treating line as a paragraph
else {
out_line.insert_str(0, "<p>");
out_line.push_str("</p>");
}
out_text.push(out_line);
for token in tokens {
out_text.push(match token {
Token::Paragraph(p) => p.to_string(),
Token::Header(h) => h.to_string(),
});
}
out_text.join("\n")
}
fn is_header(lexeme: &str) -> bool {
!lexeme.trim().is_empty()
&& lexeme.replace("#", "").is_empty()
&& lexeme.len() <= 6
mod paragraph {
use std::fmt::Display;
use super::Lexeme;
pub struct Paragraph {
text: String,
}
impl Display for Paragraph {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "<p>{}</p>", &self.text)
}
}
pub fn matches(lexeme: &Lexeme) -> bool {
!lexeme.raw.trim().is_empty()
}
pub fn lex(lexeme: &Lexeme) -> Paragraph {
Paragraph {
text: lexeme.raw.trim().to_owned(),
}
}
}
fn parse_header(line: &str, first_word: &str) -> String {
log(&parse_header, &format!("Parsing: {line:?}"));
mod header {
use crate::dev::log;
use std::fmt::Display;
use super::Lexeme;
let header_level = first_word.len();
log(&parse, &format!("Header level is {header_level}"));
let header_text = line.to_owned().replace(first_word, "");
let mut w = String::with_capacity(header_text.len().strict_add(9));
let alloc = w.capacity();
match write!(w, "<h{header_level}>{header_text}</h{header_level}>") {
Ok(()) => (),
Err(e) => panic!("{e:?}"),
enum Level {
One,
Two,
Three,
Four,
Five,
Six,
}
if alloc != w.capacity() {
log(
&parse_header,
&format!("w reallocated to {} despite prediction", w.capacity()),
);
impl Display for Level {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match *self {
Level::One => write!(f, "1"),
Level::Two => write!(f, "2"),
Level::Three => write!(f, "3"),
Level::Four => write!(f, "4"),
Level::Five => write!(f, "5"),
Level::Six => write!(f, "6"),
}
}
}
pub struct Header {
level: Level,
text: String,
}
impl Header {
fn new(level: usize, text: &str) -> Self {
Self {
level: match level {
1 => Level::One,
2 => Level::Two,
3 => Level::Three,
4 => Level::Four,
5 => Level::Five,
6 => Level::Six,
_ => panic!(
"Attempted to construct a header with invalid level"
),
},
text: text.to_owned(),
}
}
}
impl Display for Header {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "<h{}>{}</h{}>", &self.level, self.text, &self.level)
}
}
pub fn matches(lexeme: &Lexeme) -> bool {
!lexeme.first.trim().is_empty()
&& lexeme.first.replace("#", "").is_empty()
&& lexeme.first.len() <= 6
}
pub fn lex(lexeme: &Lexeme) -> Header {
let header_level = lexeme.first.len();
log(&lex, &format!("Header level is {header_level}"));
let header_text = lexeme.raw.replace(lexeme.first, "");
Header::new(header_level, &header_text)
}
w
}