Add word-level parsing

This commit is contained in:
Juno Takano 2025-12-18 02:20:11 -03:00
commit 198bc12507
34 changed files with 743 additions and 446 deletions

View file

@ -0,0 +1,68 @@
use std::fmt::Display;
use crate::syntax::content::{Parseable, parser::lexeme::Lexeme};
pub struct Anchor {
text: String,
destination: String,
}
impl Parseable for Anchor {
fn probe(lexeme: &Lexeme) -> bool {
let pipe_count = lexeme.count_char('|');
let chars = lexeme.split_chars();
let c1 = *match chars.first() {
Some(c) => c,
None => return false,
};
let cn = *match chars.last() {
Some(c) => c,
None => return false,
};
if !(1_i32..=3_i32).contains(&pipe_count) {
return false;
}
if lexeme.to_raw().matches("||").count() > 0 {
return false;
}
if pipe_count == 1 {
c1 != '|' && cn != '|'
} else if pipe_count == 2 {
c1 == '|' && cn != '|'
} else if pipe_count == 3 {
c1 == '|' && cn == '|'
} else {
false
}
}
fn lex(lexeme: &Lexeme) -> Anchor {
let parts: Vec<String> = lexeme
.to_raw()
.split('|')
.filter(|s| !s.is_empty())
.map(str::to_string)
.collect();
assert!(parts.len() == 2, "Parts should always be 2: {parts:?}");
let text = parts.first().unwrap_or_else(|| unreachable!());
let destination = parts.get(1).unwrap_or_else(|| unreachable!());
Anchor {
text: text.to_owned(),
destination: destination.to_owned(),
}
}
fn render(&self) -> String {
format!(r#"<a href="{}">{}</a>"#, &self.destination, &self.text)
}
}
impl Display for Anchor {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "Anchor: <{}> to <{}>", &self.text, &self.destination)
}
}

View file

@ -0,0 +1,137 @@
use crate::{
prelude::*,
syntax::content::{Parseable, Lexeme},
};
use std::fmt::Display;
pub struct Header {
open: Option<bool>,
level: Level,
}
impl Header {
pub fn new(level: Level, open: bool) -> Header {
Header {
level,
open: Some(open),
}
}
pub fn from_u8(level: u8, open: bool) -> Header {
Header {
level: Level::from_u8(level),
open: Some(open),
}
}
pub fn get_level(&self) -> u8 {
match self.level {
Level::One => 1,
Level::Two => 2,
Level::Three => 3,
Level::Four => 4,
Level::Five => 5,
Level::Six => 6,
}
}
}
impl Parseable for Header {
fn probe(lexeme: &Lexeme) -> bool {
if lexeme
.split_chars()
.into_iter()
.filter(|e| *e != '#')
.count()
== 0
{
let level = lexeme.to_raw().len();
lexeme.clone().split_words().len() == 1 && level > 0 && level <= 6
} else {
false
}
}
fn lex(lexeme: &Lexeme) -> Header {
Header::new(lexeme.to_raw().len().into(), true)
}
fn render(&self) -> String {
if let Some(open) = self.open {
if open {
format!("<h{}>", &self.level)
} else {
format!("</h{}>", &self.level)
}
} else {
panic!("Attempt to render a header tag while open state is unknown")
}
}
}
impl Display for Header {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
if let Some(open) = self.open {
if open {
write!(f, "Level {} Open Header", self.level)
} else {
write!(f, "Level {} Closed Header", self.level)
}
} else {
write!(f, "Level {} Header (Unknown open state)", self.level)
}
}
}
pub enum Level {
One,
Two,
Three,
Four,
Five,
Six,
}
impl Level {
fn from_u8(u: u8) -> Level {
if u <= 1 {
Level::One
} else if u == 2 {
Level::Two
} else if u == 3 {
Level::Three
} else if u == 4 {
Level::Four
} else if u == 5 {
Level::Five
} else {
Level::Six
}
}
}
impl From<usize> for Level {
fn from(z: usize) -> Level {
let u8 = match u8::try_from(z) {
Ok(u) => u,
Err(e) => {
log!("Truncating header level {z} to 6: {e:?}");
6_u8
},
};
Level::from_u8(u8)
}
}
impl Display for Level {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match *self {
Level::One => write!(f, "1"),
Level::Two => write!(f, "2"),
Level::Three => write!(f, "3"),
Level::Four => write!(f, "4"),
Level::Five => write!(f, "5"),
Level::Six => write!(f, "6"),
}
}
}

View file

@ -0,0 +1,26 @@
use std::fmt::Display;
use crate::{
syntax::content::{Parseable, parser::lexeme::Lexeme},
};
pub struct LineBreak {}
impl Parseable for LineBreak {
fn probe(lexeme: &Lexeme) -> bool {
lexeme.to_raw() == "\n"
}
fn lex(_lexeme: &Lexeme) -> LineBreak {
LineBreak {}
}
fn render(&self) -> String {
"\n".to_owned()
}
}
impl Display for LineBreak {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "Line Break")
}
}

View file

@ -0,0 +1,28 @@
use std::fmt::Display;
use crate::syntax::content::{Parseable, parser::lexeme::Lexeme};
pub struct Literal {
text: String,
}
impl Parseable for Literal {
fn probe(_lexeme: &Lexeme) -> bool {
true
}
fn lex(lexeme: &Lexeme) -> Literal {
Literal {
text: lexeme.to_raw(),
}
}
fn render(&self) -> String {
self.text.clone()
}
}
impl Display for Literal {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "Literal: <{}>", &self.text)
}
}

View file

@ -0,0 +1,53 @@
use std::fmt::Display;
use crate::syntax::content::{Parseable, parser::lexeme::Lexeme};
pub struct Paragraph {
open: Option<bool>,
}
impl Paragraph {
pub fn new(open: bool) -> Paragraph {
Paragraph { open: Some(open) }
}
}
impl Parseable for Paragraph {
fn probe(lexeme: &Lexeme) -> bool {
// lexeme for paragraph is any non-whitespace, parser knows the context
let raw = lexeme.to_raw();
let trimmed = raw.trim();
!trimmed.is_empty() && trimmed != "\n"
}
fn lex(_lexeme: &Lexeme) -> Paragraph {
Paragraph { open: None }
}
fn render(&self) -> String {
if let Some(open) = self.open {
if open {
"<p>".to_owned()
} else {
"</p>".to_owned()
}
} else {
panic!(
"Attempt to render a paragraph tag while open state is unknown"
)
}
}
}
impl Display for Paragraph {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
if let Some(open) = self.open {
if open {
write!(f, "Open Paragraph")
} else {
write!(f, "Closed Paragraph")
}
} else {
write!(f, "Unitialized Paragraph (Unknown open state)")
}
}
}

View file

@ -0,0 +1,43 @@
use crate::{
syntax::content::{Parseable, Lexeme},
};
pub struct PreFormat {
open: Option<bool>,
}
impl PreFormat {
pub fn new(open: bool) -> PreFormat {
PreFormat { open: Some(open) }
}
}
impl Parseable for PreFormat {
fn probe(lexeme: &Lexeme) -> bool {
let chars = lexeme.split_chars();
if let Some(first_char) = chars.first() {
*first_char == '`'
} else {
false
}
}
fn lex(_lexeme: &Lexeme) -> PreFormat {
PreFormat { open: None }
}
fn render(&self) -> String {
if let Some(o) = self.open {
if o {
"<pre>".to_owned()
} else {
"</pre>".to_owned()
}
} else {
panic!(
"Attempt to render a preformat tag while open state is unknown"
)
}
}
}

View file

@ -0,0 +1,49 @@
use std::fmt::Display;
use crate::syntax::content::{Parseable, parser::lexeme::Lexeme};
pub struct Span {
open: Option<bool>,
}
impl Span {
pub fn new(open: bool) -> Span {
Span { open: Some(open) }
}
}
impl Parseable for Span {
fn probe(_lexeme: &Lexeme) -> bool {
// there is no lexeme for span
false
}
fn lex(_lexeme: &Lexeme) -> Span {
Span { open: None }
}
fn render(&self) -> String {
if let Some(open) = self.open {
if open {
"<span>".to_owned()
} else {
"</span>".to_owned()
}
} else {
panic!("Attempt to render a span tag while open state is unknown")
}
}
}
impl Display for Span {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
if let Some(open) = self.open {
if open {
write!(f, "Open Span")
} else {
write!(f, "Closed Span")
}
} else {
write!(f, "Span (Unknown open state)")
}
}
}