Lexeme: Add 'third' field, implement Default, simplify some methods

This commit is contained in:
Juno Takano 2026-01-07 10:55:04 -03:00
commit b60151556e
12 changed files with 97 additions and 70 deletions

View file

@ -35,7 +35,7 @@ pub fn parse(
let mut header = Header::lex(lexeme); let mut header = Header::lex(lexeme);
header.dom_id = Some(Header::make_id( header.dom_id = Some(Header::make_id(
config, config,
iterator.peek().map_or(&Lexeme::new("", ""), |l| l), iterator.peek().map_or(&Lexeme::default(), |l| l),
&mut state.dom_ids, &mut state.dom_ids,
)); ));
log!("Block Context: None -> Header on {lexeme}"); log!("Block Context: None -> Header on {lexeme}");

View file

@ -2,18 +2,20 @@ use std::fmt;
use crate::{prelude::*, syntax::content::parser::segment::delimiter::Delimiters}; use crate::{prelude::*, syntax::content::parser::segment::delimiter::Delimiters};
#[derive(Clone, Debug)] #[derive(Clone, Debug, Default)]
pub struct Lexeme { pub struct Lexeme {
text: String, text: String,
next: String, next: String,
third: String,
last: bool, last: bool,
} }
impl Lexeme { impl Lexeme {
pub fn new(raw: &str, next: &str) -> Lexeme { pub fn new(raw: &str, next: &str, third: &str) -> Lexeme {
Lexeme { Lexeme {
text: raw.to_owned(), text: raw.to_owned(),
next: next.to_owned(), next: next.to_owned(),
third: third.to_owned(),
last: false, last: false,
} }
} }
@ -53,6 +55,14 @@ impl Lexeme {
} }
} }
pub fn third_as_char(&self) -> Option<char> {
if self.third.chars().count() == 1 {
self.third.chars().nth(0)
} else {
None
}
}
pub fn match_as_char(&self, c: char) -> bool { pub fn match_as_char(&self, c: char) -> bool {
self.as_char().is_some_and(|as_char| as_char == c) self.as_char().is_some_and(|as_char| as_char == c)
} }
@ -61,27 +71,38 @@ impl Lexeme {
self.next_as_char().is_some_and(|next| next == c) self.next_as_char().is_some_and(|next| next == c)
} }
pub fn match_third_as_char(&self, c: char) -> bool {
self.third_as_char().is_some_and(|third| third == c)
}
pub fn match_triple_as_char(&self, t: (char, char, char)) -> bool {
self.match_as_char(t.0)
&& self.match_next_as_char(t.1)
&& self.match_third_as_char(t.2)
}
pub fn contains_as_char(&self, slice: &[char]) -> bool {
self.as_char().is_some_and(|c| slice.contains(&c))
}
pub fn contains_next_as_char(&self, slice: &[char]) -> bool {
self.next_as_char().is_some_and(|c| slice.contains(&c))
}
pub fn is_punctuation(&self) -> bool { pub fn is_punctuation(&self) -> bool {
let punctuation = Delimiters::default().punctuation; self.contains_as_char(&Delimiters::default().punctuation)
self.as_char().is_some_and(|c| punctuation.contains(&c))
} }
pub fn is_whitespace(&self) -> bool { pub fn is_whitespace(&self) -> bool {
let delimiters = Delimiters::default(); self.contains_as_char(&Delimiters::default().whitespace)
self.as_char()
.is_some_and(|c| delimiters.whitespace.contains(&c))
} }
pub fn is_next_whitespace(&self) -> bool { pub fn is_next_whitespace(&self) -> bool {
let delimiters = Delimiters::default(); self.contains_next_as_char(&Delimiters::default().whitespace)
self.next_as_char()
.is_some_and(|c| delimiters.whitespace.contains(&c))
} }
pub fn is_next_punctuation(&self) -> bool { pub fn is_next_punctuation(&self) -> bool {
let delimiters = Delimiters::default(); self.contains_next_as_char(&Delimiters::default().punctuation)
self.next_as_char()
.is_some_and(|c| delimiters.punctuation.contains(&c))
} }
pub fn is_next_boundary(&self) -> bool { pub fn is_next_boundary(&self) -> bool {
@ -146,25 +167,41 @@ impl Lexeme {
self.split_words().first().map(String::to_owned) self.split_words().first().map(String::to_owned)
} }
pub fn collect(raw_strings: &[String]) -> Vec<Lexeme> { pub fn collect(segments: &[String]) -> Vec<Lexeme> {
let mut out_vector = Vec::with_capacity(raw_strings.len()); let mut out_vector = Vec::with_capacity(segments.len());
let mut iterator = raw_strings.iter().peekable(); let mut vec = segments.to_vec();
while let Some(raw) = iterator.next() { let Some(mut third) = vec.pop() else { return vec![] };
let mut next = String::default(); let last_lexeme = Lexeme {
let mut last = false; text: third.clone(),
if let Some(peeked) = iterator.peek() { next: String::default(),
next.clone_from(*peeked); third: String::default(),
} else { last: true,
last = true; };
}
let Some(mut next) = vec.pop() else { return vec![last_lexeme] };
let penultimate_lexeme = Lexeme {
text: next.clone(),
next: third.clone(),
third: String::default(),
last: false,
};
for current in vec.iter().rev() {
out_vector.push(Lexeme { out_vector.push(Lexeme {
text: raw.to_owned(), text: current.to_owned(),
next, next: next.clone(),
last, third: third.clone(),
last: false,
}); });
third.clone_from(&next);
next.clone_from(current);
} }
out_vector.reverse();
out_vector.push(penultimate_lexeme);
out_vector.push(last_lexeme);
out_vector out_vector
} }
} }
@ -175,8 +212,10 @@ impl fmt::Display for Lexeme {
let next_display = if self.last() { let next_display = if self.last() {
" <EOI>" " <EOI>"
} else if self.third.is_empty() {
&format!("-> {} -! EOI", wrap(&self.next))
} else { } else {
&format!("-> {}", wrap(&self.next)) &format!("-> {} -> {}", wrap(&self.next), wrap(&self.third))
}; };
write!(f, "{} {}", wrap(&self.text), next_display) write!(f, "{} {}", wrap(&self.text), next_display)
} }
@ -190,50 +229,44 @@ mod tests {
fn new_lexeme() { fn new_lexeme() {
let raw = "3PKK4RzfGgUL58rU2NZbAiGN1o5dOfNu"; let raw = "3PKK4RzfGgUL58rU2NZbAiGN1o5dOfNu";
let next = "wAcZe8iVEEcZLp20PP9KKf07zJbeZafa"; let next = "wAcZe8iVEEcZLp20PP9KKf07zJbeZafa";
let lexeme = Lexeme::new(raw, next); let third = "K0QTlujGjL2qxBzs16g8oyiCYSuQaRVE";
let lexeme = Lexeme::new(raw, next, third);
assert_eq!(lexeme.text, raw); assert_eq!(lexeme.text, raw);
assert_eq!(lexeme.next, next); assert_eq!(lexeme.next, next);
assert_eq!(lexeme.third, third);
} }
#[test] #[test]
fn next_first_char() { fn next_first_char() {
let payload = "4IU"; let payload = "4IU";
let lexeme = Lexeme::new(payload, payload); let lexeme = Lexeme::new("", payload, "");
assert_eq!(lexeme.next_first_char().unwrap(), '4'); assert_eq!(lexeme.next_first_char().unwrap(), '4');
} }
#[test] #[test]
fn match_first_char() { fn match_first_char() {
let payload = "MKY"; let payload = "MKY";
let lexeme = Lexeme::new(payload, payload); let lexeme = Lexeme::new(payload, "", "");
assert!(lexeme.match_first_char('M')); assert!(lexeme.match_first_char('M'));
} }
#[test] #[test]
fn match_absent_first_char() { fn match_absent_first_char() {
let payload = ""; let lexeme = Lexeme::new("", "", "");
let lexeme = Lexeme::new(payload, payload);
assert!(!lexeme.match_first_char('x')); assert!(!lexeme.match_first_char('x'));
} }
#[test] #[test]
fn first_word() { fn first_word() {
let payload = "nhNc fGev QnGW E4hj ExyZ"; let payload = "nhNc fGev QnGW E4hj ExyZ";
let lexeme = Lexeme::new(payload, payload); let lexeme = Lexeme::new(payload, "", "");
assert_eq!(lexeme.first(), Some(String::from("nhNc"))); assert_eq!(lexeme.first(), Some(String::from("nhNc")));
} }
#[test] #[test]
fn count_char() { fn count_char() {
let payload = "6Ur3UjnndhENjFNSYWF7bhej2NZKLwdY"; let payload = "6Ur3UjnndhENjFNSYWF7bhej2NZKLwdY";
let lexeme = Lexeme::new(payload, payload); let lexeme = Lexeme::new(payload, "", "");
assert_eq!(lexeme.count_char('j'), 3);
}
#[test]
fn count_char_huge_number() {
let payload = "6Ur3UjnndhENjFNSYWF7bhej2NZKLwdY";
let lexeme = Lexeme::new(payload, payload);
assert_eq!(lexeme.count_char('j'), 3); assert_eq!(lexeme.count_char('j'), 3);
} }
} }

View file

@ -124,7 +124,7 @@ mod tests {
expected = "Attempt to lex an anchor directly from a lexeme" expected = "Attempt to lex an anchor directly from a lexeme"
)] )]
fn lex() { fn lex() {
Anchor::lex(&Lexeme::new("", "")); Anchor::lex(&Lexeme::default());
} }
#[test] #[test]

View file

@ -56,6 +56,6 @@ mod tests {
expected = "Attempt to lex a bold tag directly from a lexeme" expected = "Attempt to lex a bold tag directly from a lexeme"
)] )]
fn lex() { fn lex() {
Bold::lex(&Lexeme::new("", "")); Bold::lex(&Lexeme::default());
} }
} }

View file

@ -56,6 +56,6 @@ mod tests {
expected = "Attempt to lex a code tag directly from a lexeme" expected = "Attempt to lex a code tag directly from a lexeme"
)] )]
fn lex() { fn lex() {
Code::lex(&Lexeme::new("", "")); Code::lex(&Lexeme::default());
} }
} }

View file

@ -196,7 +196,7 @@ mod tests {
let mut map: HashMap<String, Vec<String>> = HashMap::default(); let mut map: HashMap<String, Vec<String>> = HashMap::default();
let id = Header::make_id( let id = Header::make_id(
&Config::default(), &Config::default(),
&Lexeme::new("##", "Title"), &Lexeme::new("##", "Title", ""),
&mut map, &mut map,
); );
assert_eq!(id, "Title"); assert_eq!(id, "Title");
@ -209,7 +209,7 @@ mod tests {
let id = Header::make_id( let id = Header::make_id(
&config, &config,
&Lexeme::new("##", "駄目!"), &Lexeme::new("##", "駄目!", ""),
&mut HashMap::default(), &mut HashMap::default(),
); );
assert_eq!(id, "h"); assert_eq!(id, "h");
@ -222,7 +222,7 @@ mod tests {
let id = Header::make_id( let id = Header::make_id(
&config, &config,
&Lexeme::new("##", "駄目!"), &Lexeme::new("##", "駄目!", ""),
&mut HashMap::default(), &mut HashMap::default(),
); );
assert_eq!(id, "駄目!"); assert_eq!(id, "駄目!");
@ -233,15 +233,15 @@ mod tests {
let mut map: HashMap<String, Vec<String>> = HashMap::default(); let mut map: HashMap<String, Vec<String>> = HashMap::default();
let config = Config::default(); let config = Config::default();
let id = let id =
Header::make_id(&config, &Lexeme::new("##", "UVrcCUjoQ"), &mut map); Header::make_id(&config, &Lexeme::new("##", "UVrcCUjoQ", ""), &mut map);
assert_eq!(id, "UVrcCUjoQ"); assert_eq!(id, "UVrcCUjoQ");
let double = let double =
Header::make_id(&config, &Lexeme::new("##", "UVrcCUjoQ"), &mut map); Header::make_id(&config, &Lexeme::new("##", "UVrcCUjoQ", ""), &mut map);
assert_eq!(double, "UVrcCUjoQ-1"); assert_eq!(double, "UVrcCUjoQ-1");
let double2 = let double2 =
Header::make_id(&config, &Lexeme::new("##", "UVrcCUjoQ"), &mut map); Header::make_id(&config, &Lexeme::new("##", "UVrcCUjoQ", ""), &mut map);
assert_eq!(double2, "UVrcCUjoQ-2"); assert_eq!(double2, "UVrcCUjoQ-2");
} }

View file

@ -56,6 +56,6 @@ mod tests {
expected = "Attempt to lex an oblique tag directly from a lexeme" expected = "Attempt to lex an oblique tag directly from a lexeme"
)] )]
fn lex() { fn lex() {
Oblique::lex(&Lexeme::new("", "")); Oblique::lex(&Lexeme::default());
} }
} }

View file

@ -62,7 +62,7 @@ mod tests {
#[test] #[test]
fn lex() { fn lex() {
let p = Paragraph::lex(&Lexeme::new("", "")); let p = Paragraph::lex(&Lexeme::default());
assert!(p.open.is_none()); assert!(p.open.is_none());
} }
@ -71,7 +71,7 @@ mod tests {
expected = "Attempt to render a paragraph tag while open state is unknown" expected = "Attempt to render a paragraph tag while open state is unknown"
)] )]
fn render_state_unknown() { fn render_state_unknown() {
let p = Paragraph::lex(&Lexeme::new("", "")); let p = Paragraph::lex(&Lexeme::default());
drop(p.render()); drop(p.render());
} }
} }

View file

@ -54,10 +54,10 @@ mod tests {
#[test] #[test]
fn lex() { fn lex() {
let from_empty_lexeme = PreFormat::lex(&Lexeme::new("", "")); let from_empty_lexeme = PreFormat::lex(&Lexeme::default());
assert!(from_empty_lexeme.open.is_none()); assert!(from_empty_lexeme.open.is_none());
let from_non_empty_lexeme = PreFormat::lex(&Lexeme::new("`", "`")); let from_non_empty_lexeme = PreFormat::lex(&Lexeme::default());
assert!(from_non_empty_lexeme.open.is_none()); assert!(from_non_empty_lexeme.open.is_none());
} }
@ -66,10 +66,10 @@ mod tests {
expected = "Attempt to render a preformat tag while open state is unknown" expected = "Attempt to render a preformat tag while open state is unknown"
)] )]
fn render() { fn render() {
let from_empty_lexeme = PreFormat::lex(&Lexeme::new("", "")); let from_empty_lexeme = PreFormat::lex(&Lexeme::default());
from_empty_lexeme.render(); from_empty_lexeme.render();
let from_non_empty_lexeme = PreFormat::lex(&Lexeme::new("`", "`")); let from_non_empty_lexeme = PreFormat::lex(&Lexeme::default());
from_non_empty_lexeme.render(); from_non_empty_lexeme.render();
} }
} }

View file

@ -56,18 +56,12 @@ mod tests {
#[test] #[test]
fn probe() { fn probe() {
assert!(!Span::probe(&Lexeme::new( assert!(!Span::probe(&Lexeme::default()));
&crate::ONSET.elapsed().as_nanos().to_string(),
"",
)));
} }
#[test] #[test]
fn lex() { fn lex() {
let span = Span::lex(&Lexeme::new( let span = Span::lex(&Lexeme::default());
&crate::ONSET.elapsed().as_nanos().to_string(),
"",
));
assert!(span.open.is_none()); assert!(span.open.is_none());
} }
@ -85,7 +79,7 @@ mod tests {
expected = "Attempt to render a span tag while open state is unknown" expected = "Attempt to render a span tag while open state is unknown"
)] )]
fn render_unknown_open_state() { fn render_unknown_open_state() {
let open_span = Span::lex(&Lexeme::new("", "")); let open_span = Span::lex(&Lexeme::default());
drop(open_span.render()); drop(open_span.render());
} }
} }

View file

@ -53,6 +53,6 @@ mod tests {
expected = "Attempt to lex a strike tag directly from a lexeme" expected = "Attempt to lex a strike tag directly from a lexeme"
)] )]
fn lex() { fn lex() {
Strike::lex(&Lexeme::new("", "")); Strike::lex(&Lexeme::default());
} }
} }

View file

@ -56,6 +56,6 @@ mod tests {
expected = "Attempt to lex an underline tag directly from a lexeme" expected = "Attempt to lex an underline tag directly from a lexeme"
)] )]
fn lex() { fn lex() {
Underline::lex(&Lexeme::new("", "")); Underline::lex(&Lexeme::default());
} }
} }