diff --git a/src/syntax/content/parser/context/block.rs b/src/syntax/content/parser/context/block.rs index 87efd48..608b58b 100644 --- a/src/syntax/content/parser/context/block.rs +++ b/src/syntax/content/parser/context/block.rs @@ -35,7 +35,7 @@ pub fn parse( let mut header = Header::lex(lexeme); header.dom_id = Some(Header::make_id( config, - iterator.peek().map_or(&Lexeme::new("", ""), |l| l), + iterator.peek().map_or(&Lexeme::default(), |l| l), &mut state.dom_ids, )); log!("Block Context: None -> Header on {lexeme}"); diff --git a/src/syntax/content/parser/lexeme.rs b/src/syntax/content/parser/lexeme.rs index 2596a0c..78de9f8 100644 --- a/src/syntax/content/parser/lexeme.rs +++ b/src/syntax/content/parser/lexeme.rs @@ -2,18 +2,20 @@ use std::fmt; use crate::{prelude::*, syntax::content::parser::segment::delimiter::Delimiters}; -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Default)] pub struct Lexeme { text: String, next: String, + third: String, last: bool, } impl Lexeme { - pub fn new(raw: &str, next: &str) -> Lexeme { + pub fn new(raw: &str, next: &str, third: &str) -> Lexeme { Lexeme { text: raw.to_owned(), next: next.to_owned(), + third: third.to_owned(), last: false, } } @@ -53,6 +55,14 @@ impl Lexeme { } } + pub fn third_as_char(&self) -> Option { + if self.third.chars().count() == 1 { + self.third.chars().nth(0) + } else { + None + } + } + pub fn match_as_char(&self, c: char) -> bool { self.as_char().is_some_and(|as_char| as_char == c) } @@ -61,27 +71,38 @@ impl Lexeme { self.next_as_char().is_some_and(|next| next == c) } + pub fn match_third_as_char(&self, c: char) -> bool { + self.third_as_char().is_some_and(|third| third == c) + } + + pub fn match_triple_as_char(&self, t: (char, char, char)) -> bool { + self.match_as_char(t.0) + && self.match_next_as_char(t.1) + && self.match_third_as_char(t.2) + } + + pub fn contains_as_char(&self, slice: &[char]) -> bool { + self.as_char().is_some_and(|c| slice.contains(&c)) + } + + pub fn contains_next_as_char(&self, slice: &[char]) -> bool { + self.next_as_char().is_some_and(|c| slice.contains(&c)) + } + pub fn is_punctuation(&self) -> bool { - let punctuation = Delimiters::default().punctuation; - self.as_char().is_some_and(|c| punctuation.contains(&c)) + self.contains_as_char(&Delimiters::default().punctuation) } pub fn is_whitespace(&self) -> bool { - let delimiters = Delimiters::default(); - self.as_char() - .is_some_and(|c| delimiters.whitespace.contains(&c)) + self.contains_as_char(&Delimiters::default().whitespace) } pub fn is_next_whitespace(&self) -> bool { - let delimiters = Delimiters::default(); - self.next_as_char() - .is_some_and(|c| delimiters.whitespace.contains(&c)) + self.contains_next_as_char(&Delimiters::default().whitespace) } pub fn is_next_punctuation(&self) -> bool { - let delimiters = Delimiters::default(); - self.next_as_char() - .is_some_and(|c| delimiters.punctuation.contains(&c)) + self.contains_next_as_char(&Delimiters::default().punctuation) } pub fn is_next_boundary(&self) -> bool { @@ -146,25 +167,41 @@ impl Lexeme { self.split_words().first().map(String::to_owned) } - pub fn collect(raw_strings: &[String]) -> Vec { - let mut out_vector = Vec::with_capacity(raw_strings.len()); - let mut iterator = raw_strings.iter().peekable(); + pub fn collect(segments: &[String]) -> Vec { + let mut out_vector = Vec::with_capacity(segments.len()); + let mut vec = segments.to_vec(); - while let Some(raw) = iterator.next() { - let mut next = String::default(); - let mut last = false; - if let Some(peeked) = iterator.peek() { - next.clone_from(*peeked); - } else { - last = true; - } + let Some(mut third) = vec.pop() else { return vec![] }; + let last_lexeme = Lexeme { + text: third.clone(), + next: String::default(), + third: String::default(), + last: true, + }; + + let Some(mut next) = vec.pop() else { return vec![last_lexeme] }; + let penultimate_lexeme = Lexeme { + text: next.clone(), + next: third.clone(), + third: String::default(), + last: false, + }; + + for current in vec.iter().rev() { out_vector.push(Lexeme { - text: raw.to_owned(), - next, - last, + text: current.to_owned(), + next: next.clone(), + third: third.clone(), + last: false, }); + + third.clone_from(&next); + next.clone_from(current); } + out_vector.reverse(); + out_vector.push(penultimate_lexeme); + out_vector.push(last_lexeme); out_vector } } @@ -175,8 +212,10 @@ impl fmt::Display for Lexeme { let next_display = if self.last() { " " + } else if self.third.is_empty() { + &format!("-> {} -! EOI", wrap(&self.next)) } else { - &format!("-> {}", wrap(&self.next)) + &format!("-> {} -> {}", wrap(&self.next), wrap(&self.third)) }; write!(f, "{} {}", wrap(&self.text), next_display) } @@ -190,50 +229,44 @@ mod tests { fn new_lexeme() { let raw = "3PKK4RzfGgUL58rU2NZbAiGN1o5dOfNu"; let next = "wAcZe8iVEEcZLp20PP9KKf07zJbeZafa"; - let lexeme = Lexeme::new(raw, next); + let third = "K0QTlujGjL2qxBzs16g8oyiCYSuQaRVE"; + let lexeme = Lexeme::new(raw, next, third); assert_eq!(lexeme.text, raw); assert_eq!(lexeme.next, next); + assert_eq!(lexeme.third, third); } #[test] fn next_first_char() { let payload = "4IU"; - let lexeme = Lexeme::new(payload, payload); + let lexeme = Lexeme::new("", payload, ""); assert_eq!(lexeme.next_first_char().unwrap(), '4'); } #[test] fn match_first_char() { let payload = "MKY"; - let lexeme = Lexeme::new(payload, payload); + let lexeme = Lexeme::new(payload, "", ""); assert!(lexeme.match_first_char('M')); } #[test] fn match_absent_first_char() { - let payload = ""; - let lexeme = Lexeme::new(payload, payload); + let lexeme = Lexeme::new("", "", ""); assert!(!lexeme.match_first_char('x')); } #[test] fn first_word() { let payload = "nhNc fGev QnGW E4hj ExyZ"; - let lexeme = Lexeme::new(payload, payload); + let lexeme = Lexeme::new(payload, "", ""); assert_eq!(lexeme.first(), Some(String::from("nhNc"))); } #[test] fn count_char() { let payload = "6Ur3UjnndhENjFNSYWF7bhej2NZKLwdY"; - let lexeme = Lexeme::new(payload, payload); - assert_eq!(lexeme.count_char('j'), 3); - } - - #[test] - fn count_char_huge_number() { - let payload = "6Ur3UjnndhENjFNSYWF7bhej2NZKLwdY"; - let lexeme = Lexeme::new(payload, payload); + let lexeme = Lexeme::new(payload, "", ""); assert_eq!(lexeme.count_char('j'), 3); } } diff --git a/src/syntax/content/parser/token/anchor.rs b/src/syntax/content/parser/token/anchor.rs index e84b471..51dcf59 100644 --- a/src/syntax/content/parser/token/anchor.rs +++ b/src/syntax/content/parser/token/anchor.rs @@ -124,7 +124,7 @@ mod tests { expected = "Attempt to lex an anchor directly from a lexeme" )] fn lex() { - Anchor::lex(&Lexeme::new("", "")); + Anchor::lex(&Lexeme::default()); } #[test] diff --git a/src/syntax/content/parser/token/bold.rs b/src/syntax/content/parser/token/bold.rs index a0fd845..09902f2 100644 --- a/src/syntax/content/parser/token/bold.rs +++ b/src/syntax/content/parser/token/bold.rs @@ -56,6 +56,6 @@ mod tests { expected = "Attempt to lex a bold tag directly from a lexeme" )] fn lex() { - Bold::lex(&Lexeme::new("", "")); + Bold::lex(&Lexeme::default()); } } diff --git a/src/syntax/content/parser/token/code.rs b/src/syntax/content/parser/token/code.rs index 3a8bc13..24a44f1 100644 --- a/src/syntax/content/parser/token/code.rs +++ b/src/syntax/content/parser/token/code.rs @@ -56,6 +56,6 @@ mod tests { expected = "Attempt to lex a code tag directly from a lexeme" )] fn lex() { - Code::lex(&Lexeme::new("", "")); + Code::lex(&Lexeme::default()); } } diff --git a/src/syntax/content/parser/token/header.rs b/src/syntax/content/parser/token/header.rs index ec99216..660f659 100644 --- a/src/syntax/content/parser/token/header.rs +++ b/src/syntax/content/parser/token/header.rs @@ -196,7 +196,7 @@ mod tests { let mut map: HashMap> = HashMap::default(); let id = Header::make_id( &Config::default(), - &Lexeme::new("##", "Title"), + &Lexeme::new("##", "Title", ""), &mut map, ); assert_eq!(id, "Title"); @@ -209,7 +209,7 @@ mod tests { let id = Header::make_id( &config, - &Lexeme::new("##", "駄目!"), + &Lexeme::new("##", "駄目!", ""), &mut HashMap::default(), ); assert_eq!(id, "h"); @@ -222,7 +222,7 @@ mod tests { let id = Header::make_id( &config, - &Lexeme::new("##", "駄目!"), + &Lexeme::new("##", "駄目!", ""), &mut HashMap::default(), ); assert_eq!(id, "駄目!"); @@ -233,15 +233,15 @@ mod tests { let mut map: HashMap> = HashMap::default(); let config = Config::default(); let id = - Header::make_id(&config, &Lexeme::new("##", "UVrcCUjoQ"), &mut map); + Header::make_id(&config, &Lexeme::new("##", "UVrcCUjoQ", ""), &mut map); assert_eq!(id, "UVrcCUjoQ"); let double = - Header::make_id(&config, &Lexeme::new("##", "UVrcCUjoQ"), &mut map); + Header::make_id(&config, &Lexeme::new("##", "UVrcCUjoQ", ""), &mut map); assert_eq!(double, "UVrcCUjoQ-1"); let double2 = - Header::make_id(&config, &Lexeme::new("##", "UVrcCUjoQ"), &mut map); + Header::make_id(&config, &Lexeme::new("##", "UVrcCUjoQ", ""), &mut map); assert_eq!(double2, "UVrcCUjoQ-2"); } diff --git a/src/syntax/content/parser/token/oblique.rs b/src/syntax/content/parser/token/oblique.rs index 13a8cfa..03c49d4 100644 --- a/src/syntax/content/parser/token/oblique.rs +++ b/src/syntax/content/parser/token/oblique.rs @@ -56,6 +56,6 @@ mod tests { expected = "Attempt to lex an oblique tag directly from a lexeme" )] fn lex() { - Oblique::lex(&Lexeme::new("", "")); + Oblique::lex(&Lexeme::default()); } } diff --git a/src/syntax/content/parser/token/paragraph.rs b/src/syntax/content/parser/token/paragraph.rs index 7875538..798792b 100644 --- a/src/syntax/content/parser/token/paragraph.rs +++ b/src/syntax/content/parser/token/paragraph.rs @@ -62,7 +62,7 @@ mod tests { #[test] fn lex() { - let p = Paragraph::lex(&Lexeme::new("", "")); + let p = Paragraph::lex(&Lexeme::default()); assert!(p.open.is_none()); } @@ -71,7 +71,7 @@ mod tests { expected = "Attempt to render a paragraph tag while open state is unknown" )] fn render_state_unknown() { - let p = Paragraph::lex(&Lexeme::new("", "")); + let p = Paragraph::lex(&Lexeme::default()); drop(p.render()); } } diff --git a/src/syntax/content/parser/token/preformat.rs b/src/syntax/content/parser/token/preformat.rs index 368217d..d46285d 100644 --- a/src/syntax/content/parser/token/preformat.rs +++ b/src/syntax/content/parser/token/preformat.rs @@ -54,10 +54,10 @@ mod tests { #[test] fn lex() { - let from_empty_lexeme = PreFormat::lex(&Lexeme::new("", "")); + let from_empty_lexeme = PreFormat::lex(&Lexeme::default()); assert!(from_empty_lexeme.open.is_none()); - let from_non_empty_lexeme = PreFormat::lex(&Lexeme::new("`", "`")); + let from_non_empty_lexeme = PreFormat::lex(&Lexeme::default()); assert!(from_non_empty_lexeme.open.is_none()); } @@ -66,10 +66,10 @@ mod tests { expected = "Attempt to render a preformat tag while open state is unknown" )] fn render() { - let from_empty_lexeme = PreFormat::lex(&Lexeme::new("", "")); + let from_empty_lexeme = PreFormat::lex(&Lexeme::default()); from_empty_lexeme.render(); - let from_non_empty_lexeme = PreFormat::lex(&Lexeme::new("`", "`")); + let from_non_empty_lexeme = PreFormat::lex(&Lexeme::default()); from_non_empty_lexeme.render(); } } diff --git a/src/syntax/content/parser/token/span.rs b/src/syntax/content/parser/token/span.rs index 7c7a4cd..be6e911 100644 --- a/src/syntax/content/parser/token/span.rs +++ b/src/syntax/content/parser/token/span.rs @@ -56,18 +56,12 @@ mod tests { #[test] fn probe() { - assert!(!Span::probe(&Lexeme::new( - &crate::ONSET.elapsed().as_nanos().to_string(), - "", - ))); + assert!(!Span::probe(&Lexeme::default())); } #[test] fn lex() { - let span = Span::lex(&Lexeme::new( - &crate::ONSET.elapsed().as_nanos().to_string(), - "", - )); + let span = Span::lex(&Lexeme::default()); assert!(span.open.is_none()); } @@ -85,7 +79,7 @@ mod tests { expected = "Attempt to render a span tag while open state is unknown" )] fn render_unknown_open_state() { - let open_span = Span::lex(&Lexeme::new("", "")); + let open_span = Span::lex(&Lexeme::default()); drop(open_span.render()); } } diff --git a/src/syntax/content/parser/token/strike.rs b/src/syntax/content/parser/token/strike.rs index 049fde2..69ce9d4 100644 --- a/src/syntax/content/parser/token/strike.rs +++ b/src/syntax/content/parser/token/strike.rs @@ -53,6 +53,6 @@ mod tests { expected = "Attempt to lex a strike tag directly from a lexeme" )] fn lex() { - Strike::lex(&Lexeme::new("", "")); + Strike::lex(&Lexeme::default()); } } diff --git a/src/syntax/content/parser/token/underline.rs b/src/syntax/content/parser/token/underline.rs index 866699d..a588d9c 100644 --- a/src/syntax/content/parser/token/underline.rs +++ b/src/syntax/content/parser/token/underline.rs @@ -56,6 +56,6 @@ mod tests { expected = "Attempt to lex an underline tag directly from a lexeme" )] fn lex() { - Underline::lex(&Lexeme::new("", "")); + Underline::lex(&Lexeme::default()); } }