use std::fmt; use crate::{syntax::content::parser::segment::delimiter::Delimiters}; #[derive(Clone, Debug, Default)] pub struct Lexeme { text: String, next: String, third: String, first: bool, last: bool, } impl Lexeme { pub fn new(raw: &str, next: &str, third: &str) -> Lexeme { Lexeme { text: raw.to_owned(), next: next.to_owned(), third: third.to_owned(), first: false, last: false, } } pub fn text(&self) -> String { self.text.clone() } pub fn next(&self) -> String { self.next.clone() } pub fn last(&self) -> bool { self.last } pub fn first(&self) -> bool { self.first } pub fn mutate_text(&mut self, new: &str) { self.text = new.to_string(); } pub fn as_char(&self) -> Option { if self.text.chars().count() == 1 { self.text.chars().nth(0) } else { None } } pub fn next_as_char(&self) -> Option { if self.next.chars().count() == 1 { self.next.chars().nth(0) } else { None } } pub fn third_as_char(&self) -> Option { if self.third.chars().count() == 1 { self.third.chars().nth(0) } else { None } } pub fn match_char(&self, c: char) -> bool { self.as_char().is_some_and(|as_char| as_char == c) } pub fn match_next_char(&self, c: char) -> bool { self.next_as_char().is_some_and(|next| next == c) } pub fn match_third_char(&self, c: char) -> bool { self.third_as_char().is_some_and(|third| third == c) } pub fn match_either_char(&self, c1: char, c2: char) -> bool { self.as_char().is_some_and(|c| c == c1 || c == c2) } pub fn match_next_either_char(&self, c1: char, c2: char) -> bool { self.next_as_char().is_some_and(|c| c == c1 || c == c2) } pub fn match_char_sequence(&self, c1: char, c2: char) -> bool { self.match_char(c1) && self.match_next_char(c2) } pub fn match_char_triple(&self, c1: char, c2: char, c3: char) -> bool { self.match_char(c1) && self.match_next_char(c2) && self.match_third_char(c3) } pub fn match_char_in(&self, slice: &[char]) -> bool { self.as_char().is_some_and(|c| slice.contains(&c)) } pub fn match_next_char_in(&self, slice: &[char]) -> bool { self.next_as_char().is_some_and(|c| slice.contains(&c)) } pub fn is_punctuation(&self) -> bool { self.match_char_in(&Delimiters::default().punctuation) } pub fn is_whitespace(&self) -> bool { self.match_char_in(&Delimiters::default().whitespace) } pub fn is_next_whitespace(&self) -> bool { self.match_next_char_in(&Delimiters::default().whitespace) } pub fn is_next_punctuation(&self) -> bool { self.match_next_char_in(&Delimiters::default().punctuation) } pub fn is_next_boundary(&self) -> bool { let delimiters = Delimiters::default(); self.last || self .next_as_char() .is_some_and(|c| delimiters.is_boundary(c)) } pub fn is_delimiter(&self) -> bool { let delimiters = Delimiters::default(); self.as_char().is_some_and(|c| delimiters.is_delimiter(c)) } pub fn is_next_delimiter(&self) -> bool { let delimiters = Delimiters::default(); self.last || self .next_as_char() .is_some_and(|c| delimiters.is_delimiter(c)) } pub fn next_first_char(&self) -> Option { self.next.chars().nth(0) } pub fn match_first_char(&self, query: char) -> bool { self.text.chars().nth(0).is_some_and(|c| c == query) } pub fn match_last_char(&self, query: char) -> bool { self.text.chars().last().is_some_and(|c| c == query) } pub fn match_next_first_char(&self, query: char) -> bool { self.next.chars().nth(0).is_some_and(|c| c == query) } /// # Panics /// Panics if number of chars for a single lexeme exceeds `i32::MAX` pub fn count_char(&self, c: char) -> i32 { let count = self.text().chars().filter(|&n| n == c).count(); match i32::try_from(count) { Ok(i) => i, Err(e) => { panic!("Wild char number {count} is a bit much: {e:#?}"); }, } } pub fn split_chars(&self) -> Vec { let vector: Vec = self.text().chars().collect(); vector } pub fn split_segments(self) -> Vec { self.text().split(' ').map(str::to_string).collect() } pub fn first_segment(self) -> Option { self.split_segments().first().map(String::to_owned) } pub fn collect(segments_slice: &[String]) -> Vec { let mut lexemes = Vec::with_capacity(segments_slice.len()); let mut segments = segments_slice.to_vec(); let Some(last) = segments.pop() else { return vec![]; }; let last_lexeme = Lexeme { text: last.clone(), next: String::default(), third: String::default(), first: segments.is_empty(), last: true, }; let Some(penultimate) = segments.pop() else { return vec![last_lexeme]; }; let penultimate_lexeme = Lexeme { text: penultimate.clone(), next: last.clone(), third: String::default(), first: false, last: false, }; let mut third = last; let mut next = penultimate; let mut iterator = segments.iter().rev().peekable(); while let Some(current) = iterator.next() { let lexeme = Lexeme { text: current.to_owned(), next: next.clone(), third: third.clone(), first: iterator.peek().is_none(), last: false, }; lexemes.push(lexeme); third.clone_from(&next); next.clone_from(current); } lexemes.reverse(); lexemes.push(penultimate_lexeme); lexemes.push(last_lexeme); lexemes } } impl fmt::Display for Lexeme { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { use crate::log::wrap; let properties = if self.last && self.first { "[S] " } else if self.last { "[L] " } else if self.first { "[F] " } else { "" }; let next_display = if self.last { " " } else if self.third.is_empty() { &format!(" -> {} ", wrap(&self.next)) } else { &format!(" -> {} -> {}", wrap(&self.next), wrap(&self.third)) }; write!(f, "Lx {}{}{}", properties, wrap(&self.text), next_display) } } #[cfg(test)] mod tests { use super::*; #[test] fn new_lexeme() { let raw = "3PKK4RzfGgUL58rU2NZbAiGN1o5dOfNu"; let next = "wAcZe8iVEEcZLp20PP9KKf07zJbeZafa"; let third = "K0QTlujGjL2qxBzs16g8oyiCYSuQaRVE"; let lexeme = Lexeme::new(raw, next, third); assert_eq!(lexeme.text, raw); assert_eq!(lexeme.next, next); assert_eq!(lexeme.third, third); } #[test] fn next_first_char() { let payload = "4IU"; let lexeme = Lexeme::new("", payload, ""); assert_eq!(lexeme.next_first_char().unwrap(), '4'); } #[test] fn match_first_char() { let payload = "MKY"; let lexeme = Lexeme::new(payload, "", ""); assert!(lexeme.match_first_char('M')); } #[test] fn match_absent_first_char() { let lexeme = Lexeme::new("", "", ""); assert!(!lexeme.match_first_char('x')); } #[test] fn first_segment() { let payload = "nhNc fGev QnGW E4hj ExyZ"; let lexeme = Lexeme::new(payload, "", ""); assert_eq!(lexeme.clone().first_segment(), Some(String::from("nhNc"))); } #[test] fn first_lexeme() { let input = ["h015r", "cvYde", "aw1Ui", "ASwew"].map(str::to_string); let lexemes = Lexeme::collect(&input); let first = lexemes.first().unwrap(); assert!(first.clone().first()); assert_eq!(first.text(), "h015r".to_string()); } #[test] fn count_char() { let payload = "6Ur3UjnndhENjFNSYWF7bhej2NZKLwdY"; let lexeme = Lexeme::new(payload, "", ""); assert_eq!(lexeme.count_char('j'), 3); } #[test] fn mutate_text() { let mut lexeme = Lexeme::new("b71Je", "I6y3i", "LC8na"); lexeme.mutate_text("qkjjK2"); assert_eq!(lexeme.text(), "qkjjK2"); } #[test] fn third_as_char() { let lexeme_a = Lexeme::new("1", "2", "3"); assert_eq!(lexeme_a.third_as_char().unwrap(), '3'); let lexeme_c = Lexeme::new("a", "b", ""); assert!(lexeme_c.third_as_char().is_none()); } #[test] fn match_third_char() { let lexeme = Lexeme::new("1", "2", "3"); assert!(lexeme.match_third_char('3')); } #[test] fn match_next_either_char() { let lexeme = Lexeme::new("1", "2", "3"); assert!(lexeme.match_next_either_char('x', '2')); assert!(lexeme.match_next_either_char('2', 'x')); } #[test] fn match_triple() { let lexeme = Lexeme::new("1", "2", "3"); assert!(lexeme.match_char_triple('1', '2', '3')); } #[test] fn is_punctuation() { let delimiters = Delimiters::default(); let mut lexemes: Vec = vec![]; for p in delimiters.punctuation { lexemes.push(Lexeme::new(&p.to_string(), "", "")); } for lexeme in lexemes { assert!(lexeme.is_punctuation()); } } #[test] fn is_next_punctuation() { let delimiters = Delimiters::default(); let mut lexemes: Vec = vec![]; for p in delimiters.punctuation { lexemes.push(Lexeme::new("", &p.to_string(), "")); } for lexeme in lexemes { assert!(lexeme.is_next_punctuation()); } } #[test] fn match_last_char() { let lexeme = Lexeme::new("qYBWuNX", "", ""); assert!(lexeme.match_last_char('X')); } #[test] fn match_next_last_char() { let lexeme = Lexeme::new("", "teDAqVx", ""); assert!(lexeme.match_next_first_char('t')); } #[test] fn display() { let input = ["pcdA", "o32X", "kz2i", "79Lz"].map(str::to_string); let lexemes = Lexeme::collect(&input); let first = lexemes.first().unwrap(); let second = lexemes.get(1).unwrap(); let third = lexemes.get(2).unwrap(); let last = lexemes.last().unwrap(); assert_eq!( format!("{first}"), String::from("Lx [F] pcdA -> o32X -> kz2i"), "first" ); assert_eq!( format!("{second}"), String::from("Lx o32X -> kz2i -> 79Lz"), "second" ); assert_eq!( format!("{third}"), String::from("Lx kz2i -> 79Lz "), "third" ); assert_eq!( format!("{last}"), String::from("Lx [L] 79Lz "), "last" ); let input_single = ["9fOC"].map(str::to_string); let lexemes_single = Lexeme::collect(&input_single); let single = lexemes_single.first().unwrap(); println!("{single:#?}"); assert!(input_single.to_vec().len() == 1); assert_eq!(format!("{single}"), "Lx [S] 9fOC "); } }