en/src/syntax/content/parser/lexeme.rs

226 lines
5.9 KiB
Rust

use std::fmt;
use crate::{prelude::*, syntax::content::parser::segment::delimiter::Delimiters};
#[derive(Clone, Debug)]
pub struct Lexeme {
text: String,
next: String,
last: bool,
}
impl Lexeme {
pub fn new(raw: &str, next: &str) -> Lexeme {
Lexeme {
text: raw.to_owned(),
next: next.to_owned(),
last: false,
}
}
pub fn text(&self) -> String {
self.text.clone()
}
pub fn next(&self) -> String {
if self.next.is_empty() && !self.last {
log!("Returning an empty string for next of non-last {self:?}");
}
self.next.clone()
}
pub fn last(&self) -> bool {
self.last
}
pub fn mutate_text(&mut self, new: &str) {
self.text = new.to_string();
}
pub fn as_char(&self) -> Option<char> {
if self.text.chars().count() == 1 {
self.text.chars().nth(0)
} else {
None
}
}
pub fn next_as_char(&self) -> Option<char> {
if self.next.chars().count() == 1 {
self.next.chars().nth(0)
} else {
None
}
}
pub fn match_as_char(&self, c: char) -> bool {
self.as_char().is_some_and(|as_char| as_char == c)
}
pub fn match_next_as_char(&self, c: char) -> bool {
self.next_as_char().is_some_and(|next| next == c)
}
pub fn is_punctuation(&self) -> bool {
let punctuation = Delimiters::default().punctuation;
self.as_char().is_some_and(|c| punctuation.contains(&c))
}
pub fn is_whitespace(&self) -> bool {
let delimiters = Delimiters::default();
self.as_char()
.is_some_and(|c| delimiters.whitespace.contains(&c))
}
pub fn is_next_whitespace(&self) -> bool {
let delimiters = Delimiters::default();
self.next_as_char()
.is_some_and(|c| delimiters.whitespace.contains(&c))
}
pub fn is_next_punctuation(&self) -> bool {
let delimiters = Delimiters::default();
self.next_as_char()
.is_some_and(|c| delimiters.punctuation.contains(&c))
}
pub fn is_next_boundary(&self) -> bool {
let delimiters = Delimiters::default();
self.last
|| self
.next_as_char()
.is_some_and(|c| delimiters.is_boundary(c))
}
pub fn next_first_char(&self) -> Option<char> {
self.next.chars().nth(0)
}
pub fn match_first_char(&self, query: char) -> bool {
self.text.chars().nth(0).is_some_and(|c| c == query)
}
pub fn match_last_char(&self, query: char) -> bool {
self.text.chars().last().is_some_and(|c| c == query)
}
pub fn match_next_first_char(&self, query: char) -> bool {
self.next.chars().nth(0).is_some_and(|c| c == query)
}
/// # Panics
/// Panics if number of chars for a single lexeme exceeds `i32::MAX`
pub fn count_char(&self, c: char) -> i32 {
let count = self.text().chars().filter(|&n| n == c).count();
match i32::try_from(count) {
Ok(i) => i,
Err(e) => {
panic!("Wild char number {count} is a bit much: {e:#?}");
},
}
}
pub fn split_chars(&self) -> Vec<char> {
let vector: Vec<char> = self.text().chars().collect();
vector
}
pub fn split_words(self) -> Vec<String> {
self.text().split(' ').map(str::to_string).collect()
}
pub fn first(self) -> Option<String> {
self.split_words().first().map(String::to_owned)
}
pub fn collect(raw_strings: &[String]) -> Vec<Lexeme> {
let mut out_vector = Vec::with_capacity(raw_strings.len());
let mut iterator = raw_strings.iter().peekable();
while let Some(raw) = iterator.next() {
let mut next = String::default();
let mut last = false;
if let Some(peeked) = iterator.peek() {
next.clone_from(*peeked);
} else {
last = true;
}
out_vector.push(Lexeme {
text: raw.to_owned(),
next,
last,
});
}
out_vector
}
}
impl fmt::Display for Lexeme {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
use crate::dev::wrap;
let next_display = if self.last() {
" <EOI>"
} else {
&format!("-> {}", wrap(&self.next))
};
write!(f, "{} {}", wrap(&self.text), next_display)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn new_lexeme() {
let raw = "3PKK4RzfGgUL58rU2NZbAiGN1o5dOfNu";
let next = "wAcZe8iVEEcZLp20PP9KKf07zJbeZafa";
let lexeme = Lexeme::new(raw, next);
assert_eq!(lexeme.text, raw);
assert_eq!(lexeme.next, next);
}
#[test]
fn next_first_char() {
let payload = "4IU";
let lexeme = Lexeme::new(payload, payload);
assert_eq!(lexeme.next_first_char().unwrap(), '4');
}
#[test]
fn match_first_char() {
let payload = "MKY";
let lexeme = Lexeme::new(payload, payload);
assert!(lexeme.match_first_char('M'));
}
#[test]
fn match_absent_first_char() {
let payload = "";
let lexeme = Lexeme::new(payload, payload);
assert!(!lexeme.match_first_char('x'));
}
#[test]
fn first_word() {
let payload = "nhNc fGev QnGW E4hj ExyZ";
let lexeme = Lexeme::new(payload, payload);
assert_eq!(lexeme.first(), Some(String::from("nhNc")));
}
#[test]
fn count_char() {
let payload = "6Ur3UjnndhENjFNSYWF7bhej2NZKLwdY";
let lexeme = Lexeme::new(payload, payload);
assert_eq!(lexeme.count_char('j'), 3);
}
#[test]
fn count_char_huge_number() {
let payload = "6Ur3UjnndhENjFNSYWF7bhej2NZKLwdY";
let lexeme = Lexeme::new(payload, payload);
assert_eq!(lexeme.count_char('j'), 3);
}
}