From f9ef9a56a10ab1eabfa5df0a9ed806819c00a9d5 Mon Sep 17 00:00:00 2001 From: jutty Date: Fri, 2 Jan 2026 14:33:25 -0300 Subject: [PATCH] Centralize definition of delimiters in the segment module --- src/syntax/content/parser/lexeme.rs | 64 +++++++++++++++++-------- src/syntax/content/parser/segment.rs | 71 ++++++++++++++++------------ 2 files changed, 86 insertions(+), 49 deletions(-) diff --git a/src/syntax/content/parser/lexeme.rs b/src/syntax/content/parser/lexeme.rs index a099274..d75a850 100644 --- a/src/syntax/content/parser/lexeme.rs +++ b/src/syntax/content/parser/lexeme.rs @@ -1,4 +1,4 @@ -use crate::prelude::*; +use crate::{prelude::*, syntax::content::parser::segment::delimiter::Delimiters}; #[derive(Clone, Debug)] pub struct Lexeme { @@ -35,17 +35,53 @@ impl Lexeme { self.text = new.to_string(); } + pub fn as_char(&self) -> Option { + if self.text.chars().count() == 1 { + self.text.chars().nth(0) + } else { + None + } + } + + pub fn next_as_char(&self) -> Option { + if self.next.chars().count() == 1 { + self.next.chars().nth(0) + } else { + None + } + } + + pub fn match_as_char(&self, c: char) -> bool { + self.as_char().is_some_and(|as_char| as_char == c) + } + + pub fn is_punctuation(&self) -> bool { + let punctuation = Delimiters::default().punctuation; + self.as_char().is_some_and(|c| punctuation.contains(&c)) + } + pub fn is_whitespace(&self) -> bool { - self.text == " " || self.text == "\n" + let delimiters = Delimiters::default(); + self.as_char() + .is_some_and(|c| delimiters.whitespace.contains(&c)) } pub fn is_next_whitespace(&self) -> bool { - self.next == " " || self.next == "\n" + let delimiters = Delimiters::default(); + self.next_as_char() + .is_some_and(|c| delimiters.whitespace.contains(&c)) } pub fn is_next_punctuation(&self) -> bool { - let punctuation = [",", ".", ":", ";", "?", "!", "(", ")", "\"", "'"]; - punctuation.contains(&self.next.as_str()) + let delimiters = Delimiters::default(); + self.next_as_char() + .is_some_and(|c| delimiters.punctuation.contains(&c)) + } + + pub fn is_next_boundary(&self) -> bool { + let delimiters = Delimiters::default(); + self.next_as_char() + .is_some_and(|c| delimiters.is_boundary(c)) } pub fn next_first_char(&self) -> Option { @@ -53,27 +89,15 @@ impl Lexeme { } pub fn match_first_char(&self, query: char) -> bool { - if let Some(first) = self.text.chars().nth(0) { - first == query - } else { - false - } + self.text.chars().nth(0).is_some_and(|c| c == query) } pub fn match_last_char(&self, query: char) -> bool { - if let Some(last) = self.text.chars().last() { - last == query - } else { - false - } + self.text.chars().last().is_some_and(|c| c == query) } pub fn match_next_first_char(&self, query: char) -> bool { - if let Some(first) = self.next.chars().nth(0) { - first == query - } else { - false - } + self.next.chars().nth(0).is_some_and(|c| c == query) } /// # Panics diff --git a/src/syntax/content/parser/segment.rs b/src/syntax/content/parser/segment.rs index 0fa4875..63aa202 100644 --- a/src/syntax/content/parser/segment.rs +++ b/src/syntax/content/parser/segment.rs @@ -2,41 +2,54 @@ pub fn segment(text: &str) -> Vec { delimiter::atomize(text) } -mod delimiter { +pub mod delimiter { - struct Delimiters { - atomic: Vec, - flanking: Vec, - punctuation: Vec, - grouping: Vec, + pub struct Delimiters { + pub atomic: Vec, + pub boundary: Vec, + pub flanking: Vec, + pub punctuation: Vec, + pub whitespace: Vec, + } + + impl Default for Delimiters { + fn default() -> Self { + let atomic = vec!['`', '|']; + let flanking = vec!['_', '*']; + let punctuation = vec![',', '.', ';', ':', '?', '!']; + let whitespace = vec!['\n', ' ']; + + let boundary = + [atomic.clone(), punctuation.clone(), whitespace.clone()] + .concat(); + + Delimiters { + atomic, + boundary, + flanking, + punctuation, + whitespace, + } + } } impl Delimiters { - fn new() -> Delimiters { - Delimiters { - atomic: vec!['\n', ' ', '`', '|'], - flanking: vec!['_', '*'], - punctuation: vec![',', '.', ':', ';', '?', '!'], - grouping: vec!['(', ')', '\'', '"'], - } + pub fn is_boundary(&self, c: char) -> bool { + [ + self.atomic.clone(), + self.punctuation.clone(), + self.whitespace.clone(), + ] + .concat() + .contains(&c) } - fn is_boundary(&self, c: char) -> bool { - self.atomic.contains(&c) - || self.punctuation.contains(&c) - || self.grouping.contains(&c) - } - - fn is_delimiter(&self, s: &str) -> bool { - Delimiters::match_str(s, &self.atomic) - || Delimiters::match_str(s, &self.flanking) - } - - fn match_str(s: &str, delimiters: &[char]) -> bool { + fn is_str_delimiter(&self, s: &str) -> bool { if s.chars().count() > 1 { - false - } else if let Some(first) = s.chars().nth(0) { - delimiters.contains(&first) + return false; + } + if let Some(c) = s.chars().nth(0) { + self.boundary.contains(&c) || self.flanking.contains(&c) } else { false } @@ -44,7 +57,7 @@ mod delimiter { } pub fn atomize(text: &str) -> Vec { - let delimiters = Delimiters::new(); + let delimiters = Delimiters::default(); let mut atomized: Vec = vec![]; let mut iterator = text.chars().peekable();