Centralize definition of delimiters in the segment module
This commit is contained in:
parent
8d204503a8
commit
f9ef9a56a1
2 changed files with 83 additions and 46 deletions
|
|
@ -1,4 +1,4 @@
|
|||
use crate::prelude::*;
|
||||
use crate::{prelude::*, syntax::content::parser::segment::delimiter::Delimiters};
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Lexeme {
|
||||
|
|
@ -35,17 +35,53 @@ impl Lexeme {
|
|||
self.text = new.to_string();
|
||||
}
|
||||
|
||||
pub fn as_char(&self) -> Option<char> {
|
||||
if self.text.chars().count() == 1 {
|
||||
self.text.chars().nth(0)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
pub fn next_as_char(&self) -> Option<char> {
|
||||
if self.next.chars().count() == 1 {
|
||||
self.next.chars().nth(0)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
pub fn match_as_char(&self, c: char) -> bool {
|
||||
self.as_char().is_some_and(|as_char| as_char == c)
|
||||
}
|
||||
|
||||
pub fn is_punctuation(&self) -> bool {
|
||||
let punctuation = Delimiters::default().punctuation;
|
||||
self.as_char().is_some_and(|c| punctuation.contains(&c))
|
||||
}
|
||||
|
||||
pub fn is_whitespace(&self) -> bool {
|
||||
self.text == " " || self.text == "\n"
|
||||
let delimiters = Delimiters::default();
|
||||
self.as_char()
|
||||
.is_some_and(|c| delimiters.whitespace.contains(&c))
|
||||
}
|
||||
|
||||
pub fn is_next_whitespace(&self) -> bool {
|
||||
self.next == " " || self.next == "\n"
|
||||
let delimiters = Delimiters::default();
|
||||
self.next_as_char()
|
||||
.is_some_and(|c| delimiters.whitespace.contains(&c))
|
||||
}
|
||||
|
||||
pub fn is_next_punctuation(&self) -> bool {
|
||||
let punctuation = [",", ".", ":", ";", "?", "!", "(", ")", "\"", "'"];
|
||||
punctuation.contains(&self.next.as_str())
|
||||
let delimiters = Delimiters::default();
|
||||
self.next_as_char()
|
||||
.is_some_and(|c| delimiters.punctuation.contains(&c))
|
||||
}
|
||||
|
||||
pub fn is_next_boundary(&self) -> bool {
|
||||
let delimiters = Delimiters::default();
|
||||
self.next_as_char()
|
||||
.is_some_and(|c| delimiters.is_boundary(c))
|
||||
}
|
||||
|
||||
pub fn next_first_char(&self) -> Option<char> {
|
||||
|
|
@ -53,27 +89,15 @@ impl Lexeme {
|
|||
}
|
||||
|
||||
pub fn match_first_char(&self, query: char) -> bool {
|
||||
if let Some(first) = self.text.chars().nth(0) {
|
||||
first == query
|
||||
} else {
|
||||
false
|
||||
}
|
||||
self.text.chars().nth(0).is_some_and(|c| c == query)
|
||||
}
|
||||
|
||||
pub fn match_last_char(&self, query: char) -> bool {
|
||||
if let Some(last) = self.text.chars().last() {
|
||||
last == query
|
||||
} else {
|
||||
false
|
||||
}
|
||||
self.text.chars().last().is_some_and(|c| c == query)
|
||||
}
|
||||
|
||||
pub fn match_next_first_char(&self, query: char) -> bool {
|
||||
if let Some(first) = self.next.chars().nth(0) {
|
||||
first == query
|
||||
} else {
|
||||
false
|
||||
}
|
||||
self.next.chars().nth(0).is_some_and(|c| c == query)
|
||||
}
|
||||
|
||||
/// # Panics
|
||||
|
|
|
|||
|
|
@ -2,41 +2,54 @@ pub fn segment(text: &str) -> Vec<String> {
|
|||
delimiter::atomize(text)
|
||||
}
|
||||
|
||||
mod delimiter {
|
||||
pub mod delimiter {
|
||||
|
||||
struct Delimiters {
|
||||
atomic: Vec<char>,
|
||||
flanking: Vec<char>,
|
||||
punctuation: Vec<char>,
|
||||
grouping: Vec<char>,
|
||||
pub struct Delimiters {
|
||||
pub atomic: Vec<char>,
|
||||
pub boundary: Vec<char>,
|
||||
pub flanking: Vec<char>,
|
||||
pub punctuation: Vec<char>,
|
||||
pub whitespace: Vec<char>,
|
||||
}
|
||||
|
||||
impl Default for Delimiters {
|
||||
fn default() -> Self {
|
||||
let atomic = vec!['`', '|'];
|
||||
let flanking = vec!['_', '*'];
|
||||
let punctuation = vec![',', '.', ';', ':', '?', '!'];
|
||||
let whitespace = vec!['\n', ' '];
|
||||
|
||||
let boundary =
|
||||
[atomic.clone(), punctuation.clone(), whitespace.clone()]
|
||||
.concat();
|
||||
|
||||
Delimiters {
|
||||
atomic,
|
||||
boundary,
|
||||
flanking,
|
||||
punctuation,
|
||||
whitespace,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Delimiters {
|
||||
fn new() -> Delimiters {
|
||||
Delimiters {
|
||||
atomic: vec!['\n', ' ', '`', '|'],
|
||||
flanking: vec!['_', '*'],
|
||||
punctuation: vec![',', '.', ':', ';', '?', '!'],
|
||||
grouping: vec!['(', ')', '\'', '"'],
|
||||
}
|
||||
pub fn is_boundary(&self, c: char) -> bool {
|
||||
[
|
||||
self.atomic.clone(),
|
||||
self.punctuation.clone(),
|
||||
self.whitespace.clone(),
|
||||
]
|
||||
.concat()
|
||||
.contains(&c)
|
||||
}
|
||||
|
||||
fn is_boundary(&self, c: char) -> bool {
|
||||
self.atomic.contains(&c)
|
||||
|| self.punctuation.contains(&c)
|
||||
|| self.grouping.contains(&c)
|
||||
}
|
||||
|
||||
fn is_delimiter(&self, s: &str) -> bool {
|
||||
Delimiters::match_str(s, &self.atomic)
|
||||
|| Delimiters::match_str(s, &self.flanking)
|
||||
}
|
||||
|
||||
fn match_str(s: &str, delimiters: &[char]) -> bool {
|
||||
fn is_str_delimiter(&self, s: &str) -> bool {
|
||||
if s.chars().count() > 1 {
|
||||
false
|
||||
} else if let Some(first) = s.chars().nth(0) {
|
||||
delimiters.contains(&first)
|
||||
return false;
|
||||
}
|
||||
if let Some(c) = s.chars().nth(0) {
|
||||
self.boundary.contains(&c) || self.flanking.contains(&c)
|
||||
} else {
|
||||
false
|
||||
}
|
||||
|
|
@ -44,7 +57,7 @@ mod delimiter {
|
|||
}
|
||||
|
||||
pub fn atomize(text: &str) -> Vec<String> {
|
||||
let delimiters = Delimiters::new();
|
||||
let delimiters = Delimiters::default();
|
||||
let mut atomized: Vec<String> = vec![];
|
||||
|
||||
let mut iterator = text.chars().peekable();
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue