Lexeme: Add 'third' field, implement Default, simplify some methods
This commit is contained in:
parent
e6fb301a9b
commit
b60151556e
12 changed files with 97 additions and 70 deletions
|
|
@ -35,7 +35,7 @@ pub fn parse(
|
|||
let mut header = Header::lex(lexeme);
|
||||
header.dom_id = Some(Header::make_id(
|
||||
config,
|
||||
iterator.peek().map_or(&Lexeme::new("", ""), |l| l),
|
||||
iterator.peek().map_or(&Lexeme::default(), |l| l),
|
||||
&mut state.dom_ids,
|
||||
));
|
||||
log!("Block Context: None -> Header on {lexeme}");
|
||||
|
|
|
|||
|
|
@ -2,18 +2,20 @@ use std::fmt;
|
|||
|
||||
use crate::{prelude::*, syntax::content::parser::segment::delimiter::Delimiters};
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub struct Lexeme {
|
||||
text: String,
|
||||
next: String,
|
||||
third: String,
|
||||
last: bool,
|
||||
}
|
||||
|
||||
impl Lexeme {
|
||||
pub fn new(raw: &str, next: &str) -> Lexeme {
|
||||
pub fn new(raw: &str, next: &str, third: &str) -> Lexeme {
|
||||
Lexeme {
|
||||
text: raw.to_owned(),
|
||||
next: next.to_owned(),
|
||||
third: third.to_owned(),
|
||||
last: false,
|
||||
}
|
||||
}
|
||||
|
|
@ -53,6 +55,14 @@ impl Lexeme {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn third_as_char(&self) -> Option<char> {
|
||||
if self.third.chars().count() == 1 {
|
||||
self.third.chars().nth(0)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
pub fn match_as_char(&self, c: char) -> bool {
|
||||
self.as_char().is_some_and(|as_char| as_char == c)
|
||||
}
|
||||
|
|
@ -61,27 +71,38 @@ impl Lexeme {
|
|||
self.next_as_char().is_some_and(|next| next == c)
|
||||
}
|
||||
|
||||
pub fn match_third_as_char(&self, c: char) -> bool {
|
||||
self.third_as_char().is_some_and(|third| third == c)
|
||||
}
|
||||
|
||||
pub fn match_triple_as_char(&self, t: (char, char, char)) -> bool {
|
||||
self.match_as_char(t.0)
|
||||
&& self.match_next_as_char(t.1)
|
||||
&& self.match_third_as_char(t.2)
|
||||
}
|
||||
|
||||
pub fn contains_as_char(&self, slice: &[char]) -> bool {
|
||||
self.as_char().is_some_and(|c| slice.contains(&c))
|
||||
}
|
||||
|
||||
pub fn contains_next_as_char(&self, slice: &[char]) -> bool {
|
||||
self.next_as_char().is_some_and(|c| slice.contains(&c))
|
||||
}
|
||||
|
||||
pub fn is_punctuation(&self) -> bool {
|
||||
let punctuation = Delimiters::default().punctuation;
|
||||
self.as_char().is_some_and(|c| punctuation.contains(&c))
|
||||
self.contains_as_char(&Delimiters::default().punctuation)
|
||||
}
|
||||
|
||||
pub fn is_whitespace(&self) -> bool {
|
||||
let delimiters = Delimiters::default();
|
||||
self.as_char()
|
||||
.is_some_and(|c| delimiters.whitespace.contains(&c))
|
||||
self.contains_as_char(&Delimiters::default().whitespace)
|
||||
}
|
||||
|
||||
pub fn is_next_whitespace(&self) -> bool {
|
||||
let delimiters = Delimiters::default();
|
||||
self.next_as_char()
|
||||
.is_some_and(|c| delimiters.whitespace.contains(&c))
|
||||
self.contains_next_as_char(&Delimiters::default().whitespace)
|
||||
}
|
||||
|
||||
pub fn is_next_punctuation(&self) -> bool {
|
||||
let delimiters = Delimiters::default();
|
||||
self.next_as_char()
|
||||
.is_some_and(|c| delimiters.punctuation.contains(&c))
|
||||
self.contains_next_as_char(&Delimiters::default().punctuation)
|
||||
}
|
||||
|
||||
pub fn is_next_boundary(&self) -> bool {
|
||||
|
|
@ -146,25 +167,41 @@ impl Lexeme {
|
|||
self.split_words().first().map(String::to_owned)
|
||||
}
|
||||
|
||||
pub fn collect(raw_strings: &[String]) -> Vec<Lexeme> {
|
||||
let mut out_vector = Vec::with_capacity(raw_strings.len());
|
||||
let mut iterator = raw_strings.iter().peekable();
|
||||
pub fn collect(segments: &[String]) -> Vec<Lexeme> {
|
||||
let mut out_vector = Vec::with_capacity(segments.len());
|
||||
let mut vec = segments.to_vec();
|
||||
|
||||
while let Some(raw) = iterator.next() {
|
||||
let mut next = String::default();
|
||||
let mut last = false;
|
||||
if let Some(peeked) = iterator.peek() {
|
||||
next.clone_from(*peeked);
|
||||
} else {
|
||||
last = true;
|
||||
}
|
||||
let Some(mut third) = vec.pop() else { return vec![] };
|
||||
let last_lexeme = Lexeme {
|
||||
text: third.clone(),
|
||||
next: String::default(),
|
||||
third: String::default(),
|
||||
last: true,
|
||||
};
|
||||
|
||||
let Some(mut next) = vec.pop() else { return vec![last_lexeme] };
|
||||
let penultimate_lexeme = Lexeme {
|
||||
text: next.clone(),
|
||||
next: third.clone(),
|
||||
third: String::default(),
|
||||
last: false,
|
||||
};
|
||||
|
||||
for current in vec.iter().rev() {
|
||||
out_vector.push(Lexeme {
|
||||
text: raw.to_owned(),
|
||||
next,
|
||||
last,
|
||||
text: current.to_owned(),
|
||||
next: next.clone(),
|
||||
third: third.clone(),
|
||||
last: false,
|
||||
});
|
||||
|
||||
third.clone_from(&next);
|
||||
next.clone_from(current);
|
||||
}
|
||||
|
||||
out_vector.reverse();
|
||||
out_vector.push(penultimate_lexeme);
|
||||
out_vector.push(last_lexeme);
|
||||
out_vector
|
||||
}
|
||||
}
|
||||
|
|
@ -175,8 +212,10 @@ impl fmt::Display for Lexeme {
|
|||
|
||||
let next_display = if self.last() {
|
||||
" <EOI>"
|
||||
} else if self.third.is_empty() {
|
||||
&format!("-> {} -! EOI", wrap(&self.next))
|
||||
} else {
|
||||
&format!("-> {}", wrap(&self.next))
|
||||
&format!("-> {} -> {}", wrap(&self.next), wrap(&self.third))
|
||||
};
|
||||
write!(f, "{} {}", wrap(&self.text), next_display)
|
||||
}
|
||||
|
|
@ -190,50 +229,44 @@ mod tests {
|
|||
fn new_lexeme() {
|
||||
let raw = "3PKK4RzfGgUL58rU2NZbAiGN1o5dOfNu";
|
||||
let next = "wAcZe8iVEEcZLp20PP9KKf07zJbeZafa";
|
||||
let lexeme = Lexeme::new(raw, next);
|
||||
let third = "K0QTlujGjL2qxBzs16g8oyiCYSuQaRVE";
|
||||
let lexeme = Lexeme::new(raw, next, third);
|
||||
assert_eq!(lexeme.text, raw);
|
||||
assert_eq!(lexeme.next, next);
|
||||
assert_eq!(lexeme.third, third);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn next_first_char() {
|
||||
let payload = "4IU";
|
||||
let lexeme = Lexeme::new(payload, payload);
|
||||
let lexeme = Lexeme::new("", payload, "");
|
||||
assert_eq!(lexeme.next_first_char().unwrap(), '4');
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn match_first_char() {
|
||||
let payload = "MKY";
|
||||
let lexeme = Lexeme::new(payload, payload);
|
||||
let lexeme = Lexeme::new(payload, "", "");
|
||||
assert!(lexeme.match_first_char('M'));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn match_absent_first_char() {
|
||||
let payload = "";
|
||||
let lexeme = Lexeme::new(payload, payload);
|
||||
let lexeme = Lexeme::new("", "", "");
|
||||
assert!(!lexeme.match_first_char('x'));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn first_word() {
|
||||
let payload = "nhNc fGev QnGW E4hj ExyZ";
|
||||
let lexeme = Lexeme::new(payload, payload);
|
||||
let lexeme = Lexeme::new(payload, "", "");
|
||||
assert_eq!(lexeme.first(), Some(String::from("nhNc")));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn count_char() {
|
||||
let payload = "6Ur3UjnndhENjFNSYWF7bhej2NZKLwdY";
|
||||
let lexeme = Lexeme::new(payload, payload);
|
||||
assert_eq!(lexeme.count_char('j'), 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn count_char_huge_number() {
|
||||
let payload = "6Ur3UjnndhENjFNSYWF7bhej2NZKLwdY";
|
||||
let lexeme = Lexeme::new(payload, payload);
|
||||
let lexeme = Lexeme::new(payload, "", "");
|
||||
assert_eq!(lexeme.count_char('j'), 3);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -124,7 +124,7 @@ mod tests {
|
|||
expected = "Attempt to lex an anchor directly from a lexeme"
|
||||
)]
|
||||
fn lex() {
|
||||
Anchor::lex(&Lexeme::new("", ""));
|
||||
Anchor::lex(&Lexeme::default());
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
|
|||
|
|
@ -56,6 +56,6 @@ mod tests {
|
|||
expected = "Attempt to lex a bold tag directly from a lexeme"
|
||||
)]
|
||||
fn lex() {
|
||||
Bold::lex(&Lexeme::new("", ""));
|
||||
Bold::lex(&Lexeme::default());
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -56,6 +56,6 @@ mod tests {
|
|||
expected = "Attempt to lex a code tag directly from a lexeme"
|
||||
)]
|
||||
fn lex() {
|
||||
Code::lex(&Lexeme::new("", ""));
|
||||
Code::lex(&Lexeme::default());
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -196,7 +196,7 @@ mod tests {
|
|||
let mut map: HashMap<String, Vec<String>> = HashMap::default();
|
||||
let id = Header::make_id(
|
||||
&Config::default(),
|
||||
&Lexeme::new("##", "Title"),
|
||||
&Lexeme::new("##", "Title", ""),
|
||||
&mut map,
|
||||
);
|
||||
assert_eq!(id, "Title");
|
||||
|
|
@ -209,7 +209,7 @@ mod tests {
|
|||
|
||||
let id = Header::make_id(
|
||||
&config,
|
||||
&Lexeme::new("##", "駄目!"),
|
||||
&Lexeme::new("##", "駄目!", ""),
|
||||
&mut HashMap::default(),
|
||||
);
|
||||
assert_eq!(id, "h");
|
||||
|
|
@ -222,7 +222,7 @@ mod tests {
|
|||
|
||||
let id = Header::make_id(
|
||||
&config,
|
||||
&Lexeme::new("##", "駄目!"),
|
||||
&Lexeme::new("##", "駄目!", ""),
|
||||
&mut HashMap::default(),
|
||||
);
|
||||
assert_eq!(id, "駄目!");
|
||||
|
|
@ -233,15 +233,15 @@ mod tests {
|
|||
let mut map: HashMap<String, Vec<String>> = HashMap::default();
|
||||
let config = Config::default();
|
||||
let id =
|
||||
Header::make_id(&config, &Lexeme::new("##", "UVrcCUjoQ"), &mut map);
|
||||
Header::make_id(&config, &Lexeme::new("##", "UVrcCUjoQ", ""), &mut map);
|
||||
assert_eq!(id, "UVrcCUjoQ");
|
||||
|
||||
let double =
|
||||
Header::make_id(&config, &Lexeme::new("##", "UVrcCUjoQ"), &mut map);
|
||||
Header::make_id(&config, &Lexeme::new("##", "UVrcCUjoQ", ""), &mut map);
|
||||
assert_eq!(double, "UVrcCUjoQ-1");
|
||||
|
||||
let double2 =
|
||||
Header::make_id(&config, &Lexeme::new("##", "UVrcCUjoQ"), &mut map);
|
||||
Header::make_id(&config, &Lexeme::new("##", "UVrcCUjoQ", ""), &mut map);
|
||||
assert_eq!(double2, "UVrcCUjoQ-2");
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -56,6 +56,6 @@ mod tests {
|
|||
expected = "Attempt to lex an oblique tag directly from a lexeme"
|
||||
)]
|
||||
fn lex() {
|
||||
Oblique::lex(&Lexeme::new("", ""));
|
||||
Oblique::lex(&Lexeme::default());
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -62,7 +62,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn lex() {
|
||||
let p = Paragraph::lex(&Lexeme::new("", ""));
|
||||
let p = Paragraph::lex(&Lexeme::default());
|
||||
assert!(p.open.is_none());
|
||||
}
|
||||
|
||||
|
|
@ -71,7 +71,7 @@ mod tests {
|
|||
expected = "Attempt to render a paragraph tag while open state is unknown"
|
||||
)]
|
||||
fn render_state_unknown() {
|
||||
let p = Paragraph::lex(&Lexeme::new("", ""));
|
||||
let p = Paragraph::lex(&Lexeme::default());
|
||||
drop(p.render());
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -54,10 +54,10 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn lex() {
|
||||
let from_empty_lexeme = PreFormat::lex(&Lexeme::new("", ""));
|
||||
let from_empty_lexeme = PreFormat::lex(&Lexeme::default());
|
||||
assert!(from_empty_lexeme.open.is_none());
|
||||
|
||||
let from_non_empty_lexeme = PreFormat::lex(&Lexeme::new("`", "`"));
|
||||
let from_non_empty_lexeme = PreFormat::lex(&Lexeme::default());
|
||||
assert!(from_non_empty_lexeme.open.is_none());
|
||||
}
|
||||
|
||||
|
|
@ -66,10 +66,10 @@ mod tests {
|
|||
expected = "Attempt to render a preformat tag while open state is unknown"
|
||||
)]
|
||||
fn render() {
|
||||
let from_empty_lexeme = PreFormat::lex(&Lexeme::new("", ""));
|
||||
let from_empty_lexeme = PreFormat::lex(&Lexeme::default());
|
||||
from_empty_lexeme.render();
|
||||
|
||||
let from_non_empty_lexeme = PreFormat::lex(&Lexeme::new("`", "`"));
|
||||
let from_non_empty_lexeme = PreFormat::lex(&Lexeme::default());
|
||||
from_non_empty_lexeme.render();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -56,18 +56,12 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn probe() {
|
||||
assert!(!Span::probe(&Lexeme::new(
|
||||
&crate::ONSET.elapsed().as_nanos().to_string(),
|
||||
"",
|
||||
)));
|
||||
assert!(!Span::probe(&Lexeme::default()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn lex() {
|
||||
let span = Span::lex(&Lexeme::new(
|
||||
&crate::ONSET.elapsed().as_nanos().to_string(),
|
||||
"",
|
||||
));
|
||||
let span = Span::lex(&Lexeme::default());
|
||||
assert!(span.open.is_none());
|
||||
}
|
||||
|
||||
|
|
@ -85,7 +79,7 @@ mod tests {
|
|||
expected = "Attempt to render a span tag while open state is unknown"
|
||||
)]
|
||||
fn render_unknown_open_state() {
|
||||
let open_span = Span::lex(&Lexeme::new("", ""));
|
||||
let open_span = Span::lex(&Lexeme::default());
|
||||
drop(open_span.render());
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -53,6 +53,6 @@ mod tests {
|
|||
expected = "Attempt to lex a strike tag directly from a lexeme"
|
||||
)]
|
||||
fn lex() {
|
||||
Strike::lex(&Lexeme::new("", ""));
|
||||
Strike::lex(&Lexeme::default());
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -56,6 +56,6 @@ mod tests {
|
|||
expected = "Attempt to lex an underline tag directly from a lexeme"
|
||||
)]
|
||||
fn lex() {
|
||||
Underline::lex(&Lexeme::new("", ""));
|
||||
Underline::lex(&Lexeme::default());
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue