Lexeme: Add 'third' field, implement Default, simplify some methods
This commit is contained in:
parent
e6fb301a9b
commit
b60151556e
12 changed files with 97 additions and 70 deletions
|
|
@ -35,7 +35,7 @@ pub fn parse(
|
||||||
let mut header = Header::lex(lexeme);
|
let mut header = Header::lex(lexeme);
|
||||||
header.dom_id = Some(Header::make_id(
|
header.dom_id = Some(Header::make_id(
|
||||||
config,
|
config,
|
||||||
iterator.peek().map_or(&Lexeme::new("", ""), |l| l),
|
iterator.peek().map_or(&Lexeme::default(), |l| l),
|
||||||
&mut state.dom_ids,
|
&mut state.dom_ids,
|
||||||
));
|
));
|
||||||
log!("Block Context: None -> Header on {lexeme}");
|
log!("Block Context: None -> Header on {lexeme}");
|
||||||
|
|
|
||||||
|
|
@ -2,18 +2,20 @@ use std::fmt;
|
||||||
|
|
||||||
use crate::{prelude::*, syntax::content::parser::segment::delimiter::Delimiters};
|
use crate::{prelude::*, syntax::content::parser::segment::delimiter::Delimiters};
|
||||||
|
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug, Default)]
|
||||||
pub struct Lexeme {
|
pub struct Lexeme {
|
||||||
text: String,
|
text: String,
|
||||||
next: String,
|
next: String,
|
||||||
|
third: String,
|
||||||
last: bool,
|
last: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Lexeme {
|
impl Lexeme {
|
||||||
pub fn new(raw: &str, next: &str) -> Lexeme {
|
pub fn new(raw: &str, next: &str, third: &str) -> Lexeme {
|
||||||
Lexeme {
|
Lexeme {
|
||||||
text: raw.to_owned(),
|
text: raw.to_owned(),
|
||||||
next: next.to_owned(),
|
next: next.to_owned(),
|
||||||
|
third: third.to_owned(),
|
||||||
last: false,
|
last: false,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -53,6 +55,14 @@ impl Lexeme {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn third_as_char(&self) -> Option<char> {
|
||||||
|
if self.third.chars().count() == 1 {
|
||||||
|
self.third.chars().nth(0)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub fn match_as_char(&self, c: char) -> bool {
|
pub fn match_as_char(&self, c: char) -> bool {
|
||||||
self.as_char().is_some_and(|as_char| as_char == c)
|
self.as_char().is_some_and(|as_char| as_char == c)
|
||||||
}
|
}
|
||||||
|
|
@ -61,27 +71,38 @@ impl Lexeme {
|
||||||
self.next_as_char().is_some_and(|next| next == c)
|
self.next_as_char().is_some_and(|next| next == c)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn match_third_as_char(&self, c: char) -> bool {
|
||||||
|
self.third_as_char().is_some_and(|third| third == c)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn match_triple_as_char(&self, t: (char, char, char)) -> bool {
|
||||||
|
self.match_as_char(t.0)
|
||||||
|
&& self.match_next_as_char(t.1)
|
||||||
|
&& self.match_third_as_char(t.2)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn contains_as_char(&self, slice: &[char]) -> bool {
|
||||||
|
self.as_char().is_some_and(|c| slice.contains(&c))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn contains_next_as_char(&self, slice: &[char]) -> bool {
|
||||||
|
self.next_as_char().is_some_and(|c| slice.contains(&c))
|
||||||
|
}
|
||||||
|
|
||||||
pub fn is_punctuation(&self) -> bool {
|
pub fn is_punctuation(&self) -> bool {
|
||||||
let punctuation = Delimiters::default().punctuation;
|
self.contains_as_char(&Delimiters::default().punctuation)
|
||||||
self.as_char().is_some_and(|c| punctuation.contains(&c))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn is_whitespace(&self) -> bool {
|
pub fn is_whitespace(&self) -> bool {
|
||||||
let delimiters = Delimiters::default();
|
self.contains_as_char(&Delimiters::default().whitespace)
|
||||||
self.as_char()
|
|
||||||
.is_some_and(|c| delimiters.whitespace.contains(&c))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn is_next_whitespace(&self) -> bool {
|
pub fn is_next_whitespace(&self) -> bool {
|
||||||
let delimiters = Delimiters::default();
|
self.contains_next_as_char(&Delimiters::default().whitespace)
|
||||||
self.next_as_char()
|
|
||||||
.is_some_and(|c| delimiters.whitespace.contains(&c))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn is_next_punctuation(&self) -> bool {
|
pub fn is_next_punctuation(&self) -> bool {
|
||||||
let delimiters = Delimiters::default();
|
self.contains_next_as_char(&Delimiters::default().punctuation)
|
||||||
self.next_as_char()
|
|
||||||
.is_some_and(|c| delimiters.punctuation.contains(&c))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn is_next_boundary(&self) -> bool {
|
pub fn is_next_boundary(&self) -> bool {
|
||||||
|
|
@ -146,25 +167,41 @@ impl Lexeme {
|
||||||
self.split_words().first().map(String::to_owned)
|
self.split_words().first().map(String::to_owned)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn collect(raw_strings: &[String]) -> Vec<Lexeme> {
|
pub fn collect(segments: &[String]) -> Vec<Lexeme> {
|
||||||
let mut out_vector = Vec::with_capacity(raw_strings.len());
|
let mut out_vector = Vec::with_capacity(segments.len());
|
||||||
let mut iterator = raw_strings.iter().peekable();
|
let mut vec = segments.to_vec();
|
||||||
|
|
||||||
while let Some(raw) = iterator.next() {
|
let Some(mut third) = vec.pop() else { return vec![] };
|
||||||
let mut next = String::default();
|
let last_lexeme = Lexeme {
|
||||||
let mut last = false;
|
text: third.clone(),
|
||||||
if let Some(peeked) = iterator.peek() {
|
next: String::default(),
|
||||||
next.clone_from(*peeked);
|
third: String::default(),
|
||||||
} else {
|
last: true,
|
||||||
last = true;
|
};
|
||||||
}
|
|
||||||
|
let Some(mut next) = vec.pop() else { return vec![last_lexeme] };
|
||||||
|
let penultimate_lexeme = Lexeme {
|
||||||
|
text: next.clone(),
|
||||||
|
next: third.clone(),
|
||||||
|
third: String::default(),
|
||||||
|
last: false,
|
||||||
|
};
|
||||||
|
|
||||||
|
for current in vec.iter().rev() {
|
||||||
out_vector.push(Lexeme {
|
out_vector.push(Lexeme {
|
||||||
text: raw.to_owned(),
|
text: current.to_owned(),
|
||||||
next,
|
next: next.clone(),
|
||||||
last,
|
third: third.clone(),
|
||||||
|
last: false,
|
||||||
});
|
});
|
||||||
|
|
||||||
|
third.clone_from(&next);
|
||||||
|
next.clone_from(current);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
out_vector.reverse();
|
||||||
|
out_vector.push(penultimate_lexeme);
|
||||||
|
out_vector.push(last_lexeme);
|
||||||
out_vector
|
out_vector
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -175,8 +212,10 @@ impl fmt::Display for Lexeme {
|
||||||
|
|
||||||
let next_display = if self.last() {
|
let next_display = if self.last() {
|
||||||
" <EOI>"
|
" <EOI>"
|
||||||
|
} else if self.third.is_empty() {
|
||||||
|
&format!("-> {} -! EOI", wrap(&self.next))
|
||||||
} else {
|
} else {
|
||||||
&format!("-> {}", wrap(&self.next))
|
&format!("-> {} -> {}", wrap(&self.next), wrap(&self.third))
|
||||||
};
|
};
|
||||||
write!(f, "{} {}", wrap(&self.text), next_display)
|
write!(f, "{} {}", wrap(&self.text), next_display)
|
||||||
}
|
}
|
||||||
|
|
@ -190,50 +229,44 @@ mod tests {
|
||||||
fn new_lexeme() {
|
fn new_lexeme() {
|
||||||
let raw = "3PKK4RzfGgUL58rU2NZbAiGN1o5dOfNu";
|
let raw = "3PKK4RzfGgUL58rU2NZbAiGN1o5dOfNu";
|
||||||
let next = "wAcZe8iVEEcZLp20PP9KKf07zJbeZafa";
|
let next = "wAcZe8iVEEcZLp20PP9KKf07zJbeZafa";
|
||||||
let lexeme = Lexeme::new(raw, next);
|
let third = "K0QTlujGjL2qxBzs16g8oyiCYSuQaRVE";
|
||||||
|
let lexeme = Lexeme::new(raw, next, third);
|
||||||
assert_eq!(lexeme.text, raw);
|
assert_eq!(lexeme.text, raw);
|
||||||
assert_eq!(lexeme.next, next);
|
assert_eq!(lexeme.next, next);
|
||||||
|
assert_eq!(lexeme.third, third);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn next_first_char() {
|
fn next_first_char() {
|
||||||
let payload = "4IU";
|
let payload = "4IU";
|
||||||
let lexeme = Lexeme::new(payload, payload);
|
let lexeme = Lexeme::new("", payload, "");
|
||||||
assert_eq!(lexeme.next_first_char().unwrap(), '4');
|
assert_eq!(lexeme.next_first_char().unwrap(), '4');
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn match_first_char() {
|
fn match_first_char() {
|
||||||
let payload = "MKY";
|
let payload = "MKY";
|
||||||
let lexeme = Lexeme::new(payload, payload);
|
let lexeme = Lexeme::new(payload, "", "");
|
||||||
assert!(lexeme.match_first_char('M'));
|
assert!(lexeme.match_first_char('M'));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn match_absent_first_char() {
|
fn match_absent_first_char() {
|
||||||
let payload = "";
|
let lexeme = Lexeme::new("", "", "");
|
||||||
let lexeme = Lexeme::new(payload, payload);
|
|
||||||
assert!(!lexeme.match_first_char('x'));
|
assert!(!lexeme.match_first_char('x'));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn first_word() {
|
fn first_word() {
|
||||||
let payload = "nhNc fGev QnGW E4hj ExyZ";
|
let payload = "nhNc fGev QnGW E4hj ExyZ";
|
||||||
let lexeme = Lexeme::new(payload, payload);
|
let lexeme = Lexeme::new(payload, "", "");
|
||||||
assert_eq!(lexeme.first(), Some(String::from("nhNc")));
|
assert_eq!(lexeme.first(), Some(String::from("nhNc")));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn count_char() {
|
fn count_char() {
|
||||||
let payload = "6Ur3UjnndhENjFNSYWF7bhej2NZKLwdY";
|
let payload = "6Ur3UjnndhENjFNSYWF7bhej2NZKLwdY";
|
||||||
let lexeme = Lexeme::new(payload, payload);
|
let lexeme = Lexeme::new(payload, "", "");
|
||||||
assert_eq!(lexeme.count_char('j'), 3);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn count_char_huge_number() {
|
|
||||||
let payload = "6Ur3UjnndhENjFNSYWF7bhej2NZKLwdY";
|
|
||||||
let lexeme = Lexeme::new(payload, payload);
|
|
||||||
assert_eq!(lexeme.count_char('j'), 3);
|
assert_eq!(lexeme.count_char('j'), 3);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -124,7 +124,7 @@ mod tests {
|
||||||
expected = "Attempt to lex an anchor directly from a lexeme"
|
expected = "Attempt to lex an anchor directly from a lexeme"
|
||||||
)]
|
)]
|
||||||
fn lex() {
|
fn lex() {
|
||||||
Anchor::lex(&Lexeme::new("", ""));
|
Anchor::lex(&Lexeme::default());
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|
|
||||||
|
|
@ -56,6 +56,6 @@ mod tests {
|
||||||
expected = "Attempt to lex a bold tag directly from a lexeme"
|
expected = "Attempt to lex a bold tag directly from a lexeme"
|
||||||
)]
|
)]
|
||||||
fn lex() {
|
fn lex() {
|
||||||
Bold::lex(&Lexeme::new("", ""));
|
Bold::lex(&Lexeme::default());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -56,6 +56,6 @@ mod tests {
|
||||||
expected = "Attempt to lex a code tag directly from a lexeme"
|
expected = "Attempt to lex a code tag directly from a lexeme"
|
||||||
)]
|
)]
|
||||||
fn lex() {
|
fn lex() {
|
||||||
Code::lex(&Lexeme::new("", ""));
|
Code::lex(&Lexeme::default());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -196,7 +196,7 @@ mod tests {
|
||||||
let mut map: HashMap<String, Vec<String>> = HashMap::default();
|
let mut map: HashMap<String, Vec<String>> = HashMap::default();
|
||||||
let id = Header::make_id(
|
let id = Header::make_id(
|
||||||
&Config::default(),
|
&Config::default(),
|
||||||
&Lexeme::new("##", "Title"),
|
&Lexeme::new("##", "Title", ""),
|
||||||
&mut map,
|
&mut map,
|
||||||
);
|
);
|
||||||
assert_eq!(id, "Title");
|
assert_eq!(id, "Title");
|
||||||
|
|
@ -209,7 +209,7 @@ mod tests {
|
||||||
|
|
||||||
let id = Header::make_id(
|
let id = Header::make_id(
|
||||||
&config,
|
&config,
|
||||||
&Lexeme::new("##", "駄目!"),
|
&Lexeme::new("##", "駄目!", ""),
|
||||||
&mut HashMap::default(),
|
&mut HashMap::default(),
|
||||||
);
|
);
|
||||||
assert_eq!(id, "h");
|
assert_eq!(id, "h");
|
||||||
|
|
@ -222,7 +222,7 @@ mod tests {
|
||||||
|
|
||||||
let id = Header::make_id(
|
let id = Header::make_id(
|
||||||
&config,
|
&config,
|
||||||
&Lexeme::new("##", "駄目!"),
|
&Lexeme::new("##", "駄目!", ""),
|
||||||
&mut HashMap::default(),
|
&mut HashMap::default(),
|
||||||
);
|
);
|
||||||
assert_eq!(id, "駄目!");
|
assert_eq!(id, "駄目!");
|
||||||
|
|
@ -233,15 +233,15 @@ mod tests {
|
||||||
let mut map: HashMap<String, Vec<String>> = HashMap::default();
|
let mut map: HashMap<String, Vec<String>> = HashMap::default();
|
||||||
let config = Config::default();
|
let config = Config::default();
|
||||||
let id =
|
let id =
|
||||||
Header::make_id(&config, &Lexeme::new("##", "UVrcCUjoQ"), &mut map);
|
Header::make_id(&config, &Lexeme::new("##", "UVrcCUjoQ", ""), &mut map);
|
||||||
assert_eq!(id, "UVrcCUjoQ");
|
assert_eq!(id, "UVrcCUjoQ");
|
||||||
|
|
||||||
let double =
|
let double =
|
||||||
Header::make_id(&config, &Lexeme::new("##", "UVrcCUjoQ"), &mut map);
|
Header::make_id(&config, &Lexeme::new("##", "UVrcCUjoQ", ""), &mut map);
|
||||||
assert_eq!(double, "UVrcCUjoQ-1");
|
assert_eq!(double, "UVrcCUjoQ-1");
|
||||||
|
|
||||||
let double2 =
|
let double2 =
|
||||||
Header::make_id(&config, &Lexeme::new("##", "UVrcCUjoQ"), &mut map);
|
Header::make_id(&config, &Lexeme::new("##", "UVrcCUjoQ", ""), &mut map);
|
||||||
assert_eq!(double2, "UVrcCUjoQ-2");
|
assert_eq!(double2, "UVrcCUjoQ-2");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -56,6 +56,6 @@ mod tests {
|
||||||
expected = "Attempt to lex an oblique tag directly from a lexeme"
|
expected = "Attempt to lex an oblique tag directly from a lexeme"
|
||||||
)]
|
)]
|
||||||
fn lex() {
|
fn lex() {
|
||||||
Oblique::lex(&Lexeme::new("", ""));
|
Oblique::lex(&Lexeme::default());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -62,7 +62,7 @@ mod tests {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn lex() {
|
fn lex() {
|
||||||
let p = Paragraph::lex(&Lexeme::new("", ""));
|
let p = Paragraph::lex(&Lexeme::default());
|
||||||
assert!(p.open.is_none());
|
assert!(p.open.is_none());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -71,7 +71,7 @@ mod tests {
|
||||||
expected = "Attempt to render a paragraph tag while open state is unknown"
|
expected = "Attempt to render a paragraph tag while open state is unknown"
|
||||||
)]
|
)]
|
||||||
fn render_state_unknown() {
|
fn render_state_unknown() {
|
||||||
let p = Paragraph::lex(&Lexeme::new("", ""));
|
let p = Paragraph::lex(&Lexeme::default());
|
||||||
drop(p.render());
|
drop(p.render());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -54,10 +54,10 @@ mod tests {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn lex() {
|
fn lex() {
|
||||||
let from_empty_lexeme = PreFormat::lex(&Lexeme::new("", ""));
|
let from_empty_lexeme = PreFormat::lex(&Lexeme::default());
|
||||||
assert!(from_empty_lexeme.open.is_none());
|
assert!(from_empty_lexeme.open.is_none());
|
||||||
|
|
||||||
let from_non_empty_lexeme = PreFormat::lex(&Lexeme::new("`", "`"));
|
let from_non_empty_lexeme = PreFormat::lex(&Lexeme::default());
|
||||||
assert!(from_non_empty_lexeme.open.is_none());
|
assert!(from_non_empty_lexeme.open.is_none());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -66,10 +66,10 @@ mod tests {
|
||||||
expected = "Attempt to render a preformat tag while open state is unknown"
|
expected = "Attempt to render a preformat tag while open state is unknown"
|
||||||
)]
|
)]
|
||||||
fn render() {
|
fn render() {
|
||||||
let from_empty_lexeme = PreFormat::lex(&Lexeme::new("", ""));
|
let from_empty_lexeme = PreFormat::lex(&Lexeme::default());
|
||||||
from_empty_lexeme.render();
|
from_empty_lexeme.render();
|
||||||
|
|
||||||
let from_non_empty_lexeme = PreFormat::lex(&Lexeme::new("`", "`"));
|
let from_non_empty_lexeme = PreFormat::lex(&Lexeme::default());
|
||||||
from_non_empty_lexeme.render();
|
from_non_empty_lexeme.render();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -56,18 +56,12 @@ mod tests {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn probe() {
|
fn probe() {
|
||||||
assert!(!Span::probe(&Lexeme::new(
|
assert!(!Span::probe(&Lexeme::default()));
|
||||||
&crate::ONSET.elapsed().as_nanos().to_string(),
|
|
||||||
"",
|
|
||||||
)));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn lex() {
|
fn lex() {
|
||||||
let span = Span::lex(&Lexeme::new(
|
let span = Span::lex(&Lexeme::default());
|
||||||
&crate::ONSET.elapsed().as_nanos().to_string(),
|
|
||||||
"",
|
|
||||||
));
|
|
||||||
assert!(span.open.is_none());
|
assert!(span.open.is_none());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -85,7 +79,7 @@ mod tests {
|
||||||
expected = "Attempt to render a span tag while open state is unknown"
|
expected = "Attempt to render a span tag while open state is unknown"
|
||||||
)]
|
)]
|
||||||
fn render_unknown_open_state() {
|
fn render_unknown_open_state() {
|
||||||
let open_span = Span::lex(&Lexeme::new("", ""));
|
let open_span = Span::lex(&Lexeme::default());
|
||||||
drop(open_span.render());
|
drop(open_span.render());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -53,6 +53,6 @@ mod tests {
|
||||||
expected = "Attempt to lex a strike tag directly from a lexeme"
|
expected = "Attempt to lex a strike tag directly from a lexeme"
|
||||||
)]
|
)]
|
||||||
fn lex() {
|
fn lex() {
|
||||||
Strike::lex(&Lexeme::new("", ""));
|
Strike::lex(&Lexeme::default());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -56,6 +56,6 @@ mod tests {
|
||||||
expected = "Attempt to lex an underline tag directly from a lexeme"
|
expected = "Attempt to lex an underline tag directly from a lexeme"
|
||||||
)]
|
)]
|
||||||
fn lex() {
|
fn lex() {
|
||||||
Underline::lex(&Lexeme::new("", ""));
|
Underline::lex(&Lexeme::default());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue