Add lexeme 'first' field, refactor and add methods
This commit is contained in:
parent
e42c67676d
commit
c53afefb67
9 changed files with 98 additions and 50 deletions
|
|
@ -31,7 +31,7 @@ fn lex(text: &str, map: LexMap, config: &Config) -> Vec<Token> {
|
||||||
|
|
||||||
let mut iterator = lexemes.iter().peekable();
|
let mut iterator = lexemes.iter().peekable();
|
||||||
while let Some(lexeme) = iterator.next() {
|
while let Some(lexeme) = iterator.next() {
|
||||||
if lexeme.match_as_char('\\') {
|
if lexeme.match_char('\\') {
|
||||||
if let Some(next) = iterator.next() {
|
if let Some(next) = iterator.next() {
|
||||||
tokens.push(Token::Literal(Literal::lex(next)));
|
tokens.push(Token::Literal(Literal::lex(next)));
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -53,9 +53,9 @@ pub fn parse(
|
||||||
|
|
||||||
// Conditions in this decision tree should match the destination end
|
// Conditions in this decision tree should match the destination end
|
||||||
// or some intermediary state necessary to finding it
|
// or some intermediary state necessary to finding it
|
||||||
if lexeme.match_as_char('s')
|
if lexeme.match_char('s')
|
||||||
&& lexeme.is_next_boundary()
|
&& lexeme.is_next_boundary()
|
||||||
&& !lexeme.match_next_as_char('|')
|
&& !lexeme.match_next_char('|')
|
||||||
{
|
{
|
||||||
log!("End: Plural anchor");
|
log!("End: Plural anchor");
|
||||||
candidate.destination = Some(candidate.text.clone());
|
candidate.destination = Some(candidate.text.clone());
|
||||||
|
|
@ -65,7 +65,7 @@ pub fn parse(
|
||||||
state.context.inline = Inline::None;
|
state.context.inline = Inline::None;
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
} else if lexeme.match_as_char('|') && lexeme.is_next_delimiter() {
|
} else if lexeme.match_char('|') && lexeme.is_next_delimiter() {
|
||||||
log!("End: Pipe followed by delimiter");
|
log!("End: Pipe followed by delimiter");
|
||||||
if buffer.destination.is_empty() {
|
if buffer.destination.is_empty() {
|
||||||
candidate.destination = Some(candidate.text.clone());
|
candidate.destination = Some(candidate.text.clone());
|
||||||
|
|
@ -75,16 +75,16 @@ pub fn parse(
|
||||||
tokens.push(Token::Anchor(candidate.clone()));
|
tokens.push(Token::Anchor(candidate.clone()));
|
||||||
state.context.inline = Inline::None;
|
state.context.inline = Inline::None;
|
||||||
return true;
|
return true;
|
||||||
} else if lexeme.match_as_char('|') && !candidate.balanced {
|
} else if lexeme.match_char('|') && !candidate.balanced {
|
||||||
log!("State: Found a pipe, but no boundary: destination follows");
|
log!("State: Found a pipe, but no boundary: destination follows");
|
||||||
candidate.balanced = true;
|
candidate.balanced = true;
|
||||||
return true;
|
return true;
|
||||||
} else if lexeme.match_as_char(':') {
|
} else if lexeme.match_char(':') {
|
||||||
log!("State: Found a colon, marking anchor as external");
|
log!("State: Found a colon, marking anchor as external");
|
||||||
candidate.external = true;
|
candidate.external = true;
|
||||||
buffer.destination.push_str(&lexeme.text());
|
buffer.destination.push_str(&lexeme.text());
|
||||||
return true;
|
return true;
|
||||||
} else if lexeme.match_as_char('|') {
|
} else if lexeme.match_char('|') {
|
||||||
log!("End: Explicit end-of-destination pipe");
|
log!("End: Explicit end-of-destination pipe");
|
||||||
candidate.destination = Some(buffer.destination.clone());
|
candidate.destination = Some(buffer.destination.clone());
|
||||||
return true;
|
return true;
|
||||||
|
|
|
||||||
|
|
@ -31,7 +31,7 @@ pub fn parse(
|
||||||
state.context.inline = Inline::Anchor;
|
state.context.inline = Inline::Anchor;
|
||||||
state.buffers.anchor = AnchorBuffer::default();
|
state.buffers.anchor = AnchorBuffer::default();
|
||||||
|
|
||||||
if lexeme.match_as_char('|') {
|
if lexeme.match_char('|') {
|
||||||
state.buffers.anchor.candidate.leading = true;
|
state.buffers.anchor.candidate.leading = true;
|
||||||
} else {
|
} else {
|
||||||
state.buffers.anchor.candidate.text = lexeme.text();
|
state.buffers.anchor.candidate.text = lexeme.text();
|
||||||
|
|
|
||||||
|
|
@ -7,6 +7,7 @@ pub struct Lexeme {
|
||||||
text: String,
|
text: String,
|
||||||
next: String,
|
next: String,
|
||||||
third: String,
|
third: String,
|
||||||
|
first: bool,
|
||||||
last: bool,
|
last: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -16,6 +17,7 @@ impl Lexeme {
|
||||||
text: raw.to_owned(),
|
text: raw.to_owned(),
|
||||||
next: next.to_owned(),
|
next: next.to_owned(),
|
||||||
third: third.to_owned(),
|
third: third.to_owned(),
|
||||||
|
first: false,
|
||||||
last: false,
|
last: false,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -35,6 +37,10 @@ impl Lexeme {
|
||||||
self.last
|
self.last
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn first(&self) -> bool {
|
||||||
|
self.first
|
||||||
|
}
|
||||||
|
|
||||||
pub fn mutate_text(&mut self, new: &str) {
|
pub fn mutate_text(&mut self, new: &str) {
|
||||||
self.text = new.to_string();
|
self.text = new.to_string();
|
||||||
}
|
}
|
||||||
|
|
@ -63,46 +69,58 @@ impl Lexeme {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn match_as_char(&self, c: char) -> bool {
|
pub fn match_char(&self, c: char) -> bool {
|
||||||
self.as_char().is_some_and(|as_char| as_char == c)
|
self.as_char().is_some_and(|as_char| as_char == c)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn match_next_as_char(&self, c: char) -> bool {
|
pub fn match_next_char(&self, c: char) -> bool {
|
||||||
self.next_as_char().is_some_and(|next| next == c)
|
self.next_as_char().is_some_and(|next| next == c)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn match_third_as_char(&self, c: char) -> bool {
|
pub fn match_third_char(&self, c: char) -> bool {
|
||||||
self.third_as_char().is_some_and(|third| third == c)
|
self.third_as_char().is_some_and(|third| third == c)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn match_triple_as_char(&self, t: (char, char, char)) -> bool {
|
pub fn match_either_char(&self, c1: char, c2: char) -> bool {
|
||||||
self.match_as_char(t.0)
|
self.as_char().is_some_and(|c| c == c1 || c == c2)
|
||||||
&& self.match_next_as_char(t.1)
|
|
||||||
&& self.match_third_as_char(t.2)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn contains_as_char(&self, slice: &[char]) -> bool {
|
pub fn match_next_either_char(&self, c1: char, c2: char) -> bool {
|
||||||
|
self.next_as_char().is_some_and(|c| c == c1 || c == c2)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn match_char_sequence(&self, c1: char, c2: char) -> bool {
|
||||||
|
self.match_char(c1) && self.match_next_char(c2)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn match_char_triple(&self, c1: char, c2: char, c3: char) -> bool {
|
||||||
|
self.match_char(c1)
|
||||||
|
&& self.match_next_char(c2)
|
||||||
|
&& self.match_third_char(c3)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn match_char_in(&self, slice: &[char]) -> bool {
|
||||||
self.as_char().is_some_and(|c| slice.contains(&c))
|
self.as_char().is_some_and(|c| slice.contains(&c))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn contains_next_as_char(&self, slice: &[char]) -> bool {
|
pub fn match_next_char_in(&self, slice: &[char]) -> bool {
|
||||||
self.next_as_char().is_some_and(|c| slice.contains(&c))
|
self.next_as_char().is_some_and(|c| slice.contains(&c))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn is_punctuation(&self) -> bool {
|
pub fn is_punctuation(&self) -> bool {
|
||||||
self.contains_as_char(&Delimiters::default().punctuation)
|
self.match_char_in(&Delimiters::default().punctuation)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn is_whitespace(&self) -> bool {
|
pub fn is_whitespace(&self) -> bool {
|
||||||
self.contains_as_char(&Delimiters::default().whitespace)
|
self.match_char_in(&Delimiters::default().whitespace)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn is_next_whitespace(&self) -> bool {
|
pub fn is_next_whitespace(&self) -> bool {
|
||||||
self.contains_next_as_char(&Delimiters::default().whitespace)
|
self.match_next_char_in(&Delimiters::default().whitespace)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn is_next_punctuation(&self) -> bool {
|
pub fn is_next_punctuation(&self) -> bool {
|
||||||
self.contains_next_as_char(&Delimiters::default().punctuation)
|
self.match_next_char_in(&Delimiters::default().punctuation)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn is_next_boundary(&self) -> bool {
|
pub fn is_next_boundary(&self) -> bool {
|
||||||
|
|
@ -159,54 +177,63 @@ impl Lexeme {
|
||||||
vector
|
vector
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn split_words(self) -> Vec<String> {
|
pub fn split_segments(self) -> Vec<String> {
|
||||||
self.text().split(' ').map(str::to_string).collect()
|
self.text().split(' ').map(str::to_string).collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn first(self) -> Option<String> {
|
pub fn first_segment(self) -> Option<String> {
|
||||||
self.split_words().first().map(String::to_owned)
|
self.split_segments().first().map(String::to_owned)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn collect(segments: &[String]) -> Vec<Lexeme> {
|
pub fn collect(segments_slice: &[String]) -> Vec<Lexeme> {
|
||||||
let mut out_vector = Vec::with_capacity(segments.len());
|
let mut lexemes = Vec::with_capacity(segments_slice.len());
|
||||||
let mut vec = segments.to_vec();
|
let mut segments = segments_slice.to_vec();
|
||||||
|
|
||||||
let Some(mut third) = vec.pop() else {
|
let Some(last) = segments.pop() else {
|
||||||
return vec![];
|
return vec![];
|
||||||
};
|
};
|
||||||
let last_lexeme = Lexeme {
|
let last_lexeme = Lexeme {
|
||||||
text: third.clone(),
|
text: last.clone(),
|
||||||
next: String::default(),
|
next: String::default(),
|
||||||
third: String::default(),
|
third: String::default(),
|
||||||
|
first: false,
|
||||||
last: true,
|
last: true,
|
||||||
};
|
};
|
||||||
|
|
||||||
let Some(mut next) = vec.pop() else {
|
let Some(penultimate) = segments.pop() else {
|
||||||
return vec![last_lexeme];
|
return vec![last_lexeme];
|
||||||
};
|
};
|
||||||
let penultimate_lexeme = Lexeme {
|
let penultimate_lexeme = Lexeme {
|
||||||
text: next.clone(),
|
text: penultimate.clone(),
|
||||||
next: third.clone(),
|
next: last.clone(),
|
||||||
third: String::default(),
|
third: String::default(),
|
||||||
|
first: false,
|
||||||
last: false,
|
last: false,
|
||||||
};
|
};
|
||||||
|
|
||||||
for current in vec.iter().rev() {
|
let mut third = last;
|
||||||
out_vector.push(Lexeme {
|
let mut next = penultimate;
|
||||||
|
|
||||||
|
let mut iterator = segments.iter().rev().peekable();
|
||||||
|
while let Some(current) = iterator.next() {
|
||||||
|
let lexeme = Lexeme {
|
||||||
text: current.to_owned(),
|
text: current.to_owned(),
|
||||||
next: next.clone(),
|
next: next.clone(),
|
||||||
third: third.clone(),
|
third: third.clone(),
|
||||||
|
first: iterator.peek().is_none(),
|
||||||
last: false,
|
last: false,
|
||||||
});
|
};
|
||||||
|
|
||||||
|
lexemes.push(lexeme);
|
||||||
|
|
||||||
third.clone_from(&next);
|
third.clone_from(&next);
|
||||||
next.clone_from(current);
|
next.clone_from(current);
|
||||||
}
|
}
|
||||||
|
|
||||||
out_vector.reverse();
|
lexemes.reverse();
|
||||||
out_vector.push(penultimate_lexeme);
|
lexemes.push(penultimate_lexeme);
|
||||||
out_vector.push(last_lexeme);
|
lexemes.push(last_lexeme);
|
||||||
out_vector
|
lexemes
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -214,14 +241,24 @@ impl fmt::Display for Lexeme {
|
||||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
use crate::dev::wrap;
|
use crate::dev::wrap;
|
||||||
|
|
||||||
let next_display = if self.last() {
|
let properties = if self.first {
|
||||||
|
"[F] "
|
||||||
|
} else if self.last {
|
||||||
|
"[L] "
|
||||||
|
} else if self.last && self.first {
|
||||||
|
"[FL] "
|
||||||
|
} else {
|
||||||
|
""
|
||||||
|
};
|
||||||
|
|
||||||
|
let next_display = if self.last {
|
||||||
" <EOI>"
|
" <EOI>"
|
||||||
} else if self.third.is_empty() {
|
} else if self.third.is_empty() {
|
||||||
&format!("-> {} -! EOI", wrap(&self.next))
|
&format!("-> {} -! EOI", wrap(&self.next))
|
||||||
} else {
|
} else {
|
||||||
&format!("-> {} -> {}", wrap(&self.next), wrap(&self.third))
|
&format!("-> {} -> {}", wrap(&self.next), wrap(&self.third))
|
||||||
};
|
};
|
||||||
write!(f, "{} {}", wrap(&self.text), next_display)
|
write!(f, "Lx {}{} {}", properties, wrap(&self.text), next_display)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -261,10 +298,19 @@ mod tests {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn first_word() {
|
fn first_segment() {
|
||||||
let payload = "nhNc fGev QnGW E4hj ExyZ";
|
let payload = "nhNc fGev QnGW E4hj ExyZ";
|
||||||
let lexeme = Lexeme::new(payload, "", "");
|
let lexeme = Lexeme::new(payload, "", "");
|
||||||
assert_eq!(lexeme.first(), Some(String::from("nhNc")));
|
assert_eq!(lexeme.clone().first_segment(), Some(String::from("nhNc")));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn first_lexeme() {
|
||||||
|
let input = ["h015r", "cvYde", "aw1Ui", "ASwew"].map(str::to_string);
|
||||||
|
let lexemes = Lexeme::collect(&input);
|
||||||
|
let first = lexemes.first().unwrap();
|
||||||
|
assert!(first.clone().first());
|
||||||
|
assert_eq!(first.text(), "h015r".to_string());
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|
|
||||||
|
|
@ -15,14 +15,14 @@ impl CheckBox {
|
||||||
|
|
||||||
impl Parseable for CheckBox {
|
impl Parseable for CheckBox {
|
||||||
fn probe(lexeme: &Lexeme) -> bool {
|
fn probe(lexeme: &Lexeme) -> bool {
|
||||||
lexeme.match_triple_as_char(('[', ' ', ']'))
|
lexeme.match_char_triple('[', ' ', ']')
|
||||||
|| lexeme.match_triple_as_char(('[', 'x', ']'))
|
|| lexeme.match_char_triple('[', 'x', ']')
|
||||||
}
|
}
|
||||||
|
|
||||||
fn lex(lexeme: &Lexeme) -> CheckBox {
|
fn lex(lexeme: &Lexeme) -> CheckBox {
|
||||||
use crate::prelude::*;
|
use crate::prelude::*;
|
||||||
log!("Lexing: {lexeme}");
|
log!("Lexing: {lexeme}");
|
||||||
if lexeme.match_next_as_char('x') {
|
if lexeme.match_next_char('x') {
|
||||||
CheckBox::new(true)
|
CheckBox::new(true)
|
||||||
} else {
|
} else {
|
||||||
CheckBox::new(false)
|
CheckBox::new(false)
|
||||||
|
|
|
||||||
|
|
@ -83,7 +83,9 @@ impl Parseable for Header {
|
||||||
== 0
|
== 0
|
||||||
{
|
{
|
||||||
let level = lexeme.text().len();
|
let level = lexeme.text().len();
|
||||||
lexeme.clone().split_words().len() == 1 && level > 0 && level <= 6
|
lexeme.clone().split_segments().len() == 1
|
||||||
|
&& level > 0
|
||||||
|
&& level <= 6
|
||||||
} else {
|
} else {
|
||||||
false
|
false
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -11,7 +11,7 @@ impl Paragraph {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn probe_end(lexeme: &Lexeme) -> bool {
|
pub fn probe_end(lexeme: &Lexeme) -> bool {
|
||||||
lexeme.match_as_char('\n') && lexeme.match_next_as_char('\n')
|
lexeme.match_char('\n') && lexeme.match_next_char('\n')
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -15,7 +15,7 @@ impl Strike {
|
||||||
|
|
||||||
impl Parseable for Strike {
|
impl Parseable for Strike {
|
||||||
fn probe(lexeme: &Lexeme) -> bool {
|
fn probe(lexeme: &Lexeme) -> bool {
|
||||||
lexeme.match_as_char('~') && lexeme.match_next_as_char('~')
|
lexeme.match_char('~') && lexeme.match_next_char('~')
|
||||||
}
|
}
|
||||||
|
|
||||||
fn lex(_lexeme: &Lexeme) -> Strike {
|
fn lex(_lexeme: &Lexeme) -> Strike {
|
||||||
|
|
|
||||||
|
|
@ -15,7 +15,7 @@ impl Underline {
|
||||||
|
|
||||||
impl Parseable for Underline {
|
impl Parseable for Underline {
|
||||||
fn probe(lexeme: &Lexeme) -> bool {
|
fn probe(lexeme: &Lexeme) -> bool {
|
||||||
lexeme.match_as_char('_') && lexeme.match_next_as_char('_')
|
lexeme.match_char('_') && lexeme.match_next_char('_')
|
||||||
}
|
}
|
||||||
|
|
||||||
fn lex(_lexeme: &Lexeme) -> Underline {
|
fn lex(_lexeme: &Lexeme) -> Underline {
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue