From c53afefb676a058faa98d5a6858ef8c34becb1b5 Mon Sep 17 00:00:00 2001
From: jutty <j@jutty.dev>
Date: Fri, 9 Jan 2026 19:39:44 -0300
Subject: [PATCH] Add lexeme 'first' field, refactor and add methods

---
 src/syntax/content/parser.rs                 |   2 +-
 src/syntax/content/parser/context/anchor.rs  |  12 +-
 src/syntax/content/parser/context/inline.rs  |   2 +-
 src/syntax/content/parser/lexeme.rs          | 116 +++++++++++++------
 src/syntax/content/parser/token/checkbox.rs  |   6 +-
 src/syntax/content/parser/token/header.rs    |   4 +-
 src/syntax/content/parser/token/paragraph.rs |   2 +-
 src/syntax/content/parser/token/strike.rs    |   2 +-
 src/syntax/content/parser/token/underline.rs |   2 +-
 9 files changed, 98 insertions(+), 50 deletions(-)
diff --git a/src/syntax/content/parser.rs b/src/syntax/content/parser.rs
index 0804602..491b292 100644
--- a/src/syntax/content/parser.rs
+++ b/src/syntax/content/parser.rs
@@ -31,7 +31,7 @@ fn lex(text: &str, map: LexMap, config: &Config) -> Vec<Token> {
 
     let mut iterator = lexemes.iter().peekable();
     while let Some(lexeme) = iterator.next() {
-        if lexeme.match_as_char('\\') {
+        if lexeme.match_char('\\') {
             if let Some(next) = iterator.next() {
                 tokens.push(Token::Literal(Literal::lex(next)));
             }
diff --git a/src/syntax/content/parser/context/anchor.rs b/src/syntax/content/parser/context/anchor.rs
index 1ce26ff..230bad0 100644
--- a/src/syntax/content/parser/context/anchor.rs
+++ b/src/syntax/content/parser/context/anchor.rs
@@ -53,9 +53,9 @@ pub fn parse(
 
         // Conditions in this decision tree should match the destination end
         // or some intermediary state necessary to finding it
-        if lexeme.match_as_char('s')
+        if lexeme.match_char('s')
             && lexeme.is_next_boundary()
-            && !lexeme.match_next_as_char('|')
+            && !lexeme.match_next_char('|')
         {
             log!("End: Plural anchor");
             candidate.destination = Some(candidate.text.clone());
@@ -65,7 +65,7 @@ pub fn parse(
                 state.context.inline = Inline::None;
             }
             return true;
-        } else if lexeme.match_as_char('|') && lexeme.is_next_delimiter() {
+        } else if lexeme.match_char('|') && lexeme.is_next_delimiter() {
             log!("End: Pipe followed by delimiter");
             if buffer.destination.is_empty() {
                 candidate.destination = Some(candidate.text.clone());
@@ -75,16 +75,16 @@ pub fn parse(
             tokens.push(Token::Anchor(candidate.clone()));
             state.context.inline = Inline::None;
             return true;
-        } else if lexeme.match_as_char('|') && !candidate.balanced {
+        } else if lexeme.match_char('|') && !candidate.balanced {
             log!("State: Found a pipe, but no boundary: destination follows");
             candidate.balanced = true;
             return true;
-        } else if lexeme.match_as_char(':') {
+        } else if lexeme.match_char(':') {
             log!("State: Found a colon, marking anchor as external");
             candidate.external = true;
             buffer.destination.push_str(&lexeme.text());
             return true;
-        } else if lexeme.match_as_char('|') {
+        } else if lexeme.match_char('|') {
             log!("End: Explicit end-of-destination pipe");
             candidate.destination = Some(buffer.destination.clone());
             return true;
diff --git a/src/syntax/content/parser/context/inline.rs b/src/syntax/content/parser/context/inline.rs
index 10b8b6d..7d5633d 100644
--- a/src/syntax/content/parser/context/inline.rs
+++ b/src/syntax/content/parser/context/inline.rs
@@ -31,7 +31,7 @@ pub fn parse(
                 state.context.inline = Inline::Anchor;
                 state.buffers.anchor = AnchorBuffer::default();
 
-                if lexeme.match_as_char('|') {
+                if lexeme.match_char('|') {
                     state.buffers.anchor.candidate.leading = true;
                 } else {
                     state.buffers.anchor.candidate.text = lexeme.text();
diff --git a/src/syntax/content/parser/lexeme.rs b/src/syntax/content/parser/lexeme.rs
index 2e5496c..648c6fb 100644
--- a/src/syntax/content/parser/lexeme.rs
+++ b/src/syntax/content/parser/lexeme.rs
@@ -7,6 +7,7 @@ pub struct Lexeme {
     text: String,
     next: String,
     third: String,
+    first: bool,
     last: bool,
 }
 
@@ -16,6 +17,7 @@ impl Lexeme {
             text: raw.to_owned(),
             next: next.to_owned(),
             third: third.to_owned(),
+            first: false,
             last: false,
         }
     }
@@ -35,6 +37,10 @@ impl Lexeme {
         self.last
     }
 
+    pub fn first(&self) -> bool {
+        self.first
+    }
+
     pub fn mutate_text(&mut self, new: &str) {
         self.text = new.to_string();
     }
@@ -63,46 +69,58 @@ impl Lexeme {
         }
     }
 
-    pub fn match_as_char(&self, c: char) -> bool {
+    pub fn match_char(&self, c: char) -> bool {
         self.as_char().is_some_and(|as_char| as_char == c)
     }
 
-    pub fn match_next_as_char(&self, c: char) -> bool {
+    pub fn match_next_char(&self, c: char) -> bool {
         self.next_as_char().is_some_and(|next| next == c)
     }
 
-    pub fn match_third_as_char(&self, c: char) -> bool {
+    pub fn match_third_char(&self, c: char) -> bool {
         self.third_as_char().is_some_and(|third| third == c)
     }
 
-    pub fn match_triple_as_char(&self, t: (char, char, char)) -> bool {
-        self.match_as_char(t.0)
-            && self.match_next_as_char(t.1)
-            && self.match_third_as_char(t.2)
+    pub fn match_either_char(&self, c1: char, c2: char) -> bool {
+        self.as_char().is_some_and(|c| c == c1 || c == c2)
     }
 
-    pub fn contains_as_char(&self, slice: &[char]) -> bool {
+    pub fn match_next_either_char(&self, c1: char, c2: char) -> bool {
+        self.next_as_char().is_some_and(|c| c == c1 || c == c2)
+    }
+
+    pub fn match_char_sequence(&self, c1: char, c2: char) -> bool {
+        self.match_char(c1) && self.match_next_char(c2)
+    }
+
+    pub fn match_char_triple(&self, c1: char, c2: char, c3: char) -> bool {
+        self.match_char(c1)
+            && self.match_next_char(c2)
+            && self.match_third_char(c3)
+    }
+
+    pub fn match_char_in(&self, slice: &[char]) -> bool {
         self.as_char().is_some_and(|c| slice.contains(&c))
     }
 
-    pub fn contains_next_as_char(&self, slice: &[char]) -> bool {
+    pub fn match_next_char_in(&self, slice: &[char]) -> bool {
         self.next_as_char().is_some_and(|c| slice.contains(&c))
     }
 
     pub fn is_punctuation(&self) -> bool {
-        self.contains_as_char(&Delimiters::default().punctuation)
+        self.match_char_in(&Delimiters::default().punctuation)
     }
 
     pub fn is_whitespace(&self) -> bool {
-        self.contains_as_char(&Delimiters::default().whitespace)
+        self.match_char_in(&Delimiters::default().whitespace)
     }
 
     pub fn is_next_whitespace(&self) -> bool {
-        self.contains_next_as_char(&Delimiters::default().whitespace)
+        self.match_next_char_in(&Delimiters::default().whitespace)
     }
 
     pub fn is_next_punctuation(&self) -> bool {
-        self.contains_next_as_char(&Delimiters::default().punctuation)
+        self.match_next_char_in(&Delimiters::default().punctuation)
     }
 
     pub fn is_next_boundary(&self) -> bool {
@@ -159,54 +177,63 @@ impl Lexeme {
         vector
     }
 
-    pub fn split_words(self) -> Vec<String> {
+    pub fn split_segments(self) -> Vec<String> {
         self.text().split(' ').map(str::to_string).collect()
     }
 
-    pub fn first(self) -> Option<String> {
-        self.split_words().first().map(String::to_owned)
+    pub fn first_segment(self) -> Option<String> {
+        self.split_segments().first().map(String::to_owned)
     }
 
-    pub fn collect(segments: &[String]) -> Vec<Lexeme> {
-        let mut out_vector = Vec::with_capacity(segments.len());
-        let mut vec = segments.to_vec();
+    pub fn collect(segments_slice: &[String]) -> Vec<Lexeme> {
+        let mut lexemes = Vec::with_capacity(segments_slice.len());
+        let mut segments = segments_slice.to_vec();
 
-        let Some(mut third) = vec.pop() else {
+        let Some(last) = segments.pop() else {
             return vec![];
         };
         let last_lexeme = Lexeme {
-            text: third.clone(),
+            text: last.clone(),
             next: String::default(),
             third: String::default(),
+            first: false,
             last: true,
         };
 
-        let Some(mut next) = vec.pop() else {
+        let Some(penultimate) = segments.pop() else {
             return vec![last_lexeme];
         };
         let penultimate_lexeme = Lexeme {
-            text: next.clone(),
-            next: third.clone(),
+            text: penultimate.clone(),
+            next: last.clone(),
             third: String::default(),
+            first: false,
             last: false,
         };
 
-        for current in vec.iter().rev() {
-            out_vector.push(Lexeme {
+        let mut third = last;
+        let mut next = penultimate;
+
+        let mut iterator = segments.iter().rev().peekable();
+        while let Some(current) = iterator.next() {
+            let lexeme = Lexeme {
                 text: current.to_owned(),
                 next: next.clone(),
                 third: third.clone(),
+                first: iterator.peek().is_none(),
                 last: false,
-            });
+            };
+
+            lexemes.push(lexeme);
 
             third.clone_from(&next);
             next.clone_from(current);
         }
 
-        out_vector.reverse();
-        out_vector.push(penultimate_lexeme);
-        out_vector.push(last_lexeme);
-        out_vector
+        lexemes.reverse();
+        lexemes.push(penultimate_lexeme);
+        lexemes.push(last_lexeme);
+        lexemes
     }
 }
 
@@ -214,14 +241,24 @@ impl fmt::Display for Lexeme {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         use crate::dev::wrap;
 
-        let next_display = if self.last() {
+        let properties = if self.first {
+            "[F] "
+        } else if self.last {
+            "[L] "
+        } else if self.last && self.first {
+            "[FL] "
+        } else {
+            ""
+        };
+
+        let next_display = if self.last {
             " <EOI>"
         } else if self.third.is_empty() {
             &format!("-> {} -! EOI", wrap(&self.next))
         } else {
             &format!("-> {} -> {}", wrap(&self.next), wrap(&self.third))
         };
-        write!(f, "{} {}", wrap(&self.text), next_display)
+        write!(f, "Lx {}{} {}", properties, wrap(&self.text), next_display)
     }
 }
 
@@ -261,10 +298,19 @@ mod tests {
     }
 
     #[test]
-    fn first_word() {
+    fn first_segment() {
         let payload = "nhNc fGev QnGW E4hj ExyZ";
         let lexeme = Lexeme::new(payload, "", "");
-        assert_eq!(lexeme.first(), Some(String::from("nhNc")));
+        assert_eq!(lexeme.clone().first_segment(), Some(String::from("nhNc")));
+    }
+
+    #[test]
+    fn first_lexeme() {
+        let input = ["h015r", "cvYde", "aw1Ui", "ASwew"].map(str::to_string);
+        let lexemes = Lexeme::collect(&input);
+        let first = lexemes.first().unwrap();
+        assert!(first.clone().first());
+        assert_eq!(first.text(), "h015r".to_string());
     }
 
     #[test]
diff --git a/src/syntax/content/parser/token/checkbox.rs b/src/syntax/content/parser/token/checkbox.rs
index 23cb994..290432d 100644
--- a/src/syntax/content/parser/token/checkbox.rs
+++ b/src/syntax/content/parser/token/checkbox.rs
@@ -15,14 +15,14 @@ impl CheckBox {
 
 impl Parseable for CheckBox {
     fn probe(lexeme: &Lexeme) -> bool {
-        lexeme.match_triple_as_char(('[', ' ', ']'))
-            || lexeme.match_triple_as_char(('[', 'x', ']'))
+        lexeme.match_char_triple('[', ' ', ']')
+            || lexeme.match_char_triple('[', 'x', ']')
     }
 
     fn lex(lexeme: &Lexeme) -> CheckBox {
         use crate::prelude::*;
         log!("Lexing: {lexeme}");
-        if lexeme.match_next_as_char('x') {
+        if lexeme.match_next_char('x') {
             CheckBox::new(true)
         } else {
             CheckBox::new(false)
diff --git a/src/syntax/content/parser/token/header.rs b/src/syntax/content/parser/token/header.rs
index 390a22a..f4845a6 100644
--- a/src/syntax/content/parser/token/header.rs
+++ b/src/syntax/content/parser/token/header.rs
@@ -83,7 +83,9 @@ impl Parseable for Header {
             == 0
         {
             let level = lexeme.text().len();
-            lexeme.clone().split_words().len() == 1 && level > 0 && level <= 6
+            lexeme.clone().split_segments().len() == 1
+                && level > 0
+                && level <= 6
         } else {
             false
         }
diff --git a/src/syntax/content/parser/token/paragraph.rs b/src/syntax/content/parser/token/paragraph.rs
index 798792b..0713ca3 100644
--- a/src/syntax/content/parser/token/paragraph.rs
+++ b/src/syntax/content/parser/token/paragraph.rs
@@ -11,7 +11,7 @@ impl Paragraph {
     }
 
     pub fn probe_end(lexeme: &Lexeme) -> bool {
-        lexeme.match_as_char('\n') && lexeme.match_next_as_char('\n')
+        lexeme.match_char('\n') && lexeme.match_next_char('\n')
     }
 }
 
diff --git a/src/syntax/content/parser/token/strike.rs b/src/syntax/content/parser/token/strike.rs
index 69ce9d4..f0e6e94 100644
--- a/src/syntax/content/parser/token/strike.rs
+++ b/src/syntax/content/parser/token/strike.rs
@@ -15,7 +15,7 @@ impl Strike {
 
 impl Parseable for Strike {
     fn probe(lexeme: &Lexeme) -> bool {
-        lexeme.match_as_char('~') && lexeme.match_next_as_char('~')
+        lexeme.match_char('~') && lexeme.match_next_char('~')
     }
 
     fn lex(_lexeme: &Lexeme) -> Strike {
diff --git a/src/syntax/content/parser/token/underline.rs b/src/syntax/content/parser/token/underline.rs
index a588d9c..539d207 100644
--- a/src/syntax/content/parser/token/underline.rs
+++ b/src/syntax/content/parser/token/underline.rs
@@ -15,7 +15,7 @@ impl Underline {
 
 impl Parseable for Underline {
     fn probe(lexeme: &Lexeme) -> bool {
-        lexeme.match_as_char('_') && lexeme.match_next_as_char('_')
+        lexeme.match_char('_') && lexeme.match_next_char('_')
     }
 
     fn lex(_lexeme: &Lexeme) -> Underline {