From 8b782d6d20c3f7dd64c71b086a4a2021a0297dab Mon Sep 17 00:00:00 2001
From: jutty <j@jutty.dev>
Date: Tue, 23 Dec 2025 21:40:57 -0300
Subject: [PATCH] Rework token segmentation

---
 src/formats.rs                               |   9 -
 src/main.rs                                  |   9 -
 src/syntax/content/parser.rs                 | 225 ++++++++++++++-----
 src/syntax/content/parser/cluster.rs         | 192 ----------------
 src/syntax/content/parser/lexeme.rs          |  20 ++
 src/syntax/content/parser/segment.rs         | 199 ++++++++++++++++
 src/syntax/content/parser/token.rs           |   1 +
 src/syntax/content/parser/token/anchor.rs    | 110 +++------
 src/syntax/content/parser/token/code.rs      |  41 ++--
 src/syntax/content/parser/token/header.rs    |  41 +++-
 src/syntax/content/parser/token/linebreak.rs |   1 +
 src/syntax/content/parser/token/literal.rs   |   8 +-
 src/syntax/content/parser/token/paragraph.rs |   5 +-
 src/syntax/content/parser/token/preformat.rs |   1 +
 src/syntax/content/parser/token/span.rs      |   1 +
 static/graph.toml                            |  27 +--
 16 files changed, 501 insertions(+), 389 deletions(-)
 delete mode 100644 src/syntax/content/parser/cluster.rs
 create mode 100644 src/syntax/content/parser/segment.rs
diff --git a/src/formats.rs b/src/formats.rs
index 043f98d..4f79aac 100644
--- a/src/formats.rs
+++ b/src/formats.rs
@@ -136,12 +136,3 @@ pub fn deserialize_graph(in_format: &Format, serial: &str) -> Graph {
         },
     }
 }
-
-#[cfg(test)]
-mod tests {
-    #[test]
-    fn smoke() {
-        let n = true;
-        assert!(n);
-    }
-}
diff --git a/src/main.rs b/src/main.rs
index 9d45675..92d97f2 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -49,12 +49,3 @@ async fn main() -> io::Result<()> {
 
     Ok(())
 }
-
-#[cfg(test)]
-mod tests {
-    #[test]
-    fn smoke() {
-        let e = true;
-        assert!(e);
-    }
-}
diff --git a/src/syntax/content/parser.rs b/src/syntax/content/parser.rs
index 47e43bd..2adc02c 100644
--- a/src/syntax/content/parser.rs
+++ b/src/syntax/content/parser.rs
@@ -1,4 +1,4 @@
-use std::collections::{HashMap, hash_map::Entry};
+use std::collections::{HashMap};
 
 use crate::{formats::populate_graph, types::Config};
 
@@ -11,98 +11,147 @@ use lexeme::Lexeme;
 
 pub mod token;
 pub mod lexeme;
-pub mod cluster;
+pub mod segment;
 
 const LEXMAP: LexMap = &[
     (LineBreak::probe, |word| {
         Token::LineBreak(LineBreak::lex(word))
     }),
-    (Code::probe, |word| Token::Code(Code::lex(word))),
-    (Anchor::probe, |word| Token::Anchor(Anchor::lex(word))),
     (Literal::probe, |word| Token::Literal(Literal::lex(word))),
 ];
 
-enum Context {
-    None,
-    Paragraph,
-    Header(u8),
-    PreFormat,
-}
-
-struct State {
-    context: Context,
-    dom_ids: HashMap<String, Vec<String>>,
-}
-
 fn lex(text: &str, map: LexMap) -> Vec<Token> {
     let mut tokens: Vec<Token> = Vec::new();
-    let mut state = State {
-        context: Context::None,
-        dom_ids: HashMap::new(),
-    };
+    let mut state = State::new();
     let config: Config = populate_graph().meta.config;
 
-    let splits = cluster::cluster(text);
-    let lexemes = Lexeme::collect(&splits);
-    let iter = lexemes.iter().peekable();
-    for lexeme in iter {
-        match state.context {
-            Context::None => {
+    let segments = segment::segment(text);
+    let lexemes = Lexeme::collect(&segments);
+
+    let mut iterator = lexemes.iter().peekable();
+    while let Some(lexeme) = iterator.next() {
+        match state.context.block {
+            BlockContext::None => {
                 if PreFormat::probe(lexeme) {
+                    state.context.block = BlockContext::PreFormat;
                     tokens.push(Token::PreFormat(PreFormat::new(true)));
-                    state.context = Context::PreFormat;
                     continue;
                 } else if Header::probe(lexeme) {
-                    let base_id =
-                        if config.ascii_dom_ids && !lexeme.next.is_ascii() {
-                            String::from("h")
-                        } else {
-                            lexeme.next.clone().to_lowercase()
-                        };
-                    let id = match state.dom_ids.entry(base_id.clone()) {
-                        Entry::Occupied(mut occupied) => {
-                            let ids = occupied.get_mut();
-                            let suffix: u8 =
-                                ids.len().try_into().unwrap_or_default();
-                            let id_with_suffix = format!("{base_id}-{suffix}");
-                            ids.push(id_with_suffix.clone());
-                            id_with_suffix
-                        },
-                        Entry::Vacant(vacant) => {
-                            vacant.insert(vec![base_id.clone()]);
-                            base_id
-                        },
-                    };
-
                     let mut header = Header::lex(lexeme);
-                    header.dom_id = Some(id);
-                    state.context = Context::Header(header.get_level());
+                    header.dom_id = Some(Header::make_id(
+                        &config,
+                        &mut iterator,
+                        &mut state.dom_ids,
+                    ));
+                    state.context.block = BlockContext::Header(header.level());
                     tokens.push(Token::Header(header));
                     continue;
                 } else if Paragraph::probe(lexeme) {
+                    state.context.block = BlockContext::Paragraph;
                     tokens.push(Token::Paragraph(Paragraph::new(true)));
-                    state.context = Context::Paragraph;
                 }
             },
-            Context::PreFormat => {
+            BlockContext::PreFormat => {
                 if PreFormat::probe(lexeme) {
                     tokens.push(Token::PreFormat(PreFormat::new(false)));
-                    state.context = Context::None;
+                    state.context.block = BlockContext::None;
                 } else {
                     tokens.push(Token::Literal(Literal::lex(lexeme)));
                 }
                 continue;
             },
-            Context::Paragraph => {
+            BlockContext::Paragraph => {
                 if lexeme.text() == "\n" {
                     tokens.push(Token::Paragraph(Paragraph::new(false)));
-                    state.context = Context::None;
+                    state.context.block = BlockContext::None;
                 }
             },
-            Context::Header(n) => {
+            BlockContext::Header(n) => {
                 if lexeme.text() == "\n" {
                     tokens.push(Token::Header(Header::from_u8(n, false, None)));
-                    state.context = Context::None;
+                    state.context.block = BlockContext::None;
+                }
+            },
+        }
+
+        match state.context.inline {
+            InlineContext::None => {
+                if Code::probe(lexeme) {
+                    state.context.inline = InlineContext::Code;
+                    tokens.push(Token::Code(Code::new(true)));
+                    continue;
+                } else if Anchor::probe(lexeme) {
+                    state.context.inline = InlineContext::Anchor;
+                    state.buffers.anchor.clear();
+
+                    if lexeme.match_first_char('|') {
+                        state.buffers.anchor.candidate.leading = true;
+                    } else {
+                        state.buffers.anchor.candidate.text = lexeme.text();
+                    }
+                    continue;
+                }
+            },
+            InlineContext::Code => {
+                if Code::probe(lexeme) {
+                    state.context.inline = InlineContext::None;
+                    tokens.push(Token::Code(Code::new(false)));
+                    continue;
+                }
+            },
+            InlineContext::Anchor => {
+                let buffer = &mut state.buffers.anchor;
+                let candidate = &mut buffer.candidate;
+                if candidate.text.is_empty() {
+                    if lexeme.next == "|" {
+                        buffer.text.push_str(&lexeme.text());
+                        candidate.text.clone_from(&buffer.text);
+                    } else {
+                        buffer.text.push_str(&lexeme.text());
+                    }
+                    continue;
+                } else if candidate.destination.is_none() {
+                    // candidate is leading and we found the second pipe
+                    if candidate.leading && lexeme.text() == "|" {
+                        // whitespace after pipe: flanking node anchor
+                        if lexeme.is_next_whitespace() {
+                            candidate.destination =
+                                Some(candidate.text.clone());
+                            let token = Token::Anchor(candidate.clone());
+                            tokens.push(token);
+                            state.context.inline = InlineContext::None;
+                        // non-whitespace after pipe is the destination
+                        } else {
+                            candidate.destination = Some(lexeme.next.clone());
+                            let token = Token::Anchor(candidate.clone());
+                            tokens.push(token);
+                            state.context.inline = InlineContext::None;
+                            // if there is a trailing pipe, consume it
+                            if let Some(next) = iterator.next()
+                                && next.next == "|"
+                            {
+                                iterator.next();
+                            }
+                        }
+                    // candidate is nonleading and we found a second pipe
+                    } else if !candidate.leading && lexeme.next == "|" {
+                        candidate.destination = Some(lexeme.text());
+                        tokens.push(Token::Anchor(candidate.clone()));
+                        state.context.inline = InlineContext::None;
+                        iterator.next();
+                    // candidate is nonleading and we found whitespace
+                    } else if lexeme.is_next_whitespace() {
+                        candidate.destination = Some(lexeme.text());
+                        let token = Token::Anchor(candidate.clone());
+                        tokens.push(token);
+                        state.context.inline = InlineContext::None;
+                    // candidate is nonleading and we haven't found whitespace
+                    } else {
+                        buffer.destination.push_str(&lexeme.text());
+                    }
+                    continue;
+                } else {
+                    unreachable!("Anchor is already fully parsed");
                 }
             },
         }
@@ -118,6 +167,68 @@ fn lex(text: &str, map: LexMap) -> Vec<Token> {
     tokens
 }
 
+enum BlockContext {
+    Paragraph,
+    Header(u8),
+    PreFormat,
+    None,
+}
+
+enum InlineContext {
+    Anchor,
+    Code,
+    None,
+}
+
+struct State {
+    context: Context,
+    dom_ids: HashMap<String, Vec<String>>,
+    buffers: Buffers,
+}
+
+struct Buffers {
+    anchor: AnchorBuffer,
+}
+
+#[derive(Debug)]
+struct AnchorBuffer {
+    candidate: Anchor,
+    text: String,
+    destination: String,
+}
+
+impl AnchorBuffer {
+    fn clear(&mut self) {
+        self.candidate = Anchor::empty();
+        self.text = String::new();
+        self.destination = String::new();
+    }
+}
+
+impl State {
+    fn new() -> State {
+        State {
+            context: Context {
+                inline: InlineContext::None,
+                block: BlockContext::None,
+            },
+            dom_ids: HashMap::new(),
+            buffers: Buffers {
+                anchor: AnchorBuffer {
+                    candidate: Anchor::empty(),
+                    text: String::new(),
+                    destination: String::new(),
+                },
+            },
+        }
+    }
+}
+
+struct Context {
+    block: BlockContext,
+    inline: InlineContext,
+}
+
 fn parse(tokens: &[Token]) -> String {
     tokens.iter().map(Token::render).collect::<String>()
 }
diff --git a/src/syntax/content/parser/cluster.rs b/src/syntax/content/parser/cluster.rs
deleted file mode 100644
index 8cb6f47..0000000
--- a/src/syntax/content/parser/cluster.rs
+++ /dev/null
@@ -1,192 +0,0 @@
-use crate::prelude::*;
-
-pub fn cluster(text: &str) -> Vec<String> {
-    let words: Vec<String> = text
-        .replace("\n", " \n ")
-        .split(' ')
-        .map(str::to_string)
-        .collect();
-
-    let mut clusters: Vec<String> = vec![];
-    let mut raw_context = false;
-
-    let mut iterator = words.into_iter().peekable();
-    while let Some(word) = iterator.next() {
-        log!("Iterating: {word:?}");
-
-        if word == "`" {
-            raw_context = !raw_context;
-            log!("Raw context is now {raw_context}");
-        } else if raw_context {
-            log!("Skip: In raw context");
-            clusters.push(word);
-            continue;
-        }
-
-        let Some(delimiter) = delimiter::match_delimiter(&word) else {
-            log!("Skip: {word:?} does not have a delimiter");
-            clusters.push(word);
-            continue;
-        };
-
-        if !delimiter.leading && !word.starts_with(delimiter.char) {
-            clusters.push(word);
-            continue;
-        }
-
-        if (!delimiter.greedy
-            && !delimiter.triple
-            && word.matches(delimiter.char).count() == 2)
-            || (delimiter.triple
-                && (2..=3).contains(&word.matches(delimiter.char).count()))
-        {
-            log!("Skip: {word:?} is almost atomic, but must be split");
-            match word.rsplit_once(delimiter.char) {
-                Some((head, tail)) => {
-                    log!("Pushing head {head:?}, tail {tail:?} into clusters");
-                    clusters.push(format!("{head}{}", delimiter.char));
-                    clusters.push(tail.to_string());
-                    continue;
-                },
-                None => unreachable!(),
-            }
-        }
-
-        if let Some(next) = iterator.peek()
-            && next == "\n"
-            && delimiter.greedy
-        {
-            log!("Skip: Next {next:?} is a break, delimiter is greedy");
-            clusters.push(word);
-            continue;
-        }
-
-        if word.starts_with(&delimiter.string)
-            && word.ends_with(&delimiter.string)
-        {
-            log!("Skip: {word:?} is atomically-delimited");
-            clusters.push(word);
-            continue;
-        }
-
-        log!("Found cluster from {delimiter:?} in {word:?}");
-        let mut parts: Vec<String> = vec![word.clone()];
-        log!("Seeking from a base of {parts:?}");
-
-        while let Some(next) = iterator.peek() {
-            if next.contains(&delimiter.char.to_string()) {
-                log!("Found end of cluster: {next:?}");
-                if delimiter.greedy
-                    && delimiter.triple
-                    && next.matches(delimiter.char).count() > 1
-                {
-                    match next.rsplit_once(delimiter.char) {
-                        Some((head, tail)) => {
-                            log!(
-                                "Pushing head {head:?} of greedy triple EOC \
-                                into parts and tail {tail:?} into clusters"
-                            );
-                            parts.push(format!("{head}{}", delimiter.char));
-                            clusters.push(parts.join(" "));
-                            clusters.push(tail.to_string());
-                            log!("Breaking past clusters {clusters:?}");
-                            iterator.next();
-                            break;
-                        },
-                        None => unreachable!(),
-                    }
-                } else if delimiter.greedy {
-                    log!("Pushing end of cluster into parts");
-                    parts.push(
-                        iterator.next().unwrap_or_else(|| unreachable!()),
-                    );
-                    log!("Pushing parts {parts:?} into clusters {clusters:?}");
-                    clusters.push(parts.join(" "));
-                    log!("Breaking past clusters {clusters:?}");
-                    break;
-                } else {
-                    match next.rsplit_once(delimiter.char) {
-                        Some((head, tail)) => {
-                            log!(
-                                "Pushing head {head:?} of humble end of \
-                                cluster into parts"
-                            );
-                            parts.push(format!("{head}{}", delimiter.char));
-                            log!("Pushing parts into clusters");
-                            clusters.push(parts.join(" "));
-                            log!("Pushing tail {tail:?} into clusters");
-                            clusters.push(tail.to_string());
-                            log!("Breaking past clusters");
-                            iterator.next();
-                            break;
-                        },
-                        // is this one really unreachable?
-                        None => unreachable!(),
-                    }
-                }
-            } else {
-                log!("No delimiter: Pushing {:?} into parts", iterator.peek());
-                parts.push(iterator.next().unwrap_or_default());
-                log!("Seeking a boundary for parts {parts:?}");
-            }
-        }
-    }
-
-    log!("Returning clusters");
-    clusters
-}
-
-mod delimiter {
-
-    #[derive(Debug, Clone)]
-    pub struct Delimiter {
-        pub char: char,
-        pub string: String,
-        pub greedy: bool,
-        pub triple: bool,
-        pub leading: bool,
-    }
-
-    fn make_delimiters() -> (Vec<Delimiter>, Vec<Delimiter>) {
-        let delimiters = [
-            Delimiter {
-                char: '|',
-                string: "|".to_string(),
-                greedy: true,
-                triple: true,
-                leading: false,
-            },
-            Delimiter {
-                char: '`',
-                string: "`".to_string(),
-                greedy: false,
-                triple: false,
-                leading: true,
-            },
-        ];
-
-        (
-            delimiters.iter().filter(|d| d.leading).cloned().collect(),
-            delimiters.iter().filter(|d| !d.leading).cloned().collect(),
-        )
-    }
-
-    pub fn match_delimiter(word: &str) -> Option<Delimiter> {
-        let (leading, nonleading) = make_delimiters();
-
-        let first_char = word.chars().next()?;
-
-        if let Some(leading_match) =
-            leading.iter().find(|d| d.char == first_char).cloned()
-        {
-            Some(leading_match)
-        } else {
-            for delimiter in nonleading {
-                if word.contains(delimiter.char) {
-                    return Some(delimiter);
-                }
-            }
-            None
-        }
-    }
-}
diff --git a/src/syntax/content/parser/lexeme.rs b/src/syntax/content/parser/lexeme.rs
index 620ff09..708e05a 100644
--- a/src/syntax/content/parser/lexeme.rs
+++ b/src/syntax/content/parser/lexeme.rs
@@ -16,6 +16,26 @@ impl Lexeme {
         self.text.clone()
     }
 
+    pub fn is_whitespace(&self) -> bool {
+        self.text == " " || self.text == "\n"
+    }
+
+    pub fn is_next_whitespace(&self) -> bool {
+        self.next == " " || self.next == "\n"
+    }
+
+    pub fn match_first_char(&self, query: char) -> bool {
+        if let Some(first) = self.text.chars().nth(0) {
+            first == query
+        } else {
+            false
+        }
+    }
+
+    pub fn next_first_char(&self) -> Option<char> {
+        self.next.chars().nth(0)
+    }
+
     /// # Panics
     /// Panics if number of chars for a single lexeme exceeds `i2::MAX`
     pub fn count_char(&self, c: char) -> i32 {
diff --git a/src/syntax/content/parser/segment.rs b/src/syntax/content/parser/segment.rs
new file mode 100644
index 0000000..33b2f04
--- /dev/null
+++ b/src/syntax/content/parser/segment.rs
@@ -0,0 +1,199 @@
+pub fn segment(text: &str) -> Vec<String> {
+    delimiter::atomize(text)
+}
+
+mod delimiter {
+
+    fn make_delimiters() -> Vec<char> {
+        vec!['\n', ' ', '`', '|']
+    }
+
+    pub fn atomize(text: &str) -> Vec<String> {
+        let delimiters = make_delimiters();
+        text.chars().fold(
+            Vec::new(),
+            |mut accumulator: Vec<String>, character| {
+                if delimiters.contains(&character) {
+                    accumulator.push(character.to_string());
+                } else if let Some(last) = accumulator.last_mut() {
+                    if delimiters
+                        .iter()
+                        .map(char::to_string)
+                        .filter(|d| d == last)
+                        .count()
+                        > 0
+                    {
+                        accumulator.push(character.to_string());
+                    } else {
+                        last.push(character);
+                    }
+                } else {
+                    accumulator.push(character.to_string());
+                }
+                accumulator
+            },
+        )
+    }
+
+    #[cfg(test)]
+    mod tests {
+        use super::*;
+
+        #[test]
+        fn atomize_words() {
+            let words = "    justification for  the actions   of those  who hold authority   inevitably dwindles  "; // 2
+            let actual = atomize(words);
+            let expected = vec![
+                " ",
+                " ",
+                " ",
+                " ",
+                "justification",
+                " ",
+                "for",
+                " ",
+                " ",
+                "the",
+                " ",
+                "actions",
+                " ",
+                " ",
+                " ",
+                "of",
+                " ",
+                "those",
+                " ",
+                " ",
+                "who",
+                " ",
+                "hold",
+                " ",
+                "authority",
+                " ",
+                " ",
+                " ",
+                "inevitably",
+                " ",
+                "dwindles",
+                " ",
+                " ",
+            ];
+            assert_eq!(actual, expected);
+        }
+
+        #[test]
+        fn atomize_ticks_no_spaces() {
+            let s = "a`c`adc`dadcdbd`cdb`dcdb`dc`dad`bdc";
+            let actual = atomize(s);
+            let expected = vec![
+                "a", "`", "c", "`", "adc", "`", "dadcdbd", "`", "cdb", "`",
+                "dcdb", "`", "dc", "`", "dad", "`", "bdc",
+            ]
+            .iter()
+            .map(std::string::ToString::to_string)
+            .collect::<Vec<String>>();
+
+            assert_eq!(actual, expected);
+        }
+
+        #[test]
+        fn atomize_ticks_with_spaces() {
+            let s = "a`c`adc`da dcdb d` cdb` dcdb `dc ` d ad ` bdc";
+
+            let actual = atomize(s);
+            let expected = vec![
+                "a", "`", "c", "`", "adc", "`", "da", " ", "dcdb", " ", "d",
+                "`", " ", "cdb", "`", " ", "dcdb", " ", "`", "dc", " ", "`",
+                " ", "d", " ", "ad", " ", "`", " ", "bdc",
+            ]
+            .iter()
+            .map(std::string::ToString::to_string)
+            .collect::<Vec<String>>();
+            assert_eq!(actual, expected);
+        }
+
+        #[test]
+        fn atomize_pipes() {
+            let s = "every other |time| as it was perceived";
+            let actual = atomize(s);
+            let expected = vec![
+                "every",
+                " ",
+                "other",
+                " ",
+                "|",
+                "time",
+                "|",
+                " ",
+                "as",
+                " ",
+                "it",
+                " ",
+                "was",
+                " ",
+                "perceived",
+            ];
+            assert_eq!(actual, expected);
+        }
+
+        #[test]
+        fn atomize_pipes_and_ticks() {
+            let s = "every other |time| as `it could or |perhaps somehow|then or now| it was` perceived";
+            let actual = atomize(s);
+            let expected = vec![
+                "every",
+                " ",
+                "other",
+                " ",
+                "|",
+                "time",
+                "|",
+                " ",
+                "as",
+                " ",
+                "`",
+                "it",
+                " ",
+                "could",
+                " ",
+                "or",
+                " ",
+                "|",
+                "perhaps",
+                " ",
+                "somehow",
+                "|",
+                "then",
+                " ",
+                "or",
+                " ",
+                "now",
+                "|",
+                " ",
+                "it",
+                " ",
+                "was",
+                "`",
+                " ",
+                "perceived",
+            ];
+            assert_eq!(actual, expected);
+        }
+
+        #[test]
+        fn atomize_newlines() {
+            let s = "a`c`adc`da \ndcdb d` cdb` dc\ndb `dc ` d ad ` bdc";
+
+            let actual = atomize(s);
+            let expected = vec![
+                "a", "`", "c", "`", "adc", "`", "da", " ", "\n", "dcdb", " ",
+                "d", "`", " ", "cdb", "`", " ", "dc", "\n", "db", " ", "`",
+                "dc", " ", "`", " ", "d", " ", "ad", " ", "`", " ", "bdc",
+            ]
+            .iter()
+            .map(std::string::ToString::to_string)
+            .collect::<Vec<String>>();
+            assert_eq!(actual, expected);
+        }
+    }
+}
diff --git a/src/syntax/content/parser/token.rs b/src/syntax/content/parser/token.rs
index 19a27cd..85ed1bf 100644
--- a/src/syntax/content/parser/token.rs
+++ b/src/syntax/content/parser/token.rs
@@ -9,6 +9,7 @@ pub mod header;
 pub mod preformat;
 pub mod code;
 
+#[derive(Debug)]
 pub enum Token {
     Anchor(anchor::Anchor),
     Code(code::Code),
diff --git a/src/syntax/content/parser/token/anchor.rs b/src/syntax/content/parser/token/anchor.rs
index 3b2d150..059814e 100644
--- a/src/syntax/content/parser/token/anchor.rs
+++ b/src/syntax/content/parser/token/anchor.rs
@@ -1,98 +1,62 @@
-use crate::prelude::*;
-
 use std::fmt::Display;
+
 use crate::syntax::content::{Parseable, parser::lexeme::Lexeme};
 
+#[derive(Debug, Clone)]
 pub struct Anchor {
-    text: String,
-    destination: String,
-    sticky: bool,
+    pub text: String,
+    pub destination: Option<String>,
+    pub leading: bool,
 }
 
 impl Parseable for Anchor {
     fn probe(lexeme: &Lexeme) -> bool {
-        let pipe_count = lexeme.count_char('|');
-        log!("{lexeme:?} has {pipe_count} pipes");
-
-        if !(1..=3).contains(&pipe_count) {
-            log!("Negative: Bad pipe count {pipe_count} in {lexeme:?}");
-            return false;
-        }
-        if lexeme.text().matches("||").count() > 0 {
-            log!("Negative: Contiguous pipes in {lexeme:?}");
-            return false;
-        }
-
-        let parts = Anchor::split_parts(lexeme);
-        if (1..=2).contains(&parts.len()) {
-            log!("Positive: Parts {parts:?} with length {}", parts.len());
-            true
-        } else {
-            log!("Negative: {parts:?} have length {}", parts.len());
-            false
-        }
+        lexeme.text() == "|" || (!lexeme.is_whitespace() && lexeme.next == "|")
     }
 
-    fn lex(lexeme: &Lexeme) -> Anchor {
-        let parts = Anchor::split_parts(lexeme);
-        log!("Lexing anchor {parts:?}");
-
-        let text = parts.first().unwrap_or_else(|| unreachable!());
-
-        fn try_node_anchor(anchor: &str) -> String {
-            if anchor.contains(":") || anchor.contains("/") {
-                anchor.to_owned()
-            } else {
-                format!("/node/{anchor}")
-            }
-        }
-
-        let destination = match parts.get(1) {
-            Some(d) => try_node_anchor(d),
-            None => try_node_anchor(text),
-        };
-
-        let sticky = [
-            ",", ".", ":", ";", "!", "?", "/", "(", ")", "%", "*", "&", r#"""#,
-            "'",
-        ];
-
-        log!("Lexed anchor: {text} -> {destination}");
-        Anchor {
-            text: text.to_owned(),
-            destination,
-            sticky: sticky.contains(&lexeme.next.as_str()),
-        }
+    fn lex(_lexeme: &Lexeme) -> Anchor {
+        panic!("Attempt to lex an anchor directly from a lexeme");
     }
 
     fn render(&self) -> String {
-        let space = if self.sticky {
-            String::new()
-        } else {
-            String::from(" ")
+        let Some(ref destination) = self.destination else {
+            panic!(
+                "Attempt to render anchor {self:?} without knowing its destination."
+            )
         };
-        format!(
-            r#"<a href="{}">{}</a>{space}"#,
-            &self.destination, &self.text
-        )
+
+        format!(r#"<a href="{}">{}</a>"#, destination, &self.text)
     }
 }
 
 impl Anchor {
-    fn split_parts(lexeme: &Lexeme) -> Vec<String> {
-        lexeme
-            .text()
-            .trim_start_matches('|')
-            .trim_end_matches('|')
-            .split('|')
-            .filter(|s| !s.is_empty())
-            .map(str::to_string)
-            .collect()
+    pub fn new(text: &str, destination: &str, spaced: bool) -> Anchor {
+        Anchor {
+            text: text.to_owned(),
+            destination: Some(Anchor::resolve_destination(destination)),
+            leading: spaced,
+        }
+    }
+
+    fn resolve_destination(raw: &str) -> String {
+        if raw.contains(":") || raw.contains("/") {
+            raw.to_owned()
+        } else {
+            format!("/node/{raw}")
+        }
+    }
+
+    pub fn empty() -> Anchor {
+        Anchor {
+            text: String::new(),
+            destination: None,
+            leading: false,
+        }
     }
 }
 
 impl Display for Anchor {
     fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        write!(f, "Anchor: <{}> to <{}>", &self.text, &self.destination)
+        write!(f, "Anchor: <{}> to <{:?}>", &self.text, &self.destination)
     }
 }
diff --git a/src/syntax/content/parser/token/code.rs b/src/syntax/content/parser/token/code.rs
index cf1d300..549f60e 100644
--- a/src/syntax/content/parser/token/code.rs
+++ b/src/syntax/content/parser/token/code.rs
@@ -2,42 +2,31 @@ use crate::{
     syntax::content::{Parseable, Lexeme},
 };
 
+#[derive(Debug)]
 pub struct Code {
-    text: String,
-    sticky: bool,
+    open: bool,
+}
+
+impl Code {
+    pub fn new(open: bool) -> Code {
+        Code { open }
+    }
 }
 
 impl Parseable for Code {
     fn probe(lexeme: &Lexeme) -> bool {
-        let chars = lexeme.split_chars();
-
-        if let Some(first_char) = chars.first()
-            && let Some(last_char) = chars.last()
-        {
-            *first_char == '`' && *last_char == '`'
-        } else {
-            false
-        }
+        lexeme.text() == "`"
     }
 
-    fn lex(lexeme: &Lexeme) -> Code {
-        let sticky = [
-            ",", ".", ":", ";", "!", "?", "/", "(", ")", "%", "*", "&", r#"""#,
-            "'",
-        ];
-
-        Code {
-            text: lexeme.text().replace("`", ""),
-            sticky: sticky.contains(&lexeme.next.as_str()),
-        }
+    fn lex(_lexeme: &Lexeme) -> Code {
+        panic!("Attempt to lex a code tag directly from a lexeme")
     }
 
     fn render(&self) -> String {
-        let space = if self.sticky {
-            String::new()
+        if self.open {
+            String::from("<code>")
         } else {
-            String::from(" ")
-        };
-        format!("<code>{}</code>{space}", self.text)
+            String::from("</code>")
+        }
     }
 }
diff --git a/src/syntax/content/parser/token/header.rs b/src/syntax/content/parser/token/header.rs
index 612afcf..3a3c6c3 100644
--- a/src/syntax/content/parser/token/header.rs
+++ b/src/syntax/content/parser/token/header.rs
@@ -1,9 +1,18 @@
+use std::{
+    collections::{HashMap, hash_map::Entry},
+    iter::Peekable,
+    slice,
+};
+
 use crate::{
     prelude::*,
+    types::Config,
     syntax::content::{Parseable, Lexeme},
 };
+
 use std::fmt::Display;
 
+#[derive(Debug)]
 pub struct Header {
     open: Option<bool>,
     level: Level,
@@ -19,6 +28,35 @@ impl Header {
         }
     }
 
+    pub fn make_id(
+        config: &Config,
+        iterator: &mut Peekable<slice::Iter<'_, Lexeme>>,
+        ids: &mut HashMap<String, Vec<String>>,
+    ) -> String {
+        let base_id = match iterator.peek() {
+            Some(next_lexeme)
+                if !config.ascii_dom_ids || next_lexeme.next.is_ascii() =>
+            {
+                next_lexeme.next.to_lowercase()
+            },
+            _ => String::from("h"),
+        };
+
+        match ids.entry(base_id.clone()) {
+            Entry::Occupied(mut occupied) => {
+                let ids_vec = occupied.get_mut();
+                let suffix = ids_vec.len();
+                let id_with_suffix = format!("{base_id}-{suffix}");
+                ids_vec.push(id_with_suffix.clone());
+                id_with_suffix
+            },
+            Entry::Vacant(vacant) => {
+                vacant.insert(vec![base_id.clone()]);
+                base_id
+            },
+        }
+    }
+
     pub fn from_u8(level: u8, open: bool, dom_id: Option<&str>) -> Header {
         Header {
             level: Level::from_u8(level),
@@ -27,7 +65,7 @@ impl Header {
         }
     }
 
-    pub fn get_level(&self) -> u8 {
+    pub fn level(&self) -> u8 {
         match self.level {
             Level::One => 1,
             Level::Two => 2,
@@ -92,6 +130,7 @@ impl Display for Header {
     }
 }
 
+#[derive(Debug)]
 pub enum Level {
     One,
     Two,
diff --git a/src/syntax/content/parser/token/linebreak.rs b/src/syntax/content/parser/token/linebreak.rs
index 365bbdd..d56b49c 100644
--- a/src/syntax/content/parser/token/linebreak.rs
+++ b/src/syntax/content/parser/token/linebreak.rs
@@ -3,6 +3,7 @@ use crate::{
     syntax::content::{Parseable, parser::lexeme::Lexeme},
 };
 
+#[derive(Debug)]
 pub struct LineBreak {}
 
 impl Parseable for LineBreak {
diff --git a/src/syntax/content/parser/token/literal.rs b/src/syntax/content/parser/token/literal.rs
index f641579..723b152 100644
--- a/src/syntax/content/parser/token/literal.rs
+++ b/src/syntax/content/parser/token/literal.rs
@@ -1,6 +1,7 @@
 use std::fmt::Display;
 use crate::syntax::content::{Parseable, parser::lexeme::Lexeme};
 
+#[derive(Debug)]
 pub struct Literal {
     text: String,
 }
@@ -17,12 +18,7 @@ impl Parseable for Literal {
     }
 
     fn render(&self) -> String {
-        let non_sticky = [" ", "\n"];
-        if non_sticky.contains(&self.text.as_str()) {
-            self.text.clone()
-        } else {
-            format!("{} ", self.text.clone())
-        }
+        self.text.clone()
     }
 }
 
diff --git a/src/syntax/content/parser/token/paragraph.rs b/src/syntax/content/parser/token/paragraph.rs
index 09718d0..2348286 100644
--- a/src/syntax/content/parser/token/paragraph.rs
+++ b/src/syntax/content/parser/token/paragraph.rs
@@ -1,6 +1,7 @@
 use std::fmt::Display;
 use crate::syntax::content::{Parseable, parser::lexeme::Lexeme};
 
+#[derive(Debug)]
 pub struct Paragraph {
     open: Option<bool>,
 }
@@ -14,9 +15,7 @@ impl Paragraph {
 impl Parseable for Paragraph {
     fn probe(lexeme: &Lexeme) -> bool {
         // lexeme for paragraph is any non-whitespace, parser knows the context
-        let raw = lexeme.text();
-        let trimmed = raw.trim();
-        !trimmed.is_empty() && trimmed != "\n"
+        !lexeme.is_whitespace()
     }
 
     fn lex(_lexeme: &Lexeme) -> Paragraph {
diff --git a/src/syntax/content/parser/token/preformat.rs b/src/syntax/content/parser/token/preformat.rs
index af50fbd..568bd38 100644
--- a/src/syntax/content/parser/token/preformat.rs
+++ b/src/syntax/content/parser/token/preformat.rs
@@ -2,6 +2,7 @@ use crate::{
     syntax::content::{Parseable, Lexeme},
 };
 
+#[derive(Debug)]
 pub struct PreFormat {
     open: Option<bool>,
 }
diff --git a/src/syntax/content/parser/token/span.rs b/src/syntax/content/parser/token/span.rs
index 961e72d..b312a28 100644
--- a/src/syntax/content/parser/token/span.rs
+++ b/src/syntax/content/parser/token/span.rs
@@ -1,6 +1,7 @@
 use std::fmt::Display;
 use crate::syntax::content::{Parseable, parser::lexeme::Lexeme};
 
+#[derive(Debug)]
 pub struct Span {
     open: Option<bool>,
 }
diff --git a/static/graph.toml b/static/graph.toml
index 1705369..65ff79d 100644
--- a/static/graph.toml
+++ b/static/graph.toml
@@ -132,7 +132,7 @@ For example:
 docs|/node/Documentation
 `
 
-If the left side contains spaces, you need a leading `|` character. In this case, the space on the left side is mandatory:
+If the left side contains spaces, you need a leading `|` character:
 
 `
 |en docs|https://en.jutty.dev/node/Documentation
@@ -141,12 +141,12 @@ If the left side contains spaces, you need a leading `|` character. In this case
 If you have a trailing character that you don't want to be considered as part of the destination, you can separate it with a third `|`:
 
 `
-This |gem|PreciousStone|, though green, was not an emerald.
+This gem|PreciousStone|, though green, was not an emerald.
 `
 
 Which renders as:
 
-This |gem|PreciousStone|, though green, was not an emerald.
+This gem|PreciousStone|, though green, was not an emerald.
 
 ### Node anchors
 
@@ -169,14 +169,15 @@ Because en can resolve IDs case insensitively (with priority to case-sensitive m
 In summary, all of the anchors below are valid and lead to the same page:
 
 `
+|en Syntax|https://en.jutty.dev/node/Syntax|
 |en Syntax|https://en.jutty.dev/node/Syntax
 Syntax|https://en.jutty.dev/node/Syntax
 
-|en Syntax|/node/Syntax
-Syntax|/node/Syntax
+Syntax|/node/syntax
 
-Syntax|Syntax
-syntax|syntax
+|syntax|Syntax
+Syntax|syntax
+Syntax|syntax|
 
 |Syntax|
 |syntax|
@@ -312,23 +313,23 @@ We saw example `docs|/node/Documentation`, but shorter syntax exists.
 #### Epistēmē
 #### Epistēmē
 
+|en Syntax|https://en.jutty.dev/node/Syntax|
 |en Syntax|https://en.jutty.dev/node/Syntax
 Syntax|https://en.jutty.dev/node/Syntax
 
-|en Syntax|/node/Syntax
-Syntax|/node/Syntax
+Syntax|/node/syntax
 
-Syntax|Syntax
-syntax|syntax
+|syntax|Syntax
+Syntax|syntax
+Syntax|syntax|
 
 |Syntax|
 |syntax|
 """
 
 [meta.config]
+content_language = "en"
 footer_credits = false
 footer_text = """
 made by jutty|https://jutty.dev • acknowledgements|Acknowledgments • |source code|https://codeberg.org/jutty/en
 """
-
-