Fix and document some Anchor Syntax corner cases

2025-12-24 12:20:28 -03:00 · 2025-12-24 12:20:28 -03:00 · 51047ad11c
commit 51047ad11c
parent 4e828c270f
4 changed files with 99 additions and 14 deletions
--- a/src/handlers/graph.rs
+++ b/src/handlers/graph.rs
@ -10,7 +10,9 @@ pub async fn node(Path(id): Path<String>) -> Response<Body> {
    let empty_node = Node::new(Some(format!("Could not find node ID {id}.")));
    let node = graph.find_node(&id).unwrap_or(empty_node.clone());

-    if !graph.nodes.contains_key(&id) && graph.lowercase_keymap.contains_key(&id) {
+    if !graph.nodes.contains_key(&id)
+        && graph.lowercase_keymap.contains_key(&id)
+    {
        return Redirect::permanent(format!("/node/{}", node.id).as_str())
            .into_response();
    }
--- a/src/syntax/content/parser.rs
+++ b/src/syntax/content/parser.rs
@ -1,7 +1,6 @@
 use std::collections::{HashMap};

 use crate::{formats::populate_graph, types::Config};
-
 use super::{Parseable as _, Token, LexMap};
 use token::{
    anchor::Anchor, linebreak::LineBreak, paragraph::Paragraph, header::Header,
@ -113,8 +112,13 @@ fn lex(text: &str, map: LexMap) -> Vec<Token> {
                } else if candidate.destination.is_none() {
                    // candidate is leading and we found the second pipe
                    if candidate.leading && lexeme.text() == "|" {
-                        // whitespace after pipe: flanking node anchor
-                        if lexeme.is_next_whitespace() {
+                        // third pipe immediately after second: forcing flanking
+                        if lexeme.match_next_first_char('|') {
+                            continue;
+                        // whitespace or punctuation after pipe: flanking anchor
+                        } else if lexeme.is_next_whitespace()
+                            || lexeme.is_next_punctuation()
+                        {
                            candidate.destination =
                                Some(candidate.text.clone());
                            let token = Token::Anchor(candidate.clone());
@ -186,6 +190,11 @@ struct State {
    buffers: Buffers,
 }

+struct Context {
+    block: BlockContext,
+    inline: InlineContext,
+}
+
 struct Buffers {
    anchor: AnchorBuffer,
 }
@ -224,11 +233,6 @@ impl State {
    }
 }

-struct Context {
-    block: BlockContext,
-    inline: InlineContext,
-}
-
 fn parse(tokens: &[Token]) -> String {
    tokens.iter().map(Token::render).collect::<String>()
 }
--- a/src/syntax/content/parser/lexeme.rs
+++ b/src/syntax/content/parser/lexeme.rs
@ -24,6 +24,15 @@ impl Lexeme {
        self.next == " " || self.next == "\n"
    }

+    pub fn is_next_punctuation(&self) -> bool {
+        let punctuation = [",", ".", ":", ";", "?", "!", "(", ")", "\"", "'"];
+        punctuation.contains(&self.next.as_str())
+    }
+
+    pub fn next_first_char(&self) -> Option<char> {
+        self.next.chars().nth(0)
+    }
+
    pub fn match_first_char(&self, query: char) -> bool {
        if let Some(first) = self.text.chars().nth(0) {
            first == query
@ -32,8 +41,12 @@ impl Lexeme {
        }
    }

-    pub fn next_first_char(&self) -> Option<char> {
-        self.next.chars().nth(0)
+    pub fn match_next_first_char(&self, query: char) -> bool {
+        if let Some(first) = self.next.chars().nth(0) {
+            first == query
+        } else {
+            false
+        }
    }

    /// # Panics
--- a/static/graph.toml
+++ b/static/graph.toml
@ -144,9 +144,11 @@ If you have a trailing character that you don't want to be considered as part of
 This gem|PreciousStone|, though green, was not an emerald.
 `

-Which renders as:
+To make a plain address clickable, wrap it in two `|` characters:

-This gem|PreciousStone|, though green, was not an emerald.
+`
+|https://en.jutty.dev|
+`

 ### Node anchors

@ -182,6 +184,70 @@ Syntax|syntax|
 |Syntax|
 |syntax|
 `
+
+While flexible, this can sometimes be ambiguous. See |AnchorSyntax| for some caveats regarding anchors.
+
+"""
+
+[nodes.AnchorSyntax]
+title = "Anchor Syntax"
+text = """
+Anchor syntax can be very concise, but some situations lead to ambiguity.
+
+In short, following these two rules should keep you out of trouble:
+
+- Avoid special characters in your node IDs
+- When needed, use full three-pipe `|text|destination|` syntax to fix ambiguity
+
+## Punctuation in destinations
+
+Consider this example:
+
+`
+|gem|PreciousStone
+|PreciousStone|,
+`
+
+Both seem to point to the node with ID `PreciousStone`, as they _seem_ to. But if we didn't treat punctuation differently, we'd have:
+
+`
+|a|b
+|a|b
+`
+
+For this reason, punctuation is treated differently. It won't be considered as a possible destination, so you can write the previous example and have it behave as expected.
+
+This also means you can't have punctuation symbols as node IDs or as their first character.
+
+These are the punctuation symbols that are treated specially:
+
+`
+, . : ; ? ! ( ) ' "
+
+`
+
+You can also force this using a third pipe:
+
+`
+|PreciousStone||,
+`
+
+This unambiguously tells en that your destination is a node ID.
+
+## URL detection
+
+en must differentiate node anchors from outgoing URLs:
+
+`
+|sample|Example|
+|sample|https://example.com|
+
+|Example|
+|https://example.com|
+`
+
+It does this by looking at the destination and checking if it contains a `:` or `/`, so also avoid these in your node IDs.
+
 """

 [nodes.en]
@ -219,7 +285,7 @@ en uses this concept to create a writing tool, allowing you to map out complex t
 text = """
 TOML is a configuration format that can be easily read and understood by humans and machines alike.

-To learn more about TOML, you can visit its website at <toml.io>.
+To learn more about TOML, you can visit its website at |https://toml.io|.

 To see the TOML declaration that translates into the rendered graph you are reading right now, visit the "TOML Graph" link on the top navigation bar.
 """