From 1f4a9faa759623f2106286198d55b354f9f83e8d Mon Sep 17 00:00:00 2001 From: jutty Date: Sun, 21 Dec 2025 14:50:29 -0300 Subject: [PATCH] Automatic header IDs --- src/syntax/content/parser.rs | 55 ++++++++++++++++++----- src/syntax/content/parser/token/header.rs | 23 +++++++--- src/types.rs | 3 ++ 3 files changed, 64 insertions(+), 17 deletions(-) diff --git a/src/syntax/content/parser.rs b/src/syntax/content/parser.rs index b8b4531..47e43bd 100644 --- a/src/syntax/content/parser.rs +++ b/src/syntax/content/parser.rs @@ -1,3 +1,7 @@ +use std::collections::{HashMap, hash_map::Entry}; + +use crate::{formats::populate_graph, types::Config}; + use super::{Parseable as _, Token, LexMap}; use token::{ anchor::Anchor, linebreak::LineBreak, paragraph::Paragraph, header::Header, @@ -25,34 +29,65 @@ enum Context { PreFormat, } +struct State { + context: Context, + dom_ids: HashMap>, +} + fn lex(text: &str, map: LexMap) -> Vec { let mut tokens: Vec = Vec::new(); - let mut state = Context::None; + let mut state = State { + context: Context::None, + dom_ids: HashMap::new(), + }; + let config: Config = populate_graph().meta.config; let splits = cluster::cluster(text); let lexemes = Lexeme::collect(&splits); let iter = lexemes.iter().peekable(); for lexeme in iter { - match state { + match state.context { Context::None => { if PreFormat::probe(lexeme) { tokens.push(Token::PreFormat(PreFormat::new(true))); - state = Context::PreFormat; + state.context = Context::PreFormat; continue; } else if Header::probe(lexeme) { - let header = Header::lex(lexeme); - state = Context::Header(header.get_level()); + let base_id = + if config.ascii_dom_ids && !lexeme.next.is_ascii() { + String::from("h") + } else { + lexeme.next.clone().to_lowercase() + }; + let id = match state.dom_ids.entry(base_id.clone()) { + Entry::Occupied(mut occupied) => { + let ids = occupied.get_mut(); + let suffix: u8 = + ids.len().try_into().unwrap_or_default(); + let id_with_suffix = format!("{base_id}-{suffix}"); + ids.push(id_with_suffix.clone()); + id_with_suffix + }, + Entry::Vacant(vacant) => { + vacant.insert(vec![base_id.clone()]); + base_id + }, + }; + + let mut header = Header::lex(lexeme); + header.dom_id = Some(id); + state.context = Context::Header(header.get_level()); tokens.push(Token::Header(header)); continue; } else if Paragraph::probe(lexeme) { tokens.push(Token::Paragraph(Paragraph::new(true))); - state = Context::Paragraph; + state.context = Context::Paragraph; } }, Context::PreFormat => { if PreFormat::probe(lexeme) { tokens.push(Token::PreFormat(PreFormat::new(false))); - state = Context::None; + state.context = Context::None; } else { tokens.push(Token::Literal(Literal::lex(lexeme))); } @@ -61,13 +96,13 @@ fn lex(text: &str, map: LexMap) -> Vec { Context::Paragraph => { if lexeme.text() == "\n" { tokens.push(Token::Paragraph(Paragraph::new(false))); - state = Context::None; + state.context = Context::None; } }, Context::Header(n) => { if lexeme.text() == "\n" { - tokens.push(Token::Header(Header::from_u8(n, false))); - state = Context::None; + tokens.push(Token::Header(Header::from_u8(n, false, None))); + state.context = Context::None; } }, } diff --git a/src/syntax/content/parser/token/header.rs b/src/syntax/content/parser/token/header.rs index 2a5d90b..612afcf 100644 --- a/src/syntax/content/parser/token/header.rs +++ b/src/syntax/content/parser/token/header.rs @@ -7,20 +7,23 @@ use std::fmt::Display; pub struct Header { open: Option, level: Level, + pub dom_id: Option, } impl Header { - pub fn new(level: Level, open: bool) -> Header { + pub fn new(level: Level, open: bool, dom_id: Option<&str>) -> Header { Header { - level, open: Some(open), + level, + dom_id: dom_id.map(std::borrow::ToOwned::to_owned), } } - pub fn from_u8(level: u8, open: bool) -> Header { + pub fn from_u8(level: u8, open: bool, dom_id: Option<&str>) -> Header { Header { level: Level::from_u8(level), open: Some(open), + dom_id: dom_id.map(std::borrow::ToOwned::to_owned), } } @@ -53,15 +56,21 @@ impl Parseable for Header { } fn lex(lexeme: &Lexeme) -> Header { - Header::new(lexeme.text().len().into(), true) + Header::new( + lexeme.text().len().into(), + true, + Some(&lexeme.next.to_ascii_lowercase()), + ) } fn render(&self) -> String { if let Some(open) = self.open { - if open { - format!("", &self.level) + if open && let Some(ref id) = self.dom_id { + format!(r#""#, self.level, id) + } else if open { + format!("", self.level) } else { - format!("", &self.level) + format!("", self.level) } } else { panic!("Attempt to render a header tag while open state is unknown") diff --git a/src/types.rs b/src/types.rs index d66bfdf..ba099f7 100644 --- a/src/types.rs +++ b/src/types.rs @@ -92,6 +92,8 @@ pub struct Config { pub index_root_node: bool, #[serde(default = "mkfalse")] pub tree_node_text: bool, + #[serde(default = "mkfalse")] + pub ascii_dom_ids: bool, } // See: https://github.com/serde-rs/serde/issues/368 @@ -130,6 +132,7 @@ impl Graph { index_node_count: 8, index_root_node: true, tree_node_text: false, + ascii_dom_ids: false, }, version: (0, 1, 0), messages: message.map_or(vec![], |m| vec![m]),