Automatic header IDs

This commit is contained in:
Juno Takano 2025-12-21 14:50:29 -03:00
commit 1f4a9faa75
3 changed files with 64 additions and 17 deletions

View file

@ -1,3 +1,7 @@
use std::collections::{HashMap, hash_map::Entry};
use crate::{formats::populate_graph, types::Config};
use super::{Parseable as _, Token, LexMap};
use token::{
anchor::Anchor, linebreak::LineBreak, paragraph::Paragraph, header::Header,
@ -25,34 +29,65 @@ enum Context {
PreFormat,
}
struct State {
context: Context,
dom_ids: HashMap<String, Vec<String>>,
}
fn lex(text: &str, map: LexMap) -> Vec<Token> {
let mut tokens: Vec<Token> = Vec::new();
let mut state = Context::None;
let mut state = State {
context: Context::None,
dom_ids: HashMap::new(),
};
let config: Config = populate_graph().meta.config;
let splits = cluster::cluster(text);
let lexemes = Lexeme::collect(&splits);
let iter = lexemes.iter().peekable();
for lexeme in iter {
match state {
match state.context {
Context::None => {
if PreFormat::probe(lexeme) {
tokens.push(Token::PreFormat(PreFormat::new(true)));
state = Context::PreFormat;
state.context = Context::PreFormat;
continue;
} else if Header::probe(lexeme) {
let header = Header::lex(lexeme);
state = Context::Header(header.get_level());
let base_id =
if config.ascii_dom_ids && !lexeme.next.is_ascii() {
String::from("h")
} else {
lexeme.next.clone().to_lowercase()
};
let id = match state.dom_ids.entry(base_id.clone()) {
Entry::Occupied(mut occupied) => {
let ids = occupied.get_mut();
let suffix: u8 =
ids.len().try_into().unwrap_or_default();
let id_with_suffix = format!("{base_id}-{suffix}");
ids.push(id_with_suffix.clone());
id_with_suffix
},
Entry::Vacant(vacant) => {
vacant.insert(vec![base_id.clone()]);
base_id
},
};
let mut header = Header::lex(lexeme);
header.dom_id = Some(id);
state.context = Context::Header(header.get_level());
tokens.push(Token::Header(header));
continue;
} else if Paragraph::probe(lexeme) {
tokens.push(Token::Paragraph(Paragraph::new(true)));
state = Context::Paragraph;
state.context = Context::Paragraph;
}
},
Context::PreFormat => {
if PreFormat::probe(lexeme) {
tokens.push(Token::PreFormat(PreFormat::new(false)));
state = Context::None;
state.context = Context::None;
} else {
tokens.push(Token::Literal(Literal::lex(lexeme)));
}
@ -61,13 +96,13 @@ fn lex(text: &str, map: LexMap) -> Vec<Token> {
Context::Paragraph => {
if lexeme.text() == "\n" {
tokens.push(Token::Paragraph(Paragraph::new(false)));
state = Context::None;
state.context = Context::None;
}
},
Context::Header(n) => {
if lexeme.text() == "\n" {
tokens.push(Token::Header(Header::from_u8(n, false)));
state = Context::None;
tokens.push(Token::Header(Header::from_u8(n, false, None)));
state.context = Context::None;
}
},
}

View file

@ -7,20 +7,23 @@ use std::fmt::Display;
pub struct Header {
open: Option<bool>,
level: Level,
pub dom_id: Option<String>,
}
impl Header {
pub fn new(level: Level, open: bool) -> Header {
pub fn new(level: Level, open: bool, dom_id: Option<&str>) -> Header {
Header {
level,
open: Some(open),
level,
dom_id: dom_id.map(std::borrow::ToOwned::to_owned),
}
}
pub fn from_u8(level: u8, open: bool) -> Header {
pub fn from_u8(level: u8, open: bool, dom_id: Option<&str>) -> Header {
Header {
level: Level::from_u8(level),
open: Some(open),
dom_id: dom_id.map(std::borrow::ToOwned::to_owned),
}
}
@ -53,15 +56,21 @@ impl Parseable for Header {
}
fn lex(lexeme: &Lexeme) -> Header {
Header::new(lexeme.text().len().into(), true)
Header::new(
lexeme.text().len().into(),
true,
Some(&lexeme.next.to_ascii_lowercase()),
)
}
fn render(&self) -> String {
if let Some(open) = self.open {
if open {
format!("<h{}>", &self.level)
if open && let Some(ref id) = self.dom_id {
format!(r#"<h{} id="{}">"#, self.level, id)
} else if open {
format!("<h{}>", self.level)
} else {
format!("</h{}>", &self.level)
format!("</h{}>", self.level)
}
} else {
panic!("Attempt to render a header tag while open state is unknown")

View file

@ -92,6 +92,8 @@ pub struct Config {
pub index_root_node: bool,
#[serde(default = "mkfalse")]
pub tree_node_text: bool,
#[serde(default = "mkfalse")]
pub ascii_dom_ids: bool,
}
// See: https://github.com/serde-rs/serde/issues/368
@ -130,6 +132,7 @@ impl Graph {
index_node_count: 8,
index_root_node: true,
tree_node_text: false,
ascii_dom_ids: false,
},
version: (0, 1, 0),
messages: message.map_or(vec![], |m| vec![m]),