Automatic header IDs

This commit is contained in:
Juno Takano 2025-12-21 14:50:29 -03:00
commit 1f4a9faa75
3 changed files with 64 additions and 17 deletions

View file

@ -1,3 +1,7 @@
use std::collections::{HashMap, hash_map::Entry};
use crate::{formats::populate_graph, types::Config};
use super::{Parseable as _, Token, LexMap};
use token::{
anchor::Anchor, linebreak::LineBreak, paragraph::Paragraph, header::Header,
@ -25,34 +29,65 @@ enum Context {
PreFormat,
}
struct State {
context: Context,
dom_ids: HashMap<String, Vec<String>>,
}
fn lex(text: &str, map: LexMap) -> Vec<Token> {
let mut tokens: Vec<Token> = Vec::new();
let mut state = Context::None;
let mut state = State {
context: Context::None,
dom_ids: HashMap::new(),
};
let config: Config = populate_graph().meta.config;
let splits = cluster::cluster(text);
let lexemes = Lexeme::collect(&splits);
let iter = lexemes.iter().peekable();
for lexeme in iter {
match state {
match state.context {
Context::None => {
if PreFormat::probe(lexeme) {
tokens.push(Token::PreFormat(PreFormat::new(true)));
state = Context::PreFormat;
state.context = Context::PreFormat;
continue;
} else if Header::probe(lexeme) {
let header = Header::lex(lexeme);
state = Context::Header(header.get_level());
let base_id =
if config.ascii_dom_ids && !lexeme.next.is_ascii() {
String::from("h")
} else {
lexeme.next.clone().to_lowercase()
};
let id = match state.dom_ids.entry(base_id.clone()) {
Entry::Occupied(mut occupied) => {
let ids = occupied.get_mut();
let suffix: u8 =
ids.len().try_into().unwrap_or_default();
let id_with_suffix = format!("{base_id}-{suffix}");
ids.push(id_with_suffix.clone());
id_with_suffix
},
Entry::Vacant(vacant) => {
vacant.insert(vec![base_id.clone()]);
base_id
},
};
let mut header = Header::lex(lexeme);
header.dom_id = Some(id);
state.context = Context::Header(header.get_level());
tokens.push(Token::Header(header));
continue;
} else if Paragraph::probe(lexeme) {
tokens.push(Token::Paragraph(Paragraph::new(true)));
state = Context::Paragraph;
state.context = Context::Paragraph;
}
},
Context::PreFormat => {
if PreFormat::probe(lexeme) {
tokens.push(Token::PreFormat(PreFormat::new(false)));
state = Context::None;
state.context = Context::None;
} else {
tokens.push(Token::Literal(Literal::lex(lexeme)));
}
@ -61,13 +96,13 @@ fn lex(text: &str, map: LexMap) -> Vec<Token> {
Context::Paragraph => {
if lexeme.text() == "\n" {
tokens.push(Token::Paragraph(Paragraph::new(false)));
state = Context::None;
state.context = Context::None;
}
},
Context::Header(n) => {
if lexeme.text() == "\n" {
tokens.push(Token::Header(Header::from_u8(n, false)));
state = Context::None;
tokens.push(Token::Header(Header::from_u8(n, false, None)));
state.context = Context::None;
}
},
}