Concentrate configuration content parsing in syntax::serial

Fixes some pages not having parsed input by making it much harder to
construct a config with unparsed text, which is something you basically
never want. The parsing now happens much earlier and consumers don't
need to remember to parse the configuration anymore.

Fixes a possible stack overflow due to parsing and configuration
depending on each other.

This commit also has dependencies updates and minor justfile tweaks.
This commit is contained in:
Juno Takano 2025-12-28 05:16:22 -03:00
commit 7300a29b67
12 changed files with 142 additions and 132 deletions

View file

@ -1,5 +1,7 @@
use parser::{token::Token, lexeme::Lexeme};
use crate::types::Config;
pub mod parser;
pub trait Parseable {
@ -12,6 +14,9 @@ type Probe = fn(&Lexeme) -> bool;
type Lexer = fn(&Lexeme) -> Token;
type LexMap<'lm> = &'lm [(Probe, Lexer)];
pub fn parse(text: &str) -> String {
parser::read(text)
pub fn parse(text: &str, config: &Config) -> String {
if text.is_empty() {
return String::new();
}
parser::read(text, config)
}

View file

@ -1,6 +1,6 @@
use std::collections::{HashMap};
use crate::{syntax::serial::populate_graph, types::Config};
use crate::types::Config;
use super::{Parseable as _, Token, LexMap};
use token::{
anchor::Anchor, linebreak::LineBreak, paragraph::Paragraph, header::Header,
@ -19,10 +19,9 @@ const LEXMAP: LexMap = &[
(Literal::probe, |word| Token::Literal(Literal::lex(word))),
];
fn lex(text: &str, map: LexMap) -> Vec<Token> {
fn lex(text: &str, map: LexMap, config: &Config) -> Vec<Token> {
let mut tokens: Vec<Token> = Vec::new();
let mut state = State::new();
let config: Config = populate_graph().meta.config;
let segments = segment::segment(text);
let lexemes = Lexeme::collect(&segments);
@ -38,7 +37,7 @@ fn lex(text: &str, map: LexMap) -> Vec<Token> {
} else if Header::probe(lexeme) {
let mut header = Header::lex(lexeme);
header.dom_id = Some(Header::make_id(
&config,
config,
iterator.peek().map_or(&Lexeme::new("", ""), |l| l),
&mut state.dom_ids,
));
@ -247,19 +246,23 @@ fn parse(tokens: &[Token]) -> String {
tokens.iter().map(Token::render).collect::<String>()
}
pub(super) fn read(text: &str) -> String {
parse(&lex(text, LEXMAP))
pub(super) fn read(text: &str, config: &Config) -> String {
parse(&lex(text, LEXMAP, config))
}
#[cfg(test)]
mod tests {
use crate::syntax::content::parser::token::header::Level;
use crate::{types::Graph, syntax::content::parser::token::header::Level};
use super::*;
fn read_noconfig(input: &str) -> String {
read(input, &Graph::new(None).meta.config)
}
#[test]
fn empty_render_is_empty() {
assert_eq!(read(""), "");
assert_eq!(read_noconfig(""), "");
}
#[test]
@ -267,18 +270,21 @@ mod tests {
let en = "`this |test|` tries ## to |brea|k|: things";
let html = r#"<p><code>this |test|</code> tries ## to <a href="/node/k">brea</a>: things</p>"#;
assert_eq!(read(en), html);
assert_eq!(read_noconfig(en), html);
}
#[test]
fn force_flanking() {
assert_eq!(read("|Node||"), r#"<p><a href="/node/Node">Node</a></p>"#);
assert_eq!(
read_noconfig("|Node||"),
r#"<p><a href="/node/Node">Node</a></p>"#
);
}
#[test]
fn flanking_with_trailing_pipe() {
assert_eq!(
read("|Node|Destination|"),
read_noconfig("|Node|Destination|"),
r#"<p><a href="/node/Destination">Node</a></p>"#
);
}
@ -286,7 +292,7 @@ mod tests {
#[test]
fn nonleading_second_pipe() {
assert_eq!(
read("Go to Node|Destination|, here"),
read_noconfig("Go to Node|Destination|, here"),
r#"<p>Go to <a href="/node/Destination">Node</a>, here</p>"#,
);
}
@ -294,7 +300,7 @@ mod tests {
#[test]
fn clear_anchor_buffer() {
assert_eq!(
read("|SomeAnchor|\n|SomeOtherAnchor|"),
read_noconfig("|SomeAnchor|\n|SomeOtherAnchor|"),
concat!(
r#"<p><a href="/node/SomeAnchor">SomeAnchor</a></p>"#,
"\n",

View file

@ -182,10 +182,8 @@ mod tests {
#[test]
fn ascii_ids_set() {
let config = Config {
ascii_dom_ids: true,
..Config::default()
};
let mut config = Config::new();
config.ascii_dom_ids = true;
let id = Header::make_id(
&config,
@ -197,10 +195,8 @@ mod tests {
#[test]
fn ascii_ids_unset() {
let config = Config {
ascii_dom_ids: false,
..Config::default()
};
let mut config = Config::new();
config.ascii_dom_ids = false;
let id = Header::make_id(
&config,

View file

@ -2,7 +2,7 @@ use std::collections::HashMap;
use crate::{
syntax::command::Arguments,
types::{Edge, Graph, Node},
types::{Edge, Graph, Meta, Node},
};
pub fn populate_graph() -> Graph {
@ -11,29 +11,26 @@ pub fn populate_graph() -> Graph {
Ok(s) => s,
Err(e) => format!("Error: {e}"),
};
let graph = deserialize_graph(&Format::TOML, &toml_source);
let graph = deserialize_graph(&Format::TOML, &toml_source);
modulate_graph(&graph)
}
fn modulate_graph(graph: &Graph) -> Graph {
let nodes = modulate_nodes(&graph.nodes);
Graph {
nodes: nodes.clone(),
incoming: make_incoming(&nodes),
lowercase_keymap: map_lowercase_keys(&nodes),
..graph
nodes,
meta: Meta {
config: graph.meta.config.clone().parse_text(),
..graph.meta.clone()
},
..graph.to_owned()
}
}
fn map_lowercase_keys(
source_map: &HashMap<String, Node>,
) -> HashMap<String, String> {
let mut out_map: HashMap<String, String> = HashMap::new();
let keys = source_map.keys();
for key in keys {
out_map.insert(key.clone().to_lowercase(), key.clone());
}
out_map
}
fn modulate_nodes(old_nodes: &HashMap<String, Node>) -> HashMap<String, Node> {
let mut nodes: HashMap<String, Node> = HashMap::new();
@ -89,23 +86,6 @@ fn modulate_nodes(old_nodes: &HashMap<String, Node>) -> HashMap<String, Node> {
nodes
}
// Construct a HashMap with incoming connections (reversed edges)
fn make_incoming(nodes: &HashMap<String, Node>) -> HashMap<String, Vec<Edge>> {
let mut incoming: HashMap<String, Vec<Edge>> = HashMap::new();
for node in nodes.clone().into_values() {
let empty_vec: Vec<Edge> = vec![];
for edge in &node.connections.clone().unwrap_or_default() {
let mut edges =
incoming.get(&edge.to.clone()).unwrap_or(&empty_vec).clone();
edges.extend_from_slice(std::slice::from_ref(edge));
incoming.insert(edge.to.clone(), edges.clone());
}
}
incoming
}
pub enum Format {
TOML,
JSON,
@ -137,6 +117,34 @@ pub fn deserialize_graph(in_format: &Format, serial: &str) -> Graph {
}
}
// Construct a HashMap with incoming connections (reversed edges)
fn make_incoming(nodes: &HashMap<String, Node>) -> HashMap<String, Vec<Edge>> {
let mut incoming: HashMap<String, Vec<Edge>> = HashMap::new();
for node in nodes.clone().into_values() {
let empty_vec: Vec<Edge> = vec![];
for edge in &node.connections.clone().unwrap_or_default() {
let mut edges =
incoming.get(&edge.to.clone()).unwrap_or(&empty_vec).clone();
edges.extend_from_slice(std::slice::from_ref(edge));
incoming.insert(edge.to.clone(), edges.clone());
}
}
incoming
}
fn map_lowercase_keys(
source_map: &HashMap<String, Node>,
) -> HashMap<String, String> {
let mut out_map: HashMap<String, String> = HashMap::new();
let keys = source_map.keys();
for key in keys {
out_map.insert(key.clone().to_lowercase(), key.clone());
}
out_map
}
#[cfg(test)]
mod tests {
use super::*;