Implement nested lists

This commit is contained in:
Juno Takano 2026-01-07 15:11:50 -03:00
commit e42c67676d
11 changed files with 475 additions and 131 deletions

View file

@ -1,14 +1,12 @@
use crate::syntax::content::parser::{
state::State,
token::{
Token, paragraph::Paragraph, preformat::PreFormat, list::List,
item::Item,
},
token::{Token, paragraph::Paragraph, preformat::PreFormat},
};
pub mod anchor;
pub mod block;
pub mod inline;
pub mod anchor;
pub mod list;
#[derive(Clone, Debug)]
pub struct Context {
@ -19,9 +17,8 @@ pub struct Context {
#[derive(Clone, Debug)]
pub enum Block {
Paragraph,
Header(u8),
Item(bool),
List(bool),
Header(u8), // level
List,
PreFormat,
None,
}
@ -43,12 +40,8 @@ pub fn close(state: &State, tokens: &mut Vec<Token>) {
Block::Paragraph => {
tokens.push(Token::Paragraph(Paragraph::new(false)));
},
Block::Item(ordered) => {
tokens.push(Token::Item(Item::new(false)));
tokens.push(Token::List(List::new(false, ordered)));
},
Block::List(ordered) => {
tokens.push(Token::List(List::new(false, ordered)));
Block::List => {
panic!("End of input with open list")
},
Block::Header(_) => panic!("End of input with open header"),
Block::None => (),

View file

@ -9,7 +9,8 @@ use crate::{
lexeme::Lexeme,
state::State,
token::{
Token, checkbox::CheckBox, header::Header, item::Item, list::List, literal::Literal, paragraph::Paragraph, preformat::PreFormat
Token, header::Header, list::List, literal::Literal,
paragraph::Paragraph, preformat::PreFormat,
},
},
},
@ -42,15 +43,10 @@ pub fn parse(
tokens.push(Token::Header(header));
return true;
} else if List::probe(lexeme) {
let ordered = lexeme.match_as_char('+');
log!("Block Context: None -> Item on {lexeme}");
state.context.block = Block::Item(ordered);
tokens.push(Token::List(List::new(true, ordered)));
tokens.push(Token::Item(Item::new(true)));
// List::probe implies a dash followed by a space,
// both of which sould not be rendered literally
iterator.next();
return true;
log!("Block Context: None -> List on {lexeme}");
state.context.block = Block::List;
state.buffers.list.candidate.ordered = lexeme.match_char('+');
return super::list::parse(lexeme, state, tokens, iterator);
} else if Paragraph::probe(lexeme) {
log!("Block Context: None -> Paragraph on {lexeme}");
state.context.block = Block::Paragraph;
@ -81,33 +77,8 @@ pub fn parse(
state.context.block = Block::None;
}
},
Block::List(ordered) => {
if List::probe_end(lexeme) {
tokens.push(Token::List(List::new(false, ordered)));
log!("Block Context: List -> None on {lexeme}");
state.context.block = Block::None;
} else if Item::probe(lexeme) {
tokens.push(Token::Item(Item::new(true)));
log!("Block Context: List -> Item on {lexeme}");
state.context.block = Block::Item(ordered);
// Item::probe implies a dash followed by a space,
// both of which sould not be rendered literally
iterator.next();
return true;
}
},
Block::Item(ordered) => {
if CheckBox::probe(lexeme) {
log!("Probed CheckBox: {lexeme}");
tokens.push(Token::CheckBox(CheckBox::lex(lexeme)));
iterator.next();
iterator.next();
return true
} else if Item::probe_end(lexeme) {
tokens.push(Token::Item(Item::new(false)));
log!("Block Context: Item -> List on {lexeme}");
state.context.block = Block::List(ordered);
}
Block::List => {
return super::list::parse(lexeme, state, tokens, iterator);
},
}
false
@ -176,28 +147,4 @@ mod tests {
let level = Level::from(u);
assert_eq!(level.to_string(), "6");
}
#[test]
fn unordered_list_at_eoi() {
assert_eq!(
read("- a\n- b\n- c"),
"<ul><li>a</li>\n<li>b</li>\n<li>c</li></ul>"
);
}
#[test]
fn unordered_list_with_content_before() {
assert_eq!(
read("_e e_\n\n- a\n- b\n- c"),
"<p><em>e e</em></p>\n\n<ul><li>a</li>\n<li>b</li>\n<li>c</li></ul>",
);
}
#[test]
fn unordered_list_with_content_after() {
assert_eq!(
read("- a\n- b\n- c\n\nd",),
"<ul><li>a</li>\n<li>b</li>\n<li>c</li>\n</ul>\n<p>d</p>"
);
}
}

View file

@ -0,0 +1,258 @@
use std::{iter::Peekable, slice::Iter};
use crate::{
prelude::*,
syntax::content::parser::{
context::Block,
lexeme::Lexeme,
state::{ListBuffer, State},
token::{Token, item::Item},
},
};
/// Handles open list contexts until a list is fully parsed.
///
/// A return of `true` will trigger a continue in the outer parser,
/// skipping any further parsing of the current lexeme.
///
/// # Panics
/// This parser can handle only the List context, and will panic if passed an
/// unrelated context since it has no knowledge on how to handle them.
pub fn parse(
lexeme: &Lexeme,
state: &mut State,
tokens: &mut Vec<Token>,
iterator: &mut Peekable<Iter<'_, Lexeme>>,
) -> bool {
let buffer = &mut state.buffers.list;
let candidate = &mut buffer.candidate;
let item_candidate = &mut buffer.item_candidate;
match state.context.block {
Block::List => {
if lexeme.match_char(' ') && item_candidate.depth.is_none() {
// found space, unknown increasing depth
buffer.depth = buffer.depth.saturating_add(1);
} else if item_candidate.depth.is_none()
&& lexeme.match_either_char('-', '+')
{
// found bullet, depth now known
item_candidate.depth = Some(buffer.depth);
if lexeme.match_next_char(' ') {
iterator.next();
}
} else if lexeme.last() || lexeme.match_char_sequence('\n', '\n') {
// found end of list
if !lexeme.match_char('\n') {
// no trailing break, last item's text wouldn't be pushed
item_candidate.text.push_str(&lexeme.text());
}
if item_candidate.depth.is_some() {
// if the current item candidate has a known depth, push it
candidate.items.push(item_candidate.clone());
}
// push list candidate, reset state and exit context
log!("Accepting list candidate {candidate}");
tokens.push(Token::List(candidate.clone()));
state.context.block = Block::None;
iterator.next();
*buffer = ListBuffer::default();
} else if lexeme.match_char('\n') {
// found end of item, push it and reset state
log!("Accepting item candidate {item_candidate}");
candidate.items.push(item_candidate.clone());
*item_candidate = Item::default();
buffer.depth = 0;
} else {
// anything else is pushed into the candidate item's text
item_candidate.text.push_str(&lexeme.text());
}
},
Block::None
| Block::Paragraph
| Block::Header(_)
| Block::PreFormat => {
panic!("List context parser called to handle non-list context")
},
}
true
}
#[cfg(test)]
mod tests {
use crate::{syntax::content::parser, types::Graph};
fn read(input: &str) -> String {
parser::read(input, &Graph::new(None).meta.config)
}
#[test]
fn unordered_list() {
assert_eq!(
read("- a\n- b\n- c"),
"\n<ul>\n\
<li>a</li>\n\
<li>b</li>\n\
<li>c</li>\n\
</ul>\n\n"
);
}
#[test]
fn minimally_nested_unordered_list() {
assert_eq!(
read("- a\n - b"),
"\n<ul>\n\
<li>a<ul>\n\
<li>b</li></ul></li>\n\
</ul>\n\n"
);
}
#[test]
fn unordered_nested_list() {
assert_eq!(
read(concat!(
"- 0Aa\n",
" - 4Ba\n",
"- 0Ca\n",
" - 4Da\n",
" - 4Db\n",
"- 0Ea\n",
"- 0Eb"
)),
"\n<ul>\n\
<li>0Aa<ul>\n\
<li>4Ba</li></ul></li>\n\
<li>0Ca<ul>\n\
<li>4Da</li>\n\
<li>4Db</li></ul></li>\n\
<li>0Ea</li>\n\
<li>0Eb</li>\n\
</ul>\n\n"
);
}
#[test]
fn unordered_list_as_eoi() {
assert_eq!(
read("some\n\n- a\n- b"),
"<p>some</p>\n\n\n<ul>\n\
<li>a</li>\n\
<li>b</li>\n\
</ul>\n\n"
);
}
#[test]
fn unordered_list_as_soi() {
assert_eq!(
read("- a\n- b\n\nsome"),
"\n<ul>\n\
<li>a</li>\n\
<li>b</li>\n\
</ul>\n\n<p>some</p>"
);
}
#[test]
fn unordered_list_after_newline() {
assert_eq!(
read("\n- a\n- b"),
"\n\n<ul>\n\
<li>a</li>\n\
<li>b</li>\n\
</ul>\n\n"
);
}
#[test]
fn unordered_list_after_two_newlines() {
assert_eq!(
read("\n\n- a\n- b"),
"\n\n\n<ul>\n\
<li>a</li>\n\
<li>b</li>\n\
</ul>\n\n"
);
}
#[test]
fn unordered_list_after_three_newlines() {
assert_eq!(
read("\n\n\n- a\n- b"),
"\n\n\n\n<ul>\n\
<li>a</li>\n\
<li>b</li>\n\
</ul>\n\n"
);
}
#[test]
fn unordered_list_before_newline() {
assert_eq!(
read("- a\n- b\n"),
"\n<ul>\n\
<li>a</li>\n\
<li>b</li>\n\
</ul>\n\n"
);
}
#[test]
fn unordered_list_before_two_newlines() {
assert_eq!(
read("- a\n- b\n\n"),
"\n<ul>\n\
<li>a</li>\n\
<li>b</li>\n\
</ul>\n\n"
);
}
#[test]
fn unordered_list_before_three_newlines() {
assert_eq!(
read("- a\n- b\n\n\n"),
"\n<ul>\n\
<li>a</li>\n\
<li>b</li>\n\
</ul>\n\n"
);
}
#[test]
fn unordered_list_before_three_newlines_then_literals() {
assert_eq!(
read("- a\n- b\n\n\nw w"),
"\n<ul>\n\
<li>a</li>\n\
<li>b</li>\n\
</ul>\n\n\n<p>w w</p>"
);
}
#[test]
fn unordered_nested_list_multilevel_depth_drop() {
assert_eq!(
read(concat!(
"- 0Aa\n",
" - 4Ba\n",
" - 8Ca\n",
" - 12Da\n",
" - 16Ea\n",
" - 8Fa\n",
"- 0Ga\n\n",
)),
"\n<ul>\n\
<li>0Aa<ul>\n\
<li>4Ba<ul>\n\
<li>8Ca<ul>\n\
<li>12Da<ul>\n\
<li>16Ea</li></ul></li></ul></li>\n\
<li>8Fa</li></ul></li></ul></li>\n\
<li>0Ga</li>\n\
</ul>\n\n"
);
}
}

View file

@ -77,8 +77,8 @@ impl Lexeme {
pub fn match_triple_as_char(&self, t: (char, char, char)) -> bool {
self.match_as_char(t.0)
&& self.match_next_as_char(t.1)
&& self.match_third_as_char(t.2)
&& self.match_next_as_char(t.1)
&& self.match_third_as_char(t.2)
}
pub fn contains_as_char(&self, slice: &[char]) -> bool {
@ -171,7 +171,9 @@ impl Lexeme {
let mut out_vector = Vec::with_capacity(segments.len());
let mut vec = segments.to_vec();
let Some(mut third) = vec.pop() else { return vec![] };
let Some(mut third) = vec.pop() else {
return vec![];
};
let last_lexeme = Lexeme {
text: third.clone(),
next: String::default(),
@ -179,7 +181,9 @@ impl Lexeme {
last: true,
};
let Some(mut next) = vec.pop() else { return vec![last_lexeme] };
let Some(mut next) = vec.pop() else {
return vec![last_lexeme];
};
let penultimate_lexeme = Lexeme {
text: next.clone(),
next: third.clone(),

View file

@ -1,8 +1,8 @@
use std::collections::{HashMap};
use std::collections::HashMap;
use crate::syntax::content::parser::{
token::{anchor::Anchor},
context::{Context, Block, Inline},
context::{Block, Context, Inline},
token::{anchor::Anchor, item::Item, list::List},
};
#[derive(Clone, Debug)]
@ -24,6 +24,14 @@ pub struct Switches {
#[derive(Clone, Debug)]
pub struct Buffers {
pub anchor: AnchorBuffer,
pub list: ListBuffer,
}
#[derive(Default, Clone, Debug)]
pub struct ListBuffer {
pub candidate: List,
pub item_candidate: Item,
pub depth: u8,
}
#[derive(Default, Clone, Debug)]
@ -77,6 +85,11 @@ impl Default for State {
text: String::default(),
destination: String::default(),
},
list: ListBuffer {
candidate: List::default(),
item_candidate: Item::default(),
depth: 0,
},
},
}
}

View file

@ -16,7 +16,7 @@ impl CheckBox {
impl Parseable for CheckBox {
fn probe(lexeme: &Lexeme) -> bool {
lexeme.match_triple_as_char(('[', ' ', ']'))
|| lexeme.match_triple_as_char(('[', 'x', ']'))
|| lexeme.match_triple_as_char(('[', 'x', ']'))
}
fn lex(lexeme: &Lexeme) -> CheckBox {
@ -30,11 +30,7 @@ impl Parseable for CheckBox {
}
fn render(&self) -> String {
let toggle = if self.checked {
" checked "
} else {
""
};
let toggle = if self.checked { " checked " } else { "" };
format!(r#"<input type="checkbox"{toggle}/>"#)
}
}

View file

@ -232,16 +232,25 @@ mod tests {
fn id_deduplication() {
let mut map: HashMap<String, Vec<String>> = HashMap::default();
let config = Config::default();
let id =
Header::make_id(&config, &Lexeme::new("##", "UVrcCUjoQ", ""), &mut map);
let id = Header::make_id(
&config,
&Lexeme::new("##", "UVrcCUjoQ", ""),
&mut map,
);
assert_eq!(id, "UVrcCUjoQ");
let double =
Header::make_id(&config, &Lexeme::new("##", "UVrcCUjoQ", ""), &mut map);
let double = Header::make_id(
&config,
&Lexeme::new("##", "UVrcCUjoQ", ""),
&mut map,
);
assert_eq!(double, "UVrcCUjoQ-1");
let double2 =
Header::make_id(&config, &Lexeme::new("##", "UVrcCUjoQ", ""), &mut map);
let double2 = Header::make_id(
&config,
&Lexeme::new("##", "UVrcCUjoQ", ""),
&mut map,
);
assert_eq!(double2, "UVrcCUjoQ-2");
}

View file

@ -1,41 +1,45 @@
use crate::syntax::content::{Parseable, Lexeme};
#[derive(Debug, Clone, Eq, PartialEq)]
#[derive(Default, Debug, Clone, Eq, PartialEq)]
pub struct Item {
open: bool,
pub text: String,
pub depth: Option<u8>,
}
impl Parseable for Item {
fn probe(lexeme: &Lexeme) -> bool {
(lexeme.match_as_char('-') || lexeme.match_as_char('+'))
&& lexeme.match_next_as_char(' ')
fn probe(_: &Lexeme) -> bool {
false
}
fn lex(_lexeme: &Lexeme) -> Item {
Item { open: false }
fn lex(_: &Lexeme) -> Item {
panic!("Attempt to lex an item directly from a lexeme")
}
fn render(&self) -> String {
if self.open {
String::from("<li>")
} else {
String::from("</li>")
}
panic!("Items should only be rendered by a list's render method")
}
}
impl Item {
pub fn new(open: bool) -> Item {
Item { open }
}
pub fn probe_end(lexeme: &Lexeme) -> bool {
lexeme.match_as_char('\n')
pub fn new(text: &str, depth: Option<u8>) -> Item {
Item {
text: String::from(text),
depth,
}
}
}
impl std::fmt::Display for Item {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "Item [{}]", if self.open { "open" } else { "closed" })
write!(
f,
"Item [{}] {}",
if let Some(depth) = self.depth {
format!("D{depth}")
} else {
"<unknown>".to_string()
},
self.text,
)
}
}

View file

@ -1,15 +1,19 @@
use crate::syntax::content::{Parseable, Lexeme};
use std::fmt::Write as _;
#[derive(Debug, Clone, Eq, PartialEq)]
use crate::{
prelude::*,
syntax::content::{Lexeme, Parseable, parser::token::item::Item},
};
#[derive(Default, Debug, Clone, Eq, PartialEq)]
pub struct List {
open: bool,
ordered: bool,
pub ordered: bool,
pub items: Vec<Item>,
}
impl Parseable for List {
fn probe(lexeme: &Lexeme) -> bool {
(lexeme.match_as_char('-') || lexeme.match_as_char('+'))
&& lexeme.match_next_as_char(' ')
lexeme.match_either_char('-', '+') && lexeme.match_next_char(' ')
}
fn lex(_lexeme: &Lexeme) -> List {
@ -17,20 +21,57 @@ impl Parseable for List {
}
fn render(&self) -> String {
let bar = if self.open { "" } else { "/" };
let tag = if self.ordered { "ol" } else { "ul" };
let mut output = String::new();
format!("<{bar}{tag}>")
let indent_width = self
.items
.windows(2)
.find_map(|pair| {
let a = pair[0].depth?;
let b = pair[1].depth?;
(b > a).then_some(b - a)
})
.unwrap_or(1);
let mut iterator = self.items.iter().peekable();
while let Some(item) = iterator.next() {
let current_level = item.depth.unwrap_or(0) / indent_width;
let next_level = iterator.peek().and_then(|n| n.depth).unwrap_or(0)
/ indent_width;
output.push_str("<li>");
output.push_str(&item.text);
if next_level > current_level {
// Open nested list(s), keep <li> open
for _ in 0..(next_level - current_level) {
output.push_str(&format!("<{tag}>\n"));
}
} else {
// close current <li>
output.push_str("</li>");
// close nested lists inline
for _ in 0..(current_level - next_level) {
output.push_str(&format!("</{tag}></li>"));
}
output.push('\n');
}
}
format!("\n<{tag}>\n{output}</{tag}>\n\n")
}
}
impl List {
pub fn new(open: bool, ordered: bool) -> List {
List { open, ordered }
}
pub fn probe_end(lexeme: &Lexeme) -> bool {
lexeme.match_as_char('\n')
pub fn new(ordered: bool) -> List {
List {
ordered,
items: vec![],
}
}
}
@ -38,9 +79,87 @@ impl std::fmt::Display for List {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(
f,
"List [{} {}]",
if self.open { "open" } else { "closed" },
"List [{} {} items]",
self.items.len(),
if self.ordered { "ordered" } else { "unordered" },
)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn render_flat_list() {
let mut list = List::new(false);
list.items = vec![
Item::new("a", Some(0)),
Item::new("b", Some(0)),
Item::new("c", Some(0)),
];
assert_eq!(
list.render(),
"\n<ul>\n\
<li>a</li>\n\
<li>b</li>\n\
<li>c</li>\n\
</ul>\n\n"
);
}
#[test]
fn render_nested_list() {
let mut list = List::new(false);
list.items = vec![
Item::new("0Aa", Some(0)),
Item::new("4Ba", Some(4)),
Item::new("0Ca", Some(0)),
Item::new("4Da", Some(4)),
Item::new("4Db", Some(4)),
Item::new("0Ea", Some(0)),
Item::new("0Eb", Some(0)),
];
assert_eq!(
list.render(),
"\n<ul>\n\
<li>0Aa<ul>\n\
<li>4Ba</li></ul></li>\n\
<li>0Ca<ul>\n\
<li>4Da</li>\n\
<li>4Db</li></ul></li>\n\
<li>0Ea</li>\n\
<li>0Eb</li>\n\
</ul>\n\n"
);
}
#[test]
fn render_multilevel_depth_drop() {
let mut list = List::new(false);
list.items = vec![
Item::new("0Aa", Some(0)),
Item::new("4Ba", Some(4)),
Item::new("8Ca", Some(8)),
Item::new("12Da", Some(12)),
Item::new("16Ea", Some(16)),
Item::new("8Fa", Some(8)),
Item::new("0Ga", Some(0)),
];
assert_eq!(
list.render(),
"\n<ul>\n\
<li>0Aa<ul>\n\
<li>4Ba<ul>\n\
<li>8Ca<ul>\n\
<li>12Da<ul>\n\
<li>16Ea</li></ul></li></ul></li>\n\
<li>8Fa</li></ul></li></ul></li>\n\
<li>0Ga</li>\n\
</ul>\n\n"
);
}
}