From e42c67676daae565488001033374d6c3fb4a1d38 Mon Sep 17 00:00:00 2001 From: jutty Date: Wed, 7 Jan 2026 15:11:50 -0300 Subject: [PATCH] Implement nested lists --- .justfile | 2 +- Cargo.toml | 3 +- src/syntax/content/parser/context.rs | 21 +- src/syntax/content/parser/context/block.rs | 69 +----- src/syntax/content/parser/context/list.rs | 258 ++++++++++++++++++++ src/syntax/content/parser/lexeme.rs | 12 +- src/syntax/content/parser/state.rs | 19 +- src/syntax/content/parser/token/checkbox.rs | 8 +- src/syntax/content/parser/token/header.rs | 21 +- src/syntax/content/parser/token/item.rs | 42 ++-- src/syntax/content/parser/token/list.rs | 151 ++++++++++-- 11 files changed, 475 insertions(+), 131 deletions(-) create mode 100644 src/syntax/content/parser/context/list.rs diff --git a/.justfile b/.justfile index 40e4949..8f71fc5 100644 --- a/.justfile +++ b/.justfile @@ -10,7 +10,7 @@ alias u := update # Build and serve [group: 'develop'] run host='::1' port='3003' *args: - {{ debug_vars }} cargo run -- \ + DEBUG=${DEBUG:-1} {{ debug_vars }} cargo run -- \ --hostname {{ host }} --port {{ port }} {{ args }} alias r := run diff --git a/Cargo.toml b/Cargo.toml index 27f01e5..89f471f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -39,6 +39,8 @@ keyword-idents = "warn" # levels: allow, warn, deny, forbid manual_non_exhaustive = "allow" +collapsible_if = "allow" +collapsible_else_if = "allow" # pedantic assigning_clones = "warn" @@ -49,7 +51,6 @@ cast_ptr_alignment = "warn" cast_sign_loss = "warn" checked_conversions = "warn" cloned_instead_of_copied = "warn" -comparison_chain = "warn" copy_iterator = "warn" default_trait_access = "warn" doc_broken_link = "warn" diff --git a/src/syntax/content/parser/context.rs b/src/syntax/content/parser/context.rs index 0ae3197..a94c2fc 100644 --- a/src/syntax/content/parser/context.rs +++ b/src/syntax/content/parser/context.rs @@ -1,14 +1,12 @@ use crate::syntax::content::parser::{ state::State, - token::{ - Token, paragraph::Paragraph, preformat::PreFormat, list::List, - item::Item, - }, + token::{Token, paragraph::Paragraph, preformat::PreFormat}, }; -pub mod anchor; pub mod block; pub mod inline; +pub mod anchor; +pub mod list; #[derive(Clone, Debug)] pub struct Context { @@ -19,9 +17,8 @@ pub struct Context { #[derive(Clone, Debug)] pub enum Block { Paragraph, - Header(u8), - Item(bool), - List(bool), + Header(u8), // level + List, PreFormat, None, } @@ -43,12 +40,8 @@ pub fn close(state: &State, tokens: &mut Vec) { Block::Paragraph => { tokens.push(Token::Paragraph(Paragraph::new(false))); }, - Block::Item(ordered) => { - tokens.push(Token::Item(Item::new(false))); - tokens.push(Token::List(List::new(false, ordered))); - }, - Block::List(ordered) => { - tokens.push(Token::List(List::new(false, ordered))); + Block::List => { + panic!("End of input with open list") }, Block::Header(_) => panic!("End of input with open header"), Block::None => (), diff --git a/src/syntax/content/parser/context/block.rs b/src/syntax/content/parser/context/block.rs index 11fa54f..b1fad4c 100644 --- a/src/syntax/content/parser/context/block.rs +++ b/src/syntax/content/parser/context/block.rs @@ -9,7 +9,8 @@ use crate::{ lexeme::Lexeme, state::State, token::{ - Token, checkbox::CheckBox, header::Header, item::Item, list::List, literal::Literal, paragraph::Paragraph, preformat::PreFormat + Token, header::Header, list::List, literal::Literal, + paragraph::Paragraph, preformat::PreFormat, }, }, }, @@ -42,15 +43,10 @@ pub fn parse( tokens.push(Token::Header(header)); return true; } else if List::probe(lexeme) { - let ordered = lexeme.match_as_char('+'); - log!("Block Context: None -> Item on {lexeme}"); - state.context.block = Block::Item(ordered); - tokens.push(Token::List(List::new(true, ordered))); - tokens.push(Token::Item(Item::new(true))); - // List::probe implies a dash followed by a space, - // both of which sould not be rendered literally - iterator.next(); - return true; + log!("Block Context: None -> List on {lexeme}"); + state.context.block = Block::List; + state.buffers.list.candidate.ordered = lexeme.match_char('+'); + return super::list::parse(lexeme, state, tokens, iterator); } else if Paragraph::probe(lexeme) { log!("Block Context: None -> Paragraph on {lexeme}"); state.context.block = Block::Paragraph; @@ -81,33 +77,8 @@ pub fn parse( state.context.block = Block::None; } }, - Block::List(ordered) => { - if List::probe_end(lexeme) { - tokens.push(Token::List(List::new(false, ordered))); - log!("Block Context: List -> None on {lexeme}"); - state.context.block = Block::None; - } else if Item::probe(lexeme) { - tokens.push(Token::Item(Item::new(true))); - log!("Block Context: List -> Item on {lexeme}"); - state.context.block = Block::Item(ordered); - // Item::probe implies a dash followed by a space, - // both of which sould not be rendered literally - iterator.next(); - return true; - } - }, - Block::Item(ordered) => { - if CheckBox::probe(lexeme) { - log!("Probed CheckBox: {lexeme}"); - tokens.push(Token::CheckBox(CheckBox::lex(lexeme))); - iterator.next(); - iterator.next(); - return true - } else if Item::probe_end(lexeme) { - tokens.push(Token::Item(Item::new(false))); - log!("Block Context: Item -> List on {lexeme}"); - state.context.block = Block::List(ordered); - } + Block::List => { + return super::list::parse(lexeme, state, tokens, iterator); }, } false @@ -176,28 +147,4 @@ mod tests { let level = Level::from(u); assert_eq!(level.to_string(), "6"); } - - #[test] - fn unordered_list_at_eoi() { - assert_eq!( - read("- a\n- b\n- c"), - "" - ); - } - - #[test] - fn unordered_list_with_content_before() { - assert_eq!( - read("_e e_\n\n- a\n- b\n- c"), - "

e e

\n\n", - ); - } - - #[test] - fn unordered_list_with_content_after() { - assert_eq!( - read("- a\n- b\n- c\n\nd",), - "\n

d

" - ); - } } diff --git a/src/syntax/content/parser/context/list.rs b/src/syntax/content/parser/context/list.rs new file mode 100644 index 0000000..e44b7f2 --- /dev/null +++ b/src/syntax/content/parser/context/list.rs @@ -0,0 +1,258 @@ +use std::{iter::Peekable, slice::Iter}; + +use crate::{ + prelude::*, + syntax::content::parser::{ + context::Block, + lexeme::Lexeme, + state::{ListBuffer, State}, + token::{Token, item::Item}, + }, +}; + +/// Handles open list contexts until a list is fully parsed. +/// +/// A return of `true` will trigger a continue in the outer parser, +/// skipping any further parsing of the current lexeme. +/// +/// # Panics +/// This parser can handle only the List context, and will panic if passed an +/// unrelated context since it has no knowledge on how to handle them. +pub fn parse( + lexeme: &Lexeme, + state: &mut State, + tokens: &mut Vec, + iterator: &mut Peekable>, +) -> bool { + let buffer = &mut state.buffers.list; + let candidate = &mut buffer.candidate; + let item_candidate = &mut buffer.item_candidate; + + match state.context.block { + Block::List => { + if lexeme.match_char(' ') && item_candidate.depth.is_none() { + // found space, unknown increasing depth + buffer.depth = buffer.depth.saturating_add(1); + } else if item_candidate.depth.is_none() + && lexeme.match_either_char('-', '+') + { + // found bullet, depth now known + item_candidate.depth = Some(buffer.depth); + if lexeme.match_next_char(' ') { + iterator.next(); + } + } else if lexeme.last() || lexeme.match_char_sequence('\n', '\n') { + // found end of list + if !lexeme.match_char('\n') { + // no trailing break, last item's text wouldn't be pushed + item_candidate.text.push_str(&lexeme.text()); + } + if item_candidate.depth.is_some() { + // if the current item candidate has a known depth, push it + candidate.items.push(item_candidate.clone()); + } + // push list candidate, reset state and exit context + log!("Accepting list candidate {candidate}"); + tokens.push(Token::List(candidate.clone())); + state.context.block = Block::None; + iterator.next(); + *buffer = ListBuffer::default(); + } else if lexeme.match_char('\n') { + // found end of item, push it and reset state + log!("Accepting item candidate {item_candidate}"); + candidate.items.push(item_candidate.clone()); + *item_candidate = Item::default(); + buffer.depth = 0; + } else { + // anything else is pushed into the candidate item's text + item_candidate.text.push_str(&lexeme.text()); + } + }, + Block::None + | Block::Paragraph + | Block::Header(_) + | Block::PreFormat => { + panic!("List context parser called to handle non-list context") + }, + } + true +} + +#[cfg(test)] +mod tests { + use crate::{syntax::content::parser, types::Graph}; + + fn read(input: &str) -> String { + parser::read(input, &Graph::new(None).meta.config) + } + + #[test] + fn unordered_list() { + assert_eq!( + read("- a\n- b\n- c"), + "\n
    \n\ +
  • a
  • \n\ +
  • b
  • \n\ +
  • c
  • \n\ +
\n\n" + ); + } + + #[test] + fn minimally_nested_unordered_list() { + assert_eq!( + read("- a\n - b"), + "\n
    \n\ +
  • a
      \n\ +
    • b
  • \n\ +
\n\n" + ); + } + + #[test] + fn unordered_nested_list() { + assert_eq!( + read(concat!( + "- 0Aa\n", + " - 4Ba\n", + "- 0Ca\n", + " - 4Da\n", + " - 4Db\n", + "- 0Ea\n", + "- 0Eb" + )), + "\n
    \n\ +
  • 0Aa
      \n\ +
    • 4Ba
  • \n\ +
  • 0Ca
      \n\ +
    • 4Da
    • \n\ +
    • 4Db
  • \n\ +
  • 0Ea
  • \n\ +
  • 0Eb
  • \n\ +
\n\n" + ); + } + + #[test] + fn unordered_list_as_eoi() { + assert_eq!( + read("some\n\n- a\n- b"), + "

some

\n\n\n
    \n\ +
  • a
  • \n\ +
  • b
  • \n\ +
\n\n" + ); + } + + #[test] + fn unordered_list_as_soi() { + assert_eq!( + read("- a\n- b\n\nsome"), + "\n
    \n\ +
  • a
  • \n\ +
  • b
  • \n\ +
\n\n

some

" + ); + } + + #[test] + fn unordered_list_after_newline() { + assert_eq!( + read("\n- a\n- b"), + "\n\n
    \n\ +
  • a
  • \n\ +
  • b
  • \n\ +
\n\n" + ); + } + + #[test] + fn unordered_list_after_two_newlines() { + assert_eq!( + read("\n\n- a\n- b"), + "\n\n\n
    \n\ +
  • a
  • \n\ +
  • b
  • \n\ +
\n\n" + ); + } + + #[test] + fn unordered_list_after_three_newlines() { + assert_eq!( + read("\n\n\n- a\n- b"), + "\n\n\n\n
    \n\ +
  • a
  • \n\ +
  • b
  • \n\ +
\n\n" + ); + } + + #[test] + fn unordered_list_before_newline() { + assert_eq!( + read("- a\n- b\n"), + "\n
    \n\ +
  • a
  • \n\ +
  • b
  • \n\ +
\n\n" + ); + } + + #[test] + fn unordered_list_before_two_newlines() { + assert_eq!( + read("- a\n- b\n\n"), + "\n
    \n\ +
  • a
  • \n\ +
  • b
  • \n\ +
\n\n" + ); + } + + #[test] + fn unordered_list_before_three_newlines() { + assert_eq!( + read("- a\n- b\n\n\n"), + "\n
    \n\ +
  • a
  • \n\ +
  • b
  • \n\ +
\n\n" + ); + } + + #[test] + fn unordered_list_before_three_newlines_then_literals() { + assert_eq!( + read("- a\n- b\n\n\nw w"), + "\n
    \n\ +
  • a
  • \n\ +
  • b
  • \n\ +
\n\n\n

w w

" + ); + } + + #[test] + fn unordered_nested_list_multilevel_depth_drop() { + assert_eq!( + read(concat!( + "- 0Aa\n", + " - 4Ba\n", + " - 8Ca\n", + " - 12Da\n", + " - 16Ea\n", + " - 8Fa\n", + "- 0Ga\n\n", + )), + "\n
    \n\ +
  • 0Aa
      \n\ +
    • 4Ba
        \n\ +
      • 8Ca
          \n\ +
        • 12Da
            \n\ +
          • 16Ea
      • \n\ +
      • 8Fa
  • \n\ +
  • 0Ga
  • \n\ +
\n\n" + ); + } +} diff --git a/src/syntax/content/parser/lexeme.rs b/src/syntax/content/parser/lexeme.rs index 78de9f8..2e5496c 100644 --- a/src/syntax/content/parser/lexeme.rs +++ b/src/syntax/content/parser/lexeme.rs @@ -77,8 +77,8 @@ impl Lexeme { pub fn match_triple_as_char(&self, t: (char, char, char)) -> bool { self.match_as_char(t.0) - && self.match_next_as_char(t.1) - && self.match_third_as_char(t.2) + && self.match_next_as_char(t.1) + && self.match_third_as_char(t.2) } pub fn contains_as_char(&self, slice: &[char]) -> bool { @@ -171,7 +171,9 @@ impl Lexeme { let mut out_vector = Vec::with_capacity(segments.len()); let mut vec = segments.to_vec(); - let Some(mut third) = vec.pop() else { return vec![] }; + let Some(mut third) = vec.pop() else { + return vec![]; + }; let last_lexeme = Lexeme { text: third.clone(), next: String::default(), @@ -179,7 +181,9 @@ impl Lexeme { last: true, }; - let Some(mut next) = vec.pop() else { return vec![last_lexeme] }; + let Some(mut next) = vec.pop() else { + return vec![last_lexeme]; + }; let penultimate_lexeme = Lexeme { text: next.clone(), next: third.clone(), diff --git a/src/syntax/content/parser/state.rs b/src/syntax/content/parser/state.rs index 7228b9c..69318a7 100644 --- a/src/syntax/content/parser/state.rs +++ b/src/syntax/content/parser/state.rs @@ -1,8 +1,8 @@ -use std::collections::{HashMap}; +use std::collections::HashMap; use crate::syntax::content::parser::{ - token::{anchor::Anchor}, - context::{Context, Block, Inline}, + context::{Block, Context, Inline}, + token::{anchor::Anchor, item::Item, list::List}, }; #[derive(Clone, Debug)] @@ -24,6 +24,14 @@ pub struct Switches { #[derive(Clone, Debug)] pub struct Buffers { pub anchor: AnchorBuffer, + pub list: ListBuffer, +} + +#[derive(Default, Clone, Debug)] +pub struct ListBuffer { + pub candidate: List, + pub item_candidate: Item, + pub depth: u8, } #[derive(Default, Clone, Debug)] @@ -77,6 +85,11 @@ impl Default for State { text: String::default(), destination: String::default(), }, + list: ListBuffer { + candidate: List::default(), + item_candidate: Item::default(), + depth: 0, + }, }, } } diff --git a/src/syntax/content/parser/token/checkbox.rs b/src/syntax/content/parser/token/checkbox.rs index ebdafcd..23cb994 100644 --- a/src/syntax/content/parser/token/checkbox.rs +++ b/src/syntax/content/parser/token/checkbox.rs @@ -16,7 +16,7 @@ impl CheckBox { impl Parseable for CheckBox { fn probe(lexeme: &Lexeme) -> bool { lexeme.match_triple_as_char(('[', ' ', ']')) - || lexeme.match_triple_as_char(('[', 'x', ']')) + || lexeme.match_triple_as_char(('[', 'x', ']')) } fn lex(lexeme: &Lexeme) -> CheckBox { @@ -30,11 +30,7 @@ impl Parseable for CheckBox { } fn render(&self) -> String { - let toggle = if self.checked { - " checked " - } else { - "" - }; + let toggle = if self.checked { " checked " } else { "" }; format!(r#""#) } } diff --git a/src/syntax/content/parser/token/header.rs b/src/syntax/content/parser/token/header.rs index 660f659..390a22a 100644 --- a/src/syntax/content/parser/token/header.rs +++ b/src/syntax/content/parser/token/header.rs @@ -232,16 +232,25 @@ mod tests { fn id_deduplication() { let mut map: HashMap> = HashMap::default(); let config = Config::default(); - let id = - Header::make_id(&config, &Lexeme::new("##", "UVrcCUjoQ", ""), &mut map); + let id = Header::make_id( + &config, + &Lexeme::new("##", "UVrcCUjoQ", ""), + &mut map, + ); assert_eq!(id, "UVrcCUjoQ"); - let double = - Header::make_id(&config, &Lexeme::new("##", "UVrcCUjoQ", ""), &mut map); + let double = Header::make_id( + &config, + &Lexeme::new("##", "UVrcCUjoQ", ""), + &mut map, + ); assert_eq!(double, "UVrcCUjoQ-1"); - let double2 = - Header::make_id(&config, &Lexeme::new("##", "UVrcCUjoQ", ""), &mut map); + let double2 = Header::make_id( + &config, + &Lexeme::new("##", "UVrcCUjoQ", ""), + &mut map, + ); assert_eq!(double2, "UVrcCUjoQ-2"); } diff --git a/src/syntax/content/parser/token/item.rs b/src/syntax/content/parser/token/item.rs index a4aa0d1..1c94318 100644 --- a/src/syntax/content/parser/token/item.rs +++ b/src/syntax/content/parser/token/item.rs @@ -1,41 +1,45 @@ use crate::syntax::content::{Parseable, Lexeme}; -#[derive(Debug, Clone, Eq, PartialEq)] +#[derive(Default, Debug, Clone, Eq, PartialEq)] pub struct Item { - open: bool, + pub text: String, + pub depth: Option, } impl Parseable for Item { - fn probe(lexeme: &Lexeme) -> bool { - (lexeme.match_as_char('-') || lexeme.match_as_char('+')) - && lexeme.match_next_as_char(' ') + fn probe(_: &Lexeme) -> bool { + false } - fn lex(_lexeme: &Lexeme) -> Item { - Item { open: false } + fn lex(_: &Lexeme) -> Item { + panic!("Attempt to lex an item directly from a lexeme") } fn render(&self) -> String { - if self.open { - String::from("
  • ") - } else { - String::from("
  • ") - } + panic!("Items should only be rendered by a list's render method") } } impl Item { - pub fn new(open: bool) -> Item { - Item { open } - } - - pub fn probe_end(lexeme: &Lexeme) -> bool { - lexeme.match_as_char('\n') + pub fn new(text: &str, depth: Option) -> Item { + Item { + text: String::from(text), + depth, + } } } impl std::fmt::Display for Item { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - write!(f, "Item [{}]", if self.open { "open" } else { "closed" }) + write!( + f, + "Item [{}] {}", + if let Some(depth) = self.depth { + format!("D{depth}") + } else { + "".to_string() + }, + self.text, + ) } } diff --git a/src/syntax/content/parser/token/list.rs b/src/syntax/content/parser/token/list.rs index ae2d130..dcb2bc7 100644 --- a/src/syntax/content/parser/token/list.rs +++ b/src/syntax/content/parser/token/list.rs @@ -1,15 +1,19 @@ -use crate::syntax::content::{Parseable, Lexeme}; +use std::fmt::Write as _; -#[derive(Debug, Clone, Eq, PartialEq)] +use crate::{ + prelude::*, + syntax::content::{Lexeme, Parseable, parser::token::item::Item}, +}; + +#[derive(Default, Debug, Clone, Eq, PartialEq)] pub struct List { - open: bool, - ordered: bool, + pub ordered: bool, + pub items: Vec, } impl Parseable for List { fn probe(lexeme: &Lexeme) -> bool { - (lexeme.match_as_char('-') || lexeme.match_as_char('+')) - && lexeme.match_next_as_char(' ') + lexeme.match_either_char('-', '+') && lexeme.match_next_char(' ') } fn lex(_lexeme: &Lexeme) -> List { @@ -17,20 +21,57 @@ impl Parseable for List { } fn render(&self) -> String { - let bar = if self.open { "" } else { "/" }; let tag = if self.ordered { "ol" } else { "ul" }; + let mut output = String::new(); - format!("<{bar}{tag}>") + let indent_width = self + .items + .windows(2) + .find_map(|pair| { + let a = pair[0].depth?; + let b = pair[1].depth?; + (b > a).then_some(b - a) + }) + .unwrap_or(1); + + let mut iterator = self.items.iter().peekable(); + + while let Some(item) = iterator.next() { + let current_level = item.depth.unwrap_or(0) / indent_width; + let next_level = iterator.peek().and_then(|n| n.depth).unwrap_or(0) + / indent_width; + + output.push_str("
  • "); + output.push_str(&item.text); + + if next_level > current_level { + // Open nested list(s), keep
  • open + for _ in 0..(next_level - current_level) { + output.push_str(&format!("<{tag}>\n")); + } + } else { + // close current
  • + output.push_str("
  • "); + + // close nested lists inline + for _ in 0..(current_level - next_level) { + output.push_str(&format!("")); + } + + output.push('\n'); + } + } + + format!("\n<{tag}>\n{output}\n\n") } } impl List { - pub fn new(open: bool, ordered: bool) -> List { - List { open, ordered } - } - - pub fn probe_end(lexeme: &Lexeme) -> bool { - lexeme.match_as_char('\n') + pub fn new(ordered: bool) -> List { + List { + ordered, + items: vec![], + } } } @@ -38,9 +79,87 @@ impl std::fmt::Display for List { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { write!( f, - "List [{} {}]", - if self.open { "open" } else { "closed" }, + "List [{} {} items]", + self.items.len(), if self.ordered { "ordered" } else { "unordered" }, ) } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn render_flat_list() { + let mut list = List::new(false); + list.items = vec![ + Item::new("a", Some(0)), + Item::new("b", Some(0)), + Item::new("c", Some(0)), + ]; + + assert_eq!( + list.render(), + "\n
      \n\ +
    • a
    • \n\ +
    • b
    • \n\ +
    • c
    • \n\ +
    \n\n" + ); + } + + #[test] + fn render_nested_list() { + let mut list = List::new(false); + list.items = vec![ + Item::new("0Aa", Some(0)), + Item::new("4Ba", Some(4)), + Item::new("0Ca", Some(0)), + Item::new("4Da", Some(4)), + Item::new("4Db", Some(4)), + Item::new("0Ea", Some(0)), + Item::new("0Eb", Some(0)), + ]; + + assert_eq!( + list.render(), + "\n
      \n\ +
    • 0Aa
        \n\ +
      • 4Ba
    • \n\ +
    • 0Ca
        \n\ +
      • 4Da
      • \n\ +
      • 4Db
    • \n\ +
    • 0Ea
    • \n\ +
    • 0Eb
    • \n\ +
    \n\n" + ); + } + + #[test] + fn render_multilevel_depth_drop() { + let mut list = List::new(false); + list.items = vec![ + Item::new("0Aa", Some(0)), + Item::new("4Ba", Some(4)), + Item::new("8Ca", Some(8)), + Item::new("12Da", Some(12)), + Item::new("16Ea", Some(16)), + Item::new("8Fa", Some(8)), + Item::new("0Ga", Some(0)), + ]; + + assert_eq!( + list.render(), + "\n
      \n\ +
    • 0Aa
        \n\ +
      • 4Ba
          \n\ +
        • 8Ca
            \n\ +
          • 12Da
              \n\ +
            • 16Ea
        • \n\ +
        • 8Fa
    • \n\ +
    • 0Ga
    • \n\ +
    \n\n" + ); + } +}