From e50bbd468dc4805dd4410f7c410fc65f96d4667c Mon Sep 17 00:00:00 2001 From: jutty Date: Fri, 26 Dec 2025 04:19:21 -0300 Subject: [PATCH] Add tests for the content syntax parser --- src/syntax/content.rs | 2 +- src/syntax/content/parser.rs | 99 ++++++++++++++- src/syntax/content/parser/lexeme.rs | 58 ++++++++- src/syntax/content/parser/token.rs | 49 +------- src/syntax/content/parser/token/anchor.rs | 33 ++++- src/syntax/content/parser/token/code.rs | 22 ++++ src/syntax/content/parser/token/header.rs | 122 +++++++++++++++---- src/syntax/content/parser/token/linebreak.rs | 7 -- src/syntax/content/parser/token/literal.rs | 7 -- src/syntax/content/parser/token/paragraph.rs | 29 +++-- src/syntax/content/parser/token/preformat.rs | 26 ++++ src/syntax/content/parser/token/span.rs | 49 ++++++-- 12 files changed, 383 insertions(+), 120 deletions(-) diff --git a/src/syntax/content.rs b/src/syntax/content.rs index d885392..353af63 100644 --- a/src/syntax/content.rs +++ b/src/syntax/content.rs @@ -2,7 +2,7 @@ use parser::{token::Token, lexeme::Lexeme}; pub mod parser; -pub trait Parseable: Into { +pub trait Parseable { fn probe(lexeme: &Lexeme) -> bool; fn lex(lexeme: &Lexeme) -> Self; fn render(&self) -> String; diff --git a/src/syntax/content/parser.rs b/src/syntax/content/parser.rs index ea9744b..a57b9ce 100644 --- a/src/syntax/content/parser.rs +++ b/src/syntax/content/parser.rs @@ -1,6 +1,6 @@ use std::collections::{HashMap}; -use crate::{prelude::*, syntax::serial::populate_graph, types::Config}; +use crate::{syntax::serial::populate_graph, types::Config}; use super::{Parseable as _, Token, LexMap}; use token::{ anchor::Anchor, linebreak::LineBreak, paragraph::Paragraph, header::Header, @@ -39,7 +39,7 @@ fn lex(text: &str, map: LexMap) -> Vec { let mut header = Header::lex(lexeme); header.dom_id = Some(Header::make_id( &config, - &mut iterator, + iterator.peek().map_or(&Lexeme::new("", ""), |l| l), &mut state.dom_ids, )); state.context.block = BlockContext::Header(header.level()); @@ -154,8 +154,6 @@ fn lex(text: &str, map: LexMap) -> Vec { buffer.destination.push_str(&lexeme.text()); } continue; - } else { - unreachable!("Anchor is already fully parsed"); } }, } @@ -241,11 +239,10 @@ fn close(state: &State, tokens: &mut Vec) { }, BlockContext::Header(_) => panic!("End of file with open header"), BlockContext::PreFormat => panic!("End of file with open preformat"), - BlockContext::None => log!("End of file on None block context"), + BlockContext::None => (), } } - fn parse(tokens: &[Token]) -> String { tokens.iter().map(Token::render).collect::() } @@ -256,10 +253,100 @@ pub(super) fn read(text: &str) -> String { #[cfg(test)] mod tests { + use crate::syntax::content::parser::token::header::Level; + use super::*; + #[test] + fn empty_render_is_empty() { + assert_eq!(read(""), ""); + } + + #[test] + fn mixed_sample() { + let en = "`this |test|` tries ## to |brea|k|: things"; + let html = r#"

this |test| tries ## to brea: things

"#; + + assert_eq!(read(en), html); + } + #[test] fn force_flanking() { assert_eq!(read("|Node||"), r#"

Node

"#); } + + #[test] + fn flanking_with_trailing_pipe() { + assert_eq!( + read("|Node|Destination|"), + r#"

Node

"# + ); + } + + #[test] + fn nonleading_second_pipe() { + assert_eq!( + read("Go to Node|Destination|, here"), + r#"

Go to Node, here

"#, + ); + } + + #[test] + fn clear_anchor_buffer() { + assert_eq!( + read("|SomeAnchor|\n|SomeOtherAnchor|"), + concat!( + r#"

SomeAnchor

"#, + "\n", + r#"

SomeOtherAnchor

"# + ), + ); + } + + #[test] + #[should_panic(expected = "End of file with open header")] + fn end_with_open_header() { + let default_state = State::new(); + let state = State { + context: Context { + block: BlockContext::Header(1), + ..default_state.context + }, + ..default_state + }; + + close(&state, &mut vec![]); + } + + #[test] + #[should_panic(expected = "End of file with open preformat")] + fn end_with_open_preformat() { + let default_state = State::new(); + let state = State { + context: Context { + block: BlockContext::PreFormat, + ..default_state.context + }, + ..default_state + }; + + close(&state, &mut vec![]); + } + + #[test] + fn truncated_header_level() { + let u: usize = 999; + let level = Level::from(u); + assert_eq!(level.to_string(), "6"); + } + + #[test] + fn display_level() { + assert_eq!(format!("{}", Level::One), "1"); + assert_eq!(format!("{}", Level::Two), "2"); + assert_eq!(format!("{}", Level::Three), "3"); + assert_eq!(format!("{}", Level::Four), "4"); + assert_eq!(format!("{}", Level::Five), "5"); + assert_eq!(format!("{}", Level::Six), "6"); + } } diff --git a/src/syntax/content/parser/lexeme.rs b/src/syntax/content/parser/lexeme.rs index 14e94e5..ae23202 100644 --- a/src/syntax/content/parser/lexeme.rs +++ b/src/syntax/content/parser/lexeme.rs @@ -50,7 +50,7 @@ impl Lexeme { } /// # Panics - /// Panics if number of chars for a single lexeme exceeds `i2::MAX` + /// Panics if number of chars for a single lexeme exceeds `i32::MAX` pub fn count_char(&self, c: char) -> i32 { let count = self.text().chars().filter(|&n| n == c).count(); match i32::try_from(count) { @@ -90,3 +90,59 @@ impl Lexeme { out_vector } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn new_lexeme() { + let raw = "3PKK4RzfGgUL58rU2NZbAiGN1o5dOfNu"; + let next = "wAcZe8iVEEcZLp20PP9KKf07zJbeZafa"; + let lexeme = Lexeme::new(raw, next); + assert_eq!(lexeme.text, raw); + assert_eq!(lexeme.next, next); + } + + #[test] + fn next_first_char() { + let payload = "4IU"; + let lexeme = Lexeme::new(payload, payload); + assert_eq!(lexeme.next_first_char().unwrap(), '4'); + } + + #[test] + fn match_first_char() { + let payload = "MKY"; + let lexeme = Lexeme::new(payload, payload); + assert!(lexeme.match_first_char('M')); + } + + #[test] + fn match_absent_first_char() { + let payload = ""; + let lexeme = Lexeme::new(payload, payload); + assert!(!lexeme.match_first_char('x')); + } + + #[test] + fn first_word() { + let payload = "nhNc fGev QnGW E4hj ExyZ"; + let lexeme = Lexeme::new(payload, payload); + assert_eq!(lexeme.first(), Some(String::from("nhNc"))); + } + + #[test] + fn count_char() { + let payload = "6Ur3UjnndhENjFNSYWF7bhej2NZKLwdY"; + let lexeme = Lexeme::new(payload, payload); + assert_eq!(lexeme.count_char('j'), 3); + } + + #[test] + fn count_char_huge_number() { + let payload = "6Ur3UjnndhENjFNSYWF7bhej2NZKLwdY"; + let lexeme = Lexeme::new(payload, payload); + assert_eq!(lexeme.count_char('j'), 3); + } +} diff --git a/src/syntax/content/parser/token.rs b/src/syntax/content/parser/token.rs index 85ed1bf..8484fff 100644 --- a/src/syntax/content/parser/token.rs +++ b/src/syntax/content/parser/token.rs @@ -36,50 +36,9 @@ impl Token { } } -impl From for Token { - fn from(d: paragraph::Paragraph) -> Token { - Token::Paragraph(d) - } -} +#[cfg(test)] +mod tests { -impl From for Token { - fn from(d: header::Header) -> Token { - Token::Header(d) - } -} - -impl From for Token { - fn from(d: span::Span) -> Token { - Token::Span(d) - } -} - -impl From for Token { - fn from(d: literal::Literal) -> Token { - Token::Literal(d) - } -} - -impl From for Token { - fn from(d: anchor::Anchor) -> Token { - Token::Anchor(d) - } -} - -impl From for Token { - fn from(d: linebreak::LineBreak) -> Token { - Token::LineBreak(d) - } -} - -impl From for Token { - fn from(d: preformat::PreFormat) -> Token { - Token::PreFormat(d) - } -} - -impl From for Token { - fn from(d: code::Code) -> Token { - Token::Code(d) - } + #[test] + fn smoke() {} } diff --git a/src/syntax/content/parser/token/anchor.rs b/src/syntax/content/parser/token/anchor.rs index d50e99b..bad10d6 100644 --- a/src/syntax/content/parser/token/anchor.rs +++ b/src/syntax/content/parser/token/anchor.rs @@ -1,5 +1,3 @@ -use std::fmt::Display; - use crate::syntax::content::{Parseable, parser::lexeme::Lexeme}; #[derive(Debug, Clone)] @@ -65,8 +63,33 @@ impl Anchor { } } -impl Display for Anchor { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - write!(f, "Anchor: <{}> to <{:?}>", &self.text, &self.destination) +#[cfg(test)] +mod tests { + + use super::*; + use crate::syntax::content::parser::read; + + #[test] + fn render_anchor() { + let anchor = Anchor::new("AnchorText", "AnchorDest", true); + assert_eq!( + anchor.render(), + r#"AnchorText"# + ); + } + + #[test] + #[should_panic( + expected = "Attempt to lex an anchor directly from a lexeme" + )] + fn lex() { + Anchor::lex(&Lexeme::new("", "")); + } + + #[test] + #[should_panic(expected = "without knowing its destination")] + fn unknown_destination_render() { + let anchor = Anchor::empty(); + drop(anchor.render()); } } diff --git a/src/syntax/content/parser/token/code.rs b/src/syntax/content/parser/token/code.rs index 549f60e..f3a5f70 100644 --- a/src/syntax/content/parser/token/code.rs +++ b/src/syntax/content/parser/token/code.rs @@ -30,3 +30,25 @@ impl Parseable for Code { } } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn render() { + let code_open = Code::new(true); + assert_eq!(code_open.render(), ""); + + let code_closed = Code::new(false); + assert_eq!(code_closed.render(), ""); + } + + #[test] + #[should_panic( + expected = "Attempt to lex a code tag directly from a lexeme" + )] + fn lex() { + Code::lex(&Lexeme::new("", "")); + } +} diff --git a/src/syntax/content/parser/token/header.rs b/src/syntax/content/parser/token/header.rs index cb3620f..72be7e6 100644 --- a/src/syntax/content/parser/token/header.rs +++ b/src/syntax/content/parser/token/header.rs @@ -1,7 +1,5 @@ use std::{ collections::{HashMap, hash_map::Entry}, - iter::Peekable, - slice, }; use crate::{ @@ -30,16 +28,13 @@ impl Header { pub fn make_id( config: &Config, - iterator: &mut Peekable>, + next_lexeme: &Lexeme, ids: &mut HashMap>, ) -> String { - let base_id = match iterator.peek() { - Some(next_lexeme) - if !config.ascii_dom_ids || next_lexeme.next.is_ascii() => - { - next_lexeme.next.clone() - }, - _ => String::from("h"), + let base_id = if !config.ascii_dom_ids || next_lexeme.next.is_ascii() { + next_lexeme.next.clone() + } else { + String::from("h") }; match ids.entry(base_id.clone()) { @@ -116,20 +111,6 @@ impl Parseable for Header { } } -impl Display for Header { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - if let Some(open) = self.open { - if open { - write!(f, "Level {} Open Header", self.level) - } else { - write!(f, "Level {} Closed Header", self.level) - } - } else { - write!(f, "Level {} Header (Unknown open state)", self.level) - } - } -} - #[derive(Debug)] pub enum Level { One, @@ -183,3 +164,96 @@ impl Display for Level { } } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn make_id() { + let mut map: HashMap> = HashMap::new(); + let id = Header::make_id( + &Config::default(), + &Lexeme::new("##", "Title"), + &mut map, + ); + assert_eq!(id, "Title"); + } + + #[test] + fn ascii_ids_set() { + let config = Config { + ascii_dom_ids: true, + ..Config::default() + }; + + let id = Header::make_id( + &config, + &Lexeme::new("##", "駄目!"), + &mut HashMap::new(), + ); + assert_eq!(id, "h"); + } + + #[test] + fn ascii_ids_unset() { + let config = Config { + ascii_dom_ids: false, + ..Config::default() + }; + + let id = Header::make_id( + &config, + &Lexeme::new("##", "駄目!"), + &mut HashMap::new(), + ); + assert_eq!(id, "駄目!"); + } + + #[test] + fn id_deduplication() { + let mut map: HashMap> = HashMap::new(); + let config = Config::default(); + let id = + Header::make_id(&config, &Lexeme::new("##", "UVrcCUjoQ"), &mut map); + assert_eq!(id, "UVrcCUjoQ"); + + let double = + Header::make_id(&config, &Lexeme::new("##", "UVrcCUjoQ"), &mut map); + assert_eq!(double, "UVrcCUjoQ-1"); + + let double2 = + Header::make_id(&config, &Lexeme::new("##", "UVrcCUjoQ"), &mut map); + assert_eq!(double2, "UVrcCUjoQ-2"); + } + + #[test] + fn get_level() { + for l in 1..=6 { + let header = Header::from_u8(l, true, None); + assert_eq!(header.level(), l); + } + } + + #[test] + fn no_id_render() { + let open_header = Header::from_u8(2, true, None); + let closed_header = Header::from_u8(2, false, None); + assert_eq!(open_header.render(), "

"); + assert_eq!(closed_header.render(), "

"); + } + + #[test] + #[should_panic( + expected = "Attempt to render a header tag while open state is unknown" + )] + fn unknown_open_state_render() { + let header = Header { + level: Level::Two, + open: None, + dom_id: None, + }; + + header.render(); + } +} diff --git a/src/syntax/content/parser/token/linebreak.rs b/src/syntax/content/parser/token/linebreak.rs index d56b49c..9105dcc 100644 --- a/src/syntax/content/parser/token/linebreak.rs +++ b/src/syntax/content/parser/token/linebreak.rs @@ -1,4 +1,3 @@ -use std::fmt::Display; use crate::{ syntax::content::{Parseable, parser::lexeme::Lexeme}, }; @@ -19,9 +18,3 @@ impl Parseable for LineBreak { "\n".to_owned() } } - -impl Display for LineBreak { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - write!(f, "Line Break") - } -} diff --git a/src/syntax/content/parser/token/literal.rs b/src/syntax/content/parser/token/literal.rs index 723b152..5c1c292 100644 --- a/src/syntax/content/parser/token/literal.rs +++ b/src/syntax/content/parser/token/literal.rs @@ -1,4 +1,3 @@ -use std::fmt::Display; use crate::syntax::content::{Parseable, parser::lexeme::Lexeme}; #[derive(Debug)] @@ -21,9 +20,3 @@ impl Parseable for Literal { self.text.clone() } } - -impl Display for Literal { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - write!(f, "Literal: <{}>", &self.text) - } -} diff --git a/src/syntax/content/parser/token/paragraph.rs b/src/syntax/content/parser/token/paragraph.rs index 2348286..c26a397 100644 --- a/src/syntax/content/parser/token/paragraph.rs +++ b/src/syntax/content/parser/token/paragraph.rs @@ -1,4 +1,3 @@ -use std::fmt::Display; use crate::syntax::content::{Parseable, parser::lexeme::Lexeme}; #[derive(Debug)] @@ -37,16 +36,22 @@ impl Parseable for Paragraph { } } -impl Display for Paragraph { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - if let Some(open) = self.open { - if open { - write!(f, "Open Paragraph") - } else { - write!(f, "Closed Paragraph") - } - } else { - write!(f, "Unitialized Paragraph (Unknown open state)") - } +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn lex() { + let p = Paragraph::lex(&Lexeme::new("", "")); + assert!(p.open.is_none()); + } + + #[test] + #[should_panic( + expected = "Attempt to render a paragraph tag while open state is unknown" + )] + fn render_state_unknown() { + let p = Paragraph::lex(&Lexeme::new("", "")); + drop(p.render()); } } diff --git a/src/syntax/content/parser/token/preformat.rs b/src/syntax/content/parser/token/preformat.rs index 195636d..9bb88bb 100644 --- a/src/syntax/content/parser/token/preformat.rs +++ b/src/syntax/content/parser/token/preformat.rs @@ -36,3 +36,29 @@ impl Parseable for PreFormat { } } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn lex() { + let from_empty_lexeme = PreFormat::lex(&Lexeme::new("", "")); + assert!(from_empty_lexeme.open.is_none()); + + let from_non_empty_lexeme = PreFormat::lex(&Lexeme::new("`", "`")); + assert!(from_non_empty_lexeme.open.is_none()); + } + + #[test] + #[should_panic( + expected = "Attempt to render a preformat tag while open state is unknown" + )] + fn render() { + let from_empty_lexeme = PreFormat::lex(&Lexeme::new("", "")); + from_empty_lexeme.render(); + + let from_non_empty_lexeme = PreFormat::lex(&Lexeme::new("`", "`")); + from_non_empty_lexeme.render(); + } +} diff --git a/src/syntax/content/parser/token/span.rs b/src/syntax/content/parser/token/span.rs index b312a28..252b83e 100644 --- a/src/syntax/content/parser/token/span.rs +++ b/src/syntax/content/parser/token/span.rs @@ -1,4 +1,3 @@ -use std::fmt::Display; use crate::syntax::content::{Parseable, parser::lexeme::Lexeme}; #[derive(Debug)] @@ -35,16 +34,42 @@ impl Parseable for Span { } } -impl Display for Span { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - if let Some(open) = self.open { - if open { - write!(f, "Open Span") - } else { - write!(f, "Closed Span") - } - } else { - write!(f, "Span (Unknown open state)") - } +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn probe() { + assert!(!Span::probe(&Lexeme::new( + &crate::ONSET.elapsed().as_nanos().to_string(), + "", + ))); + } + + #[test] + fn lex() { + let span = Span::lex(&Lexeme::new( + &crate::ONSET.elapsed().as_nanos().to_string(), + "", + )); + assert!(span.open.is_none()); + } + + #[test] + fn render() { + let open_span = Span::new(true); + assert_eq!(open_span.render(), ""); + + let closed_span = Span::new(false); + assert_eq!(closed_span.render(), ""); + } + + #[test] + #[should_panic( + expected = "Attempt to render a span tag while open state is unknown" + )] + fn render_unknown_open_state() { + let open_span = Span::lex(&Lexeme::new("", "")); + drop(open_span.render()); } }