Add tests for the content syntax parser

This commit is contained in:
Juno Takano 2025-12-26 04:19:21 -03:00
commit e50bbd468d
12 changed files with 383 additions and 120 deletions

View file

@ -2,7 +2,7 @@ use parser::{token::Token, lexeme::Lexeme};
pub mod parser;
pub trait Parseable: Into<Token> {
pub trait Parseable {
fn probe(lexeme: &Lexeme) -> bool;
fn lex(lexeme: &Lexeme) -> Self;
fn render(&self) -> String;

View file

@ -1,6 +1,6 @@
use std::collections::{HashMap};
use crate::{prelude::*, syntax::serial::populate_graph, types::Config};
use crate::{syntax::serial::populate_graph, types::Config};
use super::{Parseable as _, Token, LexMap};
use token::{
anchor::Anchor, linebreak::LineBreak, paragraph::Paragraph, header::Header,
@ -39,7 +39,7 @@ fn lex(text: &str, map: LexMap) -> Vec<Token> {
let mut header = Header::lex(lexeme);
header.dom_id = Some(Header::make_id(
&config,
&mut iterator,
iterator.peek().map_or(&Lexeme::new("", ""), |l| l),
&mut state.dom_ids,
));
state.context.block = BlockContext::Header(header.level());
@ -154,8 +154,6 @@ fn lex(text: &str, map: LexMap) -> Vec<Token> {
buffer.destination.push_str(&lexeme.text());
}
continue;
} else {
unreachable!("Anchor is already fully parsed");
}
},
}
@ -241,11 +239,10 @@ fn close(state: &State, tokens: &mut Vec<Token>) {
},
BlockContext::Header(_) => panic!("End of file with open header"),
BlockContext::PreFormat => panic!("End of file with open preformat"),
BlockContext::None => log!("End of file on None block context"),
BlockContext::None => (),
}
}
fn parse(tokens: &[Token]) -> String {
tokens.iter().map(Token::render).collect::<String>()
}
@ -256,10 +253,100 @@ pub(super) fn read(text: &str) -> String {
#[cfg(test)]
mod tests {
use crate::syntax::content::parser::token::header::Level;
use super::*;
#[test]
fn empty_render_is_empty() {
assert_eq!(read(""), "");
}
#[test]
fn mixed_sample() {
let en = "`this |test|` tries ## to |brea|k|: things";
let html = r#"<p><code>this |test|</code> tries ## to <a href="/node/k">brea</a>: things</p>"#;
assert_eq!(read(en), html);
}
#[test]
fn force_flanking() {
assert_eq!(read("|Node||"), r#"<p><a href="/node/Node">Node</a></p>"#);
}
#[test]
fn flanking_with_trailing_pipe() {
assert_eq!(
read("|Node|Destination|"),
r#"<p><a href="/node/Destination">Node</a></p>"#
);
}
#[test]
fn nonleading_second_pipe() {
assert_eq!(
read("Go to Node|Destination|, here"),
r#"<p>Go to <a href="/node/Destination">Node</a>, here</p>"#,
);
}
#[test]
fn clear_anchor_buffer() {
assert_eq!(
read("|SomeAnchor|\n|SomeOtherAnchor|"),
concat!(
r#"<p><a href="/node/SomeAnchor">SomeAnchor</a></p>"#,
"\n",
r#"<p><a href="/node/SomeOtherAnchor">SomeOtherAnchor</a></p>"#
),
);
}
#[test]
#[should_panic(expected = "End of file with open header")]
fn end_with_open_header() {
let default_state = State::new();
let state = State {
context: Context {
block: BlockContext::Header(1),
..default_state.context
},
..default_state
};
close(&state, &mut vec![]);
}
#[test]
#[should_panic(expected = "End of file with open preformat")]
fn end_with_open_preformat() {
let default_state = State::new();
let state = State {
context: Context {
block: BlockContext::PreFormat,
..default_state.context
},
..default_state
};
close(&state, &mut vec![]);
}
#[test]
fn truncated_header_level() {
let u: usize = 999;
let level = Level::from(u);
assert_eq!(level.to_string(), "6");
}
#[test]
fn display_level() {
assert_eq!(format!("{}", Level::One), "1");
assert_eq!(format!("{}", Level::Two), "2");
assert_eq!(format!("{}", Level::Three), "3");
assert_eq!(format!("{}", Level::Four), "4");
assert_eq!(format!("{}", Level::Five), "5");
assert_eq!(format!("{}", Level::Six), "6");
}
}

View file

@ -50,7 +50,7 @@ impl Lexeme {
}
/// # Panics
/// Panics if number of chars for a single lexeme exceeds `i2::MAX`
/// Panics if number of chars for a single lexeme exceeds `i32::MAX`
pub fn count_char(&self, c: char) -> i32 {
let count = self.text().chars().filter(|&n| n == c).count();
match i32::try_from(count) {
@ -90,3 +90,59 @@ impl Lexeme {
out_vector
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn new_lexeme() {
let raw = "3PKK4RzfGgUL58rU2NZbAiGN1o5dOfNu";
let next = "wAcZe8iVEEcZLp20PP9KKf07zJbeZafa";
let lexeme = Lexeme::new(raw, next);
assert_eq!(lexeme.text, raw);
assert_eq!(lexeme.next, next);
}
#[test]
fn next_first_char() {
let payload = "4IU";
let lexeme = Lexeme::new(payload, payload);
assert_eq!(lexeme.next_first_char().unwrap(), '4');
}
#[test]
fn match_first_char() {
let payload = "MKY";
let lexeme = Lexeme::new(payload, payload);
assert!(lexeme.match_first_char('M'));
}
#[test]
fn match_absent_first_char() {
let payload = "";
let lexeme = Lexeme::new(payload, payload);
assert!(!lexeme.match_first_char('x'));
}
#[test]
fn first_word() {
let payload = "nhNc fGev QnGW E4hj ExyZ";
let lexeme = Lexeme::new(payload, payload);
assert_eq!(lexeme.first(), Some(String::from("nhNc")));
}
#[test]
fn count_char() {
let payload = "6Ur3UjnndhENjFNSYWF7bhej2NZKLwdY";
let lexeme = Lexeme::new(payload, payload);
assert_eq!(lexeme.count_char('j'), 3);
}
#[test]
fn count_char_huge_number() {
let payload = "6Ur3UjnndhENjFNSYWF7bhej2NZKLwdY";
let lexeme = Lexeme::new(payload, payload);
assert_eq!(lexeme.count_char('j'), 3);
}
}

View file

@ -36,50 +36,9 @@ impl Token {
}
}
impl From<paragraph::Paragraph> for Token {
fn from(d: paragraph::Paragraph) -> Token {
Token::Paragraph(d)
}
}
#[cfg(test)]
mod tests {
impl From<header::Header> for Token {
fn from(d: header::Header) -> Token {
Token::Header(d)
}
}
impl From<span::Span> for Token {
fn from(d: span::Span) -> Token {
Token::Span(d)
}
}
impl From<literal::Literal> for Token {
fn from(d: literal::Literal) -> Token {
Token::Literal(d)
}
}
impl From<anchor::Anchor> for Token {
fn from(d: anchor::Anchor) -> Token {
Token::Anchor(d)
}
}
impl From<linebreak::LineBreak> for Token {
fn from(d: linebreak::LineBreak) -> Token {
Token::LineBreak(d)
}
}
impl From<preformat::PreFormat> for Token {
fn from(d: preformat::PreFormat) -> Token {
Token::PreFormat(d)
}
}
impl From<code::Code> for Token {
fn from(d: code::Code) -> Token {
Token::Code(d)
}
#[test]
fn smoke() {}
}

View file

@ -1,5 +1,3 @@
use std::fmt::Display;
use crate::syntax::content::{Parseable, parser::lexeme::Lexeme};
#[derive(Debug, Clone)]
@ -65,8 +63,33 @@ impl Anchor {
}
}
impl Display for Anchor {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "Anchor: <{}> to <{:?}>", &self.text, &self.destination)
#[cfg(test)]
mod tests {
use super::*;
use crate::syntax::content::parser::read;
#[test]
fn render_anchor() {
let anchor = Anchor::new("AnchorText", "AnchorDest", true);
assert_eq!(
anchor.render(),
r#"<a href="/node/AnchorDest">AnchorText</a>"#
);
}
#[test]
#[should_panic(
expected = "Attempt to lex an anchor directly from a lexeme"
)]
fn lex() {
Anchor::lex(&Lexeme::new("", ""));
}
#[test]
#[should_panic(expected = "without knowing its destination")]
fn unknown_destination_render() {
let anchor = Anchor::empty();
drop(anchor.render());
}
}

View file

@ -30,3 +30,25 @@ impl Parseable for Code {
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn render() {
let code_open = Code::new(true);
assert_eq!(code_open.render(), "<code>");
let code_closed = Code::new(false);
assert_eq!(code_closed.render(), "</code>");
}
#[test]
#[should_panic(
expected = "Attempt to lex a code tag directly from a lexeme"
)]
fn lex() {
Code::lex(&Lexeme::new("", ""));
}
}

View file

@ -1,7 +1,5 @@
use std::{
collections::{HashMap, hash_map::Entry},
iter::Peekable,
slice,
};
use crate::{
@ -30,16 +28,13 @@ impl Header {
pub fn make_id(
config: &Config,
iterator: &mut Peekable<slice::Iter<'_, Lexeme>>,
next_lexeme: &Lexeme,
ids: &mut HashMap<String, Vec<String>>,
) -> String {
let base_id = match iterator.peek() {
Some(next_lexeme)
if !config.ascii_dom_ids || next_lexeme.next.is_ascii() =>
{
next_lexeme.next.clone()
},
_ => String::from("h"),
let base_id = if !config.ascii_dom_ids || next_lexeme.next.is_ascii() {
next_lexeme.next.clone()
} else {
String::from("h")
};
match ids.entry(base_id.clone()) {
@ -116,20 +111,6 @@ impl Parseable for Header {
}
}
impl Display for Header {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
if let Some(open) = self.open {
if open {
write!(f, "Level {} Open Header", self.level)
} else {
write!(f, "Level {} Closed Header", self.level)
}
} else {
write!(f, "Level {} Header (Unknown open state)", self.level)
}
}
}
#[derive(Debug)]
pub enum Level {
One,
@ -183,3 +164,96 @@ impl Display for Level {
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn make_id() {
let mut map: HashMap<String, Vec<String>> = HashMap::new();
let id = Header::make_id(
&Config::default(),
&Lexeme::new("##", "Title"),
&mut map,
);
assert_eq!(id, "Title");
}
#[test]
fn ascii_ids_set() {
let config = Config {
ascii_dom_ids: true,
..Config::default()
};
let id = Header::make_id(
&config,
&Lexeme::new("##", "駄目!"),
&mut HashMap::new(),
);
assert_eq!(id, "h");
}
#[test]
fn ascii_ids_unset() {
let config = Config {
ascii_dom_ids: false,
..Config::default()
};
let id = Header::make_id(
&config,
&Lexeme::new("##", "駄目!"),
&mut HashMap::new(),
);
assert_eq!(id, "駄目!");
}
#[test]
fn id_deduplication() {
let mut map: HashMap<String, Vec<String>> = HashMap::new();
let config = Config::default();
let id =
Header::make_id(&config, &Lexeme::new("##", "UVrcCUjoQ"), &mut map);
assert_eq!(id, "UVrcCUjoQ");
let double =
Header::make_id(&config, &Lexeme::new("##", "UVrcCUjoQ"), &mut map);
assert_eq!(double, "UVrcCUjoQ-1");
let double2 =
Header::make_id(&config, &Lexeme::new("##", "UVrcCUjoQ"), &mut map);
assert_eq!(double2, "UVrcCUjoQ-2");
}
#[test]
fn get_level() {
for l in 1..=6 {
let header = Header::from_u8(l, true, None);
assert_eq!(header.level(), l);
}
}
#[test]
fn no_id_render() {
let open_header = Header::from_u8(2, true, None);
let closed_header = Header::from_u8(2, false, None);
assert_eq!(open_header.render(), "<h2>");
assert_eq!(closed_header.render(), "</h2>");
}
#[test]
#[should_panic(
expected = "Attempt to render a header tag while open state is unknown"
)]
fn unknown_open_state_render() {
let header = Header {
level: Level::Two,
open: None,
dom_id: None,
};
header.render();
}
}

View file

@ -1,4 +1,3 @@
use std::fmt::Display;
use crate::{
syntax::content::{Parseable, parser::lexeme::Lexeme},
};
@ -19,9 +18,3 @@ impl Parseable for LineBreak {
"\n".to_owned()
}
}
impl Display for LineBreak {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "Line Break")
}
}

View file

@ -1,4 +1,3 @@
use std::fmt::Display;
use crate::syntax::content::{Parseable, parser::lexeme::Lexeme};
#[derive(Debug)]
@ -21,9 +20,3 @@ impl Parseable for Literal {
self.text.clone()
}
}
impl Display for Literal {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "Literal: <{}>", &self.text)
}
}

View file

@ -1,4 +1,3 @@
use std::fmt::Display;
use crate::syntax::content::{Parseable, parser::lexeme::Lexeme};
#[derive(Debug)]
@ -37,16 +36,22 @@ impl Parseable for Paragraph {
}
}
impl Display for Paragraph {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
if let Some(open) = self.open {
if open {
write!(f, "Open Paragraph")
} else {
write!(f, "Closed Paragraph")
}
} else {
write!(f, "Unitialized Paragraph (Unknown open state)")
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn lex() {
let p = Paragraph::lex(&Lexeme::new("", ""));
assert!(p.open.is_none());
}
#[test]
#[should_panic(
expected = "Attempt to render a paragraph tag while open state is unknown"
)]
fn render_state_unknown() {
let p = Paragraph::lex(&Lexeme::new("", ""));
drop(p.render());
}
}

View file

@ -36,3 +36,29 @@ impl Parseable for PreFormat {
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn lex() {
let from_empty_lexeme = PreFormat::lex(&Lexeme::new("", ""));
assert!(from_empty_lexeme.open.is_none());
let from_non_empty_lexeme = PreFormat::lex(&Lexeme::new("`", "`"));
assert!(from_non_empty_lexeme.open.is_none());
}
#[test]
#[should_panic(
expected = "Attempt to render a preformat tag while open state is unknown"
)]
fn render() {
let from_empty_lexeme = PreFormat::lex(&Lexeme::new("", ""));
from_empty_lexeme.render();
let from_non_empty_lexeme = PreFormat::lex(&Lexeme::new("`", "`"));
from_non_empty_lexeme.render();
}
}

View file

@ -1,4 +1,3 @@
use std::fmt::Display;
use crate::syntax::content::{Parseable, parser::lexeme::Lexeme};
#[derive(Debug)]
@ -35,16 +34,42 @@ impl Parseable for Span {
}
}
impl Display for Span {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
if let Some(open) = self.open {
if open {
write!(f, "Open Span")
} else {
write!(f, "Closed Span")
}
} else {
write!(f, "Span (Unknown open state)")
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn probe() {
assert!(!Span::probe(&Lexeme::new(
&crate::ONSET.elapsed().as_nanos().to_string(),
"",
)));
}
#[test]
fn lex() {
let span = Span::lex(&Lexeme::new(
&crate::ONSET.elapsed().as_nanos().to_string(),
"",
));
assert!(span.open.is_none());
}
#[test]
fn render() {
let open_span = Span::new(true);
assert_eq!(open_span.render(), "<span>");
let closed_span = Span::new(false);
assert_eq!(closed_span.render(), "</span>");
}
#[test]
#[should_panic(
expected = "Attempt to render a span tag while open state is unknown"
)]
fn render_unknown_open_state() {
let open_span = Span::lex(&Lexeme::new("", ""));
drop(open_span.render());
}
}