Add a context parser for PreFormat blocks
This commit is contained in:
parent
29c2beb3ed
commit
d0ca4e6cb3
9 changed files with 145 additions and 94 deletions
10
Cargo.lock
generated
10
Cargo.lock
generated
|
|
@ -86,9 +86,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "bitflags"
|
||||
version = "2.11.1"
|
||||
version = "2.12.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c4512299f36f043ab09a583e57bceb5a5aab7a73db1805848e8fef3c9e8c78b3"
|
||||
checksum = "84d7ced0ae9557296835c32bf1b1e02b44c746701f898460fb000d7eaa84f00a"
|
||||
|
||||
[[package]]
|
||||
name = "block-buffer"
|
||||
|
|
@ -238,7 +238,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "en"
|
||||
version = "0.4.0-alpha"
|
||||
version = "0.4.1-alpha"
|
||||
dependencies = [
|
||||
"axum",
|
||||
"serde",
|
||||
|
|
@ -531,9 +531,9 @@ checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981"
|
|||
|
||||
[[package]]
|
||||
name = "log"
|
||||
version = "0.4.30"
|
||||
version = "0.4.31"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "616ec5685824bcc94416c6d4a7a446eea774a31efd7062c8480ba6fd06d7a6e5"
|
||||
checksum = "113b30b4cd05f7c06868fdb2854f66a7b9fece9a48425351cd532e810d74024f"
|
||||
|
||||
[[package]]
|
||||
name = "matchit"
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
[package]
|
||||
name = "en"
|
||||
version = "0.4.0-alpha"
|
||||
version = "0.4.1-alpha"
|
||||
description = "A non-linear writing instrument."
|
||||
license = "AGPL-3.0-only"
|
||||
|
||||
|
|
|
|||
|
|
@ -1,12 +1,13 @@
|
|||
use crate::syntax::content::parser::{
|
||||
State, Token,
|
||||
token::{Header, Paragraph, PreFormat, Verse},
|
||||
token::{Header, Paragraph, Verse},
|
||||
};
|
||||
|
||||
pub mod anchor;
|
||||
pub mod block;
|
||||
pub mod inline;
|
||||
pub mod list;
|
||||
pub mod preformat;
|
||||
pub mod quote;
|
||||
pub mod table;
|
||||
|
||||
|
|
@ -38,30 +39,32 @@ pub enum Inline {
|
|||
}
|
||||
|
||||
/// # Panics
|
||||
/// Panics if there is an open header or list at end of input.
|
||||
/// Panics if there is an open token at end of input that can't be easily
|
||||
/// closed by simply adding a matching closing token. This normally is handled
|
||||
/// by context parsers and probably indicates an error in one of them.
|
||||
pub fn close(state: &State, tokens: &mut Vec<Token>) {
|
||||
match state.context.block {
|
||||
Block::PreFormat => {
|
||||
tokens.push(Token::PreFormat(PreFormat::new(false)));
|
||||
},
|
||||
Block::Paragraph => {
|
||||
tokens.push(Token::Paragraph(Paragraph::new(false)));
|
||||
},
|
||||
Block::List => {
|
||||
panic!("End of input with open list")
|
||||
},
|
||||
Block::Header(level) => {
|
||||
tokens.push(Token::Header(Header::from_u8(level, false, None)));
|
||||
},
|
||||
Block::Quote => {
|
||||
panic!("End of input with open quote")
|
||||
},
|
||||
Block::Table => {
|
||||
panic!("End of input with open table")
|
||||
},
|
||||
Block::Verse => {
|
||||
tokens.push(Token::Verse(Verse::new(false)));
|
||||
},
|
||||
Block::PreFormat => {
|
||||
panic!("End of input with open preformat: {tokens:#?}")
|
||||
},
|
||||
Block::List => {
|
||||
panic!("End of input with open list: {tokens:#?}")
|
||||
},
|
||||
Block::Quote => {
|
||||
panic!("End of input with open quote: {tokens:#?}")
|
||||
},
|
||||
Block::Table => {
|
||||
panic!("End of input with open table: {tokens:#?}")
|
||||
},
|
||||
Block::None => (),
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,15 +6,17 @@ use crate::{
|
|||
syntax::content::{
|
||||
Parseable as _,
|
||||
parser::{
|
||||
Block, Lexeme, State, Token,
|
||||
Block, Lexeme, State, Token, context,
|
||||
token::{
|
||||
Header, LineBreak, List, Literal, Paragraph, PreFormat, Quote,
|
||||
Table, Verse,
|
||||
Header, LineBreak, List, Paragraph, PreFormat, Quote, Table,
|
||||
Verse,
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
/// A return of `true` will trigger a `continue` on the outer parser, causing
|
||||
/// no more subsequent parsing of the current lexeme.
|
||||
pub fn parse(
|
||||
lexeme: &Lexeme,
|
||||
state: &mut State,
|
||||
|
|
@ -27,8 +29,7 @@ pub fn parse(
|
|||
if PreFormat::probe(lexeme) {
|
||||
log!(VERBOSE, "Block Context: None -> PreFormat on {lexeme}");
|
||||
state.context.block = Block::PreFormat;
|
||||
tokens.push(Token::PreFormat(PreFormat::new(true)));
|
||||
return true;
|
||||
return true
|
||||
} else if Header::probe(lexeme) {
|
||||
let mut header = Header::lex(lexeme);
|
||||
header.dom_id = Some(Header::make_id(
|
||||
|
|
@ -44,7 +45,7 @@ pub fn parse(
|
|||
log!(VERBOSE, "Block Context: None -> List on {lexeme}");
|
||||
state.context.block = Block::List;
|
||||
state.buffers.list.candidate.ordered = lexeme.match_char('+');
|
||||
return super::list::parse(
|
||||
return context::list::parse(
|
||||
lexeme, state, tokens, iterator, graph,
|
||||
);
|
||||
} else if Quote::probe(lexeme) {
|
||||
|
|
@ -71,14 +72,7 @@ pub fn parse(
|
|||
}
|
||||
},
|
||||
Block::PreFormat => {
|
||||
if PreFormat::probe(lexeme) {
|
||||
tokens.push(Token::PreFormat(PreFormat::new(false)));
|
||||
log!(VERBOSE, "Block Context: PreFormat -> None on {lexeme}");
|
||||
state.context.block = Block::None;
|
||||
} else {
|
||||
tokens.push(Token::Literal(Literal::lex(lexeme)));
|
||||
}
|
||||
return true;
|
||||
return context::preformat::parse(lexeme, state, tokens, iterator);
|
||||
},
|
||||
Block::Paragraph => {
|
||||
if Paragraph::probe_end(lexeme) {
|
||||
|
|
@ -95,13 +89,17 @@ pub fn parse(
|
|||
}
|
||||
},
|
||||
Block::List => {
|
||||
return super::list::parse(lexeme, state, tokens, iterator, graph);
|
||||
return context::list::parse(lexeme, state, tokens, iterator, graph);
|
||||
},
|
||||
Block::Quote => {
|
||||
return super::quote::parse(lexeme, state, tokens, iterator, graph);
|
||||
return context::quote::parse(
|
||||
lexeme, state, tokens, iterator, graph,
|
||||
);
|
||||
},
|
||||
Block::Table => {
|
||||
return super::table::parse(lexeme, state, tokens, iterator, graph);
|
||||
return context::table::parse(
|
||||
lexeme, state, tokens, iterator, graph,
|
||||
);
|
||||
},
|
||||
Block::Verse => {
|
||||
if Verse::probe_end(lexeme) {
|
||||
|
|
@ -127,7 +125,7 @@ mod tests {
|
|||
graph::Graph,
|
||||
syntax::content::parser::{
|
||||
self, Block, State, Token, context,
|
||||
token::{Header, PreFormat, header::Level},
|
||||
token::{Header, header::Level},
|
||||
},
|
||||
};
|
||||
|
||||
|
|
@ -161,16 +159,6 @@ mod tests {
|
|||
assert_eq!(vec, vec![Token::Header(Header::from_u8(1, false, None))]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn end_with_open_preformat() {
|
||||
let mut state = State::default();
|
||||
state.context.block = Block::PreFormat;
|
||||
|
||||
let mut vec: Vec<Token> = vec![];
|
||||
context::close(&state, &mut vec);
|
||||
assert_eq!(vec, vec![Token::PreFormat(PreFormat::new(false))]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn truncated_header_level() {
|
||||
let u: usize = 999;
|
||||
|
|
|
|||
61
src/syntax/content/parser/context/preformat.rs
Normal file
61
src/syntax/content/parser/context/preformat.rs
Normal file
|
|
@ -0,0 +1,61 @@
|
|||
use std::{iter::Peekable, slice::Iter};
|
||||
|
||||
use crate::{
|
||||
prelude::*,
|
||||
syntax::content::{
|
||||
Parseable as _,
|
||||
parser::{Lexeme, State, Token, context::Block, token::PreFormat},
|
||||
},
|
||||
};
|
||||
|
||||
/// Handles open `PreFormat` contexts until a block is fully parsed.
|
||||
///
|
||||
/// A return of `true` will trigger a continue in the outer parser,
|
||||
/// skipping any further parsing of the current lexeme.
|
||||
///
|
||||
/// # Panics
|
||||
/// This parser can handle only the List context, and will panic if passed an
|
||||
/// unrelated context since it has no knowledge on how to handle them.
|
||||
pub fn parse(
|
||||
lexeme: &Lexeme,
|
||||
state: &mut State,
|
||||
tokens: &mut Vec<Token>,
|
||||
iterator: &mut Peekable<Iter<'_, Lexeme>>,
|
||||
) -> bool {
|
||||
let buffer = &mut state.buffers.preformat;
|
||||
let candidate = &mut buffer.candidate;
|
||||
|
||||
#[expect(clippy::wildcard_enum_match_arm)]
|
||||
match state.context.block {
|
||||
Block::PreFormat => {
|
||||
if lexeme.match_first_char('<') {
|
||||
candidate.text.push_str("<");
|
||||
candidate.text.push_str(
|
||||
lexeme.text().strip_prefix('<').unwrap_or(&lexeme.text()),
|
||||
);
|
||||
} else if lexeme.match_last_char('>') {
|
||||
candidate.text.push_str(
|
||||
lexeme.text().strip_suffix('>').unwrap_or(&lexeme.text()),
|
||||
);
|
||||
candidate.text.push_str(">");
|
||||
} else if lexeme.match_char('\\') {
|
||||
candidate.text.push_str(lexeme.next().as_str());
|
||||
iterator.next();
|
||||
return true;
|
||||
} else if PreFormat::probe(lexeme) {
|
||||
// found end of block, push it and reset state
|
||||
log!(VERBOSE, "Accepting preformat candidate {candidate}");
|
||||
tokens.push(Token::PreFormat(candidate.clone()));
|
||||
state.context.block = Block::None;
|
||||
*candidate = PreFormat::default();
|
||||
} else {
|
||||
// anything else is pushed into the candidate preformat's text
|
||||
candidate.text.push_str(&lexeme.text());
|
||||
}
|
||||
},
|
||||
_ => {
|
||||
panic!("PreFormat context parser called for {:?}", state.context)
|
||||
},
|
||||
}
|
||||
true
|
||||
}
|
||||
|
|
@ -32,6 +32,8 @@ impl Lexeme {
|
|||
|
||||
pub fn mutate_text(&mut self, new: &str) { self.text = new.to_string(); }
|
||||
|
||||
/// Returns an Option containing the character if the raw lexeme text
|
||||
/// is composed of a single character, None if it has multiple characters.
|
||||
pub fn as_char(&self) -> Option<char> {
|
||||
if self.text.chars().count() == 1 {
|
||||
self.text.chars().nth(0)
|
||||
|
|
@ -56,6 +58,7 @@ impl Lexeme {
|
|||
}
|
||||
}
|
||||
|
||||
/// Returns true if the raw lexeme text is a single matching character.
|
||||
pub fn match_char(&self, c: char) -> bool {
|
||||
self.as_char().is_some_and(|as_char| as_char == c)
|
||||
}
|
||||
|
|
@ -86,6 +89,8 @@ impl Lexeme {
|
|||
&& self.match_third_char(c3)
|
||||
}
|
||||
|
||||
/// Returns true if the lexeme raw text is composed of a single character
|
||||
/// and this character is in the provided slice.
|
||||
pub fn match_char_in(&self, slice: &[char]) -> bool {
|
||||
self.as_char().is_some_and(|c| slice.contains(&c))
|
||||
}
|
||||
|
|
|
|||
|
|
@ -38,7 +38,9 @@ pub(super) fn lex(
|
|||
|
||||
let mut iterator = lexemes.iter().peekable();
|
||||
while let Some(lexeme) = iterator.next() {
|
||||
if lexeme.match_char('\\') {
|
||||
if lexeme.match_char('\\')
|
||||
&& !matches!(state.context.block, context::Block::PreFormat)
|
||||
{
|
||||
if let Some(next) = iterator.next() {
|
||||
tokens.push(Token::Literal(Literal::lex(next)));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ use std::collections::HashMap;
|
|||
use crate::syntax::content::parser::{
|
||||
Token,
|
||||
context::Context,
|
||||
token::{Anchor, Item, List, Quote, Table},
|
||||
token::{Anchor, Item, List, PreFormat, Quote, Table},
|
||||
};
|
||||
|
||||
#[derive(Clone, Default, Debug)]
|
||||
|
|
@ -29,6 +29,7 @@ pub struct Buffers {
|
|||
pub list: ListBuffer,
|
||||
pub quote: QuoteBuffer,
|
||||
pub table: TableBuffer,
|
||||
pub preformat: PreFormatBuffer,
|
||||
}
|
||||
|
||||
#[derive(Default, Clone, Debug)]
|
||||
|
|
@ -59,6 +60,11 @@ pub struct TableBuffer {
|
|||
pub in_header: bool,
|
||||
}
|
||||
|
||||
#[derive(Default, Clone, Debug)]
|
||||
pub struct PreFormatBuffer {
|
||||
pub candidate: PreFormat,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for AnchorBuffer {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
let display_text = if self.text.is_empty() {
|
||||
|
|
|
|||
|
|
@ -1,46 +1,42 @@
|
|||
use crate::syntax::content::{Lexeme, Parseable};
|
||||
|
||||
#[derive(Debug, Clone, Eq, PartialEq)]
|
||||
#[derive(Debug, Default, Clone, Eq, PartialEq)]
|
||||
pub struct PreFormat {
|
||||
open: Option<bool>,
|
||||
pub text: String,
|
||||
}
|
||||
|
||||
impl PreFormat {
|
||||
pub const fn new(open: bool) -> PreFormat { PreFormat { open: Some(open) } }
|
||||
pub fn new(text: &str) -> PreFormat {
|
||||
PreFormat {
|
||||
text: String::from(text),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for PreFormat {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
let display_open_state = if let Some(open_state) = self.open {
|
||||
if open_state { "open" } else { "closed" }
|
||||
let character_count = self.text.chars().count();
|
||||
let is_whitespace = self.text.trim_ascii().is_empty();
|
||||
let summary = if is_whitespace {
|
||||
"empty"
|
||||
} else {
|
||||
"unknown"
|
||||
&format!("{character_count} chars")
|
||||
};
|
||||
write!(f, "PreFormat [{display_open_state}]")
|
||||
write!(f, "PreFormat [{summary}]")
|
||||
}
|
||||
}
|
||||
|
||||
impl Parseable for PreFormat {
|
||||
fn probe(lexeme: &Lexeme) -> bool {
|
||||
lexeme.match_first_char('`') && (lexeme.next() == "\n" || lexeme.last())
|
||||
lexeme.match_char('`') && (lexeme.next() == "\n" || lexeme.last())
|
||||
}
|
||||
|
||||
fn lex(_lexeme: &Lexeme) -> PreFormat { PreFormat { open: None } }
|
||||
|
||||
fn render(&self) -> String {
|
||||
if let Some(o) = self.open {
|
||||
if o {
|
||||
"<pre>".to_owned()
|
||||
} else {
|
||||
"</pre>".to_owned()
|
||||
}
|
||||
} else {
|
||||
panic!(
|
||||
"Attempt to render a preformat tag while open state is unknown"
|
||||
)
|
||||
}
|
||||
fn lex(_lexeme: &Lexeme) -> PreFormat {
|
||||
panic!("Attempt to lex a preformat directly from a lexeme")
|
||||
}
|
||||
|
||||
fn render(&self) -> String { format!("<pre>{}</pre>", self.text) }
|
||||
|
||||
fn flatten(&self) -> String { String::default() }
|
||||
}
|
||||
|
||||
|
|
@ -50,49 +46,39 @@ mod tests {
|
|||
use crate::syntax::content::parser::Token;
|
||||
|
||||
#[test]
|
||||
#[should_panic(
|
||||
expected = "Attempt to lex a preformat directly from a lexeme"
|
||||
)]
|
||||
fn lex() {
|
||||
let from_empty_lexeme = PreFormat::lex(&Lexeme::default());
|
||||
assert!(from_empty_lexeme.open.is_none());
|
||||
|
||||
let from_non_empty_lexeme = PreFormat::lex(&Lexeme::default());
|
||||
assert!(from_non_empty_lexeme.open.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic(expected = "Attempt to render a preformat tag while \
|
||||
open state is unknown")]
|
||||
fn render() {
|
||||
let from_empty_lexeme = PreFormat::lex(&Lexeme::default());
|
||||
from_empty_lexeme.render();
|
||||
|
||||
let from_non_empty_lexeme = PreFormat::lex(&Lexeme::default());
|
||||
from_non_empty_lexeme.render();
|
||||
let lexeme = Lexeme::new("a", "b", "c");
|
||||
PreFormat::lex(&lexeme);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn token_display() {
|
||||
let mut preformat = PreFormat::new(true);
|
||||
let mut preformat = PreFormat::new("");
|
||||
|
||||
assert_eq!(
|
||||
format!("{}", Token::PreFormat(preformat.clone())),
|
||||
"Tk:PreFormat [open]"
|
||||
"Tk:PreFormat [empty]"
|
||||
);
|
||||
|
||||
preformat.open = Some(false);
|
||||
preformat.text = "\n ".to_string();
|
||||
assert_eq!(
|
||||
format!("{}", Token::PreFormat(preformat.clone())),
|
||||
"Tk:PreFormat [closed]"
|
||||
"Tk:PreFormat [empty]"
|
||||
);
|
||||
|
||||
preformat.open = None;
|
||||
preformat.text = "text".to_string();
|
||||
assert_eq!(
|
||||
format!("{}", Token::PreFormat(preformat)),
|
||||
"Tk:PreFormat [unknown]"
|
||||
"Tk:PreFormat [4 chars]"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn flatten() {
|
||||
let preformat = PreFormat::new(false);
|
||||
let preformat = PreFormat::new("");
|
||||
assert_eq!(preformat.flatten(), "");
|
||||
|
||||
let token = Token::PreFormat(preformat);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue