Rework token segmentation

This commit is contained in:
Juno Takano 2025-12-23 21:40:57 -03:00
commit 8b782d6d20
16 changed files with 497 additions and 385 deletions

View file

@ -136,12 +136,3 @@ pub fn deserialize_graph(in_format: &Format, serial: &str) -> Graph {
},
}
}
#[cfg(test)]
mod tests {
#[test]
fn smoke() {
let n = true;
assert!(n);
}
}

View file

@ -49,12 +49,3 @@ async fn main() -> io::Result<()> {
Ok(())
}
#[cfg(test)]
mod tests {
#[test]
fn smoke() {
let e = true;
assert!(e);
}
}

View file

@ -1,4 +1,4 @@
use std::collections::{HashMap, hash_map::Entry};
use std::collections::{HashMap};
use crate::{formats::populate_graph, types::Config};
@ -11,98 +11,147 @@ use lexeme::Lexeme;
pub mod token;
pub mod lexeme;
pub mod cluster;
pub mod segment;
const LEXMAP: LexMap = &[
(LineBreak::probe, |word| {
Token::LineBreak(LineBreak::lex(word))
}),
(Code::probe, |word| Token::Code(Code::lex(word))),
(Anchor::probe, |word| Token::Anchor(Anchor::lex(word))),
(Literal::probe, |word| Token::Literal(Literal::lex(word))),
];
enum Context {
None,
Paragraph,
Header(u8),
PreFormat,
}
struct State {
context: Context,
dom_ids: HashMap<String, Vec<String>>,
}
fn lex(text: &str, map: LexMap) -> Vec<Token> {
let mut tokens: Vec<Token> = Vec::new();
let mut state = State {
context: Context::None,
dom_ids: HashMap::new(),
};
let mut state = State::new();
let config: Config = populate_graph().meta.config;
let splits = cluster::cluster(text);
let lexemes = Lexeme::collect(&splits);
let iter = lexemes.iter().peekable();
for lexeme in iter {
match state.context {
Context::None => {
let segments = segment::segment(text);
let lexemes = Lexeme::collect(&segments);
let mut iterator = lexemes.iter().peekable();
while let Some(lexeme) = iterator.next() {
match state.context.block {
BlockContext::None => {
if PreFormat::probe(lexeme) {
state.context.block = BlockContext::PreFormat;
tokens.push(Token::PreFormat(PreFormat::new(true)));
state.context = Context::PreFormat;
continue;
} else if Header::probe(lexeme) {
let base_id =
if config.ascii_dom_ids && !lexeme.next.is_ascii() {
String::from("h")
} else {
lexeme.next.clone().to_lowercase()
};
let id = match state.dom_ids.entry(base_id.clone()) {
Entry::Occupied(mut occupied) => {
let ids = occupied.get_mut();
let suffix: u8 =
ids.len().try_into().unwrap_or_default();
let id_with_suffix = format!("{base_id}-{suffix}");
ids.push(id_with_suffix.clone());
id_with_suffix
},
Entry::Vacant(vacant) => {
vacant.insert(vec![base_id.clone()]);
base_id
},
};
let mut header = Header::lex(lexeme);
header.dom_id = Some(id);
state.context = Context::Header(header.get_level());
header.dom_id = Some(Header::make_id(
&config,
&mut iterator,
&mut state.dom_ids,
));
state.context.block = BlockContext::Header(header.level());
tokens.push(Token::Header(header));
continue;
} else if Paragraph::probe(lexeme) {
state.context.block = BlockContext::Paragraph;
tokens.push(Token::Paragraph(Paragraph::new(true)));
state.context = Context::Paragraph;
}
},
Context::PreFormat => {
BlockContext::PreFormat => {
if PreFormat::probe(lexeme) {
tokens.push(Token::PreFormat(PreFormat::new(false)));
state.context = Context::None;
state.context.block = BlockContext::None;
} else {
tokens.push(Token::Literal(Literal::lex(lexeme)));
}
continue;
},
Context::Paragraph => {
BlockContext::Paragraph => {
if lexeme.text() == "\n" {
tokens.push(Token::Paragraph(Paragraph::new(false)));
state.context = Context::None;
state.context.block = BlockContext::None;
}
},
Context::Header(n) => {
BlockContext::Header(n) => {
if lexeme.text() == "\n" {
tokens.push(Token::Header(Header::from_u8(n, false, None)));
state.context = Context::None;
state.context.block = BlockContext::None;
}
},
}
match state.context.inline {
InlineContext::None => {
if Code::probe(lexeme) {
state.context.inline = InlineContext::Code;
tokens.push(Token::Code(Code::new(true)));
continue;
} else if Anchor::probe(lexeme) {
state.context.inline = InlineContext::Anchor;
state.buffers.anchor.clear();
if lexeme.match_first_char('|') {
state.buffers.anchor.candidate.leading = true;
} else {
state.buffers.anchor.candidate.text = lexeme.text();
}
continue;
}
},
InlineContext::Code => {
if Code::probe(lexeme) {
state.context.inline = InlineContext::None;
tokens.push(Token::Code(Code::new(false)));
continue;
}
},
InlineContext::Anchor => {
let buffer = &mut state.buffers.anchor;
let candidate = &mut buffer.candidate;
if candidate.text.is_empty() {
if lexeme.next == "|" {
buffer.text.push_str(&lexeme.text());
candidate.text.clone_from(&buffer.text);
} else {
buffer.text.push_str(&lexeme.text());
}
continue;
} else if candidate.destination.is_none() {
// candidate is leading and we found the second pipe
if candidate.leading && lexeme.text() == "|" {
// whitespace after pipe: flanking node anchor
if lexeme.is_next_whitespace() {
candidate.destination =
Some(candidate.text.clone());
let token = Token::Anchor(candidate.clone());
tokens.push(token);
state.context.inline = InlineContext::None;
// non-whitespace after pipe is the destination
} else {
candidate.destination = Some(lexeme.next.clone());
let token = Token::Anchor(candidate.clone());
tokens.push(token);
state.context.inline = InlineContext::None;
// if there is a trailing pipe, consume it
if let Some(next) = iterator.next()
&& next.next == "|"
{
iterator.next();
}
}
// candidate is nonleading and we found a second pipe
} else if !candidate.leading && lexeme.next == "|" {
candidate.destination = Some(lexeme.text());
tokens.push(Token::Anchor(candidate.clone()));
state.context.inline = InlineContext::None;
iterator.next();
// candidate is nonleading and we found whitespace
} else if lexeme.is_next_whitespace() {
candidate.destination = Some(lexeme.text());
let token = Token::Anchor(candidate.clone());
tokens.push(token);
state.context.inline = InlineContext::None;
// candidate is nonleading and we haven't found whitespace
} else {
buffer.destination.push_str(&lexeme.text());
}
continue;
} else {
unreachable!("Anchor is already fully parsed");
}
},
}
@ -118,6 +167,68 @@ fn lex(text: &str, map: LexMap) -> Vec<Token> {
tokens
}
enum BlockContext {
Paragraph,
Header(u8),
PreFormat,
None,
}
enum InlineContext {
Anchor,
Code,
None,
}
struct State {
context: Context,
dom_ids: HashMap<String, Vec<String>>,
buffers: Buffers,
}
struct Buffers {
anchor: AnchorBuffer,
}
#[derive(Debug)]
struct AnchorBuffer {
candidate: Anchor,
text: String,
destination: String,
}
impl AnchorBuffer {
fn clear(&mut self) {
self.candidate = Anchor::empty();
self.text = String::new();
self.destination = String::new();
}
}
impl State {
fn new() -> State {
State {
context: Context {
inline: InlineContext::None,
block: BlockContext::None,
},
dom_ids: HashMap::new(),
buffers: Buffers {
anchor: AnchorBuffer {
candidate: Anchor::empty(),
text: String::new(),
destination: String::new(),
},
},
}
}
}
struct Context {
block: BlockContext,
inline: InlineContext,
}
fn parse(tokens: &[Token]) -> String {
tokens.iter().map(Token::render).collect::<String>()
}

View file

@ -1,192 +0,0 @@
use crate::prelude::*;
pub fn cluster(text: &str) -> Vec<String> {
let words: Vec<String> = text
.replace("\n", " \n ")
.split(' ')
.map(str::to_string)
.collect();
let mut clusters: Vec<String> = vec![];
let mut raw_context = false;
let mut iterator = words.into_iter().peekable();
while let Some(word) = iterator.next() {
log!("Iterating: {word:?}");
if word == "`" {
raw_context = !raw_context;
log!("Raw context is now {raw_context}");
} else if raw_context {
log!("Skip: In raw context");
clusters.push(word);
continue;
}
let Some(delimiter) = delimiter::match_delimiter(&word) else {
log!("Skip: {word:?} does not have a delimiter");
clusters.push(word);
continue;
};
if !delimiter.leading && !word.starts_with(delimiter.char) {
clusters.push(word);
continue;
}
if (!delimiter.greedy
&& !delimiter.triple
&& word.matches(delimiter.char).count() == 2)
|| (delimiter.triple
&& (2..=3).contains(&word.matches(delimiter.char).count()))
{
log!("Skip: {word:?} is almost atomic, but must be split");
match word.rsplit_once(delimiter.char) {
Some((head, tail)) => {
log!("Pushing head {head:?}, tail {tail:?} into clusters");
clusters.push(format!("{head}{}", delimiter.char));
clusters.push(tail.to_string());
continue;
},
None => unreachable!(),
}
}
if let Some(next) = iterator.peek()
&& next == "\n"
&& delimiter.greedy
{
log!("Skip: Next {next:?} is a break, delimiter is greedy");
clusters.push(word);
continue;
}
if word.starts_with(&delimiter.string)
&& word.ends_with(&delimiter.string)
{
log!("Skip: {word:?} is atomically-delimited");
clusters.push(word);
continue;
}
log!("Found cluster from {delimiter:?} in {word:?}");
let mut parts: Vec<String> = vec![word.clone()];
log!("Seeking from a base of {parts:?}");
while let Some(next) = iterator.peek() {
if next.contains(&delimiter.char.to_string()) {
log!("Found end of cluster: {next:?}");
if delimiter.greedy
&& delimiter.triple
&& next.matches(delimiter.char).count() > 1
{
match next.rsplit_once(delimiter.char) {
Some((head, tail)) => {
log!(
"Pushing head {head:?} of greedy triple EOC \
into parts and tail {tail:?} into clusters"
);
parts.push(format!("{head}{}", delimiter.char));
clusters.push(parts.join(" "));
clusters.push(tail.to_string());
log!("Breaking past clusters {clusters:?}");
iterator.next();
break;
},
None => unreachable!(),
}
} else if delimiter.greedy {
log!("Pushing end of cluster into parts");
parts.push(
iterator.next().unwrap_or_else(|| unreachable!()),
);
log!("Pushing parts {parts:?} into clusters {clusters:?}");
clusters.push(parts.join(" "));
log!("Breaking past clusters {clusters:?}");
break;
} else {
match next.rsplit_once(delimiter.char) {
Some((head, tail)) => {
log!(
"Pushing head {head:?} of humble end of \
cluster into parts"
);
parts.push(format!("{head}{}", delimiter.char));
log!("Pushing parts into clusters");
clusters.push(parts.join(" "));
log!("Pushing tail {tail:?} into clusters");
clusters.push(tail.to_string());
log!("Breaking past clusters");
iterator.next();
break;
},
// is this one really unreachable?
None => unreachable!(),
}
}
} else {
log!("No delimiter: Pushing {:?} into parts", iterator.peek());
parts.push(iterator.next().unwrap_or_default());
log!("Seeking a boundary for parts {parts:?}");
}
}
}
log!("Returning clusters");
clusters
}
mod delimiter {
#[derive(Debug, Clone)]
pub struct Delimiter {
pub char: char,
pub string: String,
pub greedy: bool,
pub triple: bool,
pub leading: bool,
}
fn make_delimiters() -> (Vec<Delimiter>, Vec<Delimiter>) {
let delimiters = [
Delimiter {
char: '|',
string: "|".to_string(),
greedy: true,
triple: true,
leading: false,
},
Delimiter {
char: '`',
string: "`".to_string(),
greedy: false,
triple: false,
leading: true,
},
];
(
delimiters.iter().filter(|d| d.leading).cloned().collect(),
delimiters.iter().filter(|d| !d.leading).cloned().collect(),
)
}
pub fn match_delimiter(word: &str) -> Option<Delimiter> {
let (leading, nonleading) = make_delimiters();
let first_char = word.chars().next()?;
if let Some(leading_match) =
leading.iter().find(|d| d.char == first_char).cloned()
{
Some(leading_match)
} else {
for delimiter in nonleading {
if word.contains(delimiter.char) {
return Some(delimiter);
}
}
None
}
}
}

View file

@ -16,6 +16,26 @@ impl Lexeme {
self.text.clone()
}
pub fn is_whitespace(&self) -> bool {
self.text == " " || self.text == "\n"
}
pub fn is_next_whitespace(&self) -> bool {
self.next == " " || self.next == "\n"
}
pub fn match_first_char(&self, query: char) -> bool {
if let Some(first) = self.text.chars().nth(0) {
first == query
} else {
false
}
}
pub fn next_first_char(&self) -> Option<char> {
self.next.chars().nth(0)
}
/// # Panics
/// Panics if number of chars for a single lexeme exceeds `i2::MAX`
pub fn count_char(&self, c: char) -> i32 {

View file

@ -0,0 +1,199 @@
pub fn segment(text: &str) -> Vec<String> {
delimiter::atomize(text)
}
mod delimiter {
fn make_delimiters() -> Vec<char> {
vec!['\n', ' ', '`', '|']
}
pub fn atomize(text: &str) -> Vec<String> {
let delimiters = make_delimiters();
text.chars().fold(
Vec::new(),
|mut accumulator: Vec<String>, character| {
if delimiters.contains(&character) {
accumulator.push(character.to_string());
} else if let Some(last) = accumulator.last_mut() {
if delimiters
.iter()
.map(char::to_string)
.filter(|d| d == last)
.count()
> 0
{
accumulator.push(character.to_string());
} else {
last.push(character);
}
} else {
accumulator.push(character.to_string());
}
accumulator
},
)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn atomize_words() {
let words = " justification for the actions of those who hold authority inevitably dwindles "; // 2
let actual = atomize(words);
let expected = vec![
" ",
" ",
" ",
" ",
"justification",
" ",
"for",
" ",
" ",
"the",
" ",
"actions",
" ",
" ",
" ",
"of",
" ",
"those",
" ",
" ",
"who",
" ",
"hold",
" ",
"authority",
" ",
" ",
" ",
"inevitably",
" ",
"dwindles",
" ",
" ",
];
assert_eq!(actual, expected);
}
#[test]
fn atomize_ticks_no_spaces() {
let s = "a`c`adc`dadcdbd`cdb`dcdb`dc`dad`bdc";
let actual = atomize(s);
let expected = vec![
"a", "`", "c", "`", "adc", "`", "dadcdbd", "`", "cdb", "`",
"dcdb", "`", "dc", "`", "dad", "`", "bdc",
]
.iter()
.map(std::string::ToString::to_string)
.collect::<Vec<String>>();
assert_eq!(actual, expected);
}
#[test]
fn atomize_ticks_with_spaces() {
let s = "a`c`adc`da dcdb d` cdb` dcdb `dc ` d ad ` bdc";
let actual = atomize(s);
let expected = vec![
"a", "`", "c", "`", "adc", "`", "da", " ", "dcdb", " ", "d",
"`", " ", "cdb", "`", " ", "dcdb", " ", "`", "dc", " ", "`",
" ", "d", " ", "ad", " ", "`", " ", "bdc",
]
.iter()
.map(std::string::ToString::to_string)
.collect::<Vec<String>>();
assert_eq!(actual, expected);
}
#[test]
fn atomize_pipes() {
let s = "every other |time| as it was perceived";
let actual = atomize(s);
let expected = vec![
"every",
" ",
"other",
" ",
"|",
"time",
"|",
" ",
"as",
" ",
"it",
" ",
"was",
" ",
"perceived",
];
assert_eq!(actual, expected);
}
#[test]
fn atomize_pipes_and_ticks() {
let s = "every other |time| as `it could or |perhaps somehow|then or now| it was` perceived";
let actual = atomize(s);
let expected = vec![
"every",
" ",
"other",
" ",
"|",
"time",
"|",
" ",
"as",
" ",
"`",
"it",
" ",
"could",
" ",
"or",
" ",
"|",
"perhaps",
" ",
"somehow",
"|",
"then",
" ",
"or",
" ",
"now",
"|",
" ",
"it",
" ",
"was",
"`",
" ",
"perceived",
];
assert_eq!(actual, expected);
}
#[test]
fn atomize_newlines() {
let s = "a`c`adc`da \ndcdb d` cdb` dc\ndb `dc ` d ad ` bdc";
let actual = atomize(s);
let expected = vec![
"a", "`", "c", "`", "adc", "`", "da", " ", "\n", "dcdb", " ",
"d", "`", " ", "cdb", "`", " ", "dc", "\n", "db", " ", "`",
"dc", " ", "`", " ", "d", " ", "ad", " ", "`", " ", "bdc",
]
.iter()
.map(std::string::ToString::to_string)
.collect::<Vec<String>>();
assert_eq!(actual, expected);
}
}
}

View file

@ -9,6 +9,7 @@ pub mod header;
pub mod preformat;
pub mod code;
#[derive(Debug)]
pub enum Token {
Anchor(anchor::Anchor),
Code(code::Code),

View file

@ -1,98 +1,62 @@
use crate::prelude::*;
use std::fmt::Display;
use crate::syntax::content::{Parseable, parser::lexeme::Lexeme};
#[derive(Debug, Clone)]
pub struct Anchor {
text: String,
destination: String,
sticky: bool,
pub text: String,
pub destination: Option<String>,
pub leading: bool,
}
impl Parseable for Anchor {
fn probe(lexeme: &Lexeme) -> bool {
let pipe_count = lexeme.count_char('|');
log!("{lexeme:?} has {pipe_count} pipes");
if !(1..=3).contains(&pipe_count) {
log!("Negative: Bad pipe count {pipe_count} in {lexeme:?}");
return false;
}
if lexeme.text().matches("||").count() > 0 {
log!("Negative: Contiguous pipes in {lexeme:?}");
return false;
}
let parts = Anchor::split_parts(lexeme);
if (1..=2).contains(&parts.len()) {
log!("Positive: Parts {parts:?} with length {}", parts.len());
true
} else {
log!("Negative: {parts:?} have length {}", parts.len());
false
}
lexeme.text() == "|" || (!lexeme.is_whitespace() && lexeme.next == "|")
}
fn lex(lexeme: &Lexeme) -> Anchor {
let parts = Anchor::split_parts(lexeme);
log!("Lexing anchor {parts:?}");
let text = parts.first().unwrap_or_else(|| unreachable!());
fn try_node_anchor(anchor: &str) -> String {
if anchor.contains(":") || anchor.contains("/") {
anchor.to_owned()
} else {
format!("/node/{anchor}")
}
}
let destination = match parts.get(1) {
Some(d) => try_node_anchor(d),
None => try_node_anchor(text),
};
let sticky = [
",", ".", ":", ";", "!", "?", "/", "(", ")", "%", "*", "&", r#"""#,
"'",
];
log!("Lexed anchor: {text} -> {destination}");
Anchor {
text: text.to_owned(),
destination,
sticky: sticky.contains(&lexeme.next.as_str()),
}
fn lex(_lexeme: &Lexeme) -> Anchor {
panic!("Attempt to lex an anchor directly from a lexeme");
}
fn render(&self) -> String {
let space = if self.sticky {
String::new()
} else {
String::from(" ")
let Some(ref destination) = self.destination else {
panic!(
"Attempt to render anchor {self:?} without knowing its destination."
)
};
format!(
r#"<a href="{}">{}</a>{space}"#,
&self.destination, &self.text
)
format!(r#"<a href="{}">{}</a>"#, destination, &self.text)
}
}
impl Anchor {
fn split_parts(lexeme: &Lexeme) -> Vec<String> {
lexeme
.text()
.trim_start_matches('|')
.trim_end_matches('|')
.split('|')
.filter(|s| !s.is_empty())
.map(str::to_string)
.collect()
pub fn new(text: &str, destination: &str, spaced: bool) -> Anchor {
Anchor {
text: text.to_owned(),
destination: Some(Anchor::resolve_destination(destination)),
leading: spaced,
}
}
fn resolve_destination(raw: &str) -> String {
if raw.contains(":") || raw.contains("/") {
raw.to_owned()
} else {
format!("/node/{raw}")
}
}
pub fn empty() -> Anchor {
Anchor {
text: String::new(),
destination: None,
leading: false,
}
}
}
impl Display for Anchor {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "Anchor: <{}> to <{}>", &self.text, &self.destination)
write!(f, "Anchor: <{}> to <{:?}>", &self.text, &self.destination)
}
}

View file

@ -2,42 +2,31 @@ use crate::{
syntax::content::{Parseable, Lexeme},
};
#[derive(Debug)]
pub struct Code {
text: String,
sticky: bool,
open: bool,
}
impl Code {
pub fn new(open: bool) -> Code {
Code { open }
}
}
impl Parseable for Code {
fn probe(lexeme: &Lexeme) -> bool {
let chars = lexeme.split_chars();
if let Some(first_char) = chars.first()
&& let Some(last_char) = chars.last()
{
*first_char == '`' && *last_char == '`'
} else {
false
}
lexeme.text() == "`"
}
fn lex(lexeme: &Lexeme) -> Code {
let sticky = [
",", ".", ":", ";", "!", "?", "/", "(", ")", "%", "*", "&", r#"""#,
"'",
];
Code {
text: lexeme.text().replace("`", ""),
sticky: sticky.contains(&lexeme.next.as_str()),
}
fn lex(_lexeme: &Lexeme) -> Code {
panic!("Attempt to lex a code tag directly from a lexeme")
}
fn render(&self) -> String {
let space = if self.sticky {
String::new()
if self.open {
String::from("<code>")
} else {
String::from(" ")
};
format!("<code>{}</code>{space}", self.text)
String::from("</code>")
}
}
}

View file

@ -1,9 +1,18 @@
use std::{
collections::{HashMap, hash_map::Entry},
iter::Peekable,
slice,
};
use crate::{
prelude::*,
types::Config,
syntax::content::{Parseable, Lexeme},
};
use std::fmt::Display;
#[derive(Debug)]
pub struct Header {
open: Option<bool>,
level: Level,
@ -19,6 +28,35 @@ impl Header {
}
}
pub fn make_id(
config: &Config,
iterator: &mut Peekable<slice::Iter<'_, Lexeme>>,
ids: &mut HashMap<String, Vec<String>>,
) -> String {
let base_id = match iterator.peek() {
Some(next_lexeme)
if !config.ascii_dom_ids || next_lexeme.next.is_ascii() =>
{
next_lexeme.next.to_lowercase()
},
_ => String::from("h"),
};
match ids.entry(base_id.clone()) {
Entry::Occupied(mut occupied) => {
let ids_vec = occupied.get_mut();
let suffix = ids_vec.len();
let id_with_suffix = format!("{base_id}-{suffix}");
ids_vec.push(id_with_suffix.clone());
id_with_suffix
},
Entry::Vacant(vacant) => {
vacant.insert(vec![base_id.clone()]);
base_id
},
}
}
pub fn from_u8(level: u8, open: bool, dom_id: Option<&str>) -> Header {
Header {
level: Level::from_u8(level),
@ -27,7 +65,7 @@ impl Header {
}
}
pub fn get_level(&self) -> u8 {
pub fn level(&self) -> u8 {
match self.level {
Level::One => 1,
Level::Two => 2,
@ -92,6 +130,7 @@ impl Display for Header {
}
}
#[derive(Debug)]
pub enum Level {
One,
Two,

View file

@ -3,6 +3,7 @@ use crate::{
syntax::content::{Parseable, parser::lexeme::Lexeme},
};
#[derive(Debug)]
pub struct LineBreak {}
impl Parseable for LineBreak {

View file

@ -1,6 +1,7 @@
use std::fmt::Display;
use crate::syntax::content::{Parseable, parser::lexeme::Lexeme};
#[derive(Debug)]
pub struct Literal {
text: String,
}
@ -17,12 +18,7 @@ impl Parseable for Literal {
}
fn render(&self) -> String {
let non_sticky = [" ", "\n"];
if non_sticky.contains(&self.text.as_str()) {
self.text.clone()
} else {
format!("{} ", self.text.clone())
}
self.text.clone()
}
}

View file

@ -1,6 +1,7 @@
use std::fmt::Display;
use crate::syntax::content::{Parseable, parser::lexeme::Lexeme};
#[derive(Debug)]
pub struct Paragraph {
open: Option<bool>,
}
@ -14,9 +15,7 @@ impl Paragraph {
impl Parseable for Paragraph {
fn probe(lexeme: &Lexeme) -> bool {
// lexeme for paragraph is any non-whitespace, parser knows the context
let raw = lexeme.text();
let trimmed = raw.trim();
!trimmed.is_empty() && trimmed != "\n"
!lexeme.is_whitespace()
}
fn lex(_lexeme: &Lexeme) -> Paragraph {

View file

@ -2,6 +2,7 @@ use crate::{
syntax::content::{Parseable, Lexeme},
};
#[derive(Debug)]
pub struct PreFormat {
open: Option<bool>,
}

View file

@ -1,6 +1,7 @@
use std::fmt::Display;
use crate::syntax::content::{Parseable, parser::lexeme::Lexeme};
#[derive(Debug)]
pub struct Span {
open: Option<bool>,
}

View file

@ -132,7 +132,7 @@ For example:
docs|/node/Documentation
`
If the left side contains spaces, you need a leading `|` character. In this case, the space on the left side is mandatory:
If the left side contains spaces, you need a leading `|` character:
`
|en docs|https://en.jutty.dev/node/Documentation
@ -141,12 +141,12 @@ If the left side contains spaces, you need a leading `|` character. In this case
If you have a trailing character that you don't want to be considered as part of the destination, you can separate it with a third `|`:
`
This |gem|PreciousStone|, though green, was not an emerald.
This gem|PreciousStone|, though green, was not an emerald.
`
Which renders as:
This |gem|PreciousStone|, though green, was not an emerald.
This gem|PreciousStone|, though green, was not an emerald.
### Node anchors
@ -169,14 +169,15 @@ Because en can resolve IDs case insensitively (with priority to case-sensitive m
In summary, all of the anchors below are valid and lead to the same page:
`
|en Syntax|https://en.jutty.dev/node/Syntax|
|en Syntax|https://en.jutty.dev/node/Syntax
Syntax|https://en.jutty.dev/node/Syntax
|en Syntax|/node/Syntax
Syntax|/node/Syntax
Syntax|/node/syntax
Syntax|Syntax
syntax|syntax
|syntax|Syntax
Syntax|syntax
Syntax|syntax|
|Syntax|
|syntax|
@ -312,23 +313,23 @@ We saw example `docs|/node/Documentation`, but shorter syntax exists.
#### Epistēmē
#### Epistēmē
|en Syntax|https://en.jutty.dev/node/Syntax|
|en Syntax|https://en.jutty.dev/node/Syntax
Syntax|https://en.jutty.dev/node/Syntax
|en Syntax|/node/Syntax
Syntax|/node/Syntax
Syntax|/node/syntax
Syntax|Syntax
syntax|syntax
|syntax|Syntax
Syntax|syntax
Syntax|syntax|
|Syntax|
|syntax|
"""
[meta.config]
content_language = "en"
footer_credits = false
footer_text = """
made by jutty|https://jutty.dev acknowledgements|Acknowledgments |source code|https://codeberg.org/jutty/en
"""