Rework token segmentation
This commit is contained in:
parent
a33d9cb1e1
commit
8b782d6d20
16 changed files with 497 additions and 385 deletions
|
|
@ -1,98 +1,62 @@
|
|||
use crate::prelude::*;
|
||||
|
||||
use std::fmt::Display;
|
||||
|
||||
use crate::syntax::content::{Parseable, parser::lexeme::Lexeme};
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Anchor {
|
||||
text: String,
|
||||
destination: String,
|
||||
sticky: bool,
|
||||
pub text: String,
|
||||
pub destination: Option<String>,
|
||||
pub leading: bool,
|
||||
}
|
||||
|
||||
impl Parseable for Anchor {
|
||||
fn probe(lexeme: &Lexeme) -> bool {
|
||||
let pipe_count = lexeme.count_char('|');
|
||||
log!("{lexeme:?} has {pipe_count} pipes");
|
||||
|
||||
if !(1..=3).contains(&pipe_count) {
|
||||
log!("Negative: Bad pipe count {pipe_count} in {lexeme:?}");
|
||||
return false;
|
||||
}
|
||||
if lexeme.text().matches("||").count() > 0 {
|
||||
log!("Negative: Contiguous pipes in {lexeme:?}");
|
||||
return false;
|
||||
}
|
||||
|
||||
let parts = Anchor::split_parts(lexeme);
|
||||
if (1..=2).contains(&parts.len()) {
|
||||
log!("Positive: Parts {parts:?} with length {}", parts.len());
|
||||
true
|
||||
} else {
|
||||
log!("Negative: {parts:?} have length {}", parts.len());
|
||||
false
|
||||
}
|
||||
lexeme.text() == "|" || (!lexeme.is_whitespace() && lexeme.next == "|")
|
||||
}
|
||||
|
||||
fn lex(lexeme: &Lexeme) -> Anchor {
|
||||
let parts = Anchor::split_parts(lexeme);
|
||||
log!("Lexing anchor {parts:?}");
|
||||
|
||||
let text = parts.first().unwrap_or_else(|| unreachable!());
|
||||
|
||||
fn try_node_anchor(anchor: &str) -> String {
|
||||
if anchor.contains(":") || anchor.contains("/") {
|
||||
anchor.to_owned()
|
||||
} else {
|
||||
format!("/node/{anchor}")
|
||||
}
|
||||
}
|
||||
|
||||
let destination = match parts.get(1) {
|
||||
Some(d) => try_node_anchor(d),
|
||||
None => try_node_anchor(text),
|
||||
};
|
||||
|
||||
let sticky = [
|
||||
",", ".", ":", ";", "!", "?", "/", "(", ")", "%", "*", "&", r#"""#,
|
||||
"'",
|
||||
];
|
||||
|
||||
log!("Lexed anchor: {text} -> {destination}");
|
||||
Anchor {
|
||||
text: text.to_owned(),
|
||||
destination,
|
||||
sticky: sticky.contains(&lexeme.next.as_str()),
|
||||
}
|
||||
fn lex(_lexeme: &Lexeme) -> Anchor {
|
||||
panic!("Attempt to lex an anchor directly from a lexeme");
|
||||
}
|
||||
|
||||
fn render(&self) -> String {
|
||||
let space = if self.sticky {
|
||||
String::new()
|
||||
} else {
|
||||
String::from(" ")
|
||||
let Some(ref destination) = self.destination else {
|
||||
panic!(
|
||||
"Attempt to render anchor {self:?} without knowing its destination."
|
||||
)
|
||||
};
|
||||
format!(
|
||||
r#"<a href="{}">{}</a>{space}"#,
|
||||
&self.destination, &self.text
|
||||
)
|
||||
|
||||
format!(r#"<a href="{}">{}</a>"#, destination, &self.text)
|
||||
}
|
||||
}
|
||||
|
||||
impl Anchor {
|
||||
fn split_parts(lexeme: &Lexeme) -> Vec<String> {
|
||||
lexeme
|
||||
.text()
|
||||
.trim_start_matches('|')
|
||||
.trim_end_matches('|')
|
||||
.split('|')
|
||||
.filter(|s| !s.is_empty())
|
||||
.map(str::to_string)
|
||||
.collect()
|
||||
pub fn new(text: &str, destination: &str, spaced: bool) -> Anchor {
|
||||
Anchor {
|
||||
text: text.to_owned(),
|
||||
destination: Some(Anchor::resolve_destination(destination)),
|
||||
leading: spaced,
|
||||
}
|
||||
}
|
||||
|
||||
fn resolve_destination(raw: &str) -> String {
|
||||
if raw.contains(":") || raw.contains("/") {
|
||||
raw.to_owned()
|
||||
} else {
|
||||
format!("/node/{raw}")
|
||||
}
|
||||
}
|
||||
|
||||
pub fn empty() -> Anchor {
|
||||
Anchor {
|
||||
text: String::new(),
|
||||
destination: None,
|
||||
leading: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for Anchor {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
write!(f, "Anchor: <{}> to <{}>", &self.text, &self.destination)
|
||||
write!(f, "Anchor: <{}> to <{:?}>", &self.text, &self.destination)
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,42 +2,31 @@ use crate::{
|
|||
syntax::content::{Parseable, Lexeme},
|
||||
};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Code {
|
||||
text: String,
|
||||
sticky: bool,
|
||||
open: bool,
|
||||
}
|
||||
|
||||
impl Code {
|
||||
pub fn new(open: bool) -> Code {
|
||||
Code { open }
|
||||
}
|
||||
}
|
||||
|
||||
impl Parseable for Code {
|
||||
fn probe(lexeme: &Lexeme) -> bool {
|
||||
let chars = lexeme.split_chars();
|
||||
|
||||
if let Some(first_char) = chars.first()
|
||||
&& let Some(last_char) = chars.last()
|
||||
{
|
||||
*first_char == '`' && *last_char == '`'
|
||||
} else {
|
||||
false
|
||||
}
|
||||
lexeme.text() == "`"
|
||||
}
|
||||
|
||||
fn lex(lexeme: &Lexeme) -> Code {
|
||||
let sticky = [
|
||||
",", ".", ":", ";", "!", "?", "/", "(", ")", "%", "*", "&", r#"""#,
|
||||
"'",
|
||||
];
|
||||
|
||||
Code {
|
||||
text: lexeme.text().replace("`", ""),
|
||||
sticky: sticky.contains(&lexeme.next.as_str()),
|
||||
}
|
||||
fn lex(_lexeme: &Lexeme) -> Code {
|
||||
panic!("Attempt to lex a code tag directly from a lexeme")
|
||||
}
|
||||
|
||||
fn render(&self) -> String {
|
||||
let space = if self.sticky {
|
||||
String::new()
|
||||
if self.open {
|
||||
String::from("<code>")
|
||||
} else {
|
||||
String::from(" ")
|
||||
};
|
||||
format!("<code>{}</code>{space}", self.text)
|
||||
String::from("</code>")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,9 +1,18 @@
|
|||
use std::{
|
||||
collections::{HashMap, hash_map::Entry},
|
||||
iter::Peekable,
|
||||
slice,
|
||||
};
|
||||
|
||||
use crate::{
|
||||
prelude::*,
|
||||
types::Config,
|
||||
syntax::content::{Parseable, Lexeme},
|
||||
};
|
||||
|
||||
use std::fmt::Display;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Header {
|
||||
open: Option<bool>,
|
||||
level: Level,
|
||||
|
|
@ -19,6 +28,35 @@ impl Header {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn make_id(
|
||||
config: &Config,
|
||||
iterator: &mut Peekable<slice::Iter<'_, Lexeme>>,
|
||||
ids: &mut HashMap<String, Vec<String>>,
|
||||
) -> String {
|
||||
let base_id = match iterator.peek() {
|
||||
Some(next_lexeme)
|
||||
if !config.ascii_dom_ids || next_lexeme.next.is_ascii() =>
|
||||
{
|
||||
next_lexeme.next.to_lowercase()
|
||||
},
|
||||
_ => String::from("h"),
|
||||
};
|
||||
|
||||
match ids.entry(base_id.clone()) {
|
||||
Entry::Occupied(mut occupied) => {
|
||||
let ids_vec = occupied.get_mut();
|
||||
let suffix = ids_vec.len();
|
||||
let id_with_suffix = format!("{base_id}-{suffix}");
|
||||
ids_vec.push(id_with_suffix.clone());
|
||||
id_with_suffix
|
||||
},
|
||||
Entry::Vacant(vacant) => {
|
||||
vacant.insert(vec![base_id.clone()]);
|
||||
base_id
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_u8(level: u8, open: bool, dom_id: Option<&str>) -> Header {
|
||||
Header {
|
||||
level: Level::from_u8(level),
|
||||
|
|
@ -27,7 +65,7 @@ impl Header {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn get_level(&self) -> u8 {
|
||||
pub fn level(&self) -> u8 {
|
||||
match self.level {
|
||||
Level::One => 1,
|
||||
Level::Two => 2,
|
||||
|
|
@ -92,6 +130,7 @@ impl Display for Header {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Level {
|
||||
One,
|
||||
Two,
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ use crate::{
|
|||
syntax::content::{Parseable, parser::lexeme::Lexeme},
|
||||
};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct LineBreak {}
|
||||
|
||||
impl Parseable for LineBreak {
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
use std::fmt::Display;
|
||||
use crate::syntax::content::{Parseable, parser::lexeme::Lexeme};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Literal {
|
||||
text: String,
|
||||
}
|
||||
|
|
@ -17,12 +18,7 @@ impl Parseable for Literal {
|
|||
}
|
||||
|
||||
fn render(&self) -> String {
|
||||
let non_sticky = [" ", "\n"];
|
||||
if non_sticky.contains(&self.text.as_str()) {
|
||||
self.text.clone()
|
||||
} else {
|
||||
format!("{} ", self.text.clone())
|
||||
}
|
||||
self.text.clone()
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
use std::fmt::Display;
|
||||
use crate::syntax::content::{Parseable, parser::lexeme::Lexeme};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Paragraph {
|
||||
open: Option<bool>,
|
||||
}
|
||||
|
|
@ -14,9 +15,7 @@ impl Paragraph {
|
|||
impl Parseable for Paragraph {
|
||||
fn probe(lexeme: &Lexeme) -> bool {
|
||||
// lexeme for paragraph is any non-whitespace, parser knows the context
|
||||
let raw = lexeme.text();
|
||||
let trimmed = raw.trim();
|
||||
!trimmed.is_empty() && trimmed != "\n"
|
||||
!lexeme.is_whitespace()
|
||||
}
|
||||
|
||||
fn lex(_lexeme: &Lexeme) -> Paragraph {
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ use crate::{
|
|||
syntax::content::{Parseable, Lexeme},
|
||||
};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PreFormat {
|
||||
open: Option<bool>,
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
use std::fmt::Display;
|
||||
use crate::syntax::content::{Parseable, parser::lexeme::Lexeme};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Span {
|
||||
open: Option<bool>,
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue