2018-10-22 15:30:04 +02:00
|
|
|
|
use openssl::rand::rand_bytes;
|
2020-12-28 14:15:45 +01:00
|
|
|
|
use pulldown_cmark::{html, CodeBlockKind, CowStr, Event, LinkType, Options, Parser, Tag};
|
2020-04-29 15:13:31 +02:00
|
|
|
|
use regex_syntax::is_word_character;
|
2022-01-11 20:18:13 +01:00
|
|
|
|
use rocket::http::uri::Uri;
|
2018-10-27 20:44:42 +02:00
|
|
|
|
use std::collections::HashSet;
|
2021-01-01 23:10:10 +01:00
|
|
|
|
use syntect::html::{ClassStyle, ClassedHTMLGenerator};
|
2019-12-30 14:35:27 +01:00
|
|
|
|
use syntect::parsing::SyntaxSet;
|
2018-04-23 12:54:37 +02:00
|
|
|
|
|
2018-10-22 15:30:04 +02:00
|
|
|
|
/// Generates an hexadecimal representation of 32 bytes of random data
|
|
|
|
|
pub fn random_hex() -> String {
|
2019-03-20 17:56:17 +01:00
|
|
|
|
let mut bytes = [0; 32];
|
2018-10-22 15:30:04 +02:00
|
|
|
|
rand_bytes(&mut bytes).expect("Error while generating client id");
|
2019-03-20 17:56:17 +01:00
|
|
|
|
bytes
|
|
|
|
|
.iter()
|
|
|
|
|
.fold(String::new(), |res, byte| format!("{}{:x}", res, byte))
|
2018-10-22 15:30:04 +02:00
|
|
|
|
}
|
|
|
|
|
|
2021-09-06 10:49:10 +02:00
|
|
|
|
/**
|
|
|
|
|
* Percent-encode characters which are not allowed in IRI path segments.
|
|
|
|
|
*
|
|
|
|
|
* Intended to be used for generating Post ap_url.
|
|
|
|
|
*/
|
|
|
|
|
pub fn iri_percent_encode_seg(segment: &str) -> String {
|
|
|
|
|
segment.chars().map(iri_percent_encode_seg_char).collect()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub fn iri_percent_encode_seg_char(c: char) -> String {
|
|
|
|
|
if c.is_alphanumeric() {
|
|
|
|
|
c.to_string()
|
|
|
|
|
} else {
|
|
|
|
|
match c {
|
|
|
|
|
'-'
|
|
|
|
|
| '.'
|
|
|
|
|
| '_'
|
|
|
|
|
| '~'
|
|
|
|
|
| '\u{A0}'..='\u{D7FF}'
|
|
|
|
|
| '\u{20000}'..='\u{2FFFD}'
|
|
|
|
|
| '\u{30000}'..='\u{3FFFD}'
|
|
|
|
|
| '\u{40000}'..='\u{4FFFD}'
|
|
|
|
|
| '\u{50000}'..='\u{5FFFD}'
|
|
|
|
|
| '\u{60000}'..='\u{6FFFD}'
|
|
|
|
|
| '\u{70000}'..='\u{7FFFD}'
|
|
|
|
|
| '\u{80000}'..='\u{8FFFD}'
|
|
|
|
|
| '\u{90000}'..='\u{9FFFD}'
|
|
|
|
|
| '\u{A0000}'..='\u{AFFFD}'
|
|
|
|
|
| '\u{B0000}'..='\u{BFFFD}'
|
|
|
|
|
| '\u{C0000}'..='\u{CFFFD}'
|
|
|
|
|
| '\u{D0000}'..='\u{DFFFD}'
|
|
|
|
|
| '\u{E0000}'..='\u{EFFFD}'
|
|
|
|
|
| '!'
|
|
|
|
|
| '$'
|
|
|
|
|
| '&'
|
|
|
|
|
| '\''
|
|
|
|
|
| '('
|
|
|
|
|
| ')'
|
|
|
|
|
| '*'
|
|
|
|
|
| '+'
|
|
|
|
|
| ','
|
|
|
|
|
| ';'
|
|
|
|
|
| '='
|
|
|
|
|
| ':'
|
|
|
|
|
| '@' => c.to_string(),
|
|
|
|
|
_ => {
|
|
|
|
|
let s = c.to_string();
|
|
|
|
|
Uri::percent_encode(&s).to_string()
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2018-10-20 16:38:16 +02:00
|
|
|
|
#[derive(Debug)]
|
|
|
|
|
enum State {
|
|
|
|
|
Mention,
|
|
|
|
|
Hashtag,
|
|
|
|
|
Word,
|
|
|
|
|
Ready,
|
|
|
|
|
}
|
|
|
|
|
|
2020-01-21 07:02:03 +01:00
|
|
|
|
fn to_inline(tag: Tag<'_>) -> Tag<'_> {
|
2019-03-22 19:51:36 +01:00
|
|
|
|
match tag {
|
2020-12-27 20:38:23 +01:00
|
|
|
|
Tag::Heading(_) | Tag::Table(_) | Tag::TableHead | Tag::TableRow | Tag::TableCell => {
|
2019-03-22 19:51:36 +01:00
|
|
|
|
Tag::Paragraph
|
|
|
|
|
}
|
2020-12-27 20:38:23 +01:00
|
|
|
|
Tag::Image(typ, url, title) => Tag::Link(typ, url, title),
|
2019-03-22 19:51:36 +01:00
|
|
|
|
t => t,
|
|
|
|
|
}
|
|
|
|
|
}
|
2019-12-30 14:35:27 +01:00
|
|
|
|
struct HighlighterContext {
|
|
|
|
|
content: Vec<String>,
|
|
|
|
|
}
|
2021-01-15 16:59:07 +01:00
|
|
|
|
#[allow(clippy::unnecessary_wraps)]
|
2019-12-30 14:35:27 +01:00
|
|
|
|
fn highlight_code<'a>(
|
|
|
|
|
context: &mut Option<HighlighterContext>,
|
|
|
|
|
evt: Event<'a>,
|
|
|
|
|
) -> Option<Vec<Event<'a>>> {
|
|
|
|
|
match evt {
|
2020-12-27 20:38:23 +01:00
|
|
|
|
Event::Start(Tag::CodeBlock(kind)) => {
|
|
|
|
|
match &kind {
|
|
|
|
|
CodeBlockKind::Fenced(lang) if !lang.is_empty() => {
|
|
|
|
|
*context = Some(HighlighterContext { content: vec![] });
|
2020-12-28 14:15:45 +01:00
|
|
|
|
}
|
2020-12-27 20:38:23 +01:00
|
|
|
|
_ => {}
|
2019-12-30 14:35:27 +01:00
|
|
|
|
}
|
2020-12-27 20:38:23 +01:00
|
|
|
|
Some(vec![Event::Start(Tag::CodeBlock(kind))])
|
2019-12-30 14:35:27 +01:00
|
|
|
|
}
|
2020-12-27 20:38:23 +01:00
|
|
|
|
Event::End(Tag::CodeBlock(kind)) => {
|
2019-12-30 14:35:27 +01:00
|
|
|
|
let mut result = vec![];
|
|
|
|
|
if let Some(ctx) = context.take() {
|
2020-12-27 20:38:23 +01:00
|
|
|
|
let lang = if let CodeBlockKind::Fenced(lang) = &kind {
|
|
|
|
|
if lang.is_empty() {
|
|
|
|
|
unreachable!();
|
|
|
|
|
} else {
|
|
|
|
|
lang
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
unreachable!();
|
|
|
|
|
};
|
2019-12-30 14:35:27 +01:00
|
|
|
|
let syntax_set = SyntaxSet::load_defaults_newlines();
|
2021-11-27 23:53:13 +01:00
|
|
|
|
let syntax = syntax_set.find_syntax_by_token(lang).unwrap_or_else(|| {
|
2019-12-30 14:35:27 +01:00
|
|
|
|
syntax_set
|
2021-11-27 23:53:13 +01:00
|
|
|
|
.find_syntax_by_name(lang)
|
2019-12-30 14:35:27 +01:00
|
|
|
|
.unwrap_or_else(|| syntax_set.find_syntax_plain_text())
|
|
|
|
|
});
|
2021-01-01 23:10:10 +01:00
|
|
|
|
let mut html = ClassedHTMLGenerator::new_with_class_style(
|
2021-11-27 23:53:13 +01:00
|
|
|
|
syntax,
|
2021-01-01 23:10:10 +01:00
|
|
|
|
&syntax_set,
|
|
|
|
|
ClassStyle::Spaced,
|
|
|
|
|
);
|
2019-12-30 14:35:27 +01:00
|
|
|
|
for line in ctx.content {
|
2021-01-01 22:47:55 +01:00
|
|
|
|
html.parse_html_for_line_which_includes_newline(&line);
|
2019-12-30 14:35:27 +01:00
|
|
|
|
}
|
|
|
|
|
let q = html.finalize();
|
|
|
|
|
result.push(Event::Html(q.into()));
|
|
|
|
|
}
|
2020-12-27 20:38:23 +01:00
|
|
|
|
result.push(Event::End(Tag::CodeBlock(kind)));
|
2019-12-30 14:35:27 +01:00
|
|
|
|
*context = None;
|
|
|
|
|
Some(result)
|
|
|
|
|
}
|
|
|
|
|
Event::Text(t) => {
|
|
|
|
|
if let Some(mut c) = context.take() {
|
|
|
|
|
c.content.push(t.to_string());
|
|
|
|
|
*context = Some(c);
|
|
|
|
|
Some(vec![])
|
|
|
|
|
} else {
|
|
|
|
|
Some(vec![Event::Text(t)])
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
_ => Some(vec![evt]),
|
|
|
|
|
}
|
|
|
|
|
}
|
2021-01-15 16:59:07 +01:00
|
|
|
|
#[allow(clippy::unnecessary_wraps)]
|
2019-04-06 19:20:33 +02:00
|
|
|
|
fn flatten_text<'a>(state: &mut Option<String>, evt: Event<'a>) -> Option<Vec<Event<'a>>> {
|
|
|
|
|
let (s, res) = match evt {
|
|
|
|
|
Event::Text(txt) => match state.take() {
|
|
|
|
|
Some(mut prev_txt) => {
|
|
|
|
|
prev_txt.push_str(&txt);
|
|
|
|
|
(Some(prev_txt), vec![])
|
|
|
|
|
}
|
2020-12-27 20:38:23 +01:00
|
|
|
|
None => (Some(txt.into_string()), vec![]),
|
2019-04-06 19:20:33 +02:00
|
|
|
|
},
|
|
|
|
|
e => match state.take() {
|
2020-12-27 20:38:23 +01:00
|
|
|
|
Some(prev) => (None, vec![Event::Text(CowStr::Boxed(prev.into())), e]),
|
2019-04-06 19:20:33 +02:00
|
|
|
|
None => (None, vec![e]),
|
|
|
|
|
},
|
|
|
|
|
};
|
|
|
|
|
*state = s;
|
|
|
|
|
Some(res)
|
|
|
|
|
}
|
|
|
|
|
|
2021-01-15 16:59:07 +01:00
|
|
|
|
#[allow(clippy::unnecessary_wraps)]
|
2019-04-06 19:20:33 +02:00
|
|
|
|
fn inline_tags<'a>(
|
|
|
|
|
(state, inline): &mut (Vec<Tag<'a>>, bool),
|
|
|
|
|
evt: Event<'a>,
|
|
|
|
|
) -> Option<Event<'a>> {
|
|
|
|
|
if *inline {
|
|
|
|
|
let new_evt = match evt {
|
|
|
|
|
Event::Start(t) => {
|
|
|
|
|
let tag = to_inline(t);
|
|
|
|
|
state.push(tag.clone());
|
|
|
|
|
Event::Start(tag)
|
|
|
|
|
}
|
|
|
|
|
Event::End(t) => match state.pop() {
|
|
|
|
|
Some(other) => Event::End(other),
|
|
|
|
|
None => Event::End(t),
|
|
|
|
|
},
|
|
|
|
|
e => e,
|
|
|
|
|
};
|
|
|
|
|
Some(new_evt)
|
|
|
|
|
} else {
|
|
|
|
|
Some(evt)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2020-01-19 12:52:32 +01:00
|
|
|
|
pub type MediaProcessor<'a> = Box<dyn 'a + Fn(i32) -> Option<(String, Option<String>)>>;
|
2019-04-06 19:20:33 +02:00
|
|
|
|
|
|
|
|
|
fn process_image<'a, 'b>(
|
|
|
|
|
evt: Event<'a>,
|
|
|
|
|
inline: bool,
|
|
|
|
|
processor: &Option<MediaProcessor<'b>>,
|
|
|
|
|
) -> Event<'a> {
|
|
|
|
|
if let Some(ref processor) = *processor {
|
|
|
|
|
match evt {
|
2020-12-27 20:38:23 +01:00
|
|
|
|
Event::Start(Tag::Image(typ, id, title)) => {
|
2019-04-06 19:20:33 +02:00
|
|
|
|
if let Some((url, cw)) = id.parse::<i32>().ok().and_then(processor.as_ref()) {
|
2020-01-19 12:52:32 +01:00
|
|
|
|
if let (Some(cw), false) = (cw, inline) {
|
2019-04-06 19:20:33 +02:00
|
|
|
|
// there is a cw, and where are not inline
|
2020-12-28 14:15:45 +01:00
|
|
|
|
Event::Html(CowStr::Boxed(
|
|
|
|
|
format!(
|
|
|
|
|
r#"<label for="postcontent-cw-{id}">
|
2019-04-06 19:20:33 +02:00
|
|
|
|
<input type="checkbox" id="postcontent-cw-{id}" checked="checked" class="cw-checkbox">
|
|
|
|
|
<span class="cw-container">
|
|
|
|
|
<span class="cw-text">
|
|
|
|
|
{cw}
|
|
|
|
|
</span>
|
|
|
|
|
<img src="{url}" alt=""#,
|
2020-12-28 14:15:45 +01:00
|
|
|
|
id = random_hex(),
|
|
|
|
|
cw = cw,
|
|
|
|
|
url = url
|
|
|
|
|
)
|
|
|
|
|
.into(),
|
|
|
|
|
))
|
2020-01-19 12:52:32 +01:00
|
|
|
|
} else {
|
2020-12-27 20:38:23 +01:00
|
|
|
|
Event::Start(Tag::Image(typ, CowStr::Boxed(url.into()), title))
|
2019-04-06 19:20:33 +02:00
|
|
|
|
}
|
|
|
|
|
} else {
|
2020-12-27 20:38:23 +01:00
|
|
|
|
Event::Start(Tag::Image(typ, id, title))
|
2019-04-06 19:20:33 +02:00
|
|
|
|
}
|
|
|
|
|
}
|
2020-12-27 20:38:23 +01:00
|
|
|
|
Event::End(Tag::Image(typ, id, title)) => {
|
2019-04-06 19:20:33 +02:00
|
|
|
|
if let Some((url, cw)) = id.parse::<i32>().ok().and_then(processor.as_ref()) {
|
|
|
|
|
if inline || cw.is_none() {
|
2020-12-27 20:38:23 +01:00
|
|
|
|
Event::End(Tag::Image(typ, CowStr::Boxed(url.into()), title))
|
2019-04-06 19:20:33 +02:00
|
|
|
|
} else {
|
2020-12-27 20:38:23 +01:00
|
|
|
|
Event::Html(CowStr::Borrowed(
|
2019-04-06 19:20:33 +02:00
|
|
|
|
r#""/>
|
|
|
|
|
</span>
|
|
|
|
|
</label>"#,
|
|
|
|
|
))
|
|
|
|
|
}
|
|
|
|
|
} else {
|
2020-12-27 20:38:23 +01:00
|
|
|
|
Event::End(Tag::Image(typ, id, title))
|
2019-04-06 19:20:33 +02:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
e => e,
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
evt
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2020-05-03 14:50:32 +02:00
|
|
|
|
#[derive(Default, Debug)]
|
|
|
|
|
struct DocumentContext {
|
|
|
|
|
in_code: bool,
|
|
|
|
|
in_link: bool,
|
|
|
|
|
}
|
|
|
|
|
|
2018-10-20 16:38:16 +02:00
|
|
|
|
/// Returns (HTML, mentions, hashtags)
|
2019-04-06 19:20:33 +02:00
|
|
|
|
pub fn md_to_html<'a>(
|
2019-03-22 19:51:36 +01:00
|
|
|
|
md: &str,
|
2019-05-04 17:33:50 +02:00
|
|
|
|
base_url: Option<&str>,
|
2019-03-22 19:51:36 +01:00
|
|
|
|
inline: bool,
|
2019-04-06 19:20:33 +02:00
|
|
|
|
media_processor: Option<MediaProcessor<'a>>,
|
2019-03-22 19:51:36 +01:00
|
|
|
|
) -> (String, HashSet<String>, HashSet<String>) {
|
2019-05-04 17:33:50 +02:00
|
|
|
|
let base_url = if let Some(base_url) = base_url {
|
2022-01-09 05:11:09 +01:00
|
|
|
|
format!("https://{}/", base_url)
|
2019-05-04 17:33:50 +02:00
|
|
|
|
} else {
|
|
|
|
|
"/".to_owned()
|
|
|
|
|
};
|
2018-06-20 16:29:19 +02:00
|
|
|
|
let parser = Parser::new_ext(md, Options::all());
|
2018-06-20 20:22:34 +02:00
|
|
|
|
|
2020-01-21 07:02:03 +01:00
|
|
|
|
let (parser, mentions, hashtags): (Vec<Event<'_>>, Vec<String>, Vec<String>) = parser
|
2019-04-06 19:20:33 +02:00
|
|
|
|
// Flatten text because pulldown_cmark break #hashtag in two individual text elements
|
|
|
|
|
.scan(None, flatten_text)
|
2019-12-30 14:35:27 +01:00
|
|
|
|
.flatten()
|
|
|
|
|
.scan(None, highlight_code)
|
|
|
|
|
.flatten()
|
2019-04-06 19:20:33 +02:00
|
|
|
|
.map(|evt| process_image(evt, inline, &media_processor))
|
2019-03-22 19:51:36 +01:00
|
|
|
|
// Ignore headings, images, and tables if inline = true
|
2019-04-06 19:20:33 +02:00
|
|
|
|
.scan((vec![], inline), inline_tags)
|
2020-05-03 14:50:32 +02:00
|
|
|
|
.scan(&mut DocumentContext::default(), |ctx, evt| match evt {
|
2020-12-27 20:38:23 +01:00
|
|
|
|
Event::Start(Tag::CodeBlock(_)) => {
|
2020-05-03 14:50:32 +02:00
|
|
|
|
ctx.in_code = true;
|
2020-01-12 13:24:41 +01:00
|
|
|
|
Some((vec![evt], vec![], vec![]))
|
|
|
|
|
}
|
2020-12-27 20:38:23 +01:00
|
|
|
|
Event::End(Tag::CodeBlock(_)) => {
|
2020-05-03 14:50:32 +02:00
|
|
|
|
ctx.in_code = false;
|
|
|
|
|
Some((vec![evt], vec![], vec![]))
|
|
|
|
|
}
|
2020-12-27 20:38:23 +01:00
|
|
|
|
Event::Start(Tag::Link(_, _, _)) => {
|
2020-05-03 14:50:32 +02:00
|
|
|
|
ctx.in_link = true;
|
|
|
|
|
Some((vec![evt], vec![], vec![]))
|
|
|
|
|
}
|
2020-12-27 20:38:23 +01:00
|
|
|
|
Event::End(Tag::Link(_, _, _)) => {
|
2020-05-03 14:50:32 +02:00
|
|
|
|
ctx.in_link = false;
|
2020-01-12 13:24:41 +01:00
|
|
|
|
Some((vec![evt], vec![], vec![]))
|
|
|
|
|
}
|
2019-03-20 17:56:17 +01:00
|
|
|
|
Event::Text(txt) => {
|
|
|
|
|
let (evts, _, _, _, new_mentions, new_hashtags) = txt.chars().fold(
|
|
|
|
|
(vec![], State::Ready, String::new(), 0, vec![], vec![]),
|
|
|
|
|
|(mut events, state, mut text_acc, n, mut mentions, mut hashtags), c| {
|
|
|
|
|
match state {
|
|
|
|
|
State::Mention => {
|
|
|
|
|
let char_matches = c.is_alphanumeric() || "@.-_".contains(c);
|
|
|
|
|
if char_matches && (n < (txt.chars().count() - 1)) {
|
|
|
|
|
text_acc.push(c);
|
|
|
|
|
(events, State::Mention, text_acc, n + 1, mentions, hashtags)
|
|
|
|
|
} else {
|
|
|
|
|
if char_matches {
|
|
|
|
|
text_acc.push(c)
|
|
|
|
|
}
|
|
|
|
|
let mention = text_acc;
|
|
|
|
|
let link = Tag::Link(
|
2020-12-27 20:38:23 +01:00
|
|
|
|
LinkType::Inline,
|
2019-05-04 17:33:50 +02:00
|
|
|
|
format!("{}@/{}/", base_url, &mention).into(),
|
2021-11-27 23:53:13 +01:00
|
|
|
|
mention.clone().into(),
|
2019-03-20 17:56:17 +01:00
|
|
|
|
);
|
2018-10-20 16:38:16 +02:00
|
|
|
|
|
2019-03-20 17:56:17 +01:00
|
|
|
|
mentions.push(mention.clone());
|
|
|
|
|
events.push(Event::Start(link.clone()));
|
2021-11-27 23:53:13 +01:00
|
|
|
|
events.push(Event::Text(format!("@{}", &mention).into()));
|
2019-03-20 17:56:17 +01:00
|
|
|
|
events.push(Event::End(link));
|
2018-10-20 16:38:16 +02:00
|
|
|
|
|
2019-03-20 17:56:17 +01:00
|
|
|
|
(
|
|
|
|
|
events,
|
|
|
|
|
State::Ready,
|
|
|
|
|
c.to_string(),
|
|
|
|
|
n + 1,
|
|
|
|
|
mentions,
|
|
|
|
|
hashtags,
|
|
|
|
|
)
|
|
|
|
|
}
|
2018-11-26 10:21:52 +01:00
|
|
|
|
}
|
2019-03-20 17:56:17 +01:00
|
|
|
|
State::Hashtag => {
|
2020-04-29 15:13:31 +02:00
|
|
|
|
let char_matches = c == '-' || is_word_character(c);
|
2019-03-20 17:56:17 +01:00
|
|
|
|
if char_matches && (n < (txt.chars().count() - 1)) {
|
|
|
|
|
text_acc.push(c);
|
|
|
|
|
(events, State::Hashtag, text_acc, n + 1, mentions, hashtags)
|
|
|
|
|
} else {
|
|
|
|
|
if char_matches {
|
|
|
|
|
text_acc.push(c);
|
|
|
|
|
}
|
|
|
|
|
let hashtag = text_acc;
|
|
|
|
|
let link = Tag::Link(
|
2020-12-27 20:38:23 +01:00
|
|
|
|
LinkType::Inline,
|
2020-12-01 00:38:58 +01:00
|
|
|
|
format!("{}tag/{}", base_url, &hashtag).into(),
|
2019-03-20 17:56:17 +01:00
|
|
|
|
hashtag.to_owned().into(),
|
|
|
|
|
);
|
2018-10-20 16:38:16 +02:00
|
|
|
|
|
2019-03-20 17:56:17 +01:00
|
|
|
|
hashtags.push(hashtag.clone());
|
|
|
|
|
events.push(Event::Start(link.clone()));
|
|
|
|
|
events.push(Event::Text(format!("#{}", &hashtag).into()));
|
|
|
|
|
events.push(Event::End(link));
|
2018-10-20 16:38:16 +02:00
|
|
|
|
|
2019-03-20 17:56:17 +01:00
|
|
|
|
(
|
|
|
|
|
events,
|
|
|
|
|
State::Ready,
|
|
|
|
|
c.to_string(),
|
|
|
|
|
n + 1,
|
|
|
|
|
mentions,
|
|
|
|
|
hashtags,
|
|
|
|
|
)
|
|
|
|
|
}
|
2018-10-20 16:38:16 +02:00
|
|
|
|
}
|
2019-03-20 17:56:17 +01:00
|
|
|
|
State::Ready => {
|
2020-05-03 14:50:32 +02:00
|
|
|
|
if !ctx.in_code && !ctx.in_link && c == '@' {
|
2019-03-20 17:56:17 +01:00
|
|
|
|
events.push(Event::Text(text_acc.into()));
|
|
|
|
|
(
|
|
|
|
|
events,
|
|
|
|
|
State::Mention,
|
|
|
|
|
String::new(),
|
|
|
|
|
n + 1,
|
|
|
|
|
mentions,
|
|
|
|
|
hashtags,
|
|
|
|
|
)
|
2020-05-03 14:50:32 +02:00
|
|
|
|
} else if !ctx.in_code && !ctx.in_link && c == '#' {
|
2019-03-20 17:56:17 +01:00
|
|
|
|
events.push(Event::Text(text_acc.into()));
|
|
|
|
|
(
|
|
|
|
|
events,
|
|
|
|
|
State::Hashtag,
|
|
|
|
|
String::new(),
|
|
|
|
|
n + 1,
|
|
|
|
|
mentions,
|
|
|
|
|
hashtags,
|
|
|
|
|
)
|
|
|
|
|
} else if c.is_alphanumeric() {
|
|
|
|
|
text_acc.push(c);
|
|
|
|
|
if n >= (txt.chars().count() - 1) {
|
|
|
|
|
// Add the text after at the end, even if it is not followed by a mention.
|
|
|
|
|
events.push(Event::Text(text_acc.clone().into()))
|
|
|
|
|
}
|
|
|
|
|
(events, State::Word, text_acc, n + 1, mentions, hashtags)
|
|
|
|
|
} else {
|
|
|
|
|
text_acc.push(c);
|
|
|
|
|
if n >= (txt.chars().count() - 1) {
|
|
|
|
|
// Add the text after at the end, even if it is not followed by a mention.
|
|
|
|
|
events.push(Event::Text(text_acc.clone().into()))
|
|
|
|
|
}
|
|
|
|
|
(events, State::Ready, text_acc, n + 1, mentions, hashtags)
|
|
|
|
|
}
|
2018-10-20 16:38:16 +02:00
|
|
|
|
}
|
2019-03-20 17:56:17 +01:00
|
|
|
|
State::Word => {
|
|
|
|
|
text_acc.push(c);
|
|
|
|
|
if c.is_alphanumeric() {
|
|
|
|
|
if n >= (txt.chars().count() - 1) {
|
|
|
|
|
// Add the text after at the end, even if it is not followed by a mention.
|
|
|
|
|
events.push(Event::Text(text_acc.clone().into()))
|
|
|
|
|
}
|
|
|
|
|
(events, State::Word, text_acc, n + 1, mentions, hashtags)
|
|
|
|
|
} else {
|
|
|
|
|
if n >= (txt.chars().count() - 1) {
|
|
|
|
|
// Add the text after at the end, even if it is not followed by a mention.
|
|
|
|
|
events.push(Event::Text(text_acc.clone().into()))
|
|
|
|
|
}
|
|
|
|
|
(events, State::Ready, text_acc, n + 1, mentions, hashtags)
|
|
|
|
|
}
|
2018-10-20 16:38:16 +02:00
|
|
|
|
}
|
2018-06-20 22:58:11 +02:00
|
|
|
|
}
|
2019-03-20 17:56:17 +01:00
|
|
|
|
},
|
|
|
|
|
);
|
2020-01-12 13:24:41 +01:00
|
|
|
|
Some((evts, new_mentions, new_hashtags))
|
2019-03-20 17:56:17 +01:00
|
|
|
|
}
|
2020-01-12 13:24:41 +01:00
|
|
|
|
_ => Some((vec![evt], vec![], vec![])),
|
2019-03-20 17:56:17 +01:00
|
|
|
|
})
|
|
|
|
|
.fold(
|
|
|
|
|
(vec![], vec![], vec![]),
|
|
|
|
|
|(mut parser, mut mention, mut hashtag), (mut p, mut m, mut h)| {
|
|
|
|
|
parser.append(&mut p);
|
|
|
|
|
mention.append(&mut m);
|
|
|
|
|
hashtag.append(&mut h);
|
|
|
|
|
(parser, mention, hashtag)
|
|
|
|
|
},
|
|
|
|
|
);
|
2018-11-26 10:21:52 +01:00
|
|
|
|
let parser = parser.into_iter();
|
|
|
|
|
let mentions = mentions.into_iter().map(|m| String::from(m.trim()));
|
|
|
|
|
let hashtags = hashtags.into_iter().map(|h| String::from(h.trim()));
|
2018-06-20 20:22:34 +02:00
|
|
|
|
|
|
|
|
|
// TODO: fetch mentionned profiles in background, if needed
|
|
|
|
|
|
2018-06-20 16:29:19 +02:00
|
|
|
|
let mut buf = String::new();
|
|
|
|
|
html::push_html(&mut buf, parser);
|
2018-10-27 20:44:42 +02:00
|
|
|
|
(buf, mentions.collect(), hashtags.collect())
|
2018-06-20 16:29:19 +02:00
|
|
|
|
}
|
2018-07-18 18:35:50 +02:00
|
|
|
|
|
2022-01-06 21:36:39 +01:00
|
|
|
|
pub fn escape(string: &str) -> askama_escape::Escaped<askama_escape::Html> {
|
|
|
|
|
askama_escape::escape(string, askama_escape::Html)
|
|
|
|
|
}
|
|
|
|
|
|
2018-07-18 18:35:50 +02:00
|
|
|
|
#[cfg(test)]
|
|
|
|
|
mod tests {
|
|
|
|
|
use super::*;
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn test_mentions() {
|
|
|
|
|
let tests = vec![
|
|
|
|
|
("nothing", vec![]),
|
|
|
|
|
("@mention", vec!["mention"]),
|
|
|
|
|
("@mention@instance.tld", vec!["mention@instance.tld"]),
|
|
|
|
|
("@many @mentions", vec!["many", "mentions"]),
|
|
|
|
|
("@start with a mentions", vec!["start"]),
|
|
|
|
|
("mention at @end", vec!["end"]),
|
|
|
|
|
("between parenthesis (@test)", vec!["test"]),
|
|
|
|
|
("with some punctuation @test!", vec!["test"]),
|
2020-01-12 13:24:41 +01:00
|
|
|
|
(" @spaces ", vec!["spaces"]),
|
2018-12-12 10:50:55 +01:00
|
|
|
|
("@is_a@mention", vec!["is_a@mention"]),
|
2018-10-20 16:38:16 +02:00
|
|
|
|
("not_a@mention", vec![]),
|
2020-01-12 13:24:41 +01:00
|
|
|
|
("`@helo`", vec![]),
|
|
|
|
|
("```\n@hello\n```", vec![]),
|
2020-05-03 14:50:32 +02:00
|
|
|
|
("[@atmark in link](https://example.org/)", vec![]),
|
2018-07-18 18:35:50 +02:00
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
for (md, mentions) in tests {
|
2019-03-20 17:56:17 +01:00
|
|
|
|
assert_eq!(
|
2019-05-04 17:33:50 +02:00
|
|
|
|
md_to_html(md, None, false, None).1,
|
2019-03-20 17:56:17 +01:00
|
|
|
|
mentions
|
|
|
|
|
.into_iter()
|
|
|
|
|
.map(|s| s.to_string())
|
|
|
|
|
.collect::<HashSet<String>>()
|
|
|
|
|
);
|
2018-07-18 18:35:50 +02:00
|
|
|
|
}
|
|
|
|
|
}
|
2018-10-20 16:38:16 +02:00
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn test_hashtags() {
|
|
|
|
|
let tests = vec![
|
|
|
|
|
("nothing", vec![]),
|
|
|
|
|
("#hashtag", vec!["hashtag"]),
|
|
|
|
|
("#many #hashtags", vec!["many", "hashtags"]),
|
|
|
|
|
("#start with a hashtag", vec!["start"]),
|
|
|
|
|
("hashtag at #end", vec!["end"]),
|
|
|
|
|
("between parenthesis (#test)", vec!["test"]),
|
|
|
|
|
("with some punctuation #test!", vec!["test"]),
|
2020-01-12 13:24:41 +01:00
|
|
|
|
(" #spaces ", vec!["spaces"]),
|
2018-10-20 16:38:16 +02:00
|
|
|
|
("not_a#hashtag", vec![]),
|
2020-04-29 15:13:31 +02:00
|
|
|
|
("#نرمافزار_آزاد", vec!["نرمافزار_آزاد"]),
|
2020-05-03 14:50:32 +02:00
|
|
|
|
("[#hash in link](https://example.org/)", vec![]),
|
2020-05-17 13:53:31 +02:00
|
|
|
|
("#zwsp\u{200b}inhash", vec!["zwsp"]),
|
2018-10-20 16:38:16 +02:00
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
for (md, mentions) in tests {
|
2019-03-20 17:56:17 +01:00
|
|
|
|
assert_eq!(
|
2019-05-04 17:33:50 +02:00
|
|
|
|
md_to_html(md, None, false, None).2,
|
2019-03-20 17:56:17 +01:00
|
|
|
|
mentions
|
|
|
|
|
.into_iter()
|
|
|
|
|
.map(|s| s.to_string())
|
|
|
|
|
.collect::<HashSet<String>>()
|
|
|
|
|
);
|
2018-10-20 16:38:16 +02:00
|
|
|
|
}
|
|
|
|
|
}
|
2019-03-22 19:51:36 +01:00
|
|
|
|
|
2021-09-06 10:49:10 +02:00
|
|
|
|
#[test]
|
|
|
|
|
fn test_iri_percent_encode_seg() {
|
|
|
|
|
assert_eq!(
|
|
|
|
|
&iri_percent_encode_seg("including whitespace"),
|
|
|
|
|
"including%20whitespace"
|
|
|
|
|
);
|
|
|
|
|
assert_eq!(&iri_percent_encode_seg("%20"), "%2520");
|
|
|
|
|
assert_eq!(&iri_percent_encode_seg("é"), "é");
|
|
|
|
|
assert_eq!(
|
|
|
|
|
&iri_percent_encode_seg("空白入り 日本語"),
|
|
|
|
|
"空白入り%20日本語"
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
2019-03-22 19:51:36 +01:00
|
|
|
|
#[test]
|
|
|
|
|
fn test_inline() {
|
|
|
|
|
assert_eq!(
|
2019-05-04 17:33:50 +02:00
|
|
|
|
md_to_html("# Hello", None, false, None).0,
|
2020-12-27 21:02:37 +01:00
|
|
|
|
String::from("<h1 dir=\"auto\">Hello</h1>\n")
|
2019-03-22 19:51:36 +01:00
|
|
|
|
);
|
|
|
|
|
assert_eq!(
|
2019-05-04 17:33:50 +02:00
|
|
|
|
md_to_html("# Hello", None, true, None).0,
|
2020-12-27 21:02:37 +01:00
|
|
|
|
String::from("<p dir=\"auto\">Hello</p>\n")
|
2019-03-22 19:51:36 +01:00
|
|
|
|
);
|
|
|
|
|
}
|
2018-07-18 18:35:50 +02:00
|
|
|
|
}
|