Fixes #757 Allow Unicode hashtags (#758)

* Add test for Persian language hashtags

See https://github.com/Plume-org/Plume/issues/757

* Add regex-syntax with unicode-perl feature to dependencies

* Install regex-syntax

* Allow hashtag to use Unicode word characters

* Run cargo fmt
This commit is contained in:
KITAITI Makoto 2020-04-29 22:13:31 +09:00 committed by GitHub
parent e1bd2eab75
commit edaccd1a31
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 5 additions and 1 deletions

1
Cargo.lock generated
View File

@ -2213,6 +2213,7 @@ dependencies = [
"hyper 0.12.35 (registry+https://github.com/rust-lang/crates.io-index)", "hyper 0.12.35 (registry+https://github.com/rust-lang/crates.io-index)",
"openssl 0.10.29 (registry+https://github.com/rust-lang/crates.io-index)", "openssl 0.10.29 (registry+https://github.com/rust-lang/crates.io-index)",
"pulldown-cmark 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", "pulldown-cmark 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
"regex-syntax 0.6.17 (registry+https://github.com/rust-lang/crates.io-index)",
"reqwest 0.9.24 (registry+https://github.com/rust-lang/crates.io-index)", "reqwest 0.9.24 (registry+https://github.com/rust-lang/crates.io-index)",
"rocket 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)", "rocket 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)",
"serde 1.0.106 (registry+https://github.com/rust-lang/crates.io-index)", "serde 1.0.106 (registry+https://github.com/rust-lang/crates.io-index)",

View File

@ -22,6 +22,7 @@ serde_json = "1.0"
shrinkwraprs = "0.2.1" shrinkwraprs = "0.2.1"
syntect = "3.3" syntect = "3.3"
tokio = "0.1.22" tokio = "0.1.22"
regex-syntax = { version = "0.6.17", default-features = false, features = ["unicode-perl"] }
[dependencies.chrono] [dependencies.chrono]
features = ["serde"] features = ["serde"]

View File

@ -1,6 +1,7 @@
use heck::CamelCase; use heck::CamelCase;
use openssl::rand::rand_bytes; use openssl::rand::rand_bytes;
use pulldown_cmark::{html, Event, Options, Parser, Tag}; use pulldown_cmark::{html, Event, Options, Parser, Tag};
use regex_syntax::is_word_character;
use rocket::{ use rocket::{
http::uri::Uri, http::uri::Uri,
response::{Flash, Redirect}, response::{Flash, Redirect},
@ -269,7 +270,7 @@ pub fn md_to_html<'a>(
} }
} }
State::Hashtag => { State::Hashtag => {
let char_matches = c.is_alphanumeric() || "-_".contains(c); let char_matches = c == '-' || is_word_character(c);
if char_matches && (n < (txt.chars().count() - 1)) { if char_matches && (n < (txt.chars().count() - 1)) {
text_acc.push(c); text_acc.push(c);
(events, State::Hashtag, text_acc, n + 1, mentions, hashtags) (events, State::Hashtag, text_acc, n + 1, mentions, hashtags)
@ -424,6 +425,7 @@ mod tests {
("with some punctuation #test!", vec!["test"]), ("with some punctuation #test!", vec!["test"]),
(" #spaces ", vec!["spaces"]), (" #spaces ", vec!["spaces"]),
("not_a#hashtag", vec![]), ("not_a#hashtag", vec![]),
("#نرمافزار_آزاد", vec!["نرمافزار_آزاد"]),
]; ];
for (md, mentions) in tests { for (md, mentions) in tests {