From edaccd1a311713c4304a9ee89d5b0e0305a8f65b Mon Sep 17 00:00:00 2001 From: KITAITI Makoto Date: Wed, 29 Apr 2020 22:13:31 +0900 Subject: [PATCH] Fixes #757 Allow Unicode hashtags (#758) * Add test for Persian language hashtags See https://github.com/Plume-org/Plume/issues/757 * Add regex-syntax with unicode-perl feature to dependencies * Install regex-syntax * Allow hashtag to use Unicode word characters * Run cargo fmt --- Cargo.lock | 1 + plume-common/Cargo.toml | 1 + plume-common/src/utils.rs | 4 +++- 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index df3b6b73..57b685e6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2213,6 +2213,7 @@ dependencies = [ "hyper 0.12.35 (registry+https://github.com/rust-lang/crates.io-index)", "openssl 0.10.29 (registry+https://github.com/rust-lang/crates.io-index)", "pulldown-cmark 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", + "regex-syntax 0.6.17 (registry+https://github.com/rust-lang/crates.io-index)", "reqwest 0.9.24 (registry+https://github.com/rust-lang/crates.io-index)", "rocket 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)", "serde 1.0.106 (registry+https://github.com/rust-lang/crates.io-index)", diff --git a/plume-common/Cargo.toml b/plume-common/Cargo.toml index 6c7e02ff..647305ec 100644 --- a/plume-common/Cargo.toml +++ b/plume-common/Cargo.toml @@ -22,6 +22,7 @@ serde_json = "1.0" shrinkwraprs = "0.2.1" syntect = "3.3" tokio = "0.1.22" +regex-syntax = { version = "0.6.17", default-features = false, features = ["unicode-perl"] } [dependencies.chrono] features = ["serde"] diff --git a/plume-common/src/utils.rs b/plume-common/src/utils.rs index eeb0302d..0c5b9ea7 100644 --- a/plume-common/src/utils.rs +++ b/plume-common/src/utils.rs @@ -1,6 +1,7 @@ use heck::CamelCase; use openssl::rand::rand_bytes; use pulldown_cmark::{html, Event, Options, Parser, Tag}; +use regex_syntax::is_word_character; use rocket::{ http::uri::Uri, response::{Flash, Redirect}, @@ -269,7 +270,7 @@ pub fn md_to_html<'a>( } } State::Hashtag => { - let char_matches = c.is_alphanumeric() || "-_".contains(c); + let char_matches = c == '-' || is_word_character(c); if char_matches && (n < (txt.chars().count() - 1)) { text_acc.push(c); (events, State::Hashtag, text_acc, n + 1, mentions, hashtags) @@ -424,6 +425,7 @@ mod tests { ("with some punctuation #test!", vec!["test"]), (" #spaces ", vec!["spaces"]), ("not_a#hashtag", vec![]), + ("#نرم‌افزار_آزاد", vec!["نرم‌افزار_آزاد"]), ]; for (md, mentions) in tests {