From 449641d158e7e194790dcbf76a9d67636bc5b9e1 Mon Sep 17 00:00:00 2001 From: fdb-hiroshima <35889323+fdb-hiroshima@users.noreply.github.com> Date: Sun, 2 Dec 2018 17:37:51 +0100 Subject: [PATCH] Add a search engine into Plume (#324) * Add search engine to the model Add a Tantivy based search engine to the model Implement most required functions for it * Implement indexing and plm subcommands Implement indexation on insert, update and delete Modify func args to get the indexer where required Add subcommand to initialize, refill and unlock search db * Move to a new threadpool engine allowing scheduling * Autocommit search index every half an hour * Implement front part of search Add default fields for search Add new routes and templates for search and result Implement FromFormValue for Page to reuse it on search result pagination Add optional query parameters to paginate template's macro Update to newer rocket_csrf, don't get csrf token on GET forms * Handle process termination to release lock Handle process termination Add tests to search * Add proper support for advanced search Add an advanced search form to /search, in template and route Modify Tantivy schema, add new tokenizer for some properties Create new String query parser Create Tantivy query AST from our own * Split search.rs, add comment and tests Split search.rs into multiple submodules Add comments and tests for Query Make user@domain be treated as one could assume --- .gitignore | 1 + Cargo.lock | 377 ++++++++++++++++++++++++++- Cargo.toml | 9 +- plume-cli/src/main.rs | 5 +- plume-cli/src/search.rs | 111 ++++++++ plume-models/Cargo.toml | 3 + plume-models/src/blogs.rs | 13 +- plume-models/src/lib.rs | 33 +-- plume-models/src/posts.rs | 59 +++-- plume-models/src/search/mod.rs | 167 ++++++++++++ plume-models/src/search/query.rs | 343 ++++++++++++++++++++++++ plume-models/src/search/searcher.rs | 203 +++++++++++++++ plume-models/src/search/tokenizer.rs | 67 +++++ plume-models/src/users.rs | 10 +- src/api/posts.rs | 10 +- src/inbox.rs | 10 +- src/main.rs | 39 ++- src/routes/blogs.rs | 5 +- src/routes/comments.rs | 7 +- src/routes/instance.rs | 9 +- src/routes/likes.rs | 10 +- src/routes/mod.rs | 16 +- src/routes/posts.rs | 27 +- src/routes/reshares.rs | 10 +- src/routes/search.rs | 84 ++++++ src/routes/user.rs | 41 +-- templates/macros.html.tera | 11 +- templates/search/index.html.tera | 50 ++++ templates/search/result.html.tera | 26 ++ 29 files changed, 1613 insertions(+), 143 deletions(-) create mode 100644 plume-cli/src/search.rs create mode 100644 plume-models/src/search/mod.rs create mode 100644 plume-models/src/search/query.rs create mode 100644 plume-models/src/search/searcher.rs create mode 100644 plume-models/src/search/tokenizer.rs create mode 100644 src/routes/search.rs create mode 100644 templates/search/index.html.tera create mode 100644 templates/search/result.html.tera diff --git a/.gitignore b/.gitignore index f84fea6b..a611d075 100644 --- a/.gitignore +++ b/.gitignore @@ -14,3 +14,4 @@ docker-compose.yml *.sqlite *.sqlite3 *.swp +search_index diff --git a/Cargo.lock b/Cargo.lock index 8f95a00c..063ec443 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -102,6 +102,11 @@ dependencies = [ "nodrop 0.1.12 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "ascii" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "atom_syndication" version = "0.6.0" @@ -112,6 +117,16 @@ dependencies = [ "quick-xml 0.12.4 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "atomicwrites" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "nix 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)", + "tempdir 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "atty" version = "0.2.11" @@ -175,6 +190,14 @@ dependencies = [ "safemem 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "base64" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "byteorder 1.2.6 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "bcrypt" version = "0.2.0" @@ -187,6 +210,19 @@ dependencies = [ "rand 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "bit-set" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "bit-vec 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "bit-vec" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "bitflags" version = "0.7.0" @@ -202,6 +238,14 @@ name = "bitflags" version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "bitpacking" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "crunchy 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "block-buffer" version = "0.3.3" @@ -276,6 +320,11 @@ name = "cc" version = "1.0.25" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "census" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "cfg-if" version = "0.1.5" @@ -333,6 +382,18 @@ dependencies = [ "lazy_static 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "combine" +version = "3.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "ascii 0.9.1 (registry+https://github.com/rust-lang/crates.io-index)", + "byteorder 1.2.6 (registry+https://github.com/rust-lang/crates.io-index)", + "either 1.5.0 (registry+https://github.com/rust-lang/crates.io-index)", + "memchr 2.1.0 (registry+https://github.com/rust-lang/crates.io-index)", + "unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "conv" version = "0.3.3" @@ -377,6 +438,38 @@ dependencies = [ "build_const 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "crossbeam" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "crossbeam-channel 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)", + "crossbeam-deque 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)", + "crossbeam-epoch 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)", + "crossbeam-utils 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "crossbeam-channel" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "crossbeam-epoch 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)", + "crossbeam-utils 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", + "parking_lot 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)", + "rand 0.5.5 (registry+https://github.com/rust-lang/crates.io-index)", + "smallvec 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "crossbeam-deque" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "crossbeam-epoch 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)", + "crossbeam-utils 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "crossbeam-deque" version = "0.6.1" @@ -399,11 +492,37 @@ dependencies = [ "scopeguard 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "crossbeam-epoch" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "arrayvec 0.4.7 (registry+https://github.com/rust-lang/crates.io-index)", + "cfg-if 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", + "crossbeam-utils 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", + "memoffset 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", + "scopeguard 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "crossbeam-utils" version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "crossbeam-utils" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "cfg-if 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "crunchy" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "csrf" version = "0.3.0" @@ -417,6 +536,15 @@ dependencies = [ "rust-crypto 0.2.36 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "ctrlc" +version = "3.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "nix 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "custom_derive" version = "0.1.7" @@ -553,6 +681,11 @@ dependencies = [ "regex 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "downcast" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "dtoa" version = "0.4.3" @@ -563,6 +696,11 @@ name = "either" version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "either" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "encoding_rs" version = "0.8.10" @@ -587,6 +725,16 @@ dependencies = [ "backtrace 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "fail" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "lazy_static 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)", + "log 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)", + "rand 0.3.22 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "failure" version = "0.1.2" @@ -658,6 +806,25 @@ dependencies = [ "libc 0.2.43 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "fst" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "byteorder 1.2.6 (registry+https://github.com/rust-lang/crates.io-index)", + "memmap 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "fst-regex" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "fst 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)", + "regex-syntax 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)", + "utf8-ranges 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "fuchsia-zircon" version = "0.3.3" @@ -821,6 +988,11 @@ dependencies = [ "syn 0.13.11 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "htmlescape" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "http" version = "0.1.13" @@ -984,6 +1156,14 @@ dependencies = [ "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "itertools" +version = "0.7.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "either 1.5.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "itoa" version = "0.3.4" @@ -1026,6 +1206,14 @@ name = "lazycell" version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "levenshtein_automata" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "fst 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "libc" version = "0.2.43" @@ -1134,6 +1322,15 @@ dependencies = [ "version_check 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "memmap" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "libc 0.2.43 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "memoffset" version = "0.2.1" @@ -1296,6 +1493,18 @@ dependencies = [ "unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "nix" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)", + "cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)", + "cfg-if 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.43 (registry+https://github.com/rust-lang/crates.io-index)", + "void 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "nodrop" version = "0.1.12" @@ -1374,6 +1583,15 @@ dependencies = [ "vcpkg 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "owned-read" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "rental 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)", + "stable_deref_trait 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "owning_ref" version = "0.3.3" @@ -1382,6 +1600,14 @@ dependencies = [ "stable_deref_trait 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "owning_ref" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "stable_deref_trait 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "parking_lot" version = "0.6.4" @@ -1516,6 +1742,7 @@ dependencies = [ "canapi 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "chrono 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)", "colored 1.6.1 (registry+https://github.com/rust-lang/crates.io-index)", + "ctrlc 3.1.1 (registry+https://github.com/rust-lang/crates.io-index)", "diesel 1.3.3 (registry+https://github.com/rust-lang/crates.io-index)", "dotenv 0.13.0 (registry+https://github.com/rust-lang/crates.io-index)", "failure 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", @@ -1523,15 +1750,17 @@ dependencies = [ "guid-create 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", "heck 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", "multipart 0.15.3 (registry+https://github.com/rust-lang/crates.io-index)", + "num_cpus 1.8.0 (registry+https://github.com/rust-lang/crates.io-index)", "plume-api 0.1.0", "plume-common 0.2.0", "plume-models 0.2.0", "rocket 0.4.0-dev (git+https://github.com/SergioBenitez/Rocket?rev=55459db7732b9a240826a5c120c650f87e3372ce)", "rocket_codegen 0.4.0-dev (git+https://github.com/SergioBenitez/Rocket?rev=55459db7732b9a240826a5c120c650f87e3372ce)", "rocket_contrib 0.4.0-dev (git+https://github.com/SergioBenitez/Rocket?rev=55459db7732b9a240826a5c120c650f87e3372ce)", - "rocket_csrf 0.1.0 (git+https://github.com/fdb-hiroshima/rocket_csrf?rev=2805ce5dbae4a6441208484426440885a5640a6a)", + "rocket_csrf 0.1.0 (git+https://github.com/fdb-hiroshima/rocket_csrf?rev=0dfb822d5cbf65a5eee698099368b7c0f4c61fa4)", "rocket_i18n 0.1.1 (git+https://github.com/BaptisteGelez/rocket_i18n?rev=75a3bfd7b847324c078a355a7f101f8241a9f59b)", "rpassword 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)", + "scheduled-thread-pool 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", "serde 1.0.79 (registry+https://github.com/rust-lang/crates.io-index)", "serde_derive 1.0.79 (registry+https://github.com/rust-lang/crates.io-index)", "serde_json 1.0.32 (registry+https://github.com/rust-lang/crates.io-index)", @@ -1540,7 +1769,6 @@ dependencies = [ "validator 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)", "validator_derive 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)", "webfinger 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", - "workerpool 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -1601,6 +1829,7 @@ dependencies = [ "diesel_migrations 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", "guid-create 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", "heck 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", + "itertools 0.7.11 (registry+https://github.com/rust-lang/crates.io-index)", "lazy_static 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "openssl 0.10.12 (registry+https://github.com/rust-lang/crates.io-index)", "plume-api 0.1.0", @@ -1610,8 +1839,10 @@ dependencies = [ "serde 1.0.79 (registry+https://github.com/rust-lang/crates.io-index)", "serde_derive 1.0.79 (registry+https://github.com/rust-lang/crates.io-index)", "serde_json 1.0.32 (registry+https://github.com/rust-lang/crates.io-index)", + "tantivy 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)", "url 1.7.1 (registry+https://github.com/rust-lang/crates.io-index)", "webfinger 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", + "whatlang 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -1809,6 +2040,11 @@ dependencies = [ "utf8-ranges 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "regex-syntax" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "regex-syntax" version = "0.5.6" @@ -1841,6 +2077,25 @@ dependencies = [ "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "rental" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "rental-impl 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)", + "stable_deref_trait 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "rental-impl" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "proc-macro2 0.4.20 (registry+https://github.com/rust-lang/crates.io-index)", + "quote 0.6.8 (registry+https://github.com/rust-lang/crates.io-index)", + "syn 0.15.9 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "reqwest" version = "0.9.2" @@ -1936,7 +2191,7 @@ dependencies = [ [[package]] name = "rocket_csrf" version = "0.1.0" -source = "git+https://github.com/fdb-hiroshima/rocket_csrf?rev=2805ce5dbae4a6441208484426440885a5640a6a#2805ce5dbae4a6441208484426440885a5640a6a" +source = "git+https://github.com/fdb-hiroshima/rocket_csrf?rev=0dfb822d5cbf65a5eee698099368b7c0f4c61fa4#0dfb822d5cbf65a5eee698099368b7c0f4c61fa4" dependencies = [ "csrf 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", "data-encoding 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)", @@ -1995,6 +2250,15 @@ dependencies = [ "time 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "rust-stemmers" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "serde 1.0.79 (registry+https://github.com/rust-lang/crates.io-index)", + "serde_derive 1.0.79 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "rustc-demangle" version = "0.1.9" @@ -2204,6 +2468,15 @@ dependencies = [ "unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "snap" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "byteorder 1.2.6 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "stable_deref_trait" version = "1.1.1" @@ -2329,6 +2602,50 @@ name = "take" version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "tantivy" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "atomicwrites 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", + "base64 0.10.0 (registry+https://github.com/rust-lang/crates.io-index)", + "bit-set 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", + "bitpacking 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)", + "byteorder 1.2.6 (registry+https://github.com/rust-lang/crates.io-index)", + "census 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", + "combine 3.6.3 (registry+https://github.com/rust-lang/crates.io-index)", + "crossbeam 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)", + "crossbeam-channel 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)", + "downcast 0.9.2 (registry+https://github.com/rust-lang/crates.io-index)", + "fail 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", + "failure 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", + "fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)", + "fst 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)", + "fst-regex 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", + "futures 0.1.25 (registry+https://github.com/rust-lang/crates.io-index)", + "futures-cpupool 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)", + "htmlescape 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", + "itertools 0.7.11 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", + "levenshtein_automata 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", + "log 0.4.5 (registry+https://github.com/rust-lang/crates.io-index)", + "matches 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)", + "num_cpus 1.8.0 (registry+https://github.com/rust-lang/crates.io-index)", + "owned-read 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", + "owning_ref 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", + "regex 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)", + "rust-stemmers 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", + "serde 1.0.79 (registry+https://github.com/rust-lang/crates.io-index)", + "serde_derive 1.0.79 (registry+https://github.com/rust-lang/crates.io-index)", + "serde_json 1.0.32 (registry+https://github.com/rust-lang/crates.io-index)", + "snap 0.2.5 (registry+https://github.com/rust-lang/crates.io-index)", + "stable_deref_trait 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)", + "tempdir 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)", + "tempfile 3.0.4 (registry+https://github.com/rust-lang/crates.io-index)", + "uuid 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "tempdir" version = "0.3.7" @@ -2796,6 +3113,7 @@ version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "rand 0.5.5 (registry+https://github.com/rust-lang/crates.io-index)", + "serde 1.0.79 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -2887,6 +3205,14 @@ dependencies = [ "serde_json 1.0.32 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "whatlang" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "winapi" version = "0.2.8" @@ -2924,14 +3250,6 @@ name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -[[package]] -name = "workerpool" -version = "1.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "num_cpus 1.8.0 (registry+https://github.com/rust-lang/crates.io-index)", -] - [[package]] name = "ws2_32-sys" version = "0.2.1" @@ -2959,17 +3277,23 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum array_tool 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "8f8cb5d814eb646a863c4f24978cff2880c4be96ad8cde2c0f0678732902e271" "checksum arrayref 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "0d382e583f07208808f6b1249e60848879ba3543f57c32277bf52d69c2f0f0ee" "checksum arrayvec 0.4.7 (registry+https://github.com/rust-lang/crates.io-index)" = "a1e964f9e24d588183fcb43503abda40d288c8657dfc27311516ce2f05675aef" +"checksum ascii 0.9.1 (registry+https://github.com/rust-lang/crates.io-index)" = "a5fc969a8ce2c9c0c4b0429bb8431544f6658283c8326ba5ff8c762b75369335" "checksum atom_syndication 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "0a9a7ab83635ff7a3b04856f4ad95324dccc9b947ab1e790fc5c769ee6d6f60c" +"checksum atomicwrites 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "a3420b33cdefd3feb223dddc23739fc05cc034eb0f2be792c763e3d89e1eb6e3" "checksum atty 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "9a7d5b8723950951411ee34d271d99dddcc2035a16ab25310ea2c8cfd4369652" "checksum backtrace 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "150ae7828afa7afb6d474f909d64072d21de1f3365b6e8ad8029bf7b1c6350a0" "checksum backtrace 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)" = "89a47830402e9981c5c41223151efcced65a0510c13097c769cede7efb34782a" "checksum backtrace-sys 0.1.24 (registry+https://github.com/rust-lang/crates.io-index)" = "c66d56ac8dabd07f6aacdaf633f4b8262f5b3601a810a0dcddffd5c22c69daa0" +"checksum base64 0.10.0 (registry+https://github.com/rust-lang/crates.io-index)" = "621fc7ecb8008f86d7fb9b95356cd692ce9514b80a86d85b397f32a22da7b9e2" "checksum base64 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "96434f987501f0ed4eb336a411e0631ecd1afa11574fe148587adc4ff96143c9" "checksum base64 0.9.3 (registry+https://github.com/rust-lang/crates.io-index)" = "489d6c0ed21b11d038c31b6ceccca973e65d73ba3bd8ecb9a2babf5546164643" "checksum bcrypt 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1a1512813db09170b44a00870b58421876d797b77b085c5205a24db90905f758" +"checksum bit-set 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "6f1efcc46c18245a69c38fcc5cc650f16d3a59d034f3106e9ed63748f695730a" +"checksum bit-vec 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "4440d5cb623bb7390ae27fec0bb6c61111969860f8e3ae198bfa0663645e67cf" "checksum bitflags 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "aad18937a628ec6abcd26d1489012cc0e18c21798210f491af69ded9b881106d" "checksum bitflags 0.9.1 (registry+https://github.com/rust-lang/crates.io-index)" = "4efd02e230a02e18f92fc2735f44597385ed02ad8f831e7c1c1156ee5e1ab3a5" "checksum bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "228047a76f468627ca71776ecdebd732a3423081fcf5125585bcd7c49886ce12" +"checksum bitpacking 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "75c04b83d2b444a22c6a30f4d068597efbe468fe56f068e042e627ded2fb21e7" "checksum block-buffer 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "a076c298b9ecdb530ed9d967e74a6027d6a7478924520acddcddc24c1c8ab3ab" "checksum block-cipher-trait 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "370424437b9459f3dfd68428ed9376ddfe03d8b70ede29cc533b3557df186ab4" "checksum blowfish 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "95ede07672d9f4144c578439aa352604ec5c67a80c940fe8d382ddbeeeb3c6d8" @@ -2980,21 +3304,30 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum bytes 0.4.10 (registry+https://github.com/rust-lang/crates.io-index)" = "0ce55bd354b095246fc34caf4e9e242f5297a7fd938b090cadfea6eee614aa62" "checksum canapi 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ff3e02a04f44b531d851d2db62f95aabf65d033a6724767a4bed9732563e9bc4" "checksum cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)" = "f159dfd43363c4d08055a07703eb7a3406b0dac4d0584d96965a3262db3c9d16" +"checksum census 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "e5c044df9888597e4e96610c916ce9d58c653b67c01b5eac5b7abd7405f4fee4" "checksum cfg-if 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "0c4e7bb64a8ebb0d856483e1e682ea3422f883c5f5615a90d51a2c82fe87fdd3" "checksum chomp 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "9f74ad218e66339b11fd23f693fb8f1d621e80ba6ac218297be26073365d163d" "checksum chrono 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)" = "45912881121cb26fad7c38c17ba7daa18764771836b34fab7d3fbd93ed633878" "checksum clap 2.32.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b957d88f4b6a63b9d70d5f454ac8011819c6efa7727858f458ab71c756ce2d3e" "checksum cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ddfc5b9aa5d4507acaf872de71051dfd0e309860e88966e1051e462a077aac4f" "checksum colored 1.6.1 (registry+https://github.com/rust-lang/crates.io-index)" = "dc0a60679001b62fb628c4da80e574b9645ab4646056d7c9018885efffe45533" +"checksum combine 3.6.3 (registry+https://github.com/rust-lang/crates.io-index)" = "db733c5d0f4f52e78d4417959cadf0eecc7476e7f9ece05677912571a4af34e2" "checksum conv 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "78ff10625fd0ac447827aa30ea8b861fead473bb60aeb73af6c1c58caf0d1299" "checksum cookie 0.11.0-dev (git+https://github.com/alexcrichton/cookie-rs?rev=f191ca50)" = "" "checksum core-foundation 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "286e0b41c3a20da26536c6000a280585d519fd07b3956b43aed8a79e9edce980" "checksum core-foundation-sys 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "716c271e8613ace48344f723b60b900a93150271e5be206212d052bbc0883efa" "checksum crc 1.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "d663548de7f5cca343f1e0a48d14dcfb0e9eb4e079ec58883b7251539fa10aeb" +"checksum crossbeam 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "d7408247b1b87f480890f28b670c5f8d9a8a4274833433fe74dc0dfd46d33650" +"checksum crossbeam-channel 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)" = "7b85741761b7f160bc5e7e0c14986ef685b7f8bf9b7ad081c60c604bb4649827" +"checksum crossbeam-deque 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7792c4a9b5a4222f654e3728a3dd945aacc24d2c3a1a096ed265d80e4929cb9a" "checksum crossbeam-deque 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)" = "3486aefc4c0487b9cb52372c97df0a48b8c249514af1ee99703bf70d2f2ceda1" "checksum crossbeam-epoch 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)" = "30fecfcac6abfef8771151f8be4abc9e4edc112c2bcb233314cafde2680536e9" +"checksum crossbeam-epoch 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2449aaa4ec7ef96e5fb24db16024b935df718e9ae1cec0a1e68feeca2efca7b8" "checksum crossbeam-utils 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "677d453a17e8bd2b913fa38e8b9cf04bcdbb5be790aa294f2389661d72036015" +"checksum crossbeam-utils 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)" = "c55913cc2799171a550e307918c0a360e8c16004820291bf3b638969b4a01816" +"checksum crunchy 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "a2f4a431c5c9f662e1200b7c7f02c34e91361150e382089a8f2dec3ba680cbda" "checksum csrf 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "38f2ee2a7e76740d81de006e61eff53206c56448a30d8017b4ac97b5486682bd" +"checksum ctrlc 3.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "630391922b1b893692c6334369ff528dcc3a9d8061ccf4c803aa8f83cb13db5e" "checksum custom_derive 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)" = "ef8ae57c4978a2acd8b869ce6b9ca1dfe817bff704c220209fdef2c0b75a01b9" "checksum data-encoding 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "67df0571a74bf0d97fb8b2ed22abdd9a48475c96bd327db968b7d9cace99655e" "checksum dbghelp-sys 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "97590ba53bcb8ac28279161ca943a924d1fd4a8fb3fa63302591647c4fc5b850" @@ -3011,11 +3344,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum diesel_migrations 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "17b42c35d1ce9e8d57a3e7001b4127f2bc1b073a89708bb7019f5be27c991c28" "checksum digest 0.7.6 (registry+https://github.com/rust-lang/crates.io-index)" = "03b072242a8cbaf9c145665af9d250c59af3b958f83ed6824e13533cf76d5b90" "checksum dotenv 0.13.0 (registry+https://github.com/rust-lang/crates.io-index)" = "c0d0a1279c96732bc6800ce6337b6a614697b0e74ae058dc03c62ebeb78b4d86" +"checksum downcast 0.9.2 (registry+https://github.com/rust-lang/crates.io-index)" = "6c6fe31318b6ef21166c8e839e680238eb16f875849d597544eead7ec882eed3" "checksum dtoa 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)" = "6d301140eb411af13d3115f9a562c85cc6b541ade9dfa314132244aaee7489dd" "checksum either 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)" = "a39bffec1e2015c5d8a6773cb0cf48d0d758c842398f624c34969071f5499ea7" +"checksum either 1.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3be565ca5c557d7f59e7cfcf1844f9e3033650c929c6566f511e8005f205c1d0" "checksum encoding_rs 0.8.10 (registry+https://github.com/rust-lang/crates.io-index)" = "065f4d0c826fdaef059ac45487169d918558e3cf86c9d89f6e81cf52369126e5" "checksum error-chain 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ff511d5dc435d703f4971bc399647c9bc38e20cb41452e3b9feb4765419ed3f3" "checksum error-chain 0.12.0 (registry+https://github.com/rust-lang/crates.io-index)" = "07e791d3be96241c77c43846b665ef1384606da2cd2a48730abe606a12906e02" +"checksum fail 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bd2e1a22c616c8c8c96b6e07c243014551f3ba77291d24c22e0bfea6830c0b4e" "checksum failure 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7efb22686e4a466b1ec1a15c2898f91fa9cb340452496dca654032de20ff95b9" "checksum failure_derive 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "946d0e98a50d9831f5d589038d2ca7f8f455b1c21028c0db0e84116a12696426" "checksum fake-simd 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "e88a8acf291dafb59c2d96e8f59828f3838bb1a70398823ade51a84de6a6deed" @@ -3025,6 +3361,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum foreign-types-shared 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" "checksum fsevent 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)" = "c4bbbf71584aeed076100b5665ac14e3d85eeb31fdbb45fbd41ef9a682b5ec05" "checksum fsevent-sys 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "1a772d36c338d07a032d5375a36f15f9a7043bf0cb8ce7cee658e037c6032874" +"checksum fst 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "9b0408ab57c1bf7c634b2ac6a165d14f642dc3335a43203090a7f8c78b54577b" +"checksum fst-regex 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "87aca1d91eed3c128132cee31d291fd4e8492df0b742a5b1453857a4c7cedd88" "checksum fuchsia-zircon 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "2e9763c69ebaae630ba35f74888db465e49e259ba1bc0eda7d06f4a067615d82" "checksum fuchsia-zircon-sys 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "3dcaa9ae7725d12cdb85b3ad99a434db70b468c09ded17e012d86b5c1010f7a7" "checksum futf 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "7c9c1ce3fa9336301af935ab852c437817d14cd33690446569392e65170aac3b" @@ -3043,6 +3381,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum heck 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ea04fa3ead4e05e51a7c806fc07271fdbde4e246a6c6d1efd52e72230b771b82" "checksum hex 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "805026a5d0141ffc30abb3be3173848ad46a1b1664fe632428479619a3644d77" "checksum html5ever 0.22.3 (registry+https://github.com/rust-lang/crates.io-index)" = "b04478cf718862650a0bf66acaf8f2f8c906fbc703f35c916c1f4211b069a364" +"checksum htmlescape 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "e9025058dae765dee5070ec375f591e2ba14638c63feff74f13805a72e523163" "checksum http 0.1.13 (registry+https://github.com/rust-lang/crates.io-index)" = "24f58e8c2d8e886055c3ead7b28793e1455270b5fb39650984c224bc538ba581" "checksum httparse 1.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "e8734b0cfd3bc3e101ec59100e101c2eecd19282202e87808b3037b442777a83" "checksum humansize 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b6cab2627acfc432780848602f3f558f7e9dd427352224b0d9324025796d2a5e" @@ -3057,6 +3396,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum inotify-sys 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "e74a1aa87c59aeff6ef2cc2fa62d41bc43f54952f55652656b18a02fd5e356c0" "checksum iovec 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "dbe6e417e7d0975db6512b90796e8ce223145ac4e33c377e4a42882a0e88bb08" "checksum isatty 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)" = "e31a8281fc93ec9693494da65fbf28c0c2aa60a2eaec25dc58e2f31952e95edc" +"checksum itertools 0.7.11 (registry+https://github.com/rust-lang/crates.io-index)" = "0d47946d458e94a1b7bcabbf6521ea7c037062c81f534615abcad76e84d4970d" "checksum itoa 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "8324a32baf01e2ae060e9de58ed0bc2320c9a2833491ee36cd3b4c414de4db8c" "checksum itoa 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)" = "1306f3464951f30e30d12373d31c79fbd52d236e5e896fd92f96ec7babbbe60b" "checksum kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d" @@ -3064,6 +3404,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum lazy_static 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "76f033c7ad61445c5b347c7382dd1237847eb1bce590fe50365dcb33d546be73" "checksum lazy_static 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ca488b89a5657b0a2ecd45b95609b3e848cf1755da332a0da46e2b2b1cb371a7" "checksum lazycell 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ddba4c30a78328befecec92fc94970e53b3ae385827d28620f0f5bb2493081e0" +"checksum levenshtein_automata 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "73a004f877f468548d8d0ac4977456a249d8fabbdb8416c36db163dfc8f2e8ca" "checksum libc 0.2.43 (registry+https://github.com/rust-lang/crates.io-index)" = "76e3a3ef172f1a0b9a9ff0dd1491ae5e6c948b94479a3021819ba7d860c8645d" "checksum libflate 0.1.18 (registry+https://github.com/rust-lang/crates.io-index)" = "21138fc6669f438ed7ae3559d5789a5f0ba32f28c1f0608d1e452b0bb06ee936" "checksum libsqlite3-sys 0.9.3 (registry+https://github.com/rust-lang/crates.io-index)" = "d3711dfd91a1081d2458ad2d06ea30a8755256e74038be2ad927d94e1c955ca8" @@ -3077,6 +3418,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum markup5ever 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)" = "bfedc97d5a503e96816d10fedcd5b42f760b2e525ce2f7ec71f6a41780548475" "checksum matches 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "7ffc5c5338469d4d3ea17d269fa8ea3512ad247247c30bd2df69e68309ed0a08" "checksum memchr 2.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "4b3629fe9fdbff6daa6c33b90f7c08355c1aca05a3d01fa8063b822fcf185f3b" +"checksum memmap 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)" = "e2ffa2c986de11a9df78620c01eeaaf27d94d3ff02bf81bfcca953102dd0c6ff" "checksum memoffset 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "0f9dc261e2b62d7a622bf416ea3c5245cdd5d9a7fcc428c0d06804dfce1775b3" "checksum migrations_internals 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "8cf7c8c4f83fa9f47440c0b4af99973502de55e6e7b875f693bd263e03f93e7e" "checksum migrations_macros 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "79f12499ef7353bdeca2d081bc61edd8351dac09a33af845952009b5a3d68c1a" @@ -3092,6 +3434,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum native-tls 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "8b0a7bd714e83db15676d31caf968ad7318e9cc35f93c85a90231c8f22867549" "checksum net2 0.2.33 (registry+https://github.com/rust-lang/crates.io-index)" = "42550d9fb7b6684a6d404d9fa7250c2eb2646df731d1c06afc06dcee9e1bcf88" "checksum new_debug_unreachable 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "0cdc457076c78ab54d5e0d6fa7c47981757f1e34dc39ff92787f217dede586c4" +"checksum nix 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d37e713a259ff641624b6cb20e3b12b2952313ba36b6823c0f16e6cfd9e5de17" "checksum nodrop 0.1.12 (registry+https://github.com/rust-lang/crates.io-index)" = "9a2228dca57108069a5262f2ed8bd2e82496d2e074a06d1ccc7ce1687b6ae0a2" "checksum notify 4.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "873ecfd8c174964ae30f401329d140142312c8e5590719cf1199d5f1717d8078" "checksum num-integer 0.1.39 (registry+https://github.com/rust-lang/crates.io-index)" = "e83d528d2677f0518c570baf2b7abdcf0cd2d248860b68507bdcb3e91d4c0cea" @@ -3101,7 +3444,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum openssl 0.10.12 (registry+https://github.com/rust-lang/crates.io-index)" = "5e2e79eede055813a3ac52fb3915caf8e1c9da2dec1587871aec9f6f7b48508d" "checksum openssl-probe 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "77af24da69f9d9341038eba93a073b1fdaaa1b788221b00a69bce9e762cb32de" "checksum openssl-sys 0.9.36 (registry+https://github.com/rust-lang/crates.io-index)" = "409d77eeb492a1aebd6eb322b2ee72ff7c7496b4434d98b3bf8be038755de65e" +"checksum owned-read 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "05d57fab18d627fc4dffbd78d4a25a5b5b5211fda724231f001bee4cef1b2d3b" "checksum owning_ref 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "cdf84f41639e037b484f93433aa3897863b561ed65c6e59c7073d7c561710f37" +"checksum owning_ref 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "49a4b8ea2179e6a2e27411d3bca09ca6dd630821cf6894c6c7c8467a8ee7ef13" "checksum parking_lot 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)" = "f0802bff09003b291ba756dc7e79313e51cc31667e94afbe847def490424cde5" "checksum parking_lot_core 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "ad7f7e6ebdc79edff6fdcb87a55b620174f7a989e3eb31b65231f4af57f00b8c" "checksum pear 0.1.0 (git+http://github.com/SergioBenitez/Pear?rev=b475140)" = "" @@ -3140,21 +3485,25 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum redox_termios 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "7e891cfe48e9100a70a3b6eb652fef28920c117d366339687bd5576160db0f76" "checksum regex 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "9329abc99e39129fcceabd24cf5d85b4671ef7c29c50e972bc5afe32438ec384" "checksum regex 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)" = "2069749032ea3ec200ca51e4a31df41759190a88edca0d2d86ee8bedf7073341" +"checksum regex-syntax 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)" = "f9ec002c35e86791825ed294b50008eea9ddfc8def4420124fbc6b08db834957" "checksum regex-syntax 0.5.6 (registry+https://github.com/rust-lang/crates.io-index)" = "7d707a4fa2637f2dca2ef9fd02225ec7661fe01a53623c1e6515b6916511f7a7" "checksum regex-syntax 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)" = "747ba3b235651f6e2f67dfa8bcdcd073ddb7c243cb21c442fc12395dfcac212d" "checksum relay 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "1576e382688d7e9deecea24417e350d3062d97e32e45d70b1cde65994ff1489a" "checksum remove_dir_all 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "3488ba1b9a2084d38645c4c08276a1752dcbf2c7130d74f1569681ad5d2799c5" +"checksum rental 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)" = "ca24bf9b98e3df0bb359f1bbb8ef993a0093d8432500c5eaf3ae724f30b5f754" +"checksum rental-impl 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)" = "a269533a9b93bbaa4848260e51b64564cc445d46185979f31974ec703374803a" "checksum reqwest 0.9.2 (registry+https://github.com/rust-lang/crates.io-index)" = "1d68c7bf0b1dc3860b80c6d31d05808bf54cdc1bfc70a4680893791becd083ae" "checksum ring 0.13.2 (registry+https://github.com/rust-lang/crates.io-index)" = "dbe642b9dd1ba0038d78c4a3999d1ee56178b4d415c1e1fbaba83b06dce012f0" "checksum rocket 0.4.0-dev (git+https://github.com/SergioBenitez/Rocket?rev=55459db7732b9a240826a5c120c650f87e3372ce)" = "" "checksum rocket_codegen 0.4.0-dev (git+https://github.com/SergioBenitez/Rocket?rev=55459db7732b9a240826a5c120c650f87e3372ce)" = "" "checksum rocket_codegen_next 0.4.0-dev (git+https://github.com/SergioBenitez/Rocket?rev=55459db7732b9a240826a5c120c650f87e3372ce)" = "" "checksum rocket_contrib 0.4.0-dev (git+https://github.com/SergioBenitez/Rocket?rev=55459db7732b9a240826a5c120c650f87e3372ce)" = "" -"checksum rocket_csrf 0.1.0 (git+https://github.com/fdb-hiroshima/rocket_csrf?rev=2805ce5dbae4a6441208484426440885a5640a6a)" = "" +"checksum rocket_csrf 0.1.0 (git+https://github.com/fdb-hiroshima/rocket_csrf?rev=0dfb822d5cbf65a5eee698099368b7c0f4c61fa4)" = "" "checksum rocket_http 0.4.0-dev (git+https://github.com/SergioBenitez/Rocket?rev=55459db7732b9a240826a5c120c650f87e3372ce)" = "" "checksum rocket_i18n 0.1.1 (git+https://github.com/BaptisteGelez/rocket_i18n?rev=75a3bfd7b847324c078a355a7f101f8241a9f59b)" = "" "checksum rpassword 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d127299b02abda51634f14025aec43ae87a7aa7a95202b6a868ec852607d1451" "checksum rust-crypto 0.2.36 (registry+https://github.com/rust-lang/crates.io-index)" = "f76d05d3993fd5f4af9434e8e436db163a12a9d40e1a58a726f27a01dfd12a2a" +"checksum rust-stemmers 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "fbf06149ec391025664a5634200ced1afb489f0f3f8a140d515ebc0eb04b4bc0" "checksum rustc-demangle 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)" = "bcfe5b13211b4d78e5c2cadfebd7769197d95c639c35a50057eb4c05de811395" "checksum rustc-serialize 0.3.24 (registry+https://github.com/rust-lang/crates.io-index)" = "dcf128d1287d2ea9d80910b5f1120d0b8eede3fbf1abe91c40d39ea7d51e6fda" "checksum rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a" @@ -3183,6 +3532,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum slug 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "b3bc762e6a4b6c6fcaade73e77f9ebc6991b676f88bb2358bddb56560f073373" "checksum smallvec 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "4c8cbcd6df1e117c2210e13ab5109635ad68a929fcbb8964dc965b76cb5ee013" "checksum smallvec 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)" = "153ffa32fd170e9944f7e0838edf824a754ec4c1fc64746fcc9fe1f8fa602e5d" +"checksum snap 0.2.5 (registry+https://github.com/rust-lang/crates.io-index)" = "95d697d63d44ad8b78b8d235bf85b34022a78af292c8918527c5f0cffdde7f43" "checksum stable_deref_trait 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "dba1a27d3efae4351c8051072d619e3ade2820635c3958d826bfea39d59b54c8" "checksum state 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "7345c971d1ef21ffdbd103a75990a15eb03604fc8b8852ca8cb418ee1a099028" "checksum string 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "00caf261d6f90f588f8450b8e1230fa0d5be49ee6140fdfbcb55335aff350970" @@ -3198,6 +3548,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum synom 0.11.3 (registry+https://github.com/rust-lang/crates.io-index)" = "a393066ed9010ebaed60b9eafa373d4b1baac186dd7e008555b0f702b51945b6" "checksum synstructure 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "85bb9b7550d063ea184027c9b8c20ac167cd36d3e06b3a40bceb9d746dc1a7b7" "checksum take 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b157868d8ac1f56b64604539990685fa7611d8fa9e5476cf0c02cf34d32917c5" +"checksum tantivy 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)" = "34fab04422b020c9e6e4b5f4a2eb5d6727ce89d244a9f96434347956c8d9dad6" "checksum tempdir 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)" = "15f2b5fb00ccdf689e0149d1b1b3c03fead81c2b37735d812fa8bddbbf41b6d8" "checksum tempfile 3.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "55c1195ef8513f3273d55ff59fe5da6940287a0d7a98331254397f464833675b" "checksum tendril 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "9de21546595a0873061940d994bbbc5c35f024ae4fd61ec5c5b159115684f508" @@ -3260,12 +3611,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum want 0.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "a05d9d966753fa4b5c8db73fcab5eed4549cfe0e1e4e66911e5564a0085c35d1" "checksum want 0.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "797464475f30ddb8830cc529aaaae648d581f99e2036a928877dfde027ddf6b3" "checksum webfinger 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "edc8f298f29f04bf5b6a85d7d448de4f16b7d45807d0a3ec422efcfbf1960519" +"checksum whatlang 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a9d6e6c33992562189a3c9a073525c818e8a8b984771e87e126107be7913b3c2" "checksum winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a" "checksum winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "92c1eb33641e276cfa214a0522acad57be5c56b10cb348b3c5117db75f3ac4b0" "checksum winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2d315eee3b34aca4797b2da6b13ed88266e6d612562a0c46390af8299fc699bc" "checksum winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" "checksum winapi-util 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "afc5508759c5bf4285e61feb862b6083c8480aec864fa17a81fdec6f69b461ab" "checksum winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" -"checksum workerpool 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "4f49756646617bde19ff95b370cfa5c0f7ead17a90c90d7cb62dc31dfaa8c625" "checksum ws2_32-sys 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "d59cefebd0c892fa2dd6de581e937301d8552cb44489cdff035c6187cb63fa5e" "checksum yansi 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d60c3b48c9cdec42fb06b3b84b5b087405e1fa1c644a1af3930e4dfafe93de48" diff --git a/Cargo.toml b/Cargo.toml index 16206cef..4ba4d209 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,7 +12,9 @@ failure = "0.1" gettext-rs = "0.4" guid-create = "0.1" heck = "0.3.0" +num_cpus = "1.0" rpassword = "2.0" +scheduled-thread-pool = "0.2.0" serde = "1.0" serde_derive = "1.0" serde_json = "1.0" @@ -21,7 +23,6 @@ tera = "0.11" validator = "0.7" validator_derive = "0.7" webfinger = "0.3.1" -workerpool = "1.1" [[bin]] name = "plume" @@ -31,6 +32,10 @@ path = "src/main.rs" features = ["serde"] version = "0.4" +[dependencies.ctrlc] +features = ["termination"] +version = "3.1.1" + [dependencies.diesel] features = ["r2d2", "chrono"] version = "*" @@ -64,7 +69,7 @@ rev = "55459db7732b9a240826a5c120c650f87e3372ce" [dependencies.rocket_csrf] git = "https://github.com/fdb-hiroshima/rocket_csrf" -rev = "2805ce5dbae4a6441208484426440885a5640a6a" +rev = "0dfb822d5cbf65a5eee698099368b7c0f4c61fa4" [dependencies.rocket_i18n] git = "https://github.com/BaptisteGelez/rocket_i18n" diff --git a/plume-cli/src/main.rs b/plume-cli/src/main.rs index a060c7c9..ea9c5ccf 100644 --- a/plume-cli/src/main.rs +++ b/plume-cli/src/main.rs @@ -11,6 +11,7 @@ use plume_models::{DATABASE_URL, Connection as Conn}; mod instance; mod users; +mod search; fn main() { let mut app = App::new("Plume CLI") @@ -18,7 +19,8 @@ fn main() { .version(env!("CARGO_PKG_VERSION")) .about("Collection of tools to manage your Plume instance.") .subcommand(instance::command()) - .subcommand(users::command()); + .subcommand(users::command()) + .subcommand(search::command()); let matches = app.clone().get_matches(); dotenv::dotenv().ok(); @@ -27,6 +29,7 @@ fn main() { match matches.subcommand() { ("instance", Some(args)) => instance::run(args, &conn.expect("Couldn't connect to the database.")), ("users", Some(args)) => users::run(args, &conn.expect("Couldn't connect to the database.")), + ("search", Some(args)) => search::run(args, &conn.expect("Couldn't connect to the database.")), _ => app.print_help().expect("Couldn't print help") }; } diff --git a/plume-cli/src/search.rs b/plume-cli/src/search.rs new file mode 100644 index 00000000..9a6e7f20 --- /dev/null +++ b/plume-cli/src/search.rs @@ -0,0 +1,111 @@ +use clap::{Arg, ArgMatches, App, SubCommand}; +use diesel::{ExpressionMethods, QueryDsl, RunQueryDsl}; + +use std::fs::{read_dir, remove_file}; +use std::io::ErrorKind; +use std::path::Path; +use plume_models::{ + Connection, + posts::Post, + schema::posts, + search::Searcher, +}; + +pub fn command<'a, 'b>() -> App<'a, 'b> { + SubCommand::with_name("search") + .about("Manage search index") + .subcommand(SubCommand::with_name("init") + .arg(Arg::with_name("path") + .short("p") + .long("path") + .takes_value(true) + .required(true) + .help("Path to Plume's working directory")) + .arg(Arg::with_name("force") + .short("f") + .long("force") + .help("Ignore already using directory") + ).about("Initialize Plume's internal search engine")) + .subcommand(SubCommand::with_name("refill") + .arg(Arg::with_name("path") + .short("p") + .long("path") + .takes_value(true) + .required(true) + .help("Path to Plume's working directory") + ).about("Regenerate Plume's search index")) + .subcommand(SubCommand::with_name("unlock") + .arg(Arg::with_name("path") + .short("p") + .long("path") + .takes_value(true) + .required(true) + .help("Path to Plume's working directory") + ).about("Release lock on search directory")) +} + +pub fn run<'a>(args: &ArgMatches<'a>, conn: &Connection) { + let conn = conn; + match args.subcommand() { + ("init", Some(x)) => init(x, conn), + ("refill", Some(x)) => refill(x, conn, None), + ("unlock", Some(x)) => unlock(x), + _ => println!("Unknown subcommand"), + } +} + +fn init<'a>(args: &ArgMatches<'a>, conn: &Connection) { + let path = args.value_of("path").unwrap(); + let force = args.is_present("force"); + let path = Path::new(path).join("search_index"); + + let can_do = match read_dir(path.clone()) { // try to read the directory specified + Ok(mut contents) => { + if contents.next().is_none() { + true + } else { + false + } + }, + Err(e) => if e.kind() == ErrorKind::NotFound { + true + } else { + panic!("Error while initialising search index : {}", e); + } + }; + if can_do || force { + let searcher = Searcher::create(&path).unwrap(); + refill(args, conn, Some(searcher)); + } else { + eprintln!("Can't create new index, {} exist and is not empty", path.to_str().unwrap()); + } +} + +fn refill<'a>(args: &ArgMatches<'a>, conn: &Connection, searcher: Option) { + let path = args.value_of("path").unwrap(); + let path = Path::new(path).join("search_index"); + let searcher = searcher.unwrap_or_else(|| Searcher::open(&path).unwrap()); + + let posts = posts::table + .filter(posts::published.eq(true)) + .load::(conn) + .expect("Post::get_recents: loading error"); + + let len = posts.len(); + for (i,post) in posts.iter().enumerate() { + println!("Importing {}/{} : {}", i+1, len, post.title); + searcher.update_document(conn, &post); + } + println!("Commiting result"); + searcher.commit(); +} + + +fn unlock<'a>(args: &ArgMatches<'a>) { + let path = args.value_of("path").unwrap(); + let path = Path::new(path).join("search_index/.tantivy-indexer.lock"); + + remove_file(path).unwrap(); +} + + diff --git a/plume-models/Cargo.toml b/plume-models/Cargo.toml index 66fe0cac..dab240b6 100644 --- a/plume-models/Cargo.toml +++ b/plume-models/Cargo.toml @@ -10,14 +10,17 @@ bcrypt = "0.2" canapi = "0.1" guid-create = "0.1" heck = "0.3.0" +itertools = "0.7.8" lazy_static = "*" openssl = "0.10.11" reqwest = "0.9" serde = "1.0" serde_derive = "1.0" serde_json = "1.0" +tantivy = "0.7.1" url = "1.7" webfinger = "0.3.1" +whatlang = "0.5.0" [dependencies.chrono] features = ["serde"] diff --git a/plume-models/src/blogs.rs b/plume-models/src/blogs.rs index 82a6e0aa..318eb529 100644 --- a/plume-models/src/blogs.rs +++ b/plume-models/src/blogs.rs @@ -24,6 +24,7 @@ use plume_common::activity_pub::{ use posts::Post; use safe_string::SafeString; use schema::blogs; +use search::Searcher; use users::User; use {Connection, BASE_URL, USE_HTTPS}; @@ -411,9 +412,9 @@ impl Blog { json } - pub fn delete(&self, conn: &Connection) { + pub fn delete(&self, conn: &Connection, searcher: &Searcher) { for post in Post::get_for_blog(conn, &self) { - post.delete(conn); + post.delete(&(conn, searcher)); } diesel::delete(self) .execute(conn) @@ -509,6 +510,7 @@ pub(crate) mod tests { use instance::tests as instance_tests; use tests::db; use users::tests as usersTests; + use search::tests::get_searcher; use Connection as Conn; pub(crate) fn fill_database(conn: &Conn) -> Vec { @@ -756,7 +758,7 @@ pub(crate) mod tests { conn.test_transaction::<_, (), _>(|| { let blogs = fill_database(conn); - blogs[0].delete(conn); + blogs[0].delete(conn, &get_searcher()); assert!(Blog::get(conn, blogs[0].id).is_none()); Ok(()) @@ -767,6 +769,7 @@ pub(crate) mod tests { fn delete_via_user() { let conn = &db(); conn.test_transaction::<_, (), _>(|| { + let searcher = get_searcher(); let user = usersTests::fill_database(conn); fill_database(conn); @@ -818,10 +821,10 @@ pub(crate) mod tests { }, ); - user[0].delete(conn); + user[0].delete(conn, &searcher); assert!(Blog::get(conn, blog[0].id).is_some()); assert!(Blog::get(conn, blog[1].id).is_none()); - user[1].delete(conn); + user[1].delete(conn, &searcher); assert!(Blog::get(conn, blog[0].id).is_none()); Ok(()) diff --git a/plume-models/src/lib.rs b/plume-models/src/lib.rs index 2a7189b9..fc5f71a1 100644 --- a/plume-models/src/lib.rs +++ b/plume-models/src/lib.rs @@ -10,6 +10,7 @@ extern crate chrono; extern crate diesel; extern crate guid_create; extern crate heck; +extern crate itertools; #[macro_use] extern crate lazy_static; extern crate openssl; @@ -22,8 +23,11 @@ extern crate serde; extern crate serde_derive; #[macro_use] extern crate serde_json; +#[macro_use] +extern crate tantivy; extern crate url; extern crate webfinger; +extern crate whatlang; #[cfg(test)] #[macro_use] @@ -145,34 +149,6 @@ macro_rules! insert { }; } -/// Adds a function to a model to save changes to a model. -/// The model should derive diesel::AsChangeset. -/// -/// # Usage -/// -/// ```rust -/// impl Model { -/// update!(model_table); -/// } -/// -/// // Update and save changes -/// let m = Model::get(connection, 1); -/// m.foo = 42; -/// m.update(connection); -/// ``` -macro_rules! update { - ($table:ident) => { - pub fn update(&self, conn: &crate::Connection) -> Self { - diesel::update(self) - .set(self) - .execute(conn) - .expect(concat!("macro::update: Error updating ", stringify!($table))); - Self::get(conn, self.id) - .expect(concat!("macro::update: ", stringify!($table), " we just updated doesn't exist anymore???")) - } - }; -} - /// Returns the last row of a table. /// /// # Usage @@ -284,6 +260,7 @@ pub mod post_authors; pub mod posts; pub mod reshares; pub mod safe_string; +pub mod search; pub mod schema; pub mod tags; pub mod users; diff --git a/plume-models/src/posts.rs b/plume-models/src/posts.rs index 773169d5..c7e46b30 100644 --- a/plume-models/src/posts.rs +++ b/plume-models/src/posts.rs @@ -25,6 +25,7 @@ use plume_common::{ use post_authors::*; use reshares::Reshare; use safe_string::SafeString; +use search::Searcher; use schema::posts; use std::collections::HashSet; use tags::Tag; @@ -64,11 +65,11 @@ pub struct NewPost { pub cover_id: Option, } -impl<'a> Provider<(&'a Connection, Option)> for Post { +impl<'a> Provider<(&'a Connection, &'a Searcher, Option)> for Post { type Data = PostEndpoint; fn get( - (conn, user_id): &(&'a Connection, Option), + (conn, _search, user_id): &(&'a Connection, &Searcher, Option), id: i32, ) -> Result { if let Some(post) = Post::get(conn, id) { @@ -90,7 +91,7 @@ impl<'a> Provider<(&'a Connection, Option)> for Post { } fn list( - (conn, user_id): &(&'a Connection, Option), + (conn, _searcher, user_id): &(&'a Connection, &Searcher, Option), filter: PostEndpoint, ) -> Vec { let mut query = posts::table.into_boxed(); @@ -123,37 +124,57 @@ impl<'a> Provider<(&'a Connection, Option)> for Post { } fn create( - (_conn, _user_id): &(&'a Connection, Option), + (_conn, _searcher, _user_id): &(&'a Connection, &Searcher, Option), _query: PostEndpoint, ) -> Result { unimplemented!() } fn update( - (_conn, _user_id): &(&'a Connection, Option), + (_conn, _searcher, _user_id): &(&'a Connection, &Searcher, Option), _id: i32, _new_data: PostEndpoint, ) -> Result { unimplemented!() } - fn delete((conn, user_id): &(&'a Connection, Option), id: i32) { + fn delete((conn, searcher, user_id): &(&'a Connection, &Searcher, Option), id: i32) { let user_id = user_id.expect("Post as Provider::delete: not authenticated"); if let Some(post) = Post::get(conn, id) { if post.is_author(conn, user_id) { - post.delete(conn); + post.delete(&(conn, searcher)); } } } } impl Post { - insert!(posts, NewPost); get!(posts); - update!(posts); find_by!(posts, find_by_slug, slug as &str, blog_id as i32); find_by!(posts, find_by_ap_url, ap_url as &str); + last!(posts); + pub fn insert(conn: &Connection, new: NewPost, searcher: &Searcher) -> Self { + diesel::insert_into(posts::table) + .values(new) + .execute(conn) + .expect("Post::insert: Error saving in posts"); + let post = Self::last(conn); + searcher.add_document(conn, &post); + post + } + pub fn update(&self, conn: &Connection, searcher: &Searcher) -> Self { + diesel::update(self) + .set(self) + .execute(conn) + .expect("Post::update: Error updating posts"); + let post = Self::get(conn, self.id) + .expect("macro::update: posts we just updated doesn't exist anymore???"); + searcher.update_document(conn, &post); + post + } + + pub fn list_by_tag(conn: &Connection, tag: String, (min, max): (i32, i32)) -> Vec { use schema::tags; @@ -560,7 +581,7 @@ impl Post { act } - pub fn handle_update(conn: &Connection, updated: &Article) { + pub fn handle_update(conn: &Connection, updated: &Article, searcher: &Searcher) { let id = updated .object_props .id_string() @@ -620,7 +641,7 @@ impl Post { post.update_hashtags(conn, hashtags); } - post.update(conn); + post.update(conn, searcher); } pub fn update_mentions(&self, conn: &Connection, mentions: Vec) { @@ -765,8 +786,8 @@ impl Post { } } -impl FromActivity for Post { - fn from_activity(conn: &Connection, article: Article, _actor: Id) -> Post { +impl<'a> FromActivity for Post { + fn from_activity((conn, searcher): &(&'a Connection, &'a Searcher), article: Article, _actor: Id) -> Post { if let Some(post) = Post::find_by_ap_url( conn, &article.object_props.id_string().unwrap_or_default(), @@ -838,6 +859,7 @@ impl FromActivity for Post { .content, cover_id: cover, }, + searcher, ); for author in authors { @@ -877,8 +899,8 @@ impl FromActivity for Post { } } -impl Deletable for Post { - fn delete(&self, conn: &Connection) -> Delete { +impl<'a> Deletable<(&'a Connection, &'a Searcher), Delete> for Post { + fn delete(&self, (conn, searcher): &(&Connection, &Searcher)) -> Delete { let mut act = Delete::default(); act.delete_props .set_actor_link(self.get_authors(conn)[0].clone().into_id()) @@ -904,12 +926,13 @@ impl Deletable for Post { m.delete(conn); } diesel::delete(self) - .execute(conn) + .execute(*conn) .expect("Post::delete: DB error"); + searcher.delete_document(self); act } - fn delete_id(id: &str, actor_id: &str, conn: &Connection) { + fn delete_id(id: &str, actor_id: &str, (conn, searcher): &(&Connection, &Searcher)) { let actor = User::find_by_ap_url(conn, actor_id); let post = Post::find_by_ap_url(conn, id); let can_delete = actor @@ -919,7 +942,7 @@ impl Deletable for Post { }) .unwrap_or(false); if can_delete { - post.map(|p| p.delete(conn)); + post.map(|p| p.delete(&(conn, searcher))); } } } diff --git a/plume-models/src/search/mod.rs b/plume-models/src/search/mod.rs new file mode 100644 index 00000000..35776ae8 --- /dev/null +++ b/plume-models/src/search/mod.rs @@ -0,0 +1,167 @@ +mod searcher; +mod query; +mod tokenizer; +pub use self::searcher::*; +pub use self::query::PlumeQuery as Query; + + +#[cfg(test)] +pub(crate) mod tests { + use super::{Query, Searcher}; + use std::env::temp_dir; + use diesel::Connection; + + use plume_common::activity_pub::inbox::Deletable; + use plume_common::utils::random_hex; + use blogs::tests::fill_database; + use posts::{NewPost, Post}; + use post_authors::*; + use safe_string::SafeString; + use tests::db; + + + pub(crate) fn get_searcher() -> Searcher { + let dir = temp_dir().join("plume-test"); + if dir.exists() { + Searcher::open(&dir) + } else { + Searcher::create(&dir) + }.unwrap() + } + + #[test] + fn get_first_token() { + let vector = vec![ + ("+\"my token\" other", ("+\"my token\"", " other")), + ("-\"my token\" other", ("-\"my token\"", " other")), + (" \"my token\" other", ("\"my token\"", " other")), + ("\"my token\" other", ("\"my token\"", " other")), + ("+my token other", ("+my", " token other")), + ("-my token other", ("-my", " token other")), + (" my token other", ("my", " token other")), + ("my token other", ("my", " token other")), + ("+\"my token other", ("+\"my token other", "")), + ("-\"my token other", ("-\"my token other", "")), + (" \"my token other", ("\"my token other", "")), + ("\"my token other", ("\"my token other", "")), + ]; + for (source, res) in vector { + assert_eq!(Query::get_first_token(source), res); + } + } + + #[test] + fn from_str() { + let vector = vec![ + ("", ""), + ("a query", "a query"), + ("\"a query\"", "\"a query\""), + ("+a -\"query\"", "+a -query"), + ("title:\"something\" a query", "a query title:something"), + ("-title:\"something\" a query", "a query -title:something"), + ("author:user@domain", "author:user@domain"), + ("-author:@user@domain", "-author:user@domain"), + ("before:2017-11-05 before:2018-01-01", "before:2017-11-05"), + ("after:2017-11-05 after:2018-01-01", "after:2018-01-01"), + ]; + for (source, res) in vector { + assert_eq!(&Query::from_str(source).to_string(), res); + assert_eq!(Query::new().parse_query(source).to_string(), res); + } + } + + #[test] + fn setters() { + let vector = vec![ + ("something", "title:something"), + ("+something", "+title:something"), + ("-something", "-title:something"), + ("+\"something\"", "+title:something"), + ("+some thing", "+title:\"some thing\""), + ]; + for (source, res) in vector { + assert_eq!(&Query::new().title(source, None).to_string(), res); + } + + let vector = vec![ + ("something", "author:something"), + ("+something", "+author:something"), + ("-something", "-author:something"), + ("+\"something\"", "+author:something"), + ("+@someone@somewhere", "+author:someone@somewhere"), + ]; + for (source, res) in vector { + assert_eq!(&Query::new().author(source, None).to_string(), res); + } + } + + #[test] + fn open() { + {get_searcher()};//make sure $tmp/plume-test-tantivy exist + + let dir = temp_dir().join("plume-test"); + Searcher::open(&dir).unwrap(); + } + + #[test] + fn create() { + let dir = temp_dir().join(format!("plume-test-{}", random_hex())); + + assert!(Searcher::open(&dir).is_err()); + {Searcher::create(&dir).unwrap();} + Searcher::open(&dir).unwrap();//verify it's well created + } + + #[test] + fn search() { + let conn = &db(); + conn.test_transaction::<_, (), _>(|| { + let searcher = get_searcher(); + let blog = &fill_database(conn)[0]; + let author = &blog.list_authors(conn)[0]; + + let title = random_hex()[..8].to_owned(); + + let mut post = Post::insert(conn, NewPost { + blog_id: blog.id, + slug: title.clone(), + title: title.clone(), + content: SafeString::new(""), + published: true, + license: "CC-BY-SA".to_owned(), + ap_url: "".to_owned(), + creation_date: None, + subtitle: "".to_owned(), + source: "".to_owned(), + cover_id: None, + }, &searcher); + PostAuthor::insert(conn, NewPostAuthor { + post_id: post.id, + author_id: author.id, + }); + + searcher.commit(); + assert_eq!(searcher.search_document(conn, Query::from_str(&title), (0,1))[0].id, post.id); + + let newtitle = random_hex()[..8].to_owned(); + post.title = newtitle.clone(); + post.update(conn, &searcher); + searcher.commit(); + assert_eq!(searcher.search_document(conn, Query::from_str(&newtitle), (0,1))[0].id, post.id); + assert!(searcher.search_document(conn, Query::from_str(&title), (0,1)).is_empty()); + + post.delete(&(conn, &searcher)); + searcher.commit(); + assert!(searcher.search_document(conn, Query::from_str(&newtitle), (0,1)).is_empty()); + + Ok(()) + }); + } + + #[test] + fn drop_writer() { + let searcher = get_searcher(); + searcher.drop_writer(); + get_searcher(); + } +} diff --git a/plume-models/src/search/query.rs b/plume-models/src/search/query.rs new file mode 100644 index 00000000..50e49884 --- /dev/null +++ b/plume-models/src/search/query.rs @@ -0,0 +1,343 @@ +use chrono::{Datelike, naive::NaiveDate, offset::Utc}; +use tantivy::{query::*, schema::*, Term}; +use std::{cmp,ops::Bound}; +use search::searcher::Searcher; + + +//Generate functions for advanced search +macro_rules! gen_func { + ( $($field:ident),*; strip: $($strip:ident),* ) => { + $( //most fields go here, it's kinda the "default" way + pub fn $field(&mut self, mut val: &str, occur: Option) -> &mut Self { + if !val.trim_matches(&[' ', '"', '+', '-'][..]).is_empty() { + let occur = if let Some(occur) = occur { + occur + } else { + if val.get(0..1).map(|v| v=="+").unwrap_or(false) { + val = &val[1..]; + Occur::Must + } else if val.get(0..1).map(|v| v=="-").unwrap_or(false) { + val = &val[1..]; + Occur::MustNot + } else { + Occur::Should + } + }; + self.$field.push((occur, val.trim_matches(&[' ', '"'][..]).to_owned())); + } + self + } + )* + $( // blog and author go here, leading @ get dismissed + pub fn $strip(&mut self, mut val: &str, occur: Option) -> &mut Self { + if !val.trim_matches(&[' ', '"', '+', '-'][..]).is_empty() { + let occur = if let Some(occur) = occur { + occur + } else { + if val.get(0..1).map(|v| v=="+").unwrap_or(false) { + val = &val[1..]; + Occur::Must + } else if val.get(0..1).map(|v| v=="-").unwrap_or(false) { + val = &val[1..]; + Occur::MustNot + } else { + Occur::Should + } + }; + self.$strip.push((occur, val.trim_matches(&[' ', '"', '@'][..]).to_owned())); + } + self + } + )* + } +} + +//generate the parser for advanced query from string +macro_rules! gen_parser { + ( $self:ident, $query:ident, $occur:ident; normal: $($field:ident),*; date: $($date:ident),*) => { + $( // most fields go here + if $query.starts_with(concat!(stringify!($field), ':')) { + let new_query = &$query[concat!(stringify!($field), ':').len()..]; + let (token, rest) = Self::get_first_token(new_query); + $query = rest; + $self.$field(token, Some($occur)); + } else + )* + $( // dates (before/after) got here + if $query.starts_with(concat!(stringify!($date), ':')) { + let new_query = &$query[concat!(stringify!($date), ':').len()..]; + let (token, rest) = Self::get_first_token(new_query); + $query = rest; + if let Ok(token) = NaiveDate::parse_from_str(token, "%Y-%m-%d") { + $self.$date(&token); + } + } else + )* // fields without 'fieldname:' prefix are considered bare words, and will be searched in title, subtitle and content + { + let (token, rest) = Self::get_first_token($query); + $query = rest; + $self.text(token, Some($occur)); + } + } +} + +// generate the to_string, giving back a textual query from a PlumeQuery +macro_rules! gen_to_string { + ( $self:ident, $result:ident; normal: $($field:ident),*; date: $($date:ident),*) => { + $( + for (occur, val) in &$self.$field { + if val.contains(' ') { + $result.push_str(&format!("{}{}:\"{}\" ", Self::occur_to_str(&occur), stringify!($field), val)); + } else { + $result.push_str(&format!("{}{}:{} ", Self::occur_to_str(&occur), stringify!($field), val)); + } + } + )* + $( + for val in &$self.$date { + $result.push_str(&format!("{}:{} ", stringify!($date), NaiveDate::from_num_days_from_ce(*val as i32).format("%Y-%m-%d"))); + } + )* + } +} + +// convert PlumeQuery to Tantivy's Query +macro_rules! gen_to_query { + ( $self:ident, $result:ident; normal: $($normal:ident),*; oneoff: $($oneoff:ident),*) => { + $( // classic fields + for (occur, token) in $self.$normal { + $result.push((occur, Self::token_to_query(&token, stringify!($normal)))); + } + )* + $( // fields where having more than on Must make no sense in general, so it's considered a Must be one of these instead. + // Those fields are instance, author, blog, lang and license + let mut subresult = Vec::new(); + for (occur, token) in $self.$oneoff { + match occur { + Occur::Must => subresult.push((Occur::Should, Self::token_to_query(&token, stringify!($oneoff)))), + occur => $result.push((occur, Self::token_to_query(&token, stringify!($oneoff)))), + } + } + if !subresult.is_empty() { + $result.push((Occur::Must, Box::new(BooleanQuery::from(subresult)))); + } + )* + } +} + +#[derive(Default)] +pub struct PlumeQuery { + text: Vec<(Occur, String)>, + title: Vec<(Occur, String)>, + subtitle: Vec<(Occur, String)>, + content: Vec<(Occur, String)>, + tag: Vec<(Occur, String)>, + instance: Vec<(Occur, String)>, + author: Vec<(Occur, String)>, + blog: Vec<(Occur, String)>, + lang: Vec<(Occur, String)>, + license: Vec<(Occur, String)>, + before: Option, + after: Option, +} + +impl PlumeQuery { + + /// Create a new empty Query + pub fn new() -> Self { + Default::default() + } + + /// Create a new Query from &str + /// Same as doing + /// ```rust + /// # extern crate plume_models; + /// # use plume_models::search::Query; + /// let mut q = Query::new(); + /// q.parse_query("some query"); + /// ``` + pub fn from_str(query: &str) -> Self { + let mut res: Self = Default::default(); + + res.from_str_req(&query.trim()); + res + } + + /// Parse a query string into this Query + pub fn parse_query(&mut self, query: &str) -> &mut Self { + self.from_str_req(&query.trim()) + } + + /// Convert this Query to a Tantivy Query + pub fn into_query(self) -> BooleanQuery { + let mut result: Vec<(Occur, Box)> = Vec::new(); + gen_to_query!(self, result; normal: title, subtitle, content, tag; + oneoff: instance, author, blog, lang, license); + + for (occur, token) in self.text { // text entries need to be added as multiple Terms + match occur { + Occur::Must => { // a Must mean this must be in one of title subtitle or content, not in all 3 + let subresult = vec![ + (Occur::Should, Self::token_to_query(&token, "title")), + (Occur::Should, Self::token_to_query(&token, "subtitle")), + (Occur::Should, Self::token_to_query(&token, "content")), + ]; + + result.push((Occur::Must, Box::new(BooleanQuery::from(subresult)))); + }, + occur => { + result.push((occur, Self::token_to_query(&token, "title"))); + result.push((occur, Self::token_to_query(&token, "subtitle"))); + result.push((occur, Self::token_to_query(&token, "content"))); + }, + } + } + + if self.before.is_some() || self.after.is_some() { // if at least one range bound is provided + let after = self.after.unwrap_or_else(|| i64::from(NaiveDate::from_ymd(2000, 1, 1).num_days_from_ce())); + let before = self.before.unwrap_or_else(|| i64::from(Utc::today().num_days_from_ce())); + let field = Searcher::schema().get_field("creation_date").unwrap(); + let range = RangeQuery::new_i64_bounds(field, Bound::Included(after), Bound::Included(before)); + result.push((Occur::Must, Box::new(range))); + } + + result.into() + } + + //generate most setters functions + gen_func!(text, title, subtitle, content, tag, instance, lang, license; strip: author, blog); + + // documents newer than the provided date will be ignored + pub fn before(&mut self, date: &D) -> &mut Self { + let before = self.before.unwrap_or_else(|| i64::from(Utc::today().num_days_from_ce())); + self.before = Some(cmp::min(before, i64::from(date.num_days_from_ce()))); + self + } + + // documents older than the provided date will be ignored + pub fn after(&mut self, date: &D) -> &mut Self { + let after = self.after.unwrap_or_else(|| i64::from(NaiveDate::from_ymd(2000, 1, 1).num_days_from_ce())); + self.after = Some(cmp::max(after, i64::from(date.num_days_from_ce()))); + self + } + + // split a string into a token and a rest + pub fn get_first_token<'a>(mut query: &'a str) -> (&'a str, &'a str) { + query = query.trim(); + if query.is_empty() { + ("", "") + } else { + if query.get(0..1).map(|v| v=="\"").unwrap_or(false) { + if let Some(index) = query[1..].find('"') { + query.split_at(index+2) + } else { + (query, "") + } + } else if query.get(0..2).map(|v| v=="+\"" || v=="-\"").unwrap_or(false) { + if let Some(index) = query[2..].find('"') { + query.split_at(index+3) + } else { + (query, "") + } + } else { + if let Some(index) = query.find(' ') { + query.split_at(index) + } else { + (query, "") + } + } + } + } + + // map each Occur state to a prefix + fn occur_to_str(occur: &Occur) -> &'static str { + match occur { + Occur::Should => "", + Occur::Must => "+", + Occur::MustNot => "-", + } + } + + // recursive parser for query string + fn from_str_req(&mut self, mut query: &str) -> &mut Self { + query = query.trim_left(); + if query.is_empty() { + self + } else { + let occur = if query.get(0..1).map(|v| v=="+").unwrap_or(false) { + query = &query[1..]; + Occur::Must + } else if query.get(0..1).map(|v| v=="-").unwrap_or(false) { + query = &query[1..]; + Occur::MustNot + } else { + Occur::Should + }; + gen_parser!(self, query, occur; normal: title, subtitle, content, tag, + instance, author, blog, lang, license; + date: after, before); + self.from_str_req(query) + } + } + + // map a token and it's field to a query + fn token_to_query(token: &str, field_name: &str) -> Box { + let token = token.to_lowercase(); + let token = token.as_str(); + let field = Searcher::schema().get_field(field_name).unwrap(); + if token.contains('@') && (field_name=="author" || field_name=="blog") { + let pos = token.find('@').unwrap(); + let user_term = Term::from_field_text(field, &token[..pos]); + let instance_term = Term::from_field_text(Searcher::schema().get_field("instance").unwrap(), &token[pos+1..]); + Box::new(BooleanQuery::from(vec![ + (Occur::Must, Box::new(TermQuery::new(user_term, if field_name=="author" { IndexRecordOption::Basic } + else { IndexRecordOption::WithFreqsAndPositions } + )) as Box), + (Occur::Must, Box::new(TermQuery::new(instance_term, IndexRecordOption::Basic))), + ])) + } else if token.contains(' ') { // phrase query + match field_name { + "instance" | "author" | "tag" => // phrase query are not available on these fields, treat it as multiple Term queries + Box::new(BooleanQuery::from(token.split_whitespace() + .map(|token| { + let term = Term::from_field_text(field, token); + (Occur::Should, Box::new(TermQuery::new(term, IndexRecordOption::Basic)) + as Box) + }) + .collect::>())), + _ => Box::new(PhraseQuery::new(token.split_whitespace() + .map(|token| Term::from_field_text(field, token)) + .collect())) + } + } else { // Term Query + let term = Term::from_field_text(field, token); + let index_option = match field_name { + "instance" | "author" | "tag" => IndexRecordOption::Basic, + _ => IndexRecordOption::WithFreqsAndPositions, + }; + Box::new(TermQuery::new(term, index_option)) + } + } +} + + +impl ToString for PlumeQuery { + fn to_string(&self) -> String { + let mut result = String::new(); + for (occur, val) in &self.text { + if val.contains(' ') { + result.push_str(&format!("{}\"{}\" ", Self::occur_to_str(&occur), val)); + } else { + result.push_str(&format!("{}{} ", Self::occur_to_str(&occur), val)); + } + } + + gen_to_string!(self, result; normal: title, subtitle, content, tag, + instance, author, blog, lang, license; + date: before, after); + + result.pop();// remove trailing ' ' + result + } +} + diff --git a/plume-models/src/search/searcher.rs b/plume-models/src/search/searcher.rs new file mode 100644 index 00000000..22bfa20d --- /dev/null +++ b/plume-models/src/search/searcher.rs @@ -0,0 +1,203 @@ +use instance::Instance; +use posts::Post; +use tags::Tag; +use Connection; + +use chrono::Datelike; +use itertools::Itertools; +use tantivy::{ + collector::TopCollector, directory::MmapDirectory, + schema::*, tokenizer::*, Index, IndexWriter, Term +}; +use whatlang::{detect as detect_lang, Lang}; +use std::{cmp, fs::create_dir_all, path::Path, sync::Mutex}; + +use search::query::PlumeQuery; +use super::tokenizer; + +#[derive(Debug)] +pub enum SearcherError{ + IndexCreationError, + WriteLockAcquisitionError, + IndexOpeningError, + IndexEditionError, +} + +pub struct Searcher { + index: Index, + writer: Mutex>, +} + +impl Searcher { + pub fn schema() -> Schema { + let tag_indexing = TextOptions::default() + .set_indexing_options(TextFieldIndexing::default() + .set_tokenizer("whitespace_tokenizer") + .set_index_option(IndexRecordOption::Basic)); + + let content_indexing = TextOptions::default() + .set_indexing_options(TextFieldIndexing::default() + .set_tokenizer("content_tokenizer") + .set_index_option(IndexRecordOption::WithFreqsAndPositions)); + + let property_indexing = TextOptions::default() + .set_indexing_options(TextFieldIndexing::default() + .set_tokenizer("property_tokenizer") + .set_index_option(IndexRecordOption::WithFreqsAndPositions)); + + let mut schema_builder = SchemaBuilder::default(); + + schema_builder.add_i64_field("post_id", INT_STORED | INT_INDEXED); + schema_builder.add_i64_field("creation_date", INT_INDEXED); + + schema_builder.add_text_field("instance", tag_indexing.clone()); + schema_builder.add_text_field("author", tag_indexing.clone());//todo move to a user_indexing with user_tokenizer function + schema_builder.add_text_field("tag", tag_indexing); + + schema_builder.add_text_field("blog", content_indexing.clone()); + schema_builder.add_text_field("content", content_indexing.clone()); + schema_builder.add_text_field("subtitle", content_indexing.clone()); + schema_builder.add_text_field("title", content_indexing); + + schema_builder.add_text_field("lang", property_indexing.clone()); + schema_builder.add_text_field("license", property_indexing); + + schema_builder.build() + } + + + pub fn create(path: &AsRef) -> Result { + let whitespace_tokenizer = tokenizer::WhitespaceTokenizer + .filter(LowerCaser); + + let content_tokenizer = SimpleTokenizer + .filter(RemoveLongFilter::limit(40)) + .filter(LowerCaser); + + let property_tokenizer = NgramTokenizer::new(2, 8, false) + .filter(LowerCaser); + + let schema = Self::schema(); + + create_dir_all(path).map_err(|_| SearcherError::IndexCreationError)?; + let index = Index::create(MmapDirectory::open(path).map_err(|_| SearcherError::IndexCreationError)?, schema).map_err(|_| SearcherError::IndexCreationError)?; + + { + let tokenizer_manager = index.tokenizers(); + tokenizer_manager.register("whitespace_tokenizer", whitespace_tokenizer); + tokenizer_manager.register("content_tokenizer", content_tokenizer); + tokenizer_manager.register("property_tokenizer", property_tokenizer); + }//to please the borrow checker + Ok(Self { + writer: Mutex::new(Some(index.writer(50_000_000).map_err(|_| SearcherError::WriteLockAcquisitionError)?)), + index + }) + } + + pub fn open(path: &AsRef) -> Result { + let whitespace_tokenizer = tokenizer::WhitespaceTokenizer + .filter(LowerCaser); + + let content_tokenizer = SimpleTokenizer + .filter(RemoveLongFilter::limit(40)) + .filter(LowerCaser); + + let property_tokenizer = NgramTokenizer::new(2, 8, false) + .filter(LowerCaser); + + let index = Index::open(MmapDirectory::open(path).map_err(|_| SearcherError::IndexOpeningError)?).map_err(|_| SearcherError::IndexOpeningError)?; + + { + let tokenizer_manager = index.tokenizers(); + tokenizer_manager.register("whitespace_tokenizer", whitespace_tokenizer); + tokenizer_manager.register("content_tokenizer", content_tokenizer); + tokenizer_manager.register("property_tokenizer", property_tokenizer); + }//to please the borrow checker + let mut writer = index.writer(50_000_000).map_err(|_| SearcherError::WriteLockAcquisitionError)?; + writer.garbage_collect_files().map_err(|_| SearcherError::IndexEditionError)?; + Ok(Self { + writer: Mutex::new(Some(writer)), + index, + }) + } + + pub fn add_document(&self, conn: &Connection, post: &Post) { + let schema = self.index.schema(); + + let post_id = schema.get_field("post_id").unwrap(); + let creation_date = schema.get_field("creation_date").unwrap(); + + let instance = schema.get_field("instance").unwrap(); + let author = schema.get_field("author").unwrap(); + let tag = schema.get_field("tag").unwrap(); + + let blog_name = schema.get_field("blog").unwrap(); + let content = schema.get_field("content").unwrap(); + let subtitle = schema.get_field("subtitle").unwrap(); + let title = schema.get_field("title").unwrap(); + + let lang = schema.get_field("lang").unwrap(); + let license = schema.get_field("license").unwrap(); + + let mut writer = self.writer.lock().unwrap(); + let writer = writer.as_mut().unwrap(); + writer.add_document(doc!( + post_id => i64::from(post.id), + author => post.get_authors(conn).into_iter().map(|u| u.get_fqn(conn)).join(" "), + creation_date => i64::from(post.creation_date.num_days_from_ce()), + instance => Instance::get(conn, post.get_blog(conn).instance_id).unwrap().public_domain.clone(), + tag => Tag::for_post(conn, post.id).into_iter().map(|t| t.tag).join(" "), + blog_name => post.get_blog(conn).title, + content => post.content.get().clone(), + subtitle => post.subtitle.clone(), + title => post.title.clone(), + lang => detect_lang(post.content.get()).and_then(|i| if i.is_reliable() { Some(i.lang()) } else {None} ).unwrap_or(Lang::Eng).name(), + license => post.license.clone(), + )); + } + + pub fn delete_document(&self, post: &Post) { + let schema = self.index.schema(); + let post_id = schema.get_field("post_id").unwrap(); + + let doc_id = Term::from_field_i64(post_id, i64::from(post.id)); + let mut writer = self.writer.lock().unwrap(); + let writer = writer.as_mut().unwrap(); + writer.delete_term(doc_id); + } + + pub fn update_document(&self, conn: &Connection, post: &Post) { + self.delete_document(post); + self.add_document(conn, post); + } + + pub fn search_document(&self, conn: &Connection, query: PlumeQuery, (min, max): (i32, i32)) -> Vec{ + let schema = self.index.schema(); + let post_id = schema.get_field("post_id").unwrap(); + + let mut collector = TopCollector::with_limit(cmp::max(1,max) as usize); + + let searcher = self.index.searcher(); + searcher.search(&query.into_query(), &mut collector).unwrap(); + + collector.docs().get(min as usize..).unwrap_or(&[]) + .into_iter() + .filter_map(|doc_add| { + let doc = searcher.doc(*doc_add).ok()?; + let id = doc.get_first(post_id)?; + Post::get(conn, id.i64_value() as i32) + //borrow checker don't want me to use filter_map or and_then here + }) + .collect() + } + + pub fn commit(&self) { + let mut writer = self.writer.lock().unwrap(); + writer.as_mut().unwrap().commit().unwrap(); + self.index.load_searchers().unwrap(); + } + + pub fn drop_writer(&self) { + self.writer.lock().unwrap().take(); + } +} diff --git a/plume-models/src/search/tokenizer.rs b/plume-models/src/search/tokenizer.rs new file mode 100644 index 00000000..87861797 --- /dev/null +++ b/plume-models/src/search/tokenizer.rs @@ -0,0 +1,67 @@ +use std::str::CharIndices; +use tantivy::tokenizer::{Token, TokenStream, Tokenizer}; + +/// Tokenize the text by splitting on whitespaces. Pretty much a copy of Tantivy's SimpleTokenizer, +/// but not splitting on punctuation +#[derive(Clone)] +pub struct WhitespaceTokenizer; + +pub struct WhitespaceTokenStream<'a> { + text: &'a str, + chars: CharIndices<'a>, + token: Token, +} + +impl<'a> Tokenizer<'a> for WhitespaceTokenizer { + type TokenStreamImpl = WhitespaceTokenStream<'a>; + + fn token_stream(&self, text: &'a str) -> Self::TokenStreamImpl { + WhitespaceTokenStream { + text, + chars: text.char_indices(), + token: Token::default(), + } + } +} +impl<'a> WhitespaceTokenStream<'a> { + // search for the end of the current token. + fn search_token_end(&mut self) -> usize { + (&mut self.chars) + .filter(|&(_, ref c)| c.is_whitespace()) + .map(|(offset, _)| offset) + .next() + .unwrap_or_else(|| self.text.len()) + } +} + +impl<'a> TokenStream for WhitespaceTokenStream<'a> { + fn advance(&mut self) -> bool { + self.token.text.clear(); + self.token.position = self.token.position.wrapping_add(1); + + loop { + match self.chars.next() { + Some((offset_from, c)) => { + if !c.is_whitespace() { + let offset_to = self.search_token_end(); + self.token.offset_from = offset_from; + self.token.offset_to = offset_to; + self.token.text.push_str(&self.text[offset_from..offset_to]); + return true; + } + } + None => { + return false; + } + } + } + } + + fn token(&self) -> &Token { + &self.token + } + + fn token_mut(&mut self) -> &mut Token { + &mut self.token + } +} diff --git a/plume-models/src/users.rs b/plume-models/src/users.rs index 67a5e119..11c077bc 100644 --- a/plume-models/src/users.rs +++ b/plume-models/src/users.rs @@ -41,6 +41,7 @@ use posts::Post; use reshares::Reshare; use safe_string::SafeString; use schema::users; +use search::Searcher; use {ap_url, Connection, BASE_URL, USE_HTTPS}; pub type CustomPerson = CustomObject; @@ -104,13 +105,13 @@ impl User { .expect("User::one_by_instance: loading error") } - pub fn delete(&self, conn: &Connection) { + pub fn delete(&self, conn: &Connection, searcher: &Searcher) { use schema::post_authors; Blog::find_for_author(conn, self) .iter() .filter(|b| b.list_authors(conn).len() <= 1) - .for_each(|b| b.delete(conn)); + .for_each(|b| b.delete(conn, searcher)); // delete the posts if they is the only author let all_their_posts_ids: Vec = post_authors::table .filter(post_authors::author_id.eq(self.id)) @@ -129,7 +130,7 @@ impl User { if !has_other_authors { Post::get(conn, post_id) .expect("User::delete: post not found error") - .delete(conn); + .delete(&(conn, searcher)); } } @@ -981,6 +982,7 @@ pub(crate) mod tests { use super::*; use diesel::Connection; use instance::{tests as instance_tests, Instance}; + use search::tests::get_searcher; use tests::db; use Connection as Conn; @@ -1077,7 +1079,7 @@ pub(crate) mod tests { let inserted = fill_database(conn); assert!(User::get(conn, inserted[0].id).is_some()); - inserted[0].delete(conn); + inserted[0].delete(conn, &get_searcher()); assert!(User::get(conn, inserted[0].id).is_none()); Ok(()) diff --git a/src/api/posts.rs b/src/api/posts.rs index cf68ce57..15ac3501 100644 --- a/src/api/posts.rs +++ b/src/api/posts.rs @@ -9,18 +9,20 @@ use plume_models::{ Connection, db_conn::DbConn, posts::Post, + search::Searcher as UnmanagedSearcher, }; use api::authorization::*; +use Searcher; #[get("/posts/")] -fn get(id: i32, conn: DbConn, auth: Option>) -> Json { - let post = )>>::get(&(&*conn, auth.map(|a| a.0.user_id)), id).ok(); +fn get(id: i32, conn: DbConn, auth: Option>, search: Searcher) -> Json { + let post = )>>::get(&(&*conn, &search, auth.map(|a| a.0.user_id)), id).ok(); Json(json!(post)) } #[get("/posts")] -fn list(conn: DbConn, uri: &Origin, auth: Option>) -> Json { +fn list(conn: DbConn, uri: &Origin, auth: Option>, search: Searcher) -> Json { let query: PostEndpoint = serde_qs::from_str(uri.query().unwrap_or("")).expect("api::list: invalid query error"); - let post = )>>::list(&(&*conn, auth.map(|a| a.0.user_id)), query); + let post = )>>::list(&(&*conn, &search, auth.map(|a| a.0.user_id)), query); Json(json!(post)) } diff --git a/src/inbox.rs b/src/inbox.rs index 3773b513..a045d7fc 100644 --- a/src/inbox.rs +++ b/src/inbox.rs @@ -19,11 +19,11 @@ use plume_common::activity_pub::{ }; use plume_models::{ comments::Comment, follows::Follow, instance::Instance, likes, posts::Post, reshares::Reshare, - users::User, Connection, + users::User, search::Searcher, Connection, }; pub trait Inbox { - fn received(&self, conn: &Connection, act: serde_json::Value) -> Result<(), Error> { + fn received(&self, conn: &Connection, searcher: &Searcher, act: serde_json::Value) -> Result<(), Error> { let actor_id = Id::new(act["actor"].as_str().unwrap_or_else(|| { act["actor"]["id"] .as_str() @@ -37,7 +37,7 @@ pub trait Inbox { } "Create" => { let act: Create = serde_json::from_value(act.clone())?; - if Post::try_from_activity(conn, act.clone()) + if Post::try_from_activity(&(conn, searcher), act.clone()) || Comment::try_from_activity(conn, act) { Ok(()) @@ -53,7 +53,7 @@ pub trait Inbox { .object_props .id_string()?, actor_id.as_ref(), - conn, + &(conn, searcher), ); Ok(()) } @@ -113,7 +113,7 @@ pub trait Inbox { } "Update" => { let act: Update = serde_json::from_value(act.clone())?; - Post::handle_update(conn, &act.update_props.object_object()?); + Post::handle_update(conn, &act.update_props.object_object()?, searcher); Ok(()) } _ => Err(InboxError::InvalidType)?, diff --git a/src/main.rs b/src/main.rs index e7cfffdd..63b1e83f 100644 --- a/src/main.rs +++ b/src/main.rs @@ -6,6 +6,7 @@ extern crate atom_syndication; extern crate canapi; extern crate chrono; extern crate colored; +extern crate ctrlc; extern crate diesel; extern crate dotenv; extern crate failure; @@ -13,6 +14,7 @@ extern crate gettextrs; extern crate guid_create; extern crate heck; extern crate multipart; +extern crate num_cpus; extern crate plume_api; extern crate plume_common; extern crate plume_models; @@ -22,6 +24,7 @@ extern crate rocket_contrib; extern crate rocket_csrf; extern crate rocket_i18n; extern crate rpassword; +extern crate scheduled_thread_pool; extern crate serde; #[macro_use] extern crate serde_derive; @@ -32,20 +35,24 @@ extern crate validator; #[macro_use] extern crate validator_derive; extern crate webfinger; -extern crate workerpool; use diesel::r2d2::ConnectionManager; use rocket::State; use rocket_contrib::Template; use rocket_csrf::CsrfFairingBuilder; -use plume_models::{DATABASE_URL, Connection, db_conn::DbPool}; -use workerpool::{Pool, thunk::ThunkWorker}; +use plume_models::{DATABASE_URL, Connection, + db_conn::DbPool, search::Searcher as UnmanagedSearcher}; +use scheduled_thread_pool::ScheduledThreadPool; +use std::process::exit; +use std::sync::Arc; +use std::time::Duration; mod api; mod inbox; mod routes; -type Worker<'a> = State<'a, Pool>>; +type Worker<'a> = State<'a, ScheduledThreadPool>; +type Searcher<'a> = State<'a, Arc>; /// Initializes a database pool. fn init_pool() -> Option { @@ -56,7 +63,21 @@ fn init_pool() -> Option { } fn main() { - let pool = init_pool().expect("main: database pool initialization error"); + + let dbpool = init_pool().expect("main: database pool initialization error"); + let workpool = ScheduledThreadPool::with_name("worker {}", num_cpus::get()); + let searcher = Arc::new(UnmanagedSearcher::open(&"search_index").unwrap()); + + let commiter = searcher.clone(); + workpool.execute_with_fixed_delay(Duration::from_secs(5), Duration::from_secs(60*30), move || commiter.commit()); + + let search_unlocker = searcher.clone(); + ctrlc::set_handler(move || { + search_unlocker.drop_writer(); + exit(0); + }).expect("Error setting Ctrl-c handler"); + + rocket::ignite() .mount("/", routes![ routes::blogs::paginated_details, @@ -119,6 +140,9 @@ fn main() { routes::reshares::create, routes::reshares::create_auth, + routes::search::index, + routes::search::query, + routes::session::new, routes::session::new_message, routes::session::create, @@ -167,8 +191,9 @@ fn main() { routes::errors::not_found, routes::errors::server_error ]) - .manage(pool) - .manage(Pool::>::new(4)) + .manage(dbpool) + .manage(workpool) + .manage(searcher) .attach(Template::custom(|engines| { rocket_i18n::tera(&mut engines.tera); })) diff --git a/src/routes/blogs.rs b/src/routes/blogs.rs index 96175d03..1846abd5 100644 --- a/src/routes/blogs.rs +++ b/src/routes/blogs.rs @@ -21,6 +21,7 @@ use plume_models::{ users::User }; use routes::Page; +use Searcher; #[get("/~/?", rank = 2)] fn paginated_details(name: String, conn: DbConn, user: Option, page: Page) -> Template { @@ -130,10 +131,10 @@ fn create(conn: DbConn, data: LenientForm, user: User) -> Result/delete")] -fn delete(conn: DbConn, name: String, user: Option) -> Result>{ +fn delete(conn: DbConn, name: String, user: Option, searcher: Searcher) -> Result>{ let blog = Blog::find_local(&*conn, &name).ok_or(None)?; if user.map(|u| u.is_author_in(&*conn, &blog)).unwrap_or(false) { - blog.delete(&conn); + blog.delete(&conn, &searcher); Ok(Redirect::to(uri!(super::instance::index))) } else { Err(Some(Template::render("errors/403", json!({// TODO actually return 403 error code diff --git a/src/routes/comments.rs b/src/routes/comments.rs index d7fd08ed..21b2491c 100644 --- a/src/routes/comments.rs +++ b/src/routes/comments.rs @@ -1,13 +1,11 @@ use activitypub::object::Note; use rocket::{ - State, request::LenientForm, response::Redirect }; use rocket_contrib::Template; use serde_json; use validator::Validate; -use workerpool::{Pool, thunk::*}; use plume_common::{utils, activity_pub::{broadcast, ApRequest, ActivityStream}}; use plume_models::{ @@ -19,6 +17,7 @@ use plume_models::{ safe_string::SafeString, users::User }; +use Worker; #[derive(FromForm, Debug, Validate, Serialize)] struct NewCommentForm { @@ -29,7 +28,7 @@ struct NewCommentForm { } #[post("/~///comment", data = "")] -fn create(blog_name: String, slug: String, data: LenientForm, user: User, conn: DbConn, worker: State>>) +fn create(blog_name: String, slug: String, data: LenientForm, user: User, conn: DbConn, worker: Worker) -> Result> { let blog = Blog::find_by_fqn(&*conn, &blog_name).ok_or(None)?; let post = Post::find_by_slug(&*conn, &slug, blog.id).ok_or(None)?; @@ -56,7 +55,7 @@ fn create(blog_name: String, slug: String, data: LenientForm, us // federate let dest = User::one_by_instance(&*conn); let user_clone = user.clone(); - worker.execute(Thunk::of(move || broadcast(&user_clone, new_comment, dest))); + worker.execute(move || broadcast(&user_clone, new_comment, dest)); Redirect::to(uri!(super::posts::details: blog = blog_name, slug = slug)) }) diff --git a/src/routes/instance.rs b/src/routes/instance.rs index c3dec53f..24b27f16 100644 --- a/src/routes/instance.rs +++ b/src/routes/instance.rs @@ -18,6 +18,7 @@ use plume_models::{ }; use inbox::Inbox; use routes::Page; +use Searcher; #[get("/")] fn index(conn: DbConn, user: Option) -> Template { @@ -190,15 +191,15 @@ fn admin_users_paginated(admin: Admin, conn: DbConn, page: Page) -> Template { } #[post("/admin/users//ban")] -fn ban(_admin: Admin, conn: DbConn, id: i32) -> Redirect { +fn ban(_admin: Admin, conn: DbConn, id: i32, searcher: Searcher) -> Redirect { if let Some(u) = User::get(&*conn, id) { - u.delete(&*conn); + u.delete(&*conn, &searcher); } Redirect::to(uri!(admin_users)) } #[post("/inbox", data = "")] -fn shared_inbox(conn: DbConn, data: String, headers: Headers) -> Result> { +fn shared_inbox(conn: DbConn, data: String, headers: Headers, searcher: Searcher) -> Result> { let act: serde_json::Value = serde_json::from_str(&data[..]).expect("instance::shared_inbox: deserialization error"); let activity = act.clone(); @@ -216,7 +217,7 @@ fn shared_inbox(conn: DbConn, data: String, headers: Headers) -> Result String::new(), Err(e) => { println!("Shared inbox error: {}\n{}", e.as_fail(), e.backtrace()); diff --git a/src/routes/likes.rs b/src/routes/likes.rs index 9726a65a..c94e1ee2 100644 --- a/src/routes/likes.rs +++ b/src/routes/likes.rs @@ -1,5 +1,4 @@ -use rocket::{State, response::{Redirect, Flash}}; -use workerpool::{Pool, thunk::*}; +use rocket::{response::{Redirect, Flash}}; use plume_common::activity_pub::{broadcast, inbox::{Notify, Deletable}}; use plume_common::utils; @@ -10,9 +9,10 @@ use plume_models::{ posts::Post, users::User }; +use Worker; #[post("/~///like")] -fn create(blog: String, slug: String, user: User, conn: DbConn, worker: State>>) -> Option { +fn create(blog: String, slug: String, user: User, conn: DbConn, worker: Worker) -> Option { let b = Blog::find_by_fqn(&*conn, &blog)?; let post = Post::find_by_slug(&*conn, &slug, b.id)?; @@ -27,12 +27,12 @@ fn create(blog: String, slug: String, user: User, conn: DbConn, worker: State for Page { } } +impl<'v> FromFormValue<'v> for Page { + type Error = &'v RawStr; + + fn from_form_value(form_value: &'v RawStr) -> Result { + match form_value.parse::() { + Ok(page) => Ok(Page{page}), + _ => Err(form_value), + } + } +} + impl Page { pub fn first() -> Page { Page { @@ -109,6 +122,7 @@ pub mod reshares; pub mod session; pub mod tags; pub mod user; +pub mod search; pub mod well_known; #[get("/static/", rank = 2)] diff --git a/src/routes/posts.rs b/src/routes/posts.rs index 07729a22..e0e8266b 100644 --- a/src/routes/posts.rs +++ b/src/routes/posts.rs @@ -1,13 +1,12 @@ use activitypub::object::Article; use chrono::Utc; use heck::{CamelCase, KebabCase}; -use rocket::{State, request::LenientForm}; +use rocket::{request::LenientForm}; use rocket::response::{Redirect, Flash}; use rocket_contrib::Template; use serde_json; use std::{collections::{HashMap, HashSet}, borrow::Cow}; use validator::{Validate, ValidationError, ValidationErrors}; -use workerpool::{Pool, thunk::*}; use plume_common::activity_pub::{broadcast, ActivityStream, ApRequest, inbox::Deletable}; use plume_common::utils; @@ -24,6 +23,8 @@ use plume_models::{ tags::*, users::User }; +use Worker; +use Searcher; #[derive(FromForm)] struct CommentQuery { @@ -163,7 +164,7 @@ fn edit(blog: String, slug: String, user: User, conn: DbConn) -> Option