Merge pull request 'Recreate search index if its format is outdated' (#802) from KitaitiMakoto/Plume:invalid-index into main

Reviewed-on: https://git.joinplu.me/Plume/Plume/pulls/802
Reviewed-by: Mina Galić <me+git@igalic.co>
This commit is contained in:
kiwii 2020-07-19 12:04:43 +00:00
commit 6de9a1f1c8
4 changed files with 42 additions and 4 deletions

Binary file not shown.

Binary file not shown.

View File

@ -5,10 +5,10 @@ use crate::{
use chrono::Datelike; use chrono::Datelike;
use diesel::{ExpressionMethods, QueryDsl, RunQueryDsl}; use diesel::{ExpressionMethods, QueryDsl, RunQueryDsl};
use itertools::Itertools; use itertools::Itertools;
use std::{cmp, fs::create_dir_all, path::Path, sync::Mutex}; use std::{cmp, fs::create_dir_all, io, path::Path, sync::Mutex};
use tantivy::{ use tantivy::{
collector::TopDocs, directory::MmapDirectory, schema::*, Index, IndexReader, IndexWriter, collector::TopDocs, directory::MmapDirectory, schema::*, Index, IndexReader, IndexWriter,
ReloadPolicy, Term, ReloadPolicy, TantivyError, Term,
}; };
use whatlang::{detect as detect_lang, Lang}; use whatlang::{detect as detect_lang, Lang};
@ -18,6 +18,7 @@ pub enum SearcherError {
WriteLockAcquisitionError, WriteLockAcquisitionError,
IndexOpeningError, IndexOpeningError,
IndexEditionError, IndexEditionError,
InvalidIndexDataError,
} }
pub struct Searcher { pub struct Searcher {
@ -135,7 +136,19 @@ impl Searcher {
.reader_builder() .reader_builder()
.reload_policy(ReloadPolicy::Manual) .reload_policy(ReloadPolicy::Manual)
.try_into() .try_into()
.map_err(|_| SearcherError::IndexCreationError)?, .map_err(|e| {
if let TantivyError::IOError(err) = e {
let err: io::Error = err.into();
if err.kind() == io::ErrorKind::InvalidData {
// Search index was created in older Tantivy format.
SearcherError::InvalidIndexDataError
} else {
SearcherError::IndexCreationError
}
} else {
SearcherError::IndexCreationError
}
})?,
index, index,
}) })
} }

27
src/main.rs Normal file → Executable file
View File

@ -10,6 +10,7 @@ extern crate serde_json;
#[macro_use] #[macro_use]
extern crate validator_derive; extern crate validator_derive;
use chrono::Utc;
use clap::App; use clap::App;
use diesel::r2d2::ConnectionManager; use diesel::r2d2::ConnectionManager;
use plume_models::{ use plume_models::{
@ -21,6 +22,8 @@ use plume_models::{
}; };
use rocket_csrf::CsrfFairingBuilder; use rocket_csrf::CsrfFairingBuilder;
use scheduled_thread_pool::ScheduledThreadPool; use scheduled_thread_pool::ScheduledThreadPool;
use std::fs;
use std::path::Path;
use std::process::exit; use std::process::exit;
use std::sync::{Arc, Mutex}; use std::sync::{Arc, Mutex};
use std::time::Duration; use std::time::Duration;
@ -98,8 +101,30 @@ Then try to restart Plume.
} }
let workpool = ScheduledThreadPool::with_name("worker {}", num_cpus::get()); let workpool = ScheduledThreadPool::with_name("worker {}", num_cpus::get());
// we want a fast exit here, so // we want a fast exit here, so
let mut open_searcher =
UnmanagedSearcher::open(&CONFIG.search_index, &CONFIG.search_tokenizers);
if let Err(Error::Search(SearcherError::InvalidIndexDataError)) = open_searcher {
if UnmanagedSearcher::create(&CONFIG.search_index, &CONFIG.search_tokenizers).is_err() {
let current_path = Path::new(&CONFIG.search_index);
let backup_path = format!("{}.{}", &current_path.display(), Utc::now().timestamp());
let backup_path = Path::new(&backup_path);
fs::rename(current_path, backup_path)
.expect("main: error on backing up search index directory for recreating");
if UnmanagedSearcher::create(&CONFIG.search_index, &CONFIG.search_tokenizers).is_ok() {
if fs::remove_dir_all(backup_path).is_err() {
eprintln!(
"error on removing backup directory: {}. it remains",
backup_path.display()
);
}
} else {
panic!("main: error on recreating search index in new index format. remove search index and run `plm search init` manually");
}
}
open_searcher = UnmanagedSearcher::open(&CONFIG.search_index, &CONFIG.search_tokenizers);
}
#[allow(clippy::match_wild_err_arm)] #[allow(clippy::match_wild_err_arm)]
let searcher = match UnmanagedSearcher::open(&CONFIG.search_index, &CONFIG.search_tokenizers) { let searcher = match open_searcher {
Err(Error::Search(e)) => match e { Err(Error::Search(e)) => match e {
SearcherError::WriteLockAcquisitionError => panic!( SearcherError::WriteLockAcquisitionError => panic!(
r#" r#"