Plume/plume-models/src/medias.rs

634 lines
20 KiB
Rust
Raw Normal View History

2020-01-21 07:02:03 +01:00
use crate::{
2022-12-16 22:51:14 +01:00
ap_url, instance::Instance, safe_string::SafeString, schema::medias, users::User, Connection,
Error, Result, CONFIG,
2020-01-21 07:02:03 +01:00
};
2022-05-02 17:54:37 +02:00
use activitystreams::{object::Image, prelude::*};
use diesel::{self, ExpressionMethods, QueryDsl, RunQueryDsl};
2018-10-31 10:40:20 +01:00
use guid_create::GUID;
Big refactoring of the Inbox (#443) * Big refactoring of the Inbox We now have a type that routes an activity through the registered handlers until one of them matches. Each Actor/Activity/Object combination is represented by an implementation of AsObject These combinations are then registered on the Inbox type, which will try to deserialize the incoming activity in the requested types. Advantages: - nicer syntax: the final API is clearer and more idiomatic - more generic: only two traits (`AsActor` and `AsObject`) instead of one for each kind of activity - it is easier to see which activities we handle and which one we don't * Small fixes - Avoid panics - Don't search for AP ID infinitely - Code style issues * Fix tests * Introduce a new trait: FromId It should be implemented for any AP object. It allows to look for an object in database using its AP ID, or to dereference it if it was not present in database Also moves the inbox code to plume-models to test it (and write a basic test for each activity type we handle) * Use if let instead of match * Don't require PlumeRocket::intl for tests * Return early and remove a forgotten dbg! * Add more tests to try to understand where the issues come from * Also add a test for comment federation * Don't check creation_date is the same for blogs * Make user and blog federation more tolerant to errors/missing fields * Make clippy happy * Use the correct Accept header when dereferencing * Fix follow approval with Mastodon * Add spaces to characters that should not be in usernames And validate blog names too * Smarter dereferencing: only do it once for each actor/object * Forgot some files * Cargo fmt * Delete plume_test * Delete plume_tests * Update get_id docs + Remove useless : Sized * Appease cargo fmt * Remove dbg! + Use as_ref instead of clone when possible + Use and_then instead of map when possible * Remove .po~ * send unfollow to local instance * read cover from update activity * Make sure "cc" and "to" are never empty and fix a typo in a constant name * Cargo fmt
2019-04-17 19:31:47 +02:00
use plume_common::{
2022-05-02 17:54:37 +02:00
activity_pub::{inbox::FromId, request, ToAsString, ToAsUri},
utils::{escape, MediaProcessor},
Big refactoring of the Inbox (#443) * Big refactoring of the Inbox We now have a type that routes an activity through the registered handlers until one of them matches. Each Actor/Activity/Object combination is represented by an implementation of AsObject These combinations are then registered on the Inbox type, which will try to deserialize the incoming activity in the requested types. Advantages: - nicer syntax: the final API is clearer and more idiomatic - more generic: only two traits (`AsActor` and `AsObject`) instead of one for each kind of activity - it is easier to see which activities we handle and which one we don't * Small fixes - Avoid panics - Don't search for AP ID infinitely - Code style issues * Fix tests * Introduce a new trait: FromId It should be implemented for any AP object. It allows to look for an object in database using its AP ID, or to dereference it if it was not present in database Also moves the inbox code to plume-models to test it (and write a basic test for each activity type we handle) * Use if let instead of match * Don't require PlumeRocket::intl for tests * Return early and remove a forgotten dbg! * Add more tests to try to understand where the issues come from * Also add a test for comment federation * Don't check creation_date is the same for blogs * Make user and blog federation more tolerant to errors/missing fields * Make clippy happy * Use the correct Accept header when dereferencing * Fix follow approval with Mastodon * Add spaces to characters that should not be in usernames And validate blog names too * Smarter dereferencing: only do it once for each actor/object * Forgot some files * Cargo fmt * Delete plume_test * Delete plume_tests * Update get_id docs + Remove useless : Sized * Appease cargo fmt * Remove dbg! + Use as_ref instead of clone when possible + Use and_then instead of map when possible * Remove .po~ * send unfollow to local instance * read cover from update activity * Make sure "cc" and "to" are never empty and fix a typo in a constant name * Cargo fmt
2019-04-17 19:31:47 +02:00
};
use std::{
fs::{self, DirBuilder},
path::{self, Path, PathBuf},
};
use tracing::warn;
use url::Url;
#[cfg(feature = "s3")]
use crate::config::S3Config;
const REMOTE_MEDIA_DIRECTORY: &str = "remote";
2018-09-02 13:34:48 +02:00
2021-02-23 17:05:19 +01:00
#[derive(Clone, Identifiable, Queryable, AsChangeset)]
2018-09-02 13:34:48 +02:00
pub struct Media {
pub id: i32,
pub file_path: String,
pub alt_text: String,
pub is_remote: bool,
pub remote_url: Option<String>,
pub sensitive: bool,
2018-09-02 22:55:42 +02:00
pub content_warning: Option<String>,
pub owner_id: i32,
2018-09-02 13:34:48 +02:00
}
#[derive(Insertable)]
#[table_name = "medias"]
pub struct NewMedia {
pub file_path: String,
pub alt_text: String,
pub is_remote: bool,
pub remote_url: Option<String>,
pub sensitive: bool,
2018-09-02 22:55:42 +02:00
pub content_warning: Option<String>,
pub owner_id: i32,
2018-09-02 13:34:48 +02:00
}
2023-01-02 18:45:13 +01:00
#[derive(PartialEq, Eq)]
pub enum MediaCategory {
Image,
Audio,
Video,
Unknown,
}
impl MediaCategory {
pub fn to_string(&self) -> &str {
match *self {
MediaCategory::Image => "image",
MediaCategory::Audio => "audio",
MediaCategory::Video => "video",
MediaCategory::Unknown => "unknown",
}
}
}
2018-09-02 13:34:48 +02:00
impl Media {
insert!(medias, NewMedia);
get!(medias);
2021-02-23 17:04:31 +01:00
find_by!(medias, find_by_file_path, file_path as &str);
pub fn for_user(conn: &Connection, owner: i32) -> Result<Vec<Media>> {
medias::table
.filter(medias::owner_id.eq(owner))
.order(medias::id.desc())
.load::<Self>(conn)
.map_err(Error::from)
}
2018-09-02 22:55:42 +02:00
pub fn list_all_medias(conn: &Connection) -> Result<Vec<Media>> {
2019-03-20 17:56:17 +01:00
medias::table.load::<Media>(conn).map_err(Error::from)
}
2019-03-20 17:56:17 +01:00
pub fn page_for_user(
conn: &Connection,
user: &User,
(min, max): (i32, i32),
) -> Result<Vec<Media>> {
medias::table
.filter(medias::owner_id.eq(user.id))
.order(medias::id.desc())
.offset(i64::from(min))
.limit(i64::from(max - min))
.load::<Media>(conn)
.map_err(Error::from)
}
pub fn count_for_user(conn: &Connection, user: &User) -> Result<i64> {
medias::table
.filter(medias::owner_id.eq(user.id))
.count()
.get_result(conn)
.map_err(Error::from)
}
pub fn category(&self) -> MediaCategory {
match &*self
.file_path
2021-11-27 23:53:13 +01:00
.rsplit_once('.')
.map(|x| x.1)
.unwrap_or("")
.to_lowercase()
{
"png" | "jpg" | "jpeg" | "gif" | "svg" => MediaCategory::Image,
"mp3" | "wav" | "flac" => MediaCategory::Audio,
"mp4" | "avi" | "webm" | "mov" => MediaCategory::Video,
_ => MediaCategory::Unknown,
}
}
pub fn html(&self) -> Result<SafeString> {
let url = self.url()?;
Ok(match self.category() {
MediaCategory::Image => SafeString::trusted(&format!(
r#"<img src="{}" alt="{}" title="{}">"#,
url,
escape(&self.alt_text),
escape(&self.alt_text)
)),
MediaCategory::Audio => SafeString::trusted(&format!(
r#"<div class="media-preview audio"></div><audio src="{}" title="{}" controls></audio>"#,
url,
escape(&self.alt_text)
)),
MediaCategory::Video => SafeString::trusted(&format!(
r#"<video src="{}" title="{}" controls></video>"#,
url,
escape(&self.alt_text)
)),
MediaCategory::Unknown => SafeString::trusted(&format!(
r#"<a href="{}" class="media-preview unknown"></a>"#,
url,
)),
})
}
pub fn markdown(&self) -> Result<SafeString> {
Ok(match self.category() {
2019-03-20 17:56:17 +01:00
MediaCategory::Image => {
SafeString::new(&format!("![{}]({})", escape(&self.alt_text), self.id))
2019-03-20 17:56:17 +01:00
}
MediaCategory::Audio | MediaCategory::Video => self.html()?,
MediaCategory::Unknown => SafeString::new(""),
})
2018-09-02 22:55:42 +02:00
}
/// Returns full file path for medias stored in the local media directory.
pub fn local_path(&self) -> Option<PathBuf> {
if self.file_path.is_empty() {
return None;
}
if CONFIG.s3.is_some() {
#[cfg(feature="s3")]
unreachable!("Called Media::local_path() but media are stored on S3");
#[cfg(not(feature="s3"))]
unreachable!();
}
let relative_path = self
.file_path
.trim_start_matches(&CONFIG.media_directory)
.trim_start_matches(path::MAIN_SEPARATOR)
.trim_start_matches("static/media/");
Some(Path::new(&CONFIG.media_directory).join(relative_path))
}
/// Returns the relative URL to access this file, which is also the key at which
/// it is stored in the S3 bucket if we are using S3 storage.
/// Does not start with a '/', it is of the form "static/media/<...>"
pub fn relative_url(&self) -> Option<String> {
if self.file_path.is_empty() {
return None;
}
let relative_path = self
.file_path
.trim_start_matches(&CONFIG.media_directory)
.replace(path::MAIN_SEPARATOR, "/");
let relative_path = relative_path
.trim_start_matches('/')
.trim_start_matches("static/media/");
Some(format!("static/media/{}", relative_path))
}
/// Returns a public URL through which this media file can be accessed
pub fn url(&self) -> Result<String> {
if self.is_remote {
Ok(self.remote_url.clone().unwrap_or_default())
} else {
let relative_url = self.relative_url().unwrap_or_default();
#[cfg(feature="s3")]
if CONFIG.s3.as_ref().map(|x| x.direct_download).unwrap_or(false) {
let s3_url = match CONFIG.s3.as_ref().unwrap() {
S3Config { alias: Some(alias), .. } => {
format!("https://{}/{}", alias, relative_url)
}
S3Config { path_style: true, hostname, bucket, .. } => {
format!("https://{}/{}/{}",
hostname,
bucket,
relative_url
)
}
S3Config { path_style: false, hostname, bucket, .. } => {
format!("https://{}.{}/{}",
bucket,
hostname,
relative_url
)
}
};
return Ok(s3_url);
}
Ok(ap_url(&format!(
"{}/{}",
Instance::get_local()?.public_domain,
relative_url
)))
}
2018-09-02 22:55:42 +02:00
}
2018-09-02 23:10:15 +02:00
pub fn delete(&self, conn: &Connection) -> Result<()> {
if !self.is_remote {
if CONFIG.s3.is_some() {
#[cfg(not(feature="s3"))]
unreachable!();
#[cfg(feature = "s3")]
CONFIG.s3.as_ref().unwrap().get_bucket()
.delete_object_blocking(&self.relative_url().ok_or(Error::NotFound)?)?;
2022-11-13 11:18:13 +01:00
} else {
fs::remove_file(self.local_path().ok_or(Error::NotFound)?)?;
2022-11-13 11:18:13 +01:00
}
}
diesel::delete(self)
.execute(conn)
.map(|_| ())
.map_err(Error::from)
2018-09-02 23:10:15 +02:00
}
pub fn save_remote(conn: &Connection, url: String, user: &User) -> Result<Media> {
2018-12-02 19:07:36 +01:00
if url.contains(&['<', '>', '"'][..]) {
Err(Error::Url)
2018-12-02 19:07:36 +01:00
} else {
Media::insert(
2018-12-02 19:07:36 +01:00
conn,
NewMedia {
file_path: String::new(),
alt_text: String::new(),
is_remote: true,
remote_url: Some(url),
sensitive: false,
content_warning: None,
owner_id: user.id,
},
)
2018-12-02 19:07:36 +01:00
}
}
pub fn set_owner(&self, conn: &Connection, user: &User) -> Result<()> {
diesel::update(self)
.set(medias::owner_id.eq(user.id))
.execute(conn)
.map(|_| ())
.map_err(Error::from)
}
2018-10-31 10:40:20 +01:00
// TODO: merge with save_remote?
2022-12-16 22:51:14 +01:00
pub fn from_activity(conn: &Connection, image: &Image) -> Result<Media> {
2022-02-26 02:58:49 +01:00
let remote_url = image
.url()
.and_then(|url| url.to_as_uri())
.ok_or(Error::MissingApProperty)?;
let file_path = if CONFIG.s3.is_some() {
#[cfg(not(feature="s3"))]
unreachable!();
#[cfg(feature = "s3")]
{
let dest = determine_mirror_s3_path(&remote_url);
let media = request::get(
remote_url.as_str(),
User::get_sender(),
CONFIG.proxy().cloned(),
)?;
let content_type = media.headers().get(reqwest::header::CONTENT_TYPE).cloned();
let bytes = media.bytes()?;
let bucket = CONFIG.s3.as_ref().unwrap().get_bucket();
match content_type.as_ref().and_then(|x| x.to_str().ok()) {
Some(ct) => {
bucket.put_object_with_content_type_blocking(&dest, &bytes, ct)?;
}
None => {
bucket.put_object_blocking(&dest, &bytes)?;
}
}
dest
}
} else {
let path = determine_mirror_file_path(&remote_url);
let parent = path.parent().ok_or(Error::InvalidValue)?;
if !parent.is_dir() {
DirBuilder::new().recursive(true).create(parent)?;
}
2022-02-26 02:58:49 +01:00
let mut dest = fs::File::create(path.clone())?;
// TODO: conditional GET
request::get(
remote_url.as_str(),
User::get_sender(),
CONFIG.proxy().cloned(),
)?
.copy_to(&mut dest)?;
path.to_str().ok_or(Error::InvalidValue)?.to_string()
};
Media::find_by_file_path(conn, &file_path)
2022-02-26 02:58:49 +01:00
.and_then(|mut media| {
let mut updated = false;
let alt_text = image
.content()
.and_then(|content| content.to_as_string())
.ok_or(Error::NotFound)?;
let summary = image.summary().and_then(|summary| summary.to_as_string());
let sensitive = summary.is_some();
let content_warning = summary;
if media.alt_text != alt_text {
media.alt_text = alt_text;
updated = true;
}
if media.is_remote {
media.is_remote = false;
updated = true;
}
2022-05-03 15:25:43 +02:00
if media.remote_url.is_some() {
media.remote_url = None;
updated = true;
}
2022-02-26 02:58:49 +01:00
if media.sensitive != sensitive {
media.sensitive = sensitive;
updated = true;
}
if media.content_warning != content_warning {
media.content_warning = content_warning;
updated = true;
}
if updated {
2022-12-16 22:51:14 +01:00
diesel::update(&media).set(&media).execute(conn)?;
2022-02-26 02:58:49 +01:00
}
Ok(media)
})
.or_else(|_| {
let summary = image.summary().and_then(|summary| summary.to_as_string());
Media::insert(
conn,
NewMedia {
file_path,
2022-02-26 02:58:49 +01:00
alt_text: image
.content()
.and_then(|content| content.to_as_string())
.ok_or(Error::NotFound)?,
is_remote: false,
remote_url: None,
sensitive: summary.is_some(),
content_warning: summary,
2022-05-02 12:24:36 +02:00
owner_id: User::from_id(
2022-02-26 02:58:49 +01:00
conn,
&image
.attributed_to()
.and_then(|attributed_to| attributed_to.to_as_uri())
.ok_or(Error::MissingApProperty)?,
None,
CONFIG.proxy(),
)
.map_err(|(_, e)| e)?
.id,
},
)
})
}
pub fn get_media_processor<'a>(conn: &'a Connection, user: Vec<&User>) -> MediaProcessor<'a> {
let uid = user.iter().map(|u| u.id).collect::<Vec<_>>();
Box::new(move |id| {
let media = Media::get(conn, id).ok()?;
// if owner is user or check is disabled
if uid.contains(&media.owner_id) || uid.is_empty() {
Some((media.url().ok()?, media.content_warning))
} else {
None
}
})
}
}
fn determine_mirror_file_path(url: &str) -> PathBuf {
let mut file_path = Path::new(&CONFIG.media_directory).join(REMOTE_MEDIA_DIRECTORY);
match Url::parse(url) {
Ok(url) if url.has_host() => {
file_path.push(url.host_str().unwrap());
for segment in url.path_segments().expect("FIXME") {
file_path.push(segment);
}
// TODO: handle query
// HINT: Use characters which must be percent-encoded in path as separator between path and query
// HINT: handle extension
}
other => {
if let Err(err) = other {
warn!("Failed to parse url: {} {}", &url, err);
} else {
warn!("Error without a host: {}", &url);
}
let ext = url
.rsplit('.')
.next()
.map(ToOwned::to_owned)
.unwrap_or_else(|| String::from("png"));
2021-11-27 23:53:13 +01:00
file_path.push(format!("{}.{}", GUID::rand(), ext));
}
}
file_path
}
#[cfg(feature="s3")]
fn determine_mirror_s3_path(url: &str) -> String {
match Url::parse(url) {
Ok(url) if url.has_host() => {
format!("static/media/{}/{}/{}",
REMOTE_MEDIA_DIRECTORY,
url.host_str().unwrap(),
url.path().trim_start_matches('/'),
)
}
other => {
if let Err(err) = other {
warn!("Failed to parse url: {} {}", &url, err);
} else {
warn!("Error without a host: {}", &url);
}
let ext = url
.rsplit('.')
.next()
.map(ToOwned::to_owned)
.unwrap_or_else(|| String::from("png"));
format!("static/media/{}/{}.{}",
REMOTE_MEDIA_DIRECTORY,
GUID::rand(),
ext,
)
}
}
}
#[cfg(test)]
pub(crate) mod tests {
use super::*;
2020-01-21 07:02:03 +01:00
use crate::{tests::db, users::tests as usersTests, Connection as Conn};
use diesel::Connection;
use std::env::{current_dir, set_current_dir};
use std::fs;
use std::path::Path;
pub(crate) fn fill_database(conn: &Conn) -> (Vec<User>, Vec<Media>) {
2023-01-02 18:45:13 +01:00
let mut wd = current_dir().unwrap();
while wd.pop() {
if wd.join(".git").exists() {
set_current_dir(wd).unwrap();
break;
}
}
let users = usersTests::fill_database(conn);
let user_one = users[0].id;
let user_two = users[1].id;
let f1 = "static/media/1.png".to_owned();
let f2 = "static/media/2.mp3".to_owned();
fs::write(f1.clone(), []).unwrap();
fs::write(f2.clone(), []).unwrap();
2019-03-20 17:56:17 +01:00
(
users,
vec![
NewMedia {
file_path: f1,
alt_text: "some alt".to_owned(),
is_remote: false,
remote_url: None,
sensitive: false,
content_warning: None,
owner_id: user_one,
},
NewMedia {
file_path: f2,
alt_text: "alt message".to_owned(),
is_remote: false,
remote_url: None,
sensitive: true,
content_warning: Some("Content warning".to_owned()),
owner_id: user_one,
},
NewMedia {
file_path: "".to_owned(),
alt_text: "another alt".to_owned(),
is_remote: true,
remote_url: Some("https://example.com/".to_owned()),
sensitive: false,
content_warning: None,
owner_id: user_two,
},
]
.into_iter()
.map(|nm| Media::insert(conn, nm).unwrap())
2019-03-20 17:56:17 +01:00
.collect(),
)
}
pub(crate) fn clean(conn: &Conn) {
//used to remove files generated by tests
for media in Media::list_all_medias(conn).unwrap() {
if let Some(err) = media.delete(conn).err() {
match &err {
Error::Io(e) => match e.kind() {
std::io::ErrorKind::NotFound => (),
_ => panic!("{:?}", err),
},
_ => panic!("{:?}", err),
}
}
}
}
#[test]
fn delete() {
let conn = &db();
conn.test_transaction::<_, (), _>(|| {
let user = fill_database(conn).0[0].id;
let path = "static/media/test_deletion".to_owned();
fs::write(path.clone(), []).unwrap();
let media = Media::insert(
conn,
NewMedia {
file_path: path.clone(),
alt_text: "alt message".to_owned(),
is_remote: false,
remote_url: None,
sensitive: false,
content_warning: None,
owner_id: user,
},
2019-03-20 17:56:17 +01:00
)
.unwrap();
assert!(Path::new(&path).exists());
media.delete(conn).unwrap();
assert!(!Path::new(&path).exists());
clean(conn);
Ok(())
});
}
#[test]
fn set_owner() {
let conn = &db();
conn.test_transaction::<_, (), _>(|| {
let (users, _) = fill_database(conn);
let u1 = &users[0];
let u2 = &users[1];
let path = "static/media/test_set_owner".to_owned();
fs::write(path.clone(), []).unwrap();
let media = Media::insert(
conn,
NewMedia {
2023-01-02 18:45:13 +01:00
file_path: path,
alt_text: "alt message".to_owned(),
is_remote: false,
remote_url: None,
sensitive: false,
content_warning: None,
owner_id: u1.id,
},
2019-03-20 17:56:17 +01:00
)
.unwrap();
assert!(Media::for_user(conn, u1.id)
.unwrap()
.iter()
.any(|m| m.id == media.id));
assert!(!Media::for_user(conn, u2.id)
.unwrap()
.iter()
.any(|m| m.id == media.id));
media.set_owner(conn, u2).unwrap();
2019-03-20 17:56:17 +01:00
assert!(!Media::for_user(conn, u1.id)
.unwrap()
.iter()
.any(|m| m.id == media.id));
assert!(Media::for_user(conn, u2.id)
.unwrap()
.iter()
.any(|m| m.id == media.id));
clean(conn);
Ok(())
});
2018-10-31 10:40:20 +01:00
}
2018-09-02 13:34:48 +02:00
}