store post url content type in db

This commit is contained in:
Felix Ableitner 2023-10-27 16:42:43 +02:00
parent 5507d2d680
commit 97697aa413
12 changed files with 41 additions and 38 deletions

View file

@ -34,7 +34,7 @@ full = [
"futures",
"once_cell",
"jsonwebtoken",
"mime"
"mime",
]
[dependencies]

View file

@ -29,8 +29,6 @@ pub fn client_builder(settings: &Settings) -> ClientBuilder {
}
/// Fetches metadata for the given link and optionally generates thumbnail.
///
/// TODO: consider caching the results as it will be called by api then apub send right after each other
#[tracing::instrument(skip_all)]
pub async fn fetch_link_metadata(
url: &Url,
@ -68,6 +66,20 @@ pub async fn fetch_link_metadata(
Ok(metadata)
}
#[tracing::instrument(skip_all)]
pub async fn fetch_link_metadata_opt(
url: Option<&Url>,
generate_thumbnail: bool,
context: &LemmyContext,
) -> Result<LinkMetadata, LemmyError> {
let metadata = match &url {
Some(url) => fetch_link_metadata(url, generate_thumbnail, context)
.await
.unwrap_or_default(),
_ => Default::default(),
};
Ok(metadata)
}
/// Extract site metadata from HTML Opengraph attributes.
fn extract_opengraph_data(html_bytes: &[u8], url: &Url) -> Result<LinkMetadata, LemmyError> {

View file

@ -4,7 +4,7 @@ use lemmy_api_common::{
build_response::build_post_response,
context::LemmyContext,
post::{CreatePost, PostResponse},
request::fetch_link_metadata,
request::fetch_link_metadata_opt,
send_activity::{ActivityChannel, SendActivityData},
utils::{
check_community_user_action,
@ -54,7 +54,7 @@ pub async fn create_post(
honeypot_check(&data.honeypot)?;
let data_url = data.url.as_ref();
let url = data_url.map(clean_url_params).map(Into::into); // TODO no good way to handle a "clear"
let url = data_url.map(clean_url_params); // TODO no good way to handle a "clear"
is_valid_post_title(&data.name)?;
is_valid_body_field(&body, true)?;
@ -83,12 +83,7 @@ pub async fn create_post(
}
// Fetch post links and pictrs cached image
let metadata = match data_url {
Some(url) => fetch_link_metadata(url, true, &context)
.await
.unwrap_or_default(),
_ => Default::default(),
};
let metadata = fetch_link_metadata_opt(url.as_ref(), true, &context).await?;
// Only need to check if language is allowed in case user set it explicitly. When using default
// language, it already only returns allowed languages.
@ -114,7 +109,7 @@ pub async fn create_post(
let post_form = PostInsertForm::builder()
.name(data.name.trim().to_string())
.url(url)
.url(url.map(Into::into))
.body(body)
.community_id(data.community_id)
.creator_id(local_user_view.person.id)

View file

@ -4,7 +4,7 @@ use lemmy_api_common::{
build_response::build_post_response,
context::LemmyContext,
post::{EditPost, PostResponse},
request::fetch_link_metadata,
request::fetch_link_metadata_opt,
send_activity::{ActivityChannel, SendActivityData},
utils::{check_community_user_action, local_site_to_slur_regex, process_markdown_opt},
};
@ -35,11 +35,9 @@ pub async fn update_post(
) -> Result<Json<PostResponse>, LemmyError> {
let local_site = LocalSite::read(&mut context.pool()).await?;
let data_url = data.url.as_ref();
// TODO No good way to handle a clear.
// Issue link: https://github.com/LemmyNet/lemmy/issues/2287
let url = Some(data_url.map(clean_url_params).map(Into::into));
let url = data.url.as_ref().map(clean_url_params);
let slur_regex = local_site_to_slur_regex(&local_site);
check_slurs_opt(&data.name, &slur_regex)?;
@ -68,12 +66,7 @@ pub async fn update_post(
}
// Fetch post links and Pictrs cached image
let metadata = match data_url {
Some(url) => fetch_link_metadata(url, true, &context)
.await
.unwrap_or_default(),
_ => Default::default(),
};
let metadata = fetch_link_metadata_opt(url.as_ref(), true, &context).await?;
let language_id = data.language_id;
CommunityLanguage::is_allowed_community_language(
@ -85,7 +78,7 @@ pub async fn update_post(
let post_form = PostUpdateForm {
name: data.name.clone(),
url,
url: Some(url.map(Into::into)),
body: diesel_option_overwrite(body),
nsfw: data.nsfw,
embed_title: Some(metadata.title),

View file

@ -24,7 +24,7 @@ use chrono::{DateTime, Utc};
use html2text::{from_read_with_decorator, render::text_renderer::TrivialDecorator};
use lemmy_api_common::{
context::LemmyContext,
request::fetch_link_metadata,
request::fetch_link_metadata_opt,
utils::{
is_mod_or_admin,
local_site_opt_to_sensitive,
@ -112,16 +112,10 @@ impl Object for ApubPost {
let community = Community::read(&mut context.pool(), community_id).await?;
let language = LanguageTag::new_single(self.language_id, &mut context.pool()).await?;
let metadata = match &self.url {
Some(url) => fetch_link_metadata(url, false, &context)
.await
.unwrap_or_default(),
_ => Default::default(),
};
let attachment = self
.url
.clone()
.map(|url| Attachment::new(url, metadata.content_type))
.map(|url| Attachment::new(url, self.url_content_type.clone()))
.into_iter()
.collect();
@ -224,17 +218,12 @@ impl Object for ApubPost {
let local_site = LocalSite::read(&mut context.pool()).await.ok();
let allow_sensitive = local_site_opt_to_sensitive(&local_site);
let page_is_sensitive = page.sensitive.unwrap_or(false);
let include_image = allow_sensitive || !page_is_sensitive;
let generate_thumbnail = allow_sensitive || !page_is_sensitive;
// Only fetch metadata if the post has a url and was not seen previously. We dont want to
// waste resources by fetching metadata for the same post multiple times.
// Additionally, only fetch image if content is not sensitive or is allowed on local site.
let metadata = match &url {
Some(url) => fetch_link_metadata(url, include_image, context)
.await
.unwrap_or_default(),
_ => Default::default(),
};
let metadata = fetch_link_metadata_opt(url.as_ref(), generate_thumbnail, context).await?;
let slur_regex = &local_site_opt_to_slur_regex(&local_site);
let body = read_from_string_or_source_opt(&page.content, &page.media_type, &page.source);
@ -263,6 +252,7 @@ impl Object for ApubPost {
language_id,
featured_community: None,
featured_local: None,
url_content_type: metadata.content_type,
}
} else {
// if is mod action, only update locked/stickied fields, nothing else

View file

@ -432,6 +432,7 @@ mod tests {
language_id: Default::default(),
featured_community: false,
featured_local: false,
url_content_type: None,
};
// Post Like

View file

@ -712,6 +712,7 @@ diesel::table! {
language_id -> Int4,
featured_community -> Bool,
featured_local -> Bool,
url_content_type -> Nullable<Text>,
}
}

View file

@ -54,6 +54,7 @@ pub struct Post {
pub featured_community: bool,
/// Whether the post is featured to its site.
pub featured_local: bool,
pub url_content_type: Option<String>,
}
#[derive(Debug, Clone, TypedBuilder)]
@ -84,6 +85,7 @@ pub struct PostInsertForm {
pub language_id: Option<LanguageId>,
pub featured_community: Option<bool>,
pub featured_local: Option<bool>,
pub url_content_type: Option<String>,
}
#[derive(Debug, Clone, Default)]
@ -108,6 +110,7 @@ pub struct PostUpdateForm {
pub language_id: Option<LanguageId>,
pub featured_community: Option<bool>,
pub featured_local: Option<bool>,
pub url_content_type: Option<String>,
}
#[derive(PartialEq, Eq, Debug)]

View file

@ -919,6 +919,7 @@ mod tests {
language_id: Default::default(),
featured_community: false,
featured_local: false,
url_content_type: None,
},
community: Community {
id: data.inserted_community.id,

View file

@ -1407,6 +1407,7 @@ mod tests {
language_id: LanguageId(47),
featured_community: false,
featured_local: false,
url_content_type: None,
},
my_vote: None,
unread_comments: 0,

View file

@ -0,0 +1,3 @@
ALTER TABLE post
DROP COLUMN url_content_type;

View file

@ -0,0 +1,3 @@
ALTER TABLE post
ADD COLUMN url_content_type text;