From a4b79ca61014bb5f47296c51bfe54b3aac708763 Mon Sep 17 00:00:00 2001 From: Nutomic Date: Wed, 27 Mar 2024 15:54:42 +0100 Subject: [PATCH] Generate post thumbnail/metadata in background (ref #4529) (#4564) * Generate post thumbnail/metadata in background (ref #4529) * fix api test * Apply suggestions from code review Co-authored-by: SleeplessOne1917 <28871516+SleeplessOne1917@users.noreply.github.com> * fix test --------- Co-authored-by: SleeplessOne1917 <28871516+SleeplessOne1917@users.noreply.github.com> --- api_tests/src/image.spec.ts | 22 +++++++--- api_tests/src/post.spec.ts | 27 +++++++++---- crates/api_common/src/request.rs | 55 ++++++++++++++++++++++++- crates/api_crud/src/post/create.rs | 33 +++++---------- crates/api_crud/src/post/update.rs | 46 +++++---------------- crates/apub/src/objects/post.rs | 64 ++++++++++++------------------ 6 files changed, 135 insertions(+), 112 deletions(-) diff --git a/api_tests/src/image.spec.ts b/api_tests/src/image.spec.ts index 7fd1bd47c..3a82b572d 100644 --- a/api_tests/src/image.spec.ts +++ b/api_tests/src/image.spec.ts @@ -17,13 +17,16 @@ import { deleteAllImages, delta, epsilon, + followCommunity, gamma, getSite, imageFetchLimit, registerUser, resolveBetaCommunity, + resolveCommunity, resolvePost, setupLogins, + waitForPost, unfollows, } from "./shared"; const downloadFileSync = require("download-file-sync"); @@ -209,6 +212,11 @@ test("Images in remote post are proxied if setting enabled", async () => { test("No image proxying if setting is disabled", async () => { let user = await registerUser(beta, betaUrl); let community = await createCommunity(alpha); + let betaCommunity = await resolveCommunity( + beta, + community.community_view.community.actor_id, + ); + await followCommunity(beta, true, betaCommunity.community!.community.id); const upload_form: UploadImage = { image: Buffer.from("test"), @@ -228,15 +236,19 @@ test("No image proxying if setting is disabled", async () => { ).toBeTruthy(); expect(post.post_view.post.body).toBe("![](http://example.com/image2.png)"); - let gammaPost = await resolvePost(delta, post.post_view.post); - expect(gammaPost.post).toBeDefined(); + let betaPost = await waitForPost( + beta, + post.post_view.post, + res => res?.post.alt_text != null, + ); + expect(betaPost.post).toBeDefined(); // remote image doesnt get proxied after federation expect( - gammaPost.post!.post.url?.startsWith("http://127.0.0.1:8551/pictrs/image/"), + betaPost.post.url?.startsWith("http://127.0.0.1:8551/pictrs/image/"), ).toBeTruthy(); - expect(gammaPost.post!.post.body).toBe("![](http://example.com/image2.png)"); + expect(betaPost.post.body).toBe("![](http://example.com/image2.png)"); // Make sure the alt text got federated - expect(post.post_view.post.alt_text).toBe(gammaPost.post!.post.alt_text); + expect(post.post_view.post.alt_text).toBe(betaPost.post.alt_text); }); diff --git a/api_tests/src/post.spec.ts b/api_tests/src/post.spec.ts index bd8b0051f..c1640e93e 100644 --- a/api_tests/src/post.spec.ts +++ b/api_tests/src/post.spec.ts @@ -55,7 +55,18 @@ afterAll(() => { unfollows(); }); -function assertPostFederation(postOne?: PostView, postTwo?: PostView) { +async function assertPostFederation(postOne: PostView, postTwo: PostView) { + // Link metadata is generated in background task and may not be ready yet at this time, + // so wait for it explicitly. For removed posts we cant refetch anything. + postOne = await waitForPost(beta, postOne.post, res => { + return res === null || res?.post.embed_title !== null; + }); + postTwo = await waitForPost( + beta, + postTwo.post, + res => res === null || res?.post.embed_title !== null, + ); + expect(postOne?.post.ap_id).toBe(postTwo?.post.ap_id); expect(postOne?.post.name).toBe(postTwo?.post.name); expect(postOne?.post.body).toBe(postTwo?.post.body); @@ -109,7 +120,7 @@ test("Create a post", async () => { expect(betaPost?.community.local).toBe(true); expect(betaPost?.creator.local).toBe(false); expect(betaPost?.counts.score).toBe(1); - assertPostFederation(betaPost, postRes.post_view); + await assertPostFederation(betaPost, postRes.post_view); // Delta only follows beta, so it should not see an alpha ap_id await expect( @@ -157,7 +168,7 @@ test("Unlike a post", async () => { expect(betaPost?.community.local).toBe(true); expect(betaPost?.creator.local).toBe(false); expect(betaPost?.counts.score).toBe(0); - assertPostFederation(betaPost, postRes.post_view); + await assertPostFederation(betaPost, postRes.post_view); }); test("Update a post", async () => { @@ -178,7 +189,7 @@ test("Update a post", async () => { expect(betaPost.community.local).toBe(true); expect(betaPost.creator.local).toBe(false); expect(betaPost.post.name).toBe(updatedName); - assertPostFederation(betaPost, updatedPost.post_view); + await assertPostFederation(betaPost, updatedPost.post_view); // Make sure lemmy beta cannot update the post await expect(editPost(beta, betaPost.post)).rejects.toStrictEqual( @@ -329,7 +340,7 @@ test("Delete a post", async () => { throw "Missing beta post 2"; } expect(betaPost2.post.deleted).toBe(false); - assertPostFederation(betaPost2, undeletedPost.post_view); + await assertPostFederation(betaPost2, undeletedPost.post_view); // Make sure lemmy beta cannot delete the post await expect(deletePost(beta, true, betaPost2.post)).rejects.toStrictEqual( @@ -372,7 +383,7 @@ test("Remove a post from admin and community on different instance", async () => // Make sure lemmy beta sees post is undeleted let betaPost2 = (await resolvePost(beta, postRes.post_view.post)).post; expect(betaPost2?.post.removed).toBe(false); - assertPostFederation(betaPost2, undeletedPost.post_view); + await assertPostFederation(betaPost2!, undeletedPost.post_view); }); test("Remove a post from admin and community on same instance", async () => { @@ -403,7 +414,7 @@ test("Remove a post from admin and community on same instance", async () => { p => p?.post_view.post.removed ?? false, ); expect(alphaPost?.post_view.post.removed).toBe(true); - assertPostFederation(alphaPost.post_view, removePostRes.post_view); + await assertPostFederation(alphaPost.post_view, removePostRes.post_view); // Undelete let undeletedPost = await removePost(beta, false, betaPost.post); @@ -416,7 +427,7 @@ test("Remove a post from admin and community on same instance", async () => { p => !!p && !p.post.removed, ); expect(alphaPost2.post.removed).toBe(false); - assertPostFederation(alphaPost2, undeletedPost.post_view); + await assertPostFederation(alphaPost2, undeletedPost.post_view); await unfollowRemotes(alpha); }); diff --git a/crates/api_common/src/request.rs b/crates/api_common/src/request.rs index 7209c5871..7c8768e41 100644 --- a/crates/api_common/src/request.rs +++ b/crates/api_common/src/request.rs @@ -1,17 +1,24 @@ use crate::{ context::LemmyContext, + lemmy_db_schema::traits::Crud, post::{LinkMetadata, OpenGraphData}, - utils::proxy_image_link, + send_activity::{ActivityChannel, SendActivityData}, + utils::{local_site_opt_to_sensitive, proxy_image_link, proxy_image_link_opt_apub}, }; use activitypub_federation::config::Data; use encoding::{all::encodings, DecoderTrap}; use lemmy_db_schema::{ newtypes::DbUrl, - source::images::{LocalImage, LocalImageForm}, + source::{ + images::{LocalImage, LocalImageForm}, + local_site::LocalSite, + post::{Post, PostUpdateForm}, + }, }; use lemmy_utils::{ error::{LemmyError, LemmyErrorType}, settings::structs::{PictrsImageMode, Settings}, + spawn_try_task, version::VERSION, REQWEST_TIMEOUT, }; @@ -83,6 +90,50 @@ pub async fn fetch_link_metadata_opt( _ => Default::default(), } } +/// Generate post thumbnail in background task, because some sites can be very slow to respond. +/// +/// Takes a callback to generate a send activity task, so that post can be federated with metadata. +pub fn generate_post_link_metadata( + post: Post, + custom_thumbnail: Option, + send_activity: impl FnOnce(Post) -> Option + Send + 'static, + local_site: Option, + context: Data, +) { + spawn_try_task(async move { + let allow_sensitive = local_site_opt_to_sensitive(&local_site); + let page_is_sensitive = post.nsfw; + let allow_generate_thumbnail = allow_sensitive || !page_is_sensitive; + let mut thumbnail_url = custom_thumbnail.or_else(|| post.thumbnail_url.map(Into::into)); + let do_generate_thumbnail = thumbnail_url.is_none() && allow_generate_thumbnail; + + // Generate local thumbnail only if no thumbnail was federated and 'sensitive' attributes allow it. + let metadata = fetch_link_metadata_opt( + post.url.map(Into::into).as_ref(), + do_generate_thumbnail, + &context, + ) + .await; + if let Some(thumbnail_url_) = metadata.thumbnail { + thumbnail_url = Some(thumbnail_url_.into()); + } + let thumbnail_url = proxy_image_link_opt_apub(thumbnail_url, &context).await?; + + let form = PostUpdateForm { + embed_title: Some(metadata.opengraph_data.title), + embed_description: Some(metadata.opengraph_data.description), + embed_video_url: Some(metadata.opengraph_data.embed_video_url), + thumbnail_url: Some(thumbnail_url), + url_content_type: Some(metadata.content_type), + ..Default::default() + }; + let updated_post = Post::update(&mut context.pool(), post.id, &form).await?; + if let Some(send_activity) = send_activity(updated_post) { + ActivityChannel::submit_activity(send_activity, &context).await?; + } + Ok(()) + }); +} /// Extract site metadata from HTML Opengraph attributes. fn extract_opengraph_data(html_bytes: &[u8], url: &Url) -> Result { diff --git a/crates/api_crud/src/post/create.rs b/crates/api_crud/src/post/create.rs index fabab6b09..6a61c032b 100644 --- a/crates/api_crud/src/post/create.rs +++ b/crates/api_crud/src/post/create.rs @@ -4,8 +4,8 @@ use lemmy_api_common::{ build_response::build_post_response, context::LemmyContext, post::{CreatePost, PostResponse}, - request::fetch_link_metadata_opt, - send_activity::{ActivityChannel, SendActivityData}, + request::generate_post_link_metadata, + send_activity::SendActivityData, utils::{ check_community_user_action, generate_local_apub_endpoint, @@ -75,6 +75,7 @@ pub async fn create_post( is_url_blocked(&url, &url_blocklist)?; check_url_scheme(&url)?; check_url_scheme(&custom_thumbnail)?; + let url = proxy_image_link_opt_apub(url, &context).await?; check_community_user_action( &local_user_view.person, @@ -98,18 +99,6 @@ pub async fn create_post( } } - // Only generate the thumbnail if there's no custom thumbnail provided, - // otherwise it will save it in pictrs - let generate_thumbnail = custom_thumbnail.is_none(); - - // Fetch post links and pictrs cached image - let metadata = fetch_link_metadata_opt(url.as_ref(), generate_thumbnail, &context).await; - let url = proxy_image_link_opt_apub(url, &context).await?; - let thumbnail_url = proxy_image_link_opt_apub(custom_thumbnail, &context) - .await? - .map(Into::into) - .or(metadata.thumbnail); - // Only need to check if language is allowed in case user set it explicitly. When using default // language, it already only returns allowed languages. CommunityLanguage::is_allowed_community_language( @@ -134,18 +123,13 @@ pub async fn create_post( let post_form = PostInsertForm::builder() .name(data.name.trim().to_string()) - .url_content_type(metadata.content_type) .url(url) .body(body) .alt_text(data.alt_text.clone()) .community_id(data.community_id) .creator_id(local_user_view.person.id) .nsfw(data.nsfw) - .embed_title(metadata.opengraph_data.title) - .embed_description(metadata.opengraph_data.description) - .embed_video_url(metadata.opengraph_data.embed_video_url) .language_id(language_id) - .thumbnail_url(thumbnail_url) .build(); let inserted_post = Post::create(&mut context.pool(), &post_form) @@ -170,6 +154,14 @@ pub async fn create_post( .await .with_lemmy_type(LemmyErrorType::CouldntCreatePost)?; + generate_post_link_metadata( + updated_post.clone(), + custom_thumbnail, + |post| Some(SendActivityData::CreatePost(post)), + Some(local_site), + context.reset_request_count(), + ); + // They like their own post by default let person_id = local_user_view.person.id; let post_id = inserted_post.id; @@ -183,9 +175,6 @@ pub async fn create_post( .await .with_lemmy_type(LemmyErrorType::CouldntLikePost)?; - ActivityChannel::submit_activity(SendActivityData::CreatePost(updated_post.clone()), &context) - .await?; - // Mark the post as read mark_post_as_read(person_id, post_id, &mut context.pool()).await?; diff --git a/crates/api_crud/src/post/update.rs b/crates/api_crud/src/post/update.rs index 08c5425b9..c08f35307 100644 --- a/crates/api_crud/src/post/update.rs +++ b/crates/api_crud/src/post/update.rs @@ -4,8 +4,8 @@ use lemmy_api_common::{ build_response::build_post_response, context::LemmyContext, post::{EditPost, PostResponse}, - request::fetch_link_metadata, - send_activity::{ActivityChannel, SendActivityData}, + request::generate_post_link_metadata, + send_activity::SendActivityData, utils::{ check_community_user_action, get_url_blocklist, @@ -84,40 +84,11 @@ pub async fn update_post( Err(LemmyErrorType::NoPostEditAllowed)? } - // Fetch post links and thumbnail if url was updated - let (embed_title, embed_description, embed_video_url, metadata_thumbnail, metadata_content_type) = - match &url { - Some(url) => { - // Only generate the thumbnail if there's no custom thumbnail provided, - // otherwise it will save it in pictrs - let generate_thumbnail = custom_thumbnail.is_none() || orig_post.thumbnail_url.is_none(); - - let metadata = fetch_link_metadata(url, generate_thumbnail, &context).await?; - ( - Some(metadata.opengraph_data.title), - Some(metadata.opengraph_data.description), - Some(metadata.opengraph_data.embed_video_url), - Some(metadata.thumbnail), - Some(metadata.content_type), - ) - } - _ => Default::default(), - }; - let url = match url { Some(url) => Some(proxy_image_link_opt_apub(Some(url), &context).await?), _ => Default::default(), }; - let custom_thumbnail = match custom_thumbnail { - Some(custom_thumbnail) => { - Some(proxy_image_link_opt_apub(Some(custom_thumbnail), &context).await?) - } - _ => Default::default(), - }; - - let thumbnail_url = custom_thumbnail.or(metadata_thumbnail); - let language_id = data.language_id; CommunityLanguage::is_allowed_community_language( &mut context.pool(), @@ -129,15 +100,10 @@ pub async fn update_post( let post_form = PostUpdateForm { name: data.name.clone(), url, - url_content_type: metadata_content_type, body: diesel_option_overwrite(body), alt_text: diesel_option_overwrite(data.alt_text.clone()), nsfw: data.nsfw, - embed_title, - embed_description, - embed_video_url, language_id: data.language_id, - thumbnail_url, updated: Some(Some(naive_now())), ..Default::default() }; @@ -147,7 +113,13 @@ pub async fn update_post( .await .with_lemmy_type(LemmyErrorType::CouldntUpdatePost)?; - ActivityChannel::submit_activity(SendActivityData::UpdatePost(updated_post), &context).await?; + generate_post_link_metadata( + updated_post.clone(), + custom_thumbnail, + |post| Some(SendActivityData::UpdatePost(post)), + Some(local_site), + context.reset_request_count(), + ); build_post_response( context.deref(), diff --git a/crates/apub/src/objects/post.rs b/crates/apub/src/objects/post.rs index 15184b622..0ddc6d17b 100644 --- a/crates/apub/src/objects/post.rs +++ b/crates/apub/src/objects/post.rs @@ -24,10 +24,9 @@ use chrono::{DateTime, Utc}; use html2text::{from_read_with_decorator, render::text_renderer::TrivialDecorator}; use lemmy_api_common::{ context::LemmyContext, - request::fetch_link_metadata_opt, + request::generate_post_link_metadata, utils::{ get_url_blocklist, - local_site_opt_to_sensitive, local_site_opt_to_slur_regex, process_markdown_opt, proxy_image_link_opt_apub, @@ -218,6 +217,7 @@ impl Object for ApubPost { let old_post = page.id.dereference_local(context).await; let first_attachment = page.attachment.first(); + let local_site = LocalSite::read(&mut context.pool()).await.ok(); let form = if !page.is_mod_action(context).await? { let url = if let Some(attachment) = first_attachment.cloned() { @@ -231,20 +231,8 @@ impl Object for ApubPost { check_url_scheme(&url)?; let alt_text = first_attachment.cloned().and_then(Attachment::alt_text); - let local_site = LocalSite::read(&mut context.pool()).await.ok(); - let allow_sensitive = local_site_opt_to_sensitive(&local_site); - let page_is_sensitive = page.sensitive.unwrap_or(false); - let allow_generate_thumbnail = allow_sensitive || !page_is_sensitive; - let mut thumbnail_url = page.image.map(|i| i.url); - let do_generate_thumbnail = thumbnail_url.is_none() && allow_generate_thumbnail; - // Generate local thumbnail only if no thumbnail was federated and 'sensitive' attributes allow it. - let metadata = fetch_link_metadata_opt(url.as_ref(), do_generate_thumbnail, context).await; - if let Some(thumbnail_url_) = metadata.thumbnail { - thumbnail_url = Some(thumbnail_url_.into()); - } let url = proxy_image_link_opt_apub(url, context).await?; - let thumbnail_url = proxy_image_link_opt_apub(thumbnail_url, context).await?; let slur_regex = &local_site_opt_to_slur_regex(&local_site); let url_blocklist = get_url_blocklist(context).await?; @@ -254,30 +242,22 @@ impl Object for ApubPost { let language_id = LanguageTag::to_language_id_single(page.language, &mut context.pool()).await?; - PostInsertForm { - name, - url: url.map(Into::into), - body, - alt_text, - creator_id: creator.id, - community_id: community.id, - removed: None, - locked: page.comments_enabled.map(|e| !e), - published: page.published.map(Into::into), - updated: page.updated.map(Into::into), - deleted: Some(false), - nsfw: page.sensitive, - embed_title: metadata.opengraph_data.title, - embed_description: metadata.opengraph_data.description, - embed_video_url: metadata.opengraph_data.embed_video_url, - thumbnail_url, - ap_id: Some(page.id.clone().into()), - local: Some(false), - language_id, - featured_community: None, - featured_local: None, - url_content_type: metadata.content_type, - } + PostInsertForm::builder() + .name(name) + .url(url.map(Into::into)) + .body(body) + .alt_text(alt_text) + .creator_id(creator.id) + .community_id(community.id) + .locked(page.comments_enabled.map(|e| !e)) + .published(page.published.map(Into::into)) + .updated(page.updated.map(Into::into)) + .deleted(Some(false)) + .nsfw(page.sensitive) + .ap_id(Some(page.id.clone().into())) + .local(Some(false)) + .language_id(language_id) + .build() } else { // if is mod action, only update locked/stickied fields, nothing else PostInsertForm::builder() @@ -292,6 +272,14 @@ impl Object for ApubPost { let post = Post::create(&mut context.pool(), &form).await?; + generate_post_link_metadata( + post.clone(), + page.image.map(|i| i.url), + |_| None, + local_site, + context.reset_request_count(), + ); + // write mod log entry for lock if Page::is_locked_changed(&old_post, &page.comments_enabled) { let form = ModLockPostForm {