2023-08-24 15:27:00 +00:00
use chrono ::{ DateTime , TimeZone , Utc } ;
2023-09-11 09:12:16 +00:00
use clokwerk ::{ AsyncScheduler , TimeUnits as CTimeUnits } ;
2023-04-25 23:28:06 +00:00
use diesel ::{
2023-08-24 15:27:00 +00:00
dsl ::IntervalDsl ,
2023-09-11 09:12:16 +00:00
sql_query ,
2023-08-24 15:27:00 +00:00
sql_types ::{ Integer , Timestamptz } ,
2023-04-25 23:28:06 +00:00
ExpressionMethods ,
2023-06-20 06:17:54 +00:00
NullableExpressionMethods ,
2023-04-25 23:28:06 +00:00
QueryDsl ,
2023-06-27 08:13:51 +00:00
QueryableByName ,
2023-04-25 23:28:06 +00:00
} ;
2023-09-11 09:12:16 +00:00
use diesel_async ::{ AsyncPgConnection , RunQueryDsl } ;
2023-06-21 08:28:20 +00:00
use lemmy_api_common ::context ::LemmyContext ;
2023-02-18 14:36:12 +00:00
use lemmy_db_schema ::{
2023-07-14 15:17:06 +00:00
schema ::{
captcha_answer ,
comment ,
community_person_ban ,
instance ,
person ,
post ,
received_activity ,
sent_activity ,
} ,
2024-02-15 12:50:53 +00:00
source ::{
instance ::{ Instance , InstanceForm } ,
local_user ::LocalUser ,
} ,
2023-09-11 09:12:16 +00:00
utils ::{ get_conn , naive_now , now , DbPool , DELETED_REPLACEMENT_TEXT } ,
2023-02-18 14:36:12 +00:00
} ;
2024-06-03 21:30:00 +00:00
use lemmy_routes ::nodeinfo ::{ NodeInfo , NodeInfoWellKnown } ;
2024-04-10 14:14:11 +00:00
use lemmy_utils ::error ::LemmyResult ;
2023-09-11 09:12:16 +00:00
use reqwest_middleware ::ClientWithMiddleware ;
use std ::time ::Duration ;
2023-07-13 14:12:01 +00:00
use tracing ::{ error , info , warn } ;
2021-01-29 16:38:27 +00:00
/// Schedules various cleanup tasks for lemmy in a background thread
2024-04-10 14:14:11 +00:00
pub async fn setup ( context : LemmyContext ) -> LemmyResult < ( ) > {
2022-11-09 10:05:00 +00:00
// Setup the connections
2023-09-11 09:12:16 +00:00
let mut scheduler = AsyncScheduler ::new ( ) ;
startup_jobs ( & mut context . pool ( ) ) . await ;
2021-08-26 11:49:16 +00:00
2023-09-11 09:12:16 +00:00
let context_1 = context . clone ( ) ;
2023-06-08 20:15:15 +00:00
// Update active counts every hour
2023-04-25 23:28:06 +00:00
scheduler . every ( CTimeUnits ::hour ( 1 ) ) . run ( move | | {
2023-09-11 09:12:16 +00:00
let context = context_1 . clone ( ) ;
async move {
active_counts ( & mut context . pool ( ) ) . await ;
update_banned_when_expired ( & mut context . pool ( ) ) . await ;
}
2023-06-08 20:15:15 +00:00
} ) ;
2023-09-11 09:12:16 +00:00
let context_1 = context . clone ( ) ;
2023-06-27 08:13:51 +00:00
// Update hot ranks every 15 minutes
2023-09-11 09:12:16 +00:00
scheduler . every ( CTimeUnits ::minutes ( 10 ) ) . run ( move | | {
let context = context_1 . clone ( ) ;
async move {
update_hot_ranks ( & mut context . pool ( ) ) . await ;
}
2021-01-29 16:38:27 +00:00
} ) ;
2023-09-11 09:12:16 +00:00
let context_1 = context . clone ( ) ;
2023-06-27 10:38:53 +00:00
// Delete any captcha answers older than ten minutes, every ten minutes
scheduler . every ( CTimeUnits ::minutes ( 10 ) ) . run ( move | | {
2023-09-11 09:12:16 +00:00
let context = context_1 . clone ( ) ;
async move {
delete_expired_captcha_answers ( & mut context . pool ( ) ) . await ;
}
2023-06-27 10:38:53 +00:00
} ) ;
2023-09-11 09:12:16 +00:00
let context_1 = context . clone ( ) ;
2023-06-08 20:15:15 +00:00
// Clear old activities every week
2023-04-25 23:28:06 +00:00
scheduler . every ( CTimeUnits ::weeks ( 1 ) ) . run ( move | | {
2023-09-11 09:12:16 +00:00
let context = context_1 . clone ( ) ;
async move {
clear_old_activities ( & mut context . pool ( ) ) . await ;
}
2021-01-29 16:38:27 +00:00
} ) ;
2023-09-11 09:12:16 +00:00
let context_1 = context . clone ( ) ;
2024-02-15 12:50:53 +00:00
// Daily tasks:
// - Overwrite deleted & removed posts and comments every day
// - Delete old denied users
// - Update instance software
2023-06-20 06:17:54 +00:00
scheduler . every ( CTimeUnits ::days ( 1 ) ) . run ( move | | {
2023-09-11 09:12:16 +00:00
let context = context_1 . clone ( ) ;
async move {
overwrite_deleted_posts_and_comments ( & mut context . pool ( ) ) . await ;
2024-02-15 12:50:53 +00:00
delete_old_denied_users ( & mut context . pool ( ) ) . await ;
2023-09-11 09:12:16 +00:00
update_instance_software ( & mut context . pool ( ) , context . client ( ) )
. await
2024-04-04 14:14:59 +00:00
. map_err ( | e | warn! ( " Failed to update instance software: {e} " ) )
2023-09-11 09:12:16 +00:00
. ok ( ) ;
}
2023-02-18 14:36:12 +00:00
} ) ;
2021-01-29 16:38:27 +00:00
// Manually run the scheduler in an event loop
loop {
2023-09-11 09:12:16 +00:00
scheduler . run_pending ( ) . await ;
tokio ::time ::sleep ( Duration ::from_millis ( 1000 ) ) . await ;
2021-01-29 16:38:27 +00:00
}
}
2023-06-20 09:33:03 +00:00
/// Run these on server startup
2023-09-11 09:12:16 +00:00
async fn startup_jobs ( pool : & mut DbPool < '_ > ) {
active_counts ( pool ) . await ;
update_hot_ranks ( pool ) . await ;
update_banned_when_expired ( pool ) . await ;
clear_old_activities ( pool ) . await ;
overwrite_deleted_posts_and_comments ( pool ) . await ;
2024-02-15 12:50:53 +00:00
delete_old_denied_users ( pool ) . await ;
2023-06-20 09:33:03 +00:00
}
2023-06-08 20:15:15 +00:00
/// Update the hot_rank columns for the aggregates tables
2023-06-27 08:13:51 +00:00
/// Runs in batches until all necessary rows are updated once
2023-09-11 09:12:16 +00:00
async fn update_hot_ranks ( pool : & mut DbPool < '_ > ) {
2023-07-17 09:05:55 +00:00
info! ( " Updating hot ranks for all history... " ) ;
2023-06-27 08:13:51 +00:00
2023-09-11 09:12:16 +00:00
let conn = get_conn ( pool ) . await ;
match conn {
Ok ( mut conn ) = > {
process_post_aggregates_ranks_in_batches ( & mut conn ) . await ;
process_ranks_in_batches (
& mut conn ,
Remove id column and use different primary key on some tables (#4093)
* post_saved
* fmt
* remove unique and not null
* put person_id first in primary key and remove index
* use post_saved.find
* change captcha_answer
* remove removal of not null
* comment_aggregates
* comment_like
* comment_saved
* aggregates
* remove "\"
* deduplicate site_aggregates
* person_post_aggregates
* community_moderator
* community_block
* community_person_ban
* custom_emoji_keyword
* federation allow/block list
* federation_queue_state
* instance_block
* local_site_rate_limit, local_user_language, login_token
* person_ban, person_block, person_follower, post_like, post_read, received_activity
* community_follower, community_language, site_language
* fmt
* image_upload
* remove unused newtypes
* remove more indexes
* use .find
* merge
* fix site_aggregates_site function
* fmt
* Primary keys dess (#17)
* Also order reports by oldest first (ref #4123) (#4129)
* Support signed fetch for federation (fixes #868) (#4125)
* Support signed fetch for federation (fixes #868)
* taplo
* add federation queue state to get_federated_instances api (#4104)
* add federation queue state to get_federated_instances api
* feature gate
* move retry sleep function
* move stuff around
* Add UI setting for collapsing bot comments. Fixes #3838 (#4098)
* Add UI setting for collapsing bot comments. Fixes #3838
* Fixing clippy check.
* Only keep sent and received activities for 7 days (fixes #4113, fixes #4110) (#4131)
* Only check auth secure on release mode. (#4127)
* Only check auth secure on release mode.
* Fixing wrong js-client.
* Adding is_debug_mode var.
* Fixing the desktop image on the README. (#4135)
* Delete dupes and add possibly missing unique constraint on person_aggregates.
* Fixing clippy lints.
---------
Co-authored-by: Nutomic <me@nutomic.com>
Co-authored-by: phiresky <phireskyde+git@gmail.com>
* fmt
* Update community_block.rs
* Update instance_block.rs
* Update person_block.rs
* Update person_block.rs
---------
Co-authored-by: Dessalines <dessalines@users.noreply.github.com>
Co-authored-by: Nutomic <me@nutomic.com>
Co-authored-by: phiresky <phireskyde+git@gmail.com>
2023-11-13 13:14:07 +00:00
" comment " ,
2023-09-11 09:12:16 +00:00
" a.hot_rank != 0 " ,
2024-04-18 00:58:44 +00:00
" SET hot_rank = r.hot_rank(a.score, a.published) " ,
2023-09-11 09:12:16 +00:00
)
. await ;
process_ranks_in_batches (
& mut conn ,
Remove id column and use different primary key on some tables (#4093)
* post_saved
* fmt
* remove unique and not null
* put person_id first in primary key and remove index
* use post_saved.find
* change captcha_answer
* remove removal of not null
* comment_aggregates
* comment_like
* comment_saved
* aggregates
* remove "\"
* deduplicate site_aggregates
* person_post_aggregates
* community_moderator
* community_block
* community_person_ban
* custom_emoji_keyword
* federation allow/block list
* federation_queue_state
* instance_block
* local_site_rate_limit, local_user_language, login_token
* person_ban, person_block, person_follower, post_like, post_read, received_activity
* community_follower, community_language, site_language
* fmt
* image_upload
* remove unused newtypes
* remove more indexes
* use .find
* merge
* fix site_aggregates_site function
* fmt
* Primary keys dess (#17)
* Also order reports by oldest first (ref #4123) (#4129)
* Support signed fetch for federation (fixes #868) (#4125)
* Support signed fetch for federation (fixes #868)
* taplo
* add federation queue state to get_federated_instances api (#4104)
* add federation queue state to get_federated_instances api
* feature gate
* move retry sleep function
* move stuff around
* Add UI setting for collapsing bot comments. Fixes #3838 (#4098)
* Add UI setting for collapsing bot comments. Fixes #3838
* Fixing clippy check.
* Only keep sent and received activities for 7 days (fixes #4113, fixes #4110) (#4131)
* Only check auth secure on release mode. (#4127)
* Only check auth secure on release mode.
* Fixing wrong js-client.
* Adding is_debug_mode var.
* Fixing the desktop image on the README. (#4135)
* Delete dupes and add possibly missing unique constraint on person_aggregates.
* Fixing clippy lints.
---------
Co-authored-by: Nutomic <me@nutomic.com>
Co-authored-by: phiresky <phireskyde+git@gmail.com>
* fmt
* Update community_block.rs
* Update instance_block.rs
* Update person_block.rs
* Update person_block.rs
---------
Co-authored-by: Dessalines <dessalines@users.noreply.github.com>
Co-authored-by: Nutomic <me@nutomic.com>
Co-authored-by: phiresky <phireskyde+git@gmail.com>
2023-11-13 13:14:07 +00:00
" community " ,
2023-09-11 09:12:16 +00:00
" a.hot_rank != 0 " ,
2024-04-18 00:58:44 +00:00
" SET hot_rank = r.hot_rank(a.subscribers, a.published) " ,
2023-09-11 09:12:16 +00:00
)
. await ;
info! ( " Finished hot ranks update! " ) ;
}
Err ( e ) = > {
error! ( " Failed to get connection from pool: {e} " ) ;
}
}
2023-06-27 08:13:51 +00:00
}
2023-06-08 20:15:15 +00:00
2023-06-27 08:13:51 +00:00
#[ derive(QueryableByName) ]
struct HotRanksUpdateResult {
2023-08-24 15:27:00 +00:00
#[ diesel(sql_type = Timestamptz) ]
published : DateTime < Utc > ,
2023-06-27 08:13:51 +00:00
}
2023-06-08 20:15:15 +00:00
2023-07-17 09:05:55 +00:00
/// Runs the hot rank update query in batches until all rows have been processed.
/// In `where_clause` and `set_clause`, "a" will refer to the current aggregates table.
2023-06-27 08:13:51 +00:00
/// Locked rows are skipped in order to prevent deadlocks (they will likely get updated on the next
/// run)
2023-09-11 09:12:16 +00:00
async fn process_ranks_in_batches (
conn : & mut AsyncPgConnection ,
2023-06-27 08:13:51 +00:00
table_name : & str ,
2023-07-17 09:05:55 +00:00
where_clause : & str ,
2023-06-27 08:13:51 +00:00
set_clause : & str ,
) {
2023-08-24 15:27:00 +00:00
let process_start_time : DateTime < Utc > = Utc
. timestamp_opt ( 0 , 0 )
. single ( )
. expect ( " 0 timestamp creation " ) ;
2023-07-17 09:05:55 +00:00
2023-06-27 08:13:51 +00:00
let update_batch_size = 1000 ; // Bigger batches than this tend to cause seq scans
2023-07-17 09:05:55 +00:00
let mut processed_rows_count = 0 ;
2023-06-27 08:13:51 +00:00
let mut previous_batch_result = Some ( process_start_time ) ;
while let Some ( previous_batch_last_published ) = previous_batch_result {
// Raw `sql_query` is used as a performance optimization - Diesel does not support doing this
// in a single query (neither as a CTE, nor using a subquery)
let result = sql_query ( format! (
Remove id column and use different primary key on some tables (#4093)
* post_saved
* fmt
* remove unique and not null
* put person_id first in primary key and remove index
* use post_saved.find
* change captcha_answer
* remove removal of not null
* comment_aggregates
* comment_like
* comment_saved
* aggregates
* remove "\"
* deduplicate site_aggregates
* person_post_aggregates
* community_moderator
* community_block
* community_person_ban
* custom_emoji_keyword
* federation allow/block list
* federation_queue_state
* instance_block
* local_site_rate_limit, local_user_language, login_token
* person_ban, person_block, person_follower, post_like, post_read, received_activity
* community_follower, community_language, site_language
* fmt
* image_upload
* remove unused newtypes
* remove more indexes
* use .find
* merge
* fix site_aggregates_site function
* fmt
* Primary keys dess (#17)
* Also order reports by oldest first (ref #4123) (#4129)
* Support signed fetch for federation (fixes #868) (#4125)
* Support signed fetch for federation (fixes #868)
* taplo
* add federation queue state to get_federated_instances api (#4104)
* add federation queue state to get_federated_instances api
* feature gate
* move retry sleep function
* move stuff around
* Add UI setting for collapsing bot comments. Fixes #3838 (#4098)
* Add UI setting for collapsing bot comments. Fixes #3838
* Fixing clippy check.
* Only keep sent and received activities for 7 days (fixes #4113, fixes #4110) (#4131)
* Only check auth secure on release mode. (#4127)
* Only check auth secure on release mode.
* Fixing wrong js-client.
* Adding is_debug_mode var.
* Fixing the desktop image on the README. (#4135)
* Delete dupes and add possibly missing unique constraint on person_aggregates.
* Fixing clippy lints.
---------
Co-authored-by: Nutomic <me@nutomic.com>
Co-authored-by: phiresky <phireskyde+git@gmail.com>
* fmt
* Update community_block.rs
* Update instance_block.rs
* Update person_block.rs
* Update person_block.rs
---------
Co-authored-by: Dessalines <dessalines@users.noreply.github.com>
Co-authored-by: Nutomic <me@nutomic.com>
Co-authored-by: phiresky <phireskyde+git@gmail.com>
2023-11-13 13:14:07 +00:00
r #" WITH batch AS (SELECT a.{id_column}
2023-06-27 08:13:51 +00:00
FROM { aggregates_table } a
2023-07-17 09:05:55 +00:00
WHERE a . published > $ 1 AND ( { where_clause } )
2023-06-27 08:13:51 +00:00
ORDER BY a . published
LIMIT $ 2
FOR UPDATE SKIP LOCKED )
UPDATE { aggregates_table } a { set_clause }
Remove id column and use different primary key on some tables (#4093)
* post_saved
* fmt
* remove unique and not null
* put person_id first in primary key and remove index
* use post_saved.find
* change captcha_answer
* remove removal of not null
* comment_aggregates
* comment_like
* comment_saved
* aggregates
* remove "\"
* deduplicate site_aggregates
* person_post_aggregates
* community_moderator
* community_block
* community_person_ban
* custom_emoji_keyword
* federation allow/block list
* federation_queue_state
* instance_block
* local_site_rate_limit, local_user_language, login_token
* person_ban, person_block, person_follower, post_like, post_read, received_activity
* community_follower, community_language, site_language
* fmt
* image_upload
* remove unused newtypes
* remove more indexes
* use .find
* merge
* fix site_aggregates_site function
* fmt
* Primary keys dess (#17)
* Also order reports by oldest first (ref #4123) (#4129)
* Support signed fetch for federation (fixes #868) (#4125)
* Support signed fetch for federation (fixes #868)
* taplo
* add federation queue state to get_federated_instances api (#4104)
* add federation queue state to get_federated_instances api
* feature gate
* move retry sleep function
* move stuff around
* Add UI setting for collapsing bot comments. Fixes #3838 (#4098)
* Add UI setting for collapsing bot comments. Fixes #3838
* Fixing clippy check.
* Only keep sent and received activities for 7 days (fixes #4113, fixes #4110) (#4131)
* Only check auth secure on release mode. (#4127)
* Only check auth secure on release mode.
* Fixing wrong js-client.
* Adding is_debug_mode var.
* Fixing the desktop image on the README. (#4135)
* Delete dupes and add possibly missing unique constraint on person_aggregates.
* Fixing clippy lints.
---------
Co-authored-by: Nutomic <me@nutomic.com>
Co-authored-by: phiresky <phireskyde+git@gmail.com>
* fmt
* Update community_block.rs
* Update instance_block.rs
* Update person_block.rs
* Update person_block.rs
---------
Co-authored-by: Dessalines <dessalines@users.noreply.github.com>
Co-authored-by: Nutomic <me@nutomic.com>
Co-authored-by: phiresky <phireskyde+git@gmail.com>
2023-11-13 13:14:07 +00:00
FROM batch WHERE a . { id_column } = batch . { id_column } RETURNING a . published ;
2023-06-27 08:13:51 +00:00
" #,
Remove id column and use different primary key on some tables (#4093)
* post_saved
* fmt
* remove unique and not null
* put person_id first in primary key and remove index
* use post_saved.find
* change captcha_answer
* remove removal of not null
* comment_aggregates
* comment_like
* comment_saved
* aggregates
* remove "\"
* deduplicate site_aggregates
* person_post_aggregates
* community_moderator
* community_block
* community_person_ban
* custom_emoji_keyword
* federation allow/block list
* federation_queue_state
* instance_block
* local_site_rate_limit, local_user_language, login_token
* person_ban, person_block, person_follower, post_like, post_read, received_activity
* community_follower, community_language, site_language
* fmt
* image_upload
* remove unused newtypes
* remove more indexes
* use .find
* merge
* fix site_aggregates_site function
* fmt
* Primary keys dess (#17)
* Also order reports by oldest first (ref #4123) (#4129)
* Support signed fetch for federation (fixes #868) (#4125)
* Support signed fetch for federation (fixes #868)
* taplo
* add federation queue state to get_federated_instances api (#4104)
* add federation queue state to get_federated_instances api
* feature gate
* move retry sleep function
* move stuff around
* Add UI setting for collapsing bot comments. Fixes #3838 (#4098)
* Add UI setting for collapsing bot comments. Fixes #3838
* Fixing clippy check.
* Only keep sent and received activities for 7 days (fixes #4113, fixes #4110) (#4131)
* Only check auth secure on release mode. (#4127)
* Only check auth secure on release mode.
* Fixing wrong js-client.
* Adding is_debug_mode var.
* Fixing the desktop image on the README. (#4135)
* Delete dupes and add possibly missing unique constraint on person_aggregates.
* Fixing clippy lints.
---------
Co-authored-by: Nutomic <me@nutomic.com>
Co-authored-by: phiresky <phireskyde+git@gmail.com>
* fmt
* Update community_block.rs
* Update instance_block.rs
* Update person_block.rs
* Update person_block.rs
---------
Co-authored-by: Dessalines <dessalines@users.noreply.github.com>
Co-authored-by: Nutomic <me@nutomic.com>
Co-authored-by: phiresky <phireskyde+git@gmail.com>
2023-11-13 13:14:07 +00:00
id_column = format! ( " {table_name} _id " ) ,
aggregates_table = format! ( " {table_name} _aggregates " ) ,
2023-07-17 09:05:55 +00:00
set_clause = set_clause ,
where_clause = where_clause
2023-06-27 08:13:51 +00:00
) )
2023-08-24 15:27:00 +00:00
. bind ::< Timestamptz , _ > ( previous_batch_last_published )
2023-06-27 08:13:51 +00:00
. bind ::< Integer , _ > ( update_batch_size )
2023-09-11 09:12:16 +00:00
. get_results ::< HotRanksUpdateResult > ( conn )
. await ;
2023-06-27 08:13:51 +00:00
match result {
2023-07-17 09:05:55 +00:00
Ok ( updated_rows ) = > {
processed_rows_count + = updated_rows . len ( ) ;
previous_batch_result = updated_rows . last ( ) . map ( | row | row . published ) ;
}
2023-06-27 08:13:51 +00:00
Err ( e ) = > {
error! ( " Failed to update {} hot_ranks: {} " , table_name , e ) ;
break ;
}
2023-06-15 09:29:12 +00:00
}
}
2023-06-27 08:13:51 +00:00
info! (
2023-07-17 09:05:55 +00:00
" Finished process_hot_ranks_in_batches execution for {} (processed {} rows) " ,
table_name , processed_rows_count
2023-06-27 08:13:51 +00:00
) ;
2021-01-29 16:38:27 +00:00
}
2023-09-06 17:43:27 +00:00
/// Post aggregates is a special case, since it needs to join to the community_aggregates
/// table, to get the active monthly user counts.
2023-09-11 09:12:16 +00:00
async fn process_post_aggregates_ranks_in_batches ( conn : & mut AsyncPgConnection ) {
2023-09-06 17:43:27 +00:00
let process_start_time : DateTime < Utc > = Utc
. timestamp_opt ( 0 , 0 )
. single ( )
. expect ( " 0 timestamp creation " ) ;
let update_batch_size = 1000 ; // Bigger batches than this tend to cause seq scans
let mut processed_rows_count = 0 ;
let mut previous_batch_result = Some ( process_start_time ) ;
while let Some ( previous_batch_last_published ) = previous_batch_result {
let result = sql_query (
Remove id column and use different primary key on some tables (#4093)
* post_saved
* fmt
* remove unique and not null
* put person_id first in primary key and remove index
* use post_saved.find
* change captcha_answer
* remove removal of not null
* comment_aggregates
* comment_like
* comment_saved
* aggregates
* remove "\"
* deduplicate site_aggregates
* person_post_aggregates
* community_moderator
* community_block
* community_person_ban
* custom_emoji_keyword
* federation allow/block list
* federation_queue_state
* instance_block
* local_site_rate_limit, local_user_language, login_token
* person_ban, person_block, person_follower, post_like, post_read, received_activity
* community_follower, community_language, site_language
* fmt
* image_upload
* remove unused newtypes
* remove more indexes
* use .find
* merge
* fix site_aggregates_site function
* fmt
* Primary keys dess (#17)
* Also order reports by oldest first (ref #4123) (#4129)
* Support signed fetch for federation (fixes #868) (#4125)
* Support signed fetch for federation (fixes #868)
* taplo
* add federation queue state to get_federated_instances api (#4104)
* add federation queue state to get_federated_instances api
* feature gate
* move retry sleep function
* move stuff around
* Add UI setting for collapsing bot comments. Fixes #3838 (#4098)
* Add UI setting for collapsing bot comments. Fixes #3838
* Fixing clippy check.
* Only keep sent and received activities for 7 days (fixes #4113, fixes #4110) (#4131)
* Only check auth secure on release mode. (#4127)
* Only check auth secure on release mode.
* Fixing wrong js-client.
* Adding is_debug_mode var.
* Fixing the desktop image on the README. (#4135)
* Delete dupes and add possibly missing unique constraint on person_aggregates.
* Fixing clippy lints.
---------
Co-authored-by: Nutomic <me@nutomic.com>
Co-authored-by: phiresky <phireskyde+git@gmail.com>
* fmt
* Update community_block.rs
* Update instance_block.rs
* Update person_block.rs
* Update person_block.rs
---------
Co-authored-by: Dessalines <dessalines@users.noreply.github.com>
Co-authored-by: Nutomic <me@nutomic.com>
Co-authored-by: phiresky <phireskyde+git@gmail.com>
2023-11-13 13:14:07 +00:00
r #" WITH batch AS (SELECT pa.post_id
2023-09-06 17:43:27 +00:00
FROM post_aggregates pa
WHERE pa . published > $ 1
AND ( pa . hot_rank ! = 0 OR pa . hot_rank_active ! = 0 )
ORDER BY pa . published
LIMIT $ 2
FOR UPDATE SKIP LOCKED )
UPDATE post_aggregates pa
2024-04-18 00:58:44 +00:00
SET hot_rank = r . hot_rank ( pa . score , pa . published ) ,
hot_rank_active = r . hot_rank ( pa . score , pa . newest_comment_time_necro ) ,
scaled_rank = r . scaled_rank ( pa . score , pa . published , ca . users_active_month )
2023-09-06 17:43:27 +00:00
FROM batch , community_aggregates ca
Remove id column and use different primary key on some tables (#4093)
* post_saved
* fmt
* remove unique and not null
* put person_id first in primary key and remove index
* use post_saved.find
* change captcha_answer
* remove removal of not null
* comment_aggregates
* comment_like
* comment_saved
* aggregates
* remove "\"
* deduplicate site_aggregates
* person_post_aggregates
* community_moderator
* community_block
* community_person_ban
* custom_emoji_keyword
* federation allow/block list
* federation_queue_state
* instance_block
* local_site_rate_limit, local_user_language, login_token
* person_ban, person_block, person_follower, post_like, post_read, received_activity
* community_follower, community_language, site_language
* fmt
* image_upload
* remove unused newtypes
* remove more indexes
* use .find
* merge
* fix site_aggregates_site function
* fmt
* Primary keys dess (#17)
* Also order reports by oldest first (ref #4123) (#4129)
* Support signed fetch for federation (fixes #868) (#4125)
* Support signed fetch for federation (fixes #868)
* taplo
* add federation queue state to get_federated_instances api (#4104)
* add federation queue state to get_federated_instances api
* feature gate
* move retry sleep function
* move stuff around
* Add UI setting for collapsing bot comments. Fixes #3838 (#4098)
* Add UI setting for collapsing bot comments. Fixes #3838
* Fixing clippy check.
* Only keep sent and received activities for 7 days (fixes #4113, fixes #4110) (#4131)
* Only check auth secure on release mode. (#4127)
* Only check auth secure on release mode.
* Fixing wrong js-client.
* Adding is_debug_mode var.
* Fixing the desktop image on the README. (#4135)
* Delete dupes and add possibly missing unique constraint on person_aggregates.
* Fixing clippy lints.
---------
Co-authored-by: Nutomic <me@nutomic.com>
Co-authored-by: phiresky <phireskyde+git@gmail.com>
* fmt
* Update community_block.rs
* Update instance_block.rs
* Update person_block.rs
* Update person_block.rs
---------
Co-authored-by: Dessalines <dessalines@users.noreply.github.com>
Co-authored-by: Nutomic <me@nutomic.com>
Co-authored-by: phiresky <phireskyde+git@gmail.com>
2023-11-13 13:14:07 +00:00
WHERE pa . post_id = batch . post_id and pa . community_id = ca . community_id RETURNING pa . published ;
2023-09-06 17:43:27 +00:00
" #,
)
. bind ::< Timestamptz , _ > ( previous_batch_last_published )
. bind ::< Integer , _ > ( update_batch_size )
2023-09-11 09:12:16 +00:00
. get_results ::< HotRanksUpdateResult > ( conn )
. await ;
2023-09-06 17:43:27 +00:00
match result {
Ok ( updated_rows ) = > {
processed_rows_count + = updated_rows . len ( ) ;
previous_batch_result = updated_rows . last ( ) . map ( | row | row . published ) ;
}
Err ( e ) = > {
error! ( " Failed to update {} hot_ranks: {} " , " post_aggregates " , e ) ;
break ;
}
}
}
info! (
" Finished process_hot_ranks_in_batches execution for {} (processed {} rows) " ,
" post_aggregates " , processed_rows_count
) ;
}
2023-09-11 09:12:16 +00:00
async fn delete_expired_captcha_answers ( pool : & mut DbPool < '_ > ) {
let conn = get_conn ( pool ) . await ;
match conn {
Ok ( mut conn ) = > {
diesel ::delete (
captcha_answer ::table
. filter ( captcha_answer ::published . lt ( now ( ) - IntervalDsl ::minutes ( 10 ) ) ) ,
)
. execute ( & mut conn )
. await
2024-04-04 14:14:59 +00:00
. map ( | _ | {
2023-09-11 09:12:16 +00:00
info! ( " Done. " ) ;
} )
2024-04-04 14:14:59 +00:00
. map_err ( | e | error! ( " Failed to clear old captcha answers: {e} " ) )
2023-09-11 09:12:16 +00:00
. ok ( ) ;
}
Err ( e ) = > {
error! ( " Failed to get connection from pool: {e} " ) ;
}
}
2023-06-27 10:38:53 +00:00
}
2021-01-29 16:38:27 +00:00
/// Clear old activities (this table gets very large)
2023-09-11 09:12:16 +00:00
async fn clear_old_activities ( pool : & mut DbPool < '_ > ) {
2021-01-29 16:38:27 +00:00
info! ( " Clearing old activities... " ) ;
2023-09-11 09:12:16 +00:00
let conn = get_conn ( pool ) . await ;
match conn {
Ok ( mut conn ) = > {
2023-11-06 21:17:53 +00:00
diesel ::delete (
sent_activity ::table . filter ( sent_activity ::published . lt ( now ( ) - IntervalDsl ::days ( 7 ) ) ) ,
)
. execute ( & mut conn )
. await
2024-04-04 14:14:59 +00:00
. map_err ( | e | error! ( " Failed to clear old sent activities: {e} " ) )
2023-11-06 21:17:53 +00:00
. ok ( ) ;
2023-09-11 09:12:16 +00:00
diesel ::delete (
2023-11-06 21:17:53 +00:00
received_activity ::table
. filter ( received_activity ::published . lt ( now ( ) - IntervalDsl ::days ( 7 ) ) ) ,
2023-09-11 09:12:16 +00:00
)
. execute ( & mut conn )
. await
2024-04-04 14:14:59 +00:00
. map ( | _ | info! ( " Done. " ) )
. map_err ( | e | error! ( " Failed to clear old received activities: {e} " ) )
2023-09-11 09:12:16 +00:00
. ok ( ) ;
}
Err ( e ) = > {
error! ( " Failed to get connection from pool: {e} " ) ;
}
}
2021-01-29 16:38:27 +00:00
}
2024-02-15 12:50:53 +00:00
async fn delete_old_denied_users ( pool : & mut DbPool < '_ > ) {
LocalUser ::delete_old_denied_local_users ( pool )
. await
2024-04-04 14:14:59 +00:00
. map ( | _ | {
2024-02-15 12:50:53 +00:00
info! ( " Done. " ) ;
} )
2024-04-04 14:14:59 +00:00
. map_err ( | e | error! ( " Failed to deleted old denied users: {e} " ) )
2024-02-15 12:50:53 +00:00
. ok ( ) ;
}
2023-06-20 06:17:54 +00:00
/// overwrite posts and comments 30d after deletion
2023-09-11 09:12:16 +00:00
async fn overwrite_deleted_posts_and_comments ( pool : & mut DbPool < '_ > ) {
2023-06-20 06:17:54 +00:00
info! ( " Overwriting deleted posts... " ) ;
2023-09-11 09:12:16 +00:00
let conn = get_conn ( pool ) . await ;
match conn {
Ok ( mut conn ) = > {
diesel ::update (
post ::table
. filter ( post ::deleted . eq ( true ) )
. filter ( post ::updated . lt ( now ( ) . nullable ( ) - 1. months ( ) ) )
. filter ( post ::body . ne ( DELETED_REPLACEMENT_TEXT ) ) ,
)
. set ( (
post ::body . eq ( DELETED_REPLACEMENT_TEXT ) ,
post ::name . eq ( DELETED_REPLACEMENT_TEXT ) ,
) )
. execute ( & mut conn )
. await
2024-04-04 14:14:59 +00:00
. map ( | _ | {
2023-09-11 09:12:16 +00:00
info! ( " Done. " ) ;
} )
2024-04-04 14:14:59 +00:00
. map_err ( | e | error! ( " Failed to overwrite deleted posts: {e} " ) )
2023-09-11 09:12:16 +00:00
. ok ( ) ;
info! ( " Overwriting deleted comments... " ) ;
diesel ::update (
comment ::table
. filter ( comment ::deleted . eq ( true ) )
. filter ( comment ::updated . lt ( now ( ) . nullable ( ) - 1. months ( ) ) )
. filter ( comment ::content . ne ( DELETED_REPLACEMENT_TEXT ) ) ,
)
. set ( comment ::content . eq ( DELETED_REPLACEMENT_TEXT ) )
. execute ( & mut conn )
. await
2024-04-04 14:14:59 +00:00
. map ( | _ | {
2023-09-11 09:12:16 +00:00
info! ( " Done. " ) ;
} )
2024-04-04 14:14:59 +00:00
. map_err ( | e | error! ( " Failed to overwrite deleted comments: {e} " ) )
2023-09-11 09:12:16 +00:00
. ok ( ) ;
}
Err ( e ) = > {
error! ( " Failed to get connection from pool: {e} " ) ;
}
}
2023-06-20 06:17:54 +00:00
}
2021-01-29 16:38:27 +00:00
/// Re-calculate the site and community active counts every 12 hours
2023-09-11 09:12:16 +00:00
async fn active_counts ( pool : & mut DbPool < '_ > ) {
2021-01-29 16:38:27 +00:00
info! ( " Updating active site and community aggregates ... " ) ;
2023-09-11 09:12:16 +00:00
let conn = get_conn ( pool ) . await ;
2021-01-29 16:38:27 +00:00
2023-09-11 09:12:16 +00:00
match conn {
Ok ( mut conn ) = > {
let intervals = vec! [
( " 1 day " , " day " ) ,
( " 1 week " , " week " ) ,
( " 1 month " , " month " ) ,
( " 6 months " , " half_year " ) ,
] ;
for i in & intervals {
let update_site_stmt = format! (
2023-07-10 15:20:39 +00:00
" update site_aggregates set users_active_{} = (select * from site_aggregates_activity('{}')) where site_id = 1 " ,
2021-01-29 16:38:27 +00:00
i . 1 , i . 0
) ;
2023-09-11 09:12:16 +00:00
sql_query ( update_site_stmt )
. execute ( & mut conn )
. await
2024-04-04 14:14:59 +00:00
. map_err ( | e | error! ( " Failed to update site stats: {e} " ) )
2023-09-11 09:12:16 +00:00
. ok ( ) ;
2021-01-29 16:38:27 +00:00
2023-09-11 09:12:16 +00:00
let update_community_stmt = format! ( " update community_aggregates ca set users_active_ {} = mv.count_ from community_aggregates_activity(' {} ') mv where ca.community_id = mv.community_id_ " , i . 1 , i . 0 ) ;
sql_query ( update_community_stmt )
. execute ( & mut conn )
. await
2024-04-04 14:14:59 +00:00
. map_err ( | e | error! ( " Failed to update community stats: {e} " ) )
2023-09-11 09:12:16 +00:00
. ok ( ) ;
}
2021-01-29 16:38:27 +00:00
2023-09-11 09:12:16 +00:00
info! ( " Done. " ) ;
}
Err ( e ) = > {
error! ( " Failed to get connection from pool: {e} " ) ;
}
}
2021-01-29 16:38:27 +00:00
}
2022-03-30 13:56:23 +00:00
/// Set banned to false after ban expires
2023-09-11 09:12:16 +00:00
async fn update_banned_when_expired ( pool : & mut DbPool < '_ > ) {
2022-03-30 13:56:23 +00:00
info! ( " Updating banned column if it expires ... " ) ;
2023-09-11 09:12:16 +00:00
let conn = get_conn ( pool ) . await ;
match conn {
Ok ( mut conn ) = > {
diesel ::update (
person ::table
. filter ( person ::banned . eq ( true ) )
. filter ( person ::ban_expires . lt ( now ( ) . nullable ( ) ) ) ,
)
. set ( person ::banned . eq ( false ) )
. execute ( & mut conn )
. await
2024-04-04 14:14:59 +00:00
. map_err ( | e | error! ( " Failed to update person.banned when expires: {e} " ) )
2023-09-11 09:12:16 +00:00
. ok ( ) ;
2023-04-25 23:28:06 +00:00
2023-09-11 09:12:16 +00:00
diesel ::delete (
community_person_ban ::table . filter ( community_person_ban ::expires . lt ( now ( ) . nullable ( ) ) ) ,
)
. execute ( & mut conn )
. await
2024-04-04 14:14:59 +00:00
. map_err ( | e | error! ( " Failed to remove community_ban expired rows: {e} " ) )
2023-09-11 09:12:16 +00:00
. ok ( ) ;
}
Err ( e ) = > {
error! ( " Failed to get connection from pool: {e} " ) ;
}
}
2022-03-30 13:56:23 +00:00
}
2022-09-07 12:12:51 +00:00
2024-06-03 21:30:00 +00:00
/// Updates the instance software and version.
///
/// Does so using the /.well-known/nodeinfo protocol described here:
/// https://github.com/jhass/nodeinfo/blob/main/PROTOCOL.md
2023-07-13 14:12:01 +00:00
///
/// TODO: if instance has been dead for a long time, it should be checked less frequently
2023-09-11 09:12:16 +00:00
async fn update_instance_software (
pool : & mut DbPool < '_ > ,
client : & ClientWithMiddleware ,
) -> LemmyResult < ( ) > {
2023-02-18 14:36:12 +00:00
info! ( " Updating instances software and versions... " ) ;
2023-09-11 09:12:16 +00:00
let conn = get_conn ( pool ) . await ;
match conn {
Ok ( mut conn ) = > {
let instances = instance ::table . get_results ::< Instance > ( & mut conn ) . await ? ;
for instance in instances {
2024-06-03 21:30:00 +00:00
if let Some ( form ) = build_update_instance_form ( & instance . domain , client ) . await {
2024-01-19 14:40:12 +00:00
Instance ::update ( pool , instance . id , form ) . await ? ;
2023-06-15 09:29:12 +00:00
}
}
2023-09-11 09:12:16 +00:00
info! ( " Finished updating instances software and versions... " ) ;
}
Err ( e ) = > {
error! ( " Failed to get connection from pool: {e} " ) ;
2023-02-18 14:36:12 +00:00
}
}
2023-07-13 14:12:01 +00:00
Ok ( ( ) )
2023-02-18 14:36:12 +00:00
}
2024-06-03 21:30:00 +00:00
/// This builds an instance update form, for a given domain.
/// If the instance sends a response, but doesn't have a well-known or nodeinfo,
/// Then return a default form with only the updated field.
///
/// TODO This function is a bit of a nightmare with its embedded matches, but the only other way
/// would be to extract the fetches into functions which return the default_form on errors.
async fn build_update_instance_form (
domain : & str ,
client : & ClientWithMiddleware ,
) -> Option < InstanceForm > {
// The `updated` column is used to check if instances are alive. If it is more than three
// days in the past, no outgoing activities will be sent to that instance. However
// not every Fediverse instance has a valid Nodeinfo endpoint (its not required for
// Activitypub). That's why we always need to mark instances as updated if they are
// alive.
let mut instance_form = InstanceForm ::builder ( )
. domain ( domain . to_string ( ) )
. updated ( Some ( naive_now ( ) ) )
. build ( ) ;
// First, fetch their /.well-known/nodeinfo, then extract the correct nodeinfo link from it
let well_known_url = format! ( " https:// {} /.well-known/nodeinfo " , domain ) ;
match client . get ( & well_known_url ) . send ( ) . await {
Ok ( res ) if res . status ( ) . is_client_error ( ) = > {
// Instance doesn't have well-known but sent a response, consider it alive
Some ( instance_form )
}
Ok ( res ) = > match res . json ::< NodeInfoWellKnown > ( ) . await {
Ok ( well_known ) = > {
// Find the first link where the rel contains the allowed rels above
match well_known . links . into_iter ( ) . find ( | links | {
links
. rel
. as_str ( )
. starts_with ( " http://nodeinfo.diaspora.software/ns/schema/2. " )
} ) {
Some ( well_known_link ) = > {
let node_info_url = well_known_link . href ;
// Fetch the node_info from the well known href
match client . get ( node_info_url ) . send ( ) . await {
Ok ( node_info_res ) = > match node_info_res . json ::< NodeInfo > ( ) . await {
Ok ( node_info ) = > {
// Instance sent valid nodeinfo, write it to db
// Set the instance form fields.
if let Some ( software ) = node_info . software . as_ref ( ) {
instance_form . software . clone_from ( & software . name ) ;
instance_form . version . clone_from ( & software . version ) ;
}
Some ( instance_form )
}
Err ( _ ) = > Some ( instance_form ) ,
} ,
Err ( _ ) = > Some ( instance_form ) ,
}
}
// If none is found, use the default form above
None = > Some ( instance_form ) ,
}
}
Err ( _ ) = > {
// No valid nodeinfo but valid HTTP response, consider instance alive
Some ( instance_form )
}
} ,
Err ( _ ) = > {
// dead instance, do nothing
None
}
}
}
2023-02-18 14:36:12 +00:00
#[ cfg(test) ]
2024-03-26 09:17:42 +00:00
#[ allow(clippy::indexing_slicing) ]
2023-02-18 14:36:12 +00:00
mod tests {
2023-07-17 15:04:14 +00:00
2024-06-03 21:30:00 +00:00
use crate ::scheduled_tasks ::build_update_instance_form ;
use lemmy_api_common ::request ::client_builder ;
use lemmy_utils ::{ error ::LemmyResult , settings ::structs ::Settings , LemmyErrorType } ;
2024-01-04 09:47:18 +00:00
use pretty_assertions ::assert_eq ;
2024-06-03 21:30:00 +00:00
use reqwest_middleware ::ClientBuilder ;
use serial_test ::serial ;
2023-02-18 14:36:12 +00:00
#[ tokio::test ]
2024-06-03 21:30:00 +00:00
#[ serial ]
async fn test_nodeinfo_voyager_lemmy_ml ( ) -> LemmyResult < ( ) > {
let client = ClientBuilder ::new ( client_builder ( & Settings ::default ( ) ) . build ( ) ? ) . build ( ) ;
let form = build_update_instance_form ( " voyager.lemmy.ml " , & client )
2023-02-18 14:36:12 +00:00
. await
2024-06-03 21:30:00 +00:00
. ok_or ( LemmyErrorType ::CouldntFindObject ) ? ;
assert_eq! (
form . software . ok_or ( LemmyErrorType ::CouldntFindObject ) ? ,
" lemmy "
) ;
Ok ( ( ) )
}
2023-02-18 14:36:12 +00:00
2024-06-03 21:30:00 +00:00
#[ tokio::test ]
#[ serial ]
async fn test_nodeinfo_mastodon_social ( ) -> LemmyResult < ( ) > {
let client = ClientBuilder ::new ( client_builder ( & Settings ::default ( ) ) . build ( ) ? ) . build ( ) ;
let form = build_update_instance_form ( " mastodon.social " , & client )
. await
. ok_or ( LemmyErrorType ::CouldntFindObject ) ? ;
assert_eq! (
form . software . ok_or ( LemmyErrorType ::CouldntFindObject ) ? ,
" mastodon "
) ;
Ok ( ( ) )
2023-02-18 14:36:12 +00:00
}
}