From 544d30f0d47043d1ced5adbbe628a82dd993f559 Mon Sep 17 00:00:00 2001 From: phiresky Date: Mon, 18 Sep 2023 15:44:48 +0200 Subject: [PATCH 1/5] Fix Posts List Performance + cursor-based pagination (#3872) * add token-based pagination + fast subscribed post view * add migrations * fix failing heuristic * revert * output pagination token as next_page, fix off-by-one, restructure * more cleanup * clean * format sql * fix comment * fix tests * e * empty * move last page thing * restructure a bit for readability * rename page_cursor * update for scaled sort * fix * sql format * fix * get rid of macros --------- Co-authored-by: Dessalines --- crates/api_common/src/post.rs | 6 +- crates/apub/src/api/list_posts.rs | 16 +- crates/db_views/src/post_view.rs | 354 ++++++++++++++---- crates/db_views/src/structs.rs | 7 + .../down.sql | 54 +++ .../up.sql | 59 +++ 6 files changed, 414 insertions(+), 82 deletions(-) create mode 100644 migrations/2023-09-07-215546_post-queries-efficient/down.sql create mode 100644 migrations/2023-09-07-215546_post-queries-efficient/up.sql diff --git a/crates/api_common/src/post.rs b/crates/api_common/src/post.rs index d7838866b..7c3aabb60 100644 --- a/crates/api_common/src/post.rs +++ b/crates/api_common/src/post.rs @@ -5,7 +5,7 @@ use lemmy_db_schema::{ PostFeatureType, SortType, }; -use lemmy_db_views::structs::{PostReportView, PostView}; +use lemmy_db_views::structs::{PaginationCursor, PostReportView, PostView}; use lemmy_db_views_actor::structs::{CommunityModeratorView, CommunityView}; use serde::{Deserialize, Serialize}; use serde_with::skip_serializing_none; @@ -70,6 +70,7 @@ pub struct GetPostResponse { pub struct GetPosts { pub type_: Option, pub sort: Option, + /// DEPRECATED, use page_cursor pub page: Option, pub limit: Option, pub community_id: Option, @@ -78,6 +79,7 @@ pub struct GetPosts { pub liked_only: Option, pub disliked_only: Option, pub auth: Option>, + pub page_cursor: Option, } #[derive(Serialize, Deserialize, Debug, Clone)] @@ -86,6 +88,8 @@ pub struct GetPosts { /// The post list response. pub struct GetPostsResponse { pub posts: Vec, + /// the pagination cursor to use to fetch the next page + pub next_page: Option, } #[derive(Debug, Serialize, Deserialize, Clone, Default)] diff --git a/crates/apub/src/api/list_posts.rs b/crates/apub/src/api/list_posts.rs index 1c98e699a..425e2adf2 100644 --- a/crates/apub/src/api/list_posts.rs +++ b/crates/apub/src/api/list_posts.rs @@ -11,7 +11,10 @@ use lemmy_api_common::{ utils::{check_private_instance, local_user_view_from_jwt_opt_new}, }; use lemmy_db_schema::source::{community::Community, local_site::LocalSite}; -use lemmy_db_views::{post_view::PostQuery, structs::LocalUserView}; +use lemmy_db_views::{ + post_view::PostQuery, + structs::{LocalUserView, PaginationCursor}, +}; use lemmy_utils::error::{LemmyError, LemmyErrorExt, LemmyErrorType}; #[tracing::instrument(skip(context))] @@ -48,6 +51,12 @@ pub async fn list_posts( &local_site, community_id, )?); + // parse pagination token + let page_after = if let Some(pa) = &data.page_cursor { + Some(pa.read(&mut context.pool()).await?) + } else { + None + }; let posts = PostQuery { local_user: local_user_view.as_ref(), @@ -58,6 +67,7 @@ pub async fn list_posts( liked_only, disliked_only, page, + page_after, limit, ..Default::default() } @@ -65,5 +75,7 @@ pub async fn list_posts( .await .with_lemmy_type(LemmyErrorType::CouldntGetPosts)?; - Ok(Json(GetPostsResponse { posts })) + // if this page wasn't empty, then there is a next page after the last post on this page + let next_page = posts.last().map(PaginationCursor::after_post); + Ok(Json(GetPostsResponse { posts, next_page })) } diff --git a/crates/db_views/src/post_view.rs b/crates/db_views/src/post_view.rs index 4851c5507..607af986d 100644 --- a/crates/db_views/src/post_view.rs +++ b/crates/db_views/src/post_view.rs @@ -1,21 +1,26 @@ -use crate::structs::{LocalUserView, PostView}; +use crate::structs::{LocalUserView, PaginationCursor, PostView}; use diesel::{ debug_query, - dsl::{exists, not, IntervalDsl}, + dsl::{self, exists, not, IntervalDsl}, + expression::AsExpression, pg::Pg, result::Error, sql_function, - sql_types::{self, Timestamptz}, + sql_types::{self, SingleValue, SqlType, Timestamptz}, BoolExpressionMethods, BoxableExpression, + Expression, ExpressionMethods, IntoSql, + JoinOnDsl, NullableExpressionMethods, + OptionalExtension, PgTextExpressionMethods, QueryDsl, }; use diesel_async::RunQueryDsl; use lemmy_db_schema::{ + aggregates::structs::PostAggregates, newtypes::{CommunityId, LocalUserId, PersonId, PostId}, schema::{ community, @@ -28,12 +33,12 @@ use lemmy_db_schema::{ person_block, person_post_aggregates, post, - post_aggregates, + post_aggregates::{self, newest_comment_time}, post_like, post_read, post_saved, }, - utils::{fuzzy_search, limit_and_offset, DbConn, DbPool, ListFn, Queries, ReadFn}, + utils::{fuzzy_search, get_conn, limit_and_offset, DbConn, DbPool, ListFn, Queries, ReadFn}, ListingType, SortType, }; @@ -41,6 +46,55 @@ use tracing::debug; sql_function!(fn coalesce(x: sql_types::Nullable, y: sql_types::BigInt) -> sql_types::BigInt); +fn order_and_page_filter_desc( + query: Q, + column: C, + options: &PostQuery, + getter: impl Fn(&PostAggregates) -> T, +) -> Q +where + Q: diesel::query_dsl::methods::ThenOrderDsl, Output = Q> + + diesel::query_dsl::methods::ThenOrderDsl, Output = Q> + + diesel::query_dsl::methods::FilterDsl, Output = Q> + + diesel::query_dsl::methods::FilterDsl, Output = Q>, + C: Expression + Copy, + C::SqlType: SingleValue + SqlType, + T: AsExpression, +{ + let mut query = query.then_order_by(column.desc()); + if let Some(before) = &options.page_before_or_equal { + query = query.filter(column.ge(getter(&before.0))); + } + if let Some(after) = &options.page_after { + query = query.filter(column.le(getter(&after.0))); + } + query +} + +fn order_and_page_filter_asc( + query: Q, + column: C, + options: &PostQuery, + getter: impl Fn(&PostAggregates) -> T, +) -> Q +where + Q: diesel::query_dsl::methods::ThenOrderDsl, Output = Q> + + diesel::query_dsl::methods::FilterDsl, Output = Q> + + diesel::query_dsl::methods::FilterDsl, Output = Q>, + C: Expression + Copy, + C::SqlType: SingleValue + SqlType, + T: AsExpression, +{ + let mut query = query.then_order_by(column.asc()); + if let Some(before) = &options.page_before_or_equal { + query = query.filter(column.le(getter(&before.0))); + } + if let Some(after) = &options.page_after { + query = query.filter(column.ge(getter(&after.0))); + } + query +} + fn queries<'a>() -> Queries< impl ReadFn<'a, PostView, (PostId, Option, bool)>, impl ListFn<'a, PostView, PostQuery<'a>>, @@ -251,13 +305,18 @@ fn queries<'a>() -> Queries< .filter(community::removed.eq(false)) .filter(post::removed.eq(false)); } - - if options.community_id.is_none() { - query = query.then_order_by(post_aggregates::featured_local.desc()); - } else if let Some(community_id) = options.community_id { - query = query - .filter(post_aggregates::community_id.eq(community_id)) - .then_order_by(post_aggregates::featured_community.desc()); + if options.community_id.is_none() || options.community_id_just_for_prefetch { + query = order_and_page_filter_desc(query, post_aggregates::featured_local, &options, |e| { + e.featured_local + }); + } else { + query = + order_and_page_filter_desc(query, post_aggregates::featured_community, &options, |e| { + e.featured_community + }); + } + if let Some(community_id) = options.community_id { + query = query.filter(post_aggregates::community_id.eq(community_id)); } if let Some(creator_id) = options.creator_id { @@ -292,12 +351,12 @@ fn queries<'a>() -> Queries< } } - if let Some(url_search) = options.url_search { + if let Some(url_search) = &options.url_search { query = query.filter(post::url.eq(url_search)); } - if let Some(search_term) = options.search_term { - let searcher = fuzzy_search(&search_term); + if let Some(search_term) = &options.search_term { + let searcher = fuzzy_search(search_term); query = query.filter( post::name .ilike(searcher.clone()) @@ -373,70 +432,91 @@ fn queries<'a>() -> Queries< } let now = diesel::dsl::now.into_sql::(); - query = match options.sort.unwrap_or(SortType::Hot) { - SortType::Active => query - .then_order_by(post_aggregates::hot_rank_active.desc()) - .then_order_by(post_aggregates::published.desc()), - SortType::Hot => query - .then_order_by(post_aggregates::hot_rank.desc()) - .then_order_by(post_aggregates::published.desc()), - SortType::Scaled => query - .then_order_by(post_aggregates::scaled_rank.desc()) - .then_order_by(post_aggregates::published.desc()), - SortType::Controversial => query.then_order_by(post_aggregates::controversy_rank.desc()), - SortType::New => query.then_order_by(post_aggregates::published.desc()), - SortType::Old => query.then_order_by(post_aggregates::published.asc()), - SortType::NewComments => query.then_order_by(post_aggregates::newest_comment_time.desc()), - SortType::MostComments => query - .then_order_by(post_aggregates::comments.desc()) - .then_order_by(post_aggregates::published.desc()), - SortType::TopAll => query - .then_order_by(post_aggregates::score.desc()) - .then_order_by(post_aggregates::published.desc()), - SortType::TopYear => query - .filter(post_aggregates::published.gt(now - 1.years())) - .then_order_by(post_aggregates::score.desc()) - .then_order_by(post_aggregates::published.desc()), - SortType::TopMonth => query - .filter(post_aggregates::published.gt(now - 1.months())) - .then_order_by(post_aggregates::score.desc()) - .then_order_by(post_aggregates::published.desc()), - SortType::TopWeek => query - .filter(post_aggregates::published.gt(now - 1.weeks())) - .then_order_by(post_aggregates::score.desc()) - .then_order_by(post_aggregates::published.desc()), - SortType::TopDay => query - .filter(post_aggregates::published.gt(now - 1.days())) - .then_order_by(post_aggregates::score.desc()) - .then_order_by(post_aggregates::published.desc()), - SortType::TopHour => query - .filter(post_aggregates::published.gt(now - 1.hours())) - .then_order_by(post_aggregates::score.desc()) - .then_order_by(post_aggregates::published.desc()), - SortType::TopSixHour => query - .filter(post_aggregates::published.gt(now - 6.hours())) - .then_order_by(post_aggregates::score.desc()) - .then_order_by(post_aggregates::published.desc()), - SortType::TopTwelveHour => query - .filter(post_aggregates::published.gt(now - 12.hours())) - .then_order_by(post_aggregates::score.desc()) - .then_order_by(post_aggregates::published.desc()), - SortType::TopThreeMonths => query - .filter(post_aggregates::published.gt(now - 3.months())) - .then_order_by(post_aggregates::score.desc()) - .then_order_by(post_aggregates::published.desc()), - SortType::TopSixMonths => query - .filter(post_aggregates::published.gt(now - 6.months())) - .then_order_by(post_aggregates::score.desc()) - .then_order_by(post_aggregates::published.desc()), - SortType::TopNineMonths => query - .filter(post_aggregates::published.gt(now - 9.months())) - .then_order_by(post_aggregates::score.desc()) - .then_order_by(post_aggregates::published.desc()), + { + use post_aggregates::{ + comments, + controversy_rank, + hot_rank, + hot_rank_active, + published, + scaled_rank, + score, + }; + match options.sort.as_ref().unwrap_or(&SortType::Hot) { + SortType::Active => { + query = + order_and_page_filter_desc(query, hot_rank_active, &options, |e| e.hot_rank_active); + query = order_and_page_filter_desc(query, published, &options, |e| e.published); + } + SortType::Hot => { + query = order_and_page_filter_desc(query, hot_rank, &options, |e| e.hot_rank); + query = order_and_page_filter_desc(query, published, &options, |e| e.published); + } + SortType::Scaled => { + query = order_and_page_filter_desc(query, scaled_rank, &options, |e| e.scaled_rank); + query = order_and_page_filter_desc(query, published, &options, |e| e.published); + } + SortType::Controversial => { + query = + order_and_page_filter_desc(query, controversy_rank, &options, |e| e.controversy_rank); + query = order_and_page_filter_desc(query, published, &options, |e| e.published); + } + SortType::New => { + query = order_and_page_filter_desc(query, published, &options, |e| e.published) + } + SortType::Old => { + query = order_and_page_filter_asc(query, published, &options, |e| e.published) + } + SortType::NewComments => { + query = order_and_page_filter_desc(query, newest_comment_time, &options, |e| { + e.newest_comment_time + }) + } + SortType::MostComments => { + query = order_and_page_filter_desc(query, comments, &options, |e| e.comments); + query = order_and_page_filter_desc(query, published, &options, |e| e.published); + } + SortType::TopAll => { + query = order_and_page_filter_desc(query, score, &options, |e| e.score); + query = order_and_page_filter_desc(query, published, &options, |e| e.published); + } + o @ (SortType::TopYear + | SortType::TopMonth + | SortType::TopWeek + | SortType::TopDay + | SortType::TopHour + | SortType::TopSixHour + | SortType::TopTwelveHour + | SortType::TopThreeMonths + | SortType::TopSixMonths + | SortType::TopNineMonths) => { + let interval = match o { + SortType::TopYear => 1.years(), + SortType::TopMonth => 1.months(), + SortType::TopWeek => 1.weeks(), + SortType::TopDay => 1.days(), + SortType::TopHour => 1.hours(), + SortType::TopSixHour => 6.hours(), + SortType::TopTwelveHour => 12.hours(), + SortType::TopThreeMonths => 3.months(), + SortType::TopSixMonths => 6.months(), + SortType::TopNineMonths => 9.months(), + _ => return Err(Error::NotFound), + }; + query = query.filter(post_aggregates::published.gt(now - interval)); + query = order_and_page_filter_desc(query, score, &options, |e| e.score); + query = order_and_page_filter_desc(query, published, &options, |e| e.published); + } + } }; - let (limit, offset) = limit_and_offset(options.page, options.limit)?; - + let (limit, mut offset) = limit_and_offset(options.page, options.limit)?; + if options.page_after.is_some() { + // always skip exactly one post because that's the last post of the previous page + // fixing the where clause is more difficult because we'd have to change only the last order-by-where clause + // e.g. WHERE (featured_local<=, hot_rank<=, published<=) to WHERE (<=, <=, <) + offset = 1; + } query = query.limit(limit).offset(offset); debug!("Post View Query: {:?}", debug_query::(&query)); @@ -468,12 +548,42 @@ impl PostView { } } -#[derive(Default)] +impl PaginationCursor { + // get cursor for page that starts immediately after the given post + pub fn after_post(view: &PostView) -> PaginationCursor { + // hex encoding to prevent ossification + PaginationCursor(format!("P{:x}", view.counts.post_id.0)) + } + pub async fn read(&self, pool: &mut DbPool<'_>) -> Result { + Ok(PaginationCursorData( + PostAggregates::read( + pool, + PostId( + self + .0 + .get(1..) + .and_then(|e| i32::from_str_radix(e, 16).ok()) + .ok_or_else(|| Error::QueryBuilderError("Could not parse pagination token".into()))?, + ), + ) + .await?, + )) + } +} + +// currently we use a postaggregates struct as the pagination token. +// we only use some of the properties of the post aggregates, depending on which sort type we page by +#[derive(Clone)] +pub struct PaginationCursorData(PostAggregates); + +#[derive(Default, Clone)] pub struct PostQuery<'a> { pub listing_type: Option, pub sort: Option, pub creator_id: Option, pub community_id: Option, + // if true, the query should be handled as if community_id was not given except adding the literal filter + pub community_id_just_for_prefetch: bool, pub local_user: Option<&'a LocalUserView>, pub search_term: Option, pub url_search: Option, @@ -484,11 +594,97 @@ pub struct PostQuery<'a> { pub is_profile_view: bool, pub page: Option, pub limit: Option, + pub page_after: Option, + pub page_before_or_equal: Option, } impl<'a> PostQuery<'a> { + async fn prefetch_upper_bound_for_page_before( + &self, + pool: &mut DbPool<'_>, + ) -> Result>, Error> { + // first get one page for the most popular community to get an upper bound for the the page end for the real query + // the reason this is needed is that when fetching posts for a single community PostgreSQL can optimize + // the query to use an index on e.g. (=, >=, >=, >=) and fetch only LIMIT rows + // but for the followed-communities query it has to query the index on (IN, >=, >=, >=) + // which it currently can't do at all (as of PG 16). see the discussion here: + // https://github.com/LemmyNet/lemmy/issues/2877#issuecomment-1673597190 + // + // the results are correct no matter which community we fetch these for, since it basically covers the "worst case" of the whole page consisting of posts from one community + // but using the largest community decreases the pagination-frame so make the real query more efficient. + use lemmy_db_schema::schema::{ + community_aggregates::dsl::{community_aggregates, community_id, users_active_month}, + community_follower::dsl::{ + community_follower, + community_id as follower_community_id, + person_id, + }, + }; + let (limit, offset) = limit_and_offset(self.page, self.limit)?; + if offset != 0 { + return Err(Error::QueryBuilderError( + "legacy pagination cannot be combined with v2 pagination".into(), + )); + } + let self_person_id = self + .local_user + .expect("part of the above if") + .local_user + .person_id; + let largest_subscribed = { + let conn = &mut get_conn(pool).await?; + community_follower + .filter(person_id.eq(self_person_id)) + .inner_join(community_aggregates.on(community_id.eq(follower_community_id))) + .order_by(users_active_month.desc()) + .select(community_id) + .limit(1) + .get_result::(conn) + .await + .optional()? + }; + let Some(largest_subscribed) = largest_subscribed else { + // nothing subscribed to? no posts + return Ok(None); + }; + + let mut v = queries() + .list( + pool, + PostQuery { + community_id: Some(largest_subscribed), + community_id_just_for_prefetch: true, + ..self.clone() + }, + ) + .await?; + // take last element of array. if this query returned less than LIMIT elements, + // the heuristic is invalid since we can't guarantee the full query will return >= LIMIT results (return original query) + if (v.len() as i64) < limit { + Ok(Some(self.clone())) + } else { + let page_before_or_equal = Some(PaginationCursorData(v.pop().expect("else case").counts)); + Ok(Some(PostQuery { + page_before_or_equal, + ..self.clone() + })) + } + } + pub async fn list(self, pool: &mut DbPool<'_>) -> Result, Error> { - queries().list(pool, self).await + if self.listing_type == Some(ListingType::Subscribed) + && self.community_id.is_none() + && self.local_user.is_some() + && self.page_before_or_equal.is_none() + { + if let Some(query) = self.prefetch_upper_bound_for_page_before(pool).await? { + queries().list(pool, query).await + } else { + Ok(vec![]) + } + } else { + queries().list(pool, self).await + } } } diff --git a/crates/db_views/src/structs.rs b/crates/db_views/src/structs.rs index 6ac69de03..dd0bba184 100644 --- a/crates/db_views/src/structs.rs +++ b/crates/db_views/src/structs.rs @@ -89,6 +89,13 @@ pub struct PostReportView { pub resolver: Option, } +/// currently this is just a wrapper around post id, but should be seen as opaque from the client's perspective +/// stringified since we might want to use arbitrary info later, with a P prepended to prevent ossification +/// (api users love to make assumptions (e.g. parse stuff that looks like numbers as numbers) about apis that aren't part of the spec +#[derive(Serialize, Deserialize, Debug, Clone)] +#[cfg_attr(feature = "full", derive(ts_rs::TS))] +pub struct PaginationCursor(pub(crate) String); + #[skip_serializing_none] #[derive(Debug, PartialEq, Serialize, Deserialize, Clone)] #[cfg_attr(feature = "full", derive(TS, Queryable))] diff --git a/migrations/2023-09-07-215546_post-queries-efficient/down.sql b/migrations/2023-09-07-215546_post-queries-efficient/down.sql new file mode 100644 index 000000000..82f20cd9b --- /dev/null +++ b/migrations/2023-09-07-215546_post-queries-efficient/down.sql @@ -0,0 +1,54 @@ +DROP INDEX idx_post_aggregates_featured_community_active; + +DROP INDEX idx_post_aggregates_featured_community_controversy; + +DROP INDEX idx_post_aggregates_featured_community_hot; + +DROP INDEX idx_post_aggregates_featured_community_scaled; + +DROP INDEX idx_post_aggregates_featured_community_most_comments; + +DROP INDEX idx_post_aggregates_featured_community_newest_comment_time; + +DROP INDEX idx_post_aggregates_featured_community_newest_comment_time_necro; + +DROP INDEX idx_post_aggregates_featured_community_published; + +DROP INDEX idx_post_aggregates_featured_community_score; + +CREATE INDEX idx_post_aggregates_featured_community_active ON post_aggregates (featured_community DESC, hot_rank_active DESC, published DESC); + +CREATE INDEX idx_post_aggregates_featured_community_controversy ON post_aggregates (featured_community DESC, controversy_rank DESC); + +CREATE INDEX idx_post_aggregates_featured_community_hot ON post_aggregates (featured_community DESC, hot_rank DESC, published DESC); + +CREATE INDEX idx_post_aggregates_featured_community_scaled ON post_aggregates (featured_community DESC, scaled_rank DESC, published DESC); + +CREATE INDEX idx_post_aggregates_featured_community_most_comments ON post_aggregates (featured_community DESC, comments DESC, published DESC); + +CREATE INDEX idx_post_aggregates_featured_community_newest_comment_time ON post_aggregates (featured_community DESC, newest_comment_time DESC); + +CREATE INDEX idx_post_aggregates_featured_community_newest_comment_time_necro ON post_aggregates (featured_community DESC, newest_comment_time_necro DESC); + +CREATE INDEX idx_post_aggregates_featured_community_published ON post_aggregates (featured_community DESC, published DESC); + +CREATE INDEX idx_post_aggregates_featured_community_score ON post_aggregates (featured_community DESC, score DESC, published DESC); + +DROP INDEX idx_post_aggregates_community_active; + +DROP INDEX idx_post_aggregates_community_controversy; + +DROP INDEX idx_post_aggregates_community_hot; + +DROP INDEX idx_post_aggregates_community_scaled; + +DROP INDEX idx_post_aggregates_community_most_comments; + +DROP INDEX idx_post_aggregates_community_newest_comment_time; + +DROP INDEX idx_post_aggregates_community_newest_comment_time_necro; + +DROP INDEX idx_post_aggregates_community_published; + +DROP INDEX idx_post_aggregates_community_score; + diff --git a/migrations/2023-09-07-215546_post-queries-efficient/up.sql b/migrations/2023-09-07-215546_post-queries-efficient/up.sql new file mode 100644 index 000000000..81d591de4 --- /dev/null +++ b/migrations/2023-09-07-215546_post-queries-efficient/up.sql @@ -0,0 +1,59 @@ +-- these indices are used for single-community filtering and for the followed-communities (front-page) query +-- basically one index per Sort +-- index name is truncated to 63 chars so abbreviate a bit +CREATE INDEX idx_post_aggregates_community_active ON post_aggregates (community_id, featured_local DESC, hot_rank_active DESC, published DESC); + +CREATE INDEX idx_post_aggregates_community_controversy ON post_aggregates (community_id, featured_local DESC, controversy_rank DESC); + +CREATE INDEX idx_post_aggregates_community_hot ON post_aggregates (community_id, featured_local DESC, hot_rank DESC, published DESC); + +CREATE INDEX idx_post_aggregates_community_scaled ON post_aggregates (community_id, featured_local DESC, scaled_rank DESC, published DESC); + +CREATE INDEX idx_post_aggregates_community_most_comments ON post_aggregates (community_id, featured_local DESC, comments DESC, published DESC); + +CREATE INDEX idx_post_aggregates_community_newest_comment_time ON post_aggregates (community_id, featured_local DESC, newest_comment_time DESC); + +CREATE INDEX idx_post_aggregates_community_newest_comment_time_necro ON post_aggregates (community_id, featured_local DESC, newest_comment_time_necro DESC); + +CREATE INDEX idx_post_aggregates_community_published ON post_aggregates (community_id, featured_local DESC, published DESC); + +CREATE INDEX idx_post_aggregates_community_score ON post_aggregates (community_id, featured_local DESC, score DESC, published DESC); + +-- these indices are used for "per-community" filtering +-- these indices weren't really useful because whenever the query filters by featured_community it also filters by community_id, so prepend that to all these indexes +DROP INDEX idx_post_aggregates_featured_community_active; + +DROP INDEX idx_post_aggregates_featured_community_controversy; + +DROP INDEX idx_post_aggregates_featured_community_hot; + +DROP INDEX idx_post_aggregates_featured_community_scaled; + +DROP INDEX idx_post_aggregates_featured_community_most_comments; + +DROP INDEX idx_post_aggregates_featured_community_newest_comment_time; + +DROP INDEX idx_post_aggregates_featured_community_newest_comment_time_necro; + +DROP INDEX idx_post_aggregates_featured_community_published; + +DROP INDEX idx_post_aggregates_featured_community_score; + +CREATE INDEX idx_post_aggregates_featured_community_active ON post_aggregates (community_id, featured_community DESC, hot_rank_active DESC, published DESC); + +CREATE INDEX idx_post_aggregates_featured_community_controversy ON post_aggregates (community_id, featured_community DESC, controversy_rank DESC); + +CREATE INDEX idx_post_aggregates_featured_community_hot ON post_aggregates (community_id, featured_community DESC, hot_rank DESC, published DESC); + +CREATE INDEX idx_post_aggregates_featured_community_scaled ON post_aggregates (community_id, featured_community DESC, scaled_rank DESC, published DESC); + +CREATE INDEX idx_post_aggregates_featured_community_most_comments ON post_aggregates (community_id, featured_community DESC, comments DESC, published DESC); + +CREATE INDEX idx_post_aggregates_featured_community_newest_comment_time ON post_aggregates (community_id, featured_community DESC, newest_comment_time DESC); + +CREATE INDEX idx_post_aggregates_featured_community_newest_comment_time_necro ON post_aggregates (community_id, featured_community DESC, newest_comment_time_necro DESC); + +CREATE INDEX idx_post_aggregates_featured_community_published ON post_aggregates (community_id, featured_community DESC, published DESC); + +CREATE INDEX idx_post_aggregates_featured_community_score ON post_aggregates (community_id, featured_community DESC, score DESC, published DESC); + From b431c9bdf91570292553c7b8476d5eb3ab1772fa Mon Sep 17 00:00:00 2001 From: Apple Sheeple Date: Wed, 13 Sep 2023 20:27:31 +0300 Subject: [PATCH 2/5] Sanitize apub post body Signed-off-by: Apple Sheeple --- crates/apub/src/objects/post.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crates/apub/src/objects/post.rs b/crates/apub/src/objects/post.rs index 29867fced..2fc859ddf 100644 --- a/crates/apub/src/objects/post.rs +++ b/crates/apub/src/objects/post.rs @@ -238,13 +238,14 @@ impl Object for ApubPost { LanguageTag::to_language_id_single(page.language, &mut context.pool()).await?; let name = sanitize_html_federation(&name); + let body = sanitize_html_federation_opt(&body_slurs_removed); let embed_title = sanitize_html_federation_opt(&embed_title); let embed_description = sanitize_html_federation_opt(&embed_description); PostInsertForm { name, url: url.map(Into::into), - body: body_slurs_removed, + body, creator_id: creator.id, community_id: community.id, removed: None, From 5fff7504e5e531ae4dadad73ca40624135a01995 Mon Sep 17 00:00:00 2001 From: Apple Sheeple Date: Mon, 18 Sep 2023 22:31:27 +0300 Subject: [PATCH 3/5] Reject registration application if sanitizing the username modifies it This removes the possibility of using a mix of sanitized and non-sanitized values for `username` in code. Signed-off-by: Apple Sheeple --- crates/api_crud/src/user/create.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/crates/api_crud/src/user/create.rs b/crates/api_crud/src/user/create.rs index 02c95cb04..22dcd0dc4 100644 --- a/crates/api_crud/src/user/create.rs +++ b/crates/api_crud/src/user/create.rs @@ -89,7 +89,10 @@ pub async fn register( let slur_regex = local_site_to_slur_regex(&local_site); check_slurs(&data.username, &slur_regex)?; check_slurs_opt(&data.answer, &slur_regex)?; - let username = sanitize_html_api(&data.username); + + if sanitize_html_api(&data.username) != data.username { + Err(LemmyErrorType::InvalidName)?; + } let actor_keypair = generate_actor_keypair()?; is_valid_actor_name(&data.username, local_site.actor_name_max_length as usize)?; @@ -109,7 +112,7 @@ pub async fn register( // Register the new person let person_form = PersonInsertForm::builder() - .name(username) + .name(data.username.clone()) .actor_id(Some(actor_id.clone())) .private_key(Some(actor_keypair.private_key)) .public_key(actor_keypair.public_key) From c05458adcd359d7805a0e6ffc1f3941919946a1f Mon Sep 17 00:00:00 2001 From: Apple Sheeple Date: Mon, 18 Sep 2023 22:36:38 +0300 Subject: [PATCH 4/5] Sanitize registration application answer Signed-off-by: Apple Sheeple --- crates/api_crud/src/user/create.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/crates/api_crud/src/user/create.rs b/crates/api_crud/src/user/create.rs index 22dcd0dc4..c56c1362d 100644 --- a/crates/api_crud/src/user/create.rs +++ b/crates/api_crud/src/user/create.rs @@ -11,6 +11,7 @@ use lemmy_api_common::{ local_site_to_slur_regex, password_length_check, sanitize_html_api, + sanitize_html_api_opt, send_new_applicant_email_to_admins, send_verification_email, EndpointType, @@ -94,6 +95,8 @@ pub async fn register( Err(LemmyErrorType::InvalidName)?; } + let answer = sanitize_html_api_opt(&data.answer); + let actor_keypair = generate_actor_keypair()?; is_valid_actor_name(&data.username, local_site.actor_name_max_length as usize)?; let actor_id = generate_local_apub_endpoint( @@ -149,7 +152,7 @@ pub async fn register( let form = RegistrationApplicationInsertForm { local_user_id: inserted_local_user.id, // We already made sure answer was not null above - answer: data.answer.clone().expect("must have an answer"), + answer: answer.expect("must have an answer"), }; RegistrationApplication::create(&mut context.pool(), &form).await?; From 8c419103b2e067c867f5a64c8fbcc942039ca7c9 Mon Sep 17 00:00:00 2001 From: Dessalines Date: Mon, 18 Sep 2023 19:19:13 -0400 Subject: [PATCH 5/5] Fixing formatting. --- crates/api_crud/src/user/create.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/api_crud/src/user/create.rs b/crates/api_crud/src/user/create.rs index c56c1362d..e1efcbed2 100644 --- a/crates/api_crud/src/user/create.rs +++ b/crates/api_crud/src/user/create.rs @@ -92,7 +92,7 @@ pub async fn register( check_slurs_opt(&data.answer, &slur_regex)?; if sanitize_html_api(&data.username) != data.username { - Err(LemmyErrorType::InvalidName)?; + Err(LemmyErrorType::InvalidName)?; } let answer = sanitize_html_api_opt(&data.answer);