From 688d15c866293e2622d3ad3accaadebd2a55b52a Mon Sep 17 00:00:00 2001 From: Matt Jankowski Date: Tue, 10 Sep 2024 16:25:25 -0400 Subject: [PATCH] Clean up `AnnualReport::*` classes Repeating same things across these classes: - Use AR scopes where they exist, make a few new ones - Pull out some constants to hold magic number values - Nudge more logic down into smaller private methods - Once over on Arel/AR/scopes preference (over sql strings) --- app/lib/annual_report/archetype.rb | 15 ++-- .../commonly_interacted_with_accounts.rb | 25 +++++-- .../annual_report/most_reblogged_accounts.rb | 26 +++++-- app/lib/annual_report/most_used_apps.rb | 23 ++++-- app/lib/annual_report/percentiles.rb | 54 ++++++++++++-- app/lib/annual_report/time_series.rb | 70 ++++++++++++++++--- app/lib/annual_report/top_hashtags.rb | 36 ++++++++-- app/lib/annual_report/top_statuses.rb | 38 +++++++--- app/lib/annual_report/type_distribution.rb | 20 +++++- app/models/status.rb | 4 ++ 10 files changed, 250 insertions(+), 61 deletions(-) diff --git a/app/lib/annual_report/archetype.rb b/app/lib/annual_report/archetype.rb index c02b28dfda..74e1ac1522 100644 --- a/app/lib/annual_report/archetype.rb +++ b/app/lib/annual_report/archetype.rb @@ -5,6 +5,9 @@ class AnnualReport::Archetype < AnnualReport::Source # each active user in a single year (2023) AVERAGE_PER_YEAR = 113 + SCORE_MULTIPLIER = 2 + SCORE_REDUCER = 0.1 + def generate { archetype: archetype, @@ -16,11 +19,11 @@ class AnnualReport::Archetype < AnnualReport::Source def archetype if (standalone_count + replies_count + reblogs_count) < AVERAGE_PER_YEAR :lurker - elsif reblogs_count > (standalone_count * 2) + elsif reblogs_count > (standalone_count * SCORE_MULTIPLIER) :booster - elsif polls_count > (standalone_count * 0.1) # standalone_count includes posts with polls + elsif polls_count > (standalone_count * SCORE_REDUCER) # standalone_count includes posts with polls :pollster - elsif replies_count > (standalone_count * 2) + elsif replies_count > (standalone_count * SCORE_MULTIPLIER) :replier else :oracle @@ -28,15 +31,15 @@ class AnnualReport::Archetype < AnnualReport::Source end def polls_count - @polls_count ||= report_statuses.where.not(poll_id: nil).count + @polls_count ||= report_statuses.with_polls.count end def reblogs_count - @reblogs_count ||= report_statuses.where.not(reblog_of_id: nil).count + @reblogs_count ||= report_statuses.with_reblogs.count end def replies_count - @replies_count ||= report_statuses.where.not(in_reply_to_id: nil).where.not(in_reply_to_account_id: @account.id).count + @replies_count ||= report_statuses.with_replies.without_replies_to(@account).count end def standalone_count diff --git a/app/lib/annual_report/commonly_interacted_with_accounts.rb b/app/lib/annual_report/commonly_interacted_with_accounts.rb index e7482f0d52..e944fa81cc 100644 --- a/app/lib/annual_report/commonly_interacted_with_accounts.rb +++ b/app/lib/annual_report/commonly_interacted_with_accounts.rb @@ -2,21 +2,32 @@ class AnnualReport::CommonlyInteractedWithAccounts < AnnualReport::Source SET_SIZE = 40 + MINIMUM_INTERACTIONS = 1 def generate { - commonly_interacted_with_accounts: commonly_interacted_with_accounts.map do |(account_id, count)| - { - account_id: account_id, - count: count, - } - end, + commonly_interacted_with_accounts: account_map, } end private + def account_map + commonly_interacted_with_accounts.map do |account_id, count| + { + account_id: account_id, + count: count, + } + end + end + def commonly_interacted_with_accounts - report_statuses.where.not(in_reply_to_account_id: @account.id).group(:in_reply_to_account_id).having('count(*) > 1').order(total: :desc).limit(SET_SIZE).pluck(Arel.sql('in_reply_to_account_id, count(*) AS total')) + report_statuses + .without_replies_to(@account) + .group(:in_reply_to_account_id) + .having(Arel.star.count.gt(MINIMUM_INTERACTIONS)) + .limit(SET_SIZE) + .order(total: :desc) + .pluck(:in_reply_to_account_id, Arel.star.count.as('total')) end end diff --git a/app/lib/annual_report/most_reblogged_accounts.rb b/app/lib/annual_report/most_reblogged_accounts.rb index 39ed3868ea..6075ac9b1d 100644 --- a/app/lib/annual_report/most_reblogged_accounts.rb +++ b/app/lib/annual_report/most_reblogged_accounts.rb @@ -2,21 +2,33 @@ class AnnualReport::MostRebloggedAccounts < AnnualReport::Source SET_SIZE = 10 + MINIMUM_COUNT = 1 def generate { - most_reblogged_accounts: most_reblogged_accounts.map do |(account_id, count)| - { - account_id: account_id, - count: count, - } - end, + most_reblogged_accounts: account_map, } end private + def account_map + most_reblogged_accounts.map do |account_id, count| + { + account_id: account_id, + count: count, + } + end + end + def most_reblogged_accounts - report_statuses.where.not(reblog_of_id: nil).joins(reblog: :account).group('accounts.id').having('count(*) > 1').order(total: :desc).limit(SET_SIZE).pluck(Arel.sql('accounts.id, count(*) as total')) + report_statuses + .with_reblogs + .group(Account.arel_table[:id]) + .having(Arel.star.count.gt(MINIMUM_COUNT)) + .joins(reblog: :account) + .limit(SET_SIZE) + .order(total: :desc) + .pluck(Account.arel_table[:id], Arel.star.count.as('total')) end end diff --git a/app/lib/annual_report/most_used_apps.rb b/app/lib/annual_report/most_used_apps.rb index fb1ca1d167..6fd61ebb31 100644 --- a/app/lib/annual_report/most_used_apps.rb +++ b/app/lib/annual_report/most_used_apps.rb @@ -5,18 +5,27 @@ class AnnualReport::MostUsedApps < AnnualReport::Source def generate { - most_used_apps: most_used_apps.map do |(name, count)| - { - name: name, - count: count, - } - end, + most_used_apps: app_map, } end private + def app_map + most_used_apps.map do |name, count| + { + name: name, + count: count, + } + end + end + def most_used_apps - report_statuses.joins(:application).group('oauth_applications.name').order(total: :desc).limit(SET_SIZE).pluck(Arel.sql('oauth_applications.name, count(*) as total')) + report_statuses + .group(Doorkeeper::Application.arel_table[:name]) + .joins(:application) + .limit(SET_SIZE) + .order(total: :desc) + .pluck(Doorkeeper::Application.arel_table[:name], Arel.star.count.as('total')) end end diff --git a/app/lib/annual_report/percentiles.rb b/app/lib/annual_report/percentiles.rb index 0251cb66ad..a75d083e3d 100644 --- a/app/lib/annual_report/percentiles.rb +++ b/app/lib/annual_report/percentiles.rb @@ -1,19 +1,37 @@ # frozen_string_literal: true class AnnualReport::Percentiles < AnnualReport::Source + THRESHOLD_ADJUSTMENT = 1.0 + def generate { percentiles: { - followers: (total_with_fewer_followers / (total_with_any_followers + 1.0)) * 100, - statuses: (total_with_fewer_statuses / (total_with_any_statuses + 1.0)) * 100, + followers: followers_percentile, + statuses: statuses_percentile, }, } end private + def followers_percentile + (total_with_fewer_followers / adjusted_any_followers_count) * 100 + end + + def statuses_percentile + (total_with_fewer_statuses / adjusted_any_statuses_count) * 100 + end + + def adjusted_any_followers_count + total_with_any_followers + THRESHOLD_ADJUSTMENT + end + + def adjusted_any_statuses_count + total_with_any_statuses + THRESHOLD_ADJUSTMENT + end + def followers_gained - @followers_gained ||= @account.passive_relationships.where("date_part('year', follows.created_at) = ?", @year).count + @followers_gained ||= report_followers.count end def statuses_created @@ -53,10 +71,36 @@ class AnnualReport::Percentiles < AnnualReport::Source end def total_with_any_followers - @total_with_any_followers ||= Follow.where("date_part('year', follows.created_at) = ?", @year).joins(:target_account).merge(Account.local).count('distinct follows.target_account_id') + @total_with_any_followers ||= local_account_targetting_follows.distinct.count(Follow.arel_table[:target_account_id]) end def total_with_any_statuses - @total_with_any_statuses ||= Status.where(id: year_as_snowflake_range).joins(:account).merge(Account.local).count('distinct statuses.account_id') + @total_with_any_statuses ||= local_account_statuses.distinct.count(Status.arel_table[:account_id]) + end + + def local_account_targetting_follows + Follow + .where(follows_created_year.eq(@year)) + .joins(:target_account) + .merge(Account.local) + end + + def local_account_statuses + Status + .where(id: year_as_snowflake_range) + .joins(:account) + .merge(Account.local) + end + + def report_followers + @account + .passive_relationships + .where(follows_created_year.eq(@year)) + end + + def follows_created_year + Arel.sql(<<~SQL.squish) + DATE_PART('year', follows.created_at)::int + SQL end end diff --git a/app/lib/annual_report/time_series.rb b/app/lib/annual_report/time_series.rb index 65a188eda7..3158426ea9 100644 --- a/app/lib/annual_report/time_series.rb +++ b/app/lib/annual_report/time_series.rb @@ -1,30 +1,78 @@ # frozen_string_literal: true class AnnualReport::TimeSeries < AnnualReport::Source + MONTH_INDEXES = (1..12) + def generate { - time_series: (1..12).map do |month| - { - month: month, - statuses: statuses_per_month[month] || 0, - following: following_per_month[month] || 0, - followers: followers_per_month[month] || 0, - } - end, + time_series: time_series_map, } end private + def time_series_map + MONTH_INDEXES.map do |month| + { + month: month, + statuses: statuses_per_month[month] || 0, + following: following_per_month[month] || 0, + followers: followers_per_month[month] || 0, + } + end + end + def statuses_per_month - @statuses_per_month ||= report_statuses.group(:period).pluck(Arel.sql("date_part('month', created_at)::int AS period, count(*)")).to_h + @statuses_per_month ||= monthly_statuses.to_h end def following_per_month - @following_per_month ||= @account.active_relationships.where("date_part('year', created_at) = ?", @year).group(:period).pluck(Arel.sql("date_part('month', created_at)::int AS period, count(*)")).to_h + @following_per_month ||= monthly_following.to_h end def followers_per_month - @followers_per_month ||= @account.passive_relationships.where("date_part('year', created_at) = ?", @year).group(:period).pluck(Arel.sql("date_part('month', created_at)::int AS period, count(*)")).to_h + @followers_per_month ||= monthly_followers.to_h + end + + def monthly_statuses + report_statuses + .group(:period) + .pluck(created_month.as('period'), Arel.star.count) + end + + def monthly_following + following_from_year + .group(:period) + .pluck(created_month.as('period'), Arel.star.count) + end + + def monthly_followers + followers_from_year + .group(:period) + .pluck(created_month.as('period'), Arel.star.count) + end + + def following_from_year + @account + .active_relationships + .where(created_year.eq(@year)) + end + + def followers_from_year + @account + .passive_relationships + .where(created_year.eq(@year)) + end + + def created_year + Arel.sql(<<~SQL.squish) + DATE_PART('year', created_at)::int + SQL + end + + def created_month + Arel.sql(<<~SQL.squish) + DATE_PART('month', created_at)::int + SQL end end diff --git a/app/lib/annual_report/top_hashtags.rb b/app/lib/annual_report/top_hashtags.rb index 32bd10d698..e5fdf06fd9 100644 --- a/app/lib/annual_report/top_hashtags.rb +++ b/app/lib/annual_report/top_hashtags.rb @@ -2,21 +2,43 @@ class AnnualReport::TopHashtags < AnnualReport::Source SET_SIZE = 40 + MINIMUM_COUNT = 1 def generate { - top_hashtags: top_hashtags.map do |(name, count)| - { - name: name, - count: count, - } - end, + top_hashtags: hashtag_map, } end private + def hashtag_map + top_hashtags.map do |name, count| + { + name: name, + count: count, + } + end + end + def top_hashtags - Tag.joins(:statuses).where(statuses: { id: report_statuses.select(:id) }).group(:id).having('count(*) > 1').order(total: :desc).limit(SET_SIZE).pluck(Arel.sql('COALESCE(tags.display_name, tags.name), count(*) AS total')) + Tag + .joins(:statuses) + .where(statuses: { id: report_status_ids }) + .group(:id) + .having(Arel.star.count.gt(MINIMUM_COUNT)) + .limit(SET_SIZE) + .order(total: :desc) + .pluck(coalesced_name, Arel.star.count.as('total')) + end + + def report_status_ids + report_statuses.select(:id) + end + + def coalesced_name + Arel.sql(<<~SQL.squish) + COALESCE(tags.display_name, tags.name) + SQL end end diff --git a/app/lib/annual_report/top_statuses.rb b/app/lib/annual_report/top_statuses.rb index c5abeaa58d..9df7ea22ec 100644 --- a/app/lib/annual_report/top_statuses.rb +++ b/app/lib/annual_report/top_statuses.rb @@ -2,20 +2,42 @@ class AnnualReport::TopStatuses < AnnualReport::Source def generate - top_reblogs = base_scope.order(reblogs_count: :desc).first&.id - top_favourites = base_scope.where.not(id: top_reblogs).order(favourites_count: :desc).first&.id - top_replies = base_scope.where.not(id: [top_reblogs, top_favourites]).order(replies_count: :desc).first&.id - { top_statuses: { - by_reblogs: top_reblogs, - by_favourites: top_favourites, - by_replies: top_replies, + by_reblogs: top_reblog_status_id, + by_favourites: top_favourite_status_id, + by_replies: top_reply_status_id, }, } end - def base_scope + private + + def top_reblog_status_id + @top_reblog_status_id ||= statuses_by_reblog_count.pick(:id) + end + + def top_favourite_status_id + @top_favourite_status_id ||= statuses_by_favourite_count.where.not(id: top_reblog_status_id).pick(:id) + end + + def top_reply_status_id + @top_reply_status_id ||= statuses_by_replies_count.where.not(id: [top_reblog_status_id, top_favourite_status_id]).pick(:id) + end + + def statuses_by_reblog_count + public_statuses.order(reblogs_count: :desc) + end + + def statuses_by_favourite_count + public_statuses.order(favourites_count: :desc) + end + + def statuses_by_replies_count + public_statuses.order(replies_count: :desc) + end + + def public_statuses report_statuses.public_visibility.joins(:status_stat) end end diff --git a/app/lib/annual_report/type_distribution.rb b/app/lib/annual_report/type_distribution.rb index fe38d8a8a2..02ef8b6f55 100644 --- a/app/lib/annual_report/type_distribution.rb +++ b/app/lib/annual_report/type_distribution.rb @@ -5,10 +5,24 @@ class AnnualReport::TypeDistribution < AnnualReport::Source { type_distribution: { total: report_statuses.count, - reblogs: report_statuses.where.not(reblog_of_id: nil).count, - replies: report_statuses.where.not(in_reply_to_id: nil).where.not(in_reply_to_account_id: @account.id).count, - standalone: report_statuses.without_replies.without_reblogs.count, + reblogs: reblog_statuses.count, + replies: replied_statuses.count, + standalone: standalone_statuses.count, }, } end + + private + + def reblog_statuses + report_statuses.with_reblogs + end + + def replied_statuses + report_statuses.with_replies.without_replies_to(@account) + end + + def standalone_statuses + report_statuses.without_replies.without_reblogs + end end diff --git a/app/models/status.rb b/app/models/status.rb index 73f0052673..44fb5b2860 100644 --- a/app/models/status.rb +++ b/app/models/status.rb @@ -112,6 +112,10 @@ class Status < ApplicationRecord scope :not_reply, -> { where(reply: false) } scope :reply_to_account, -> { where(arel_table[:in_reply_to_account_id].eq arel_table[:account_id]) } scope :without_reblogs, -> { where(statuses: { reblog_of_id: nil }) } + scope :with_reblogs, -> { where.not(reblog_of_id: nil) } + scope :with_replies, -> { where.not(in_reply_to_id: nil) } + scope :with_polls, -> { where.not(poll_id: nil) } + scope :without_replies_to, ->(account) { where.not(in_reply_to_account_id: account.id) } scope :tagged_with, ->(tag_ids) { joins(:statuses_tags).where(statuses_tags: { tag_id: tag_ids }) } scope :not_excluded_by_account, ->(account) { where.not(account_id: account.excluded_from_timeline_account_ids) } scope :not_domain_blocked_by_account, ->(account) { account.excluded_from_timeline_domains.blank? ? left_outer_joins(:account) : left_outer_joins(:account).merge(Account.not_domain_blocked_by_account(account)) }