mirror of
https://github.com/mastodon/mastodon.git
synced 2024-11-24 06:46:29 +00:00
Fix performance of percentile calculation for annual reports (#32765)
This commit is contained in:
parent
823f597f00
commit
90f4ffa31d
|
@ -17,11 +17,21 @@ class AnnualReport
|
||||||
|
|
||||||
SCHEMA = 1
|
SCHEMA = 1
|
||||||
|
|
||||||
|
def self.table_name_prefix
|
||||||
|
'annual_report_'
|
||||||
|
end
|
||||||
|
|
||||||
def initialize(account, year)
|
def initialize(account, year)
|
||||||
@account = account
|
@account = account
|
||||||
@year = year
|
@year = year
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def self.prepare(year)
|
||||||
|
SOURCES.each do |klass|
|
||||||
|
klass.prepare(year)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
def generate
|
def generate
|
||||||
return if GeneratedAnnualReport.exists?(account: @account, year: @year)
|
return if GeneratedAnnualReport.exists?(account: @account, year: @year)
|
||||||
|
|
||||||
|
|
|
@ -1,62 +1,37 @@
|
||||||
# frozen_string_literal: true
|
# frozen_string_literal: true
|
||||||
|
|
||||||
class AnnualReport::Percentiles < AnnualReport::Source
|
class AnnualReport::Percentiles < AnnualReport::Source
|
||||||
|
def self.prepare(year)
|
||||||
|
AnnualReport::StatusesPerAccountCount.connection.exec_query(<<~SQL.squish, nil, [year, Mastodon::Snowflake.id_at(DateTime.new(year).beginning_of_year), Mastodon::Snowflake.id_at(DateTime.new(year).end_of_year)])
|
||||||
|
INSERT INTO annual_report_statuses_per_account_counts (year, account_id, statuses_count)
|
||||||
|
SELECT $1, account_id, count(*)
|
||||||
|
FROM statuses
|
||||||
|
WHERE id BETWEEN $2 AND $3
|
||||||
|
AND (local OR uri IS NULL)
|
||||||
|
GROUP BY account_id
|
||||||
|
ON CONFLICT (year, account_id) DO NOTHING
|
||||||
|
SQL
|
||||||
|
end
|
||||||
|
|
||||||
def generate
|
def generate
|
||||||
{
|
{
|
||||||
percentiles: {
|
percentiles: {
|
||||||
followers: (total_with_fewer_followers / (total_with_any_followers + 1.0)) * 100,
|
statuses: 100.0 - ((total_with_fewer_statuses / (total_with_any_statuses + 1.0)) * 100),
|
||||||
statuses: (total_with_fewer_statuses / (total_with_any_statuses + 1.0)) * 100,
|
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
end
|
end
|
||||||
|
|
||||||
private
|
private
|
||||||
|
|
||||||
def followers_gained
|
|
||||||
@followers_gained ||= @account.passive_relationships.where("date_part('year', follows.created_at) = ?", @year).count
|
|
||||||
end
|
|
||||||
|
|
||||||
def statuses_created
|
def statuses_created
|
||||||
@statuses_created ||= report_statuses.count
|
@statuses_created ||= report_statuses.count
|
||||||
end
|
end
|
||||||
|
|
||||||
def total_with_fewer_followers
|
|
||||||
@total_with_fewer_followers ||= Follow.find_by_sql([<<~SQL.squish, { year: @year, comparison: followers_gained }]).first.total
|
|
||||||
WITH tmp0 AS (
|
|
||||||
SELECT follows.target_account_id
|
|
||||||
FROM follows
|
|
||||||
INNER JOIN accounts ON accounts.id = follows.target_account_id
|
|
||||||
WHERE date_part('year', follows.created_at) = :year
|
|
||||||
AND accounts.domain IS NULL
|
|
||||||
GROUP BY follows.target_account_id
|
|
||||||
HAVING COUNT(*) < :comparison
|
|
||||||
)
|
|
||||||
SELECT count(*) AS total
|
|
||||||
FROM tmp0
|
|
||||||
SQL
|
|
||||||
end
|
|
||||||
|
|
||||||
def total_with_fewer_statuses
|
def total_with_fewer_statuses
|
||||||
@total_with_fewer_statuses ||= Status.find_by_sql([<<~SQL.squish, { comparison: statuses_created, min_id: year_as_snowflake_range.first, max_id: year_as_snowflake_range.last }]).first.total
|
@total_with_fewer_statuses ||= AnnualReport::StatusesPerAccountCount.where(year: year).where(statuses_count: ...statuses_created).count
|
||||||
WITH tmp0 AS (
|
|
||||||
SELECT statuses.account_id
|
|
||||||
FROM statuses
|
|
||||||
INNER JOIN accounts ON accounts.id = statuses.account_id
|
|
||||||
WHERE statuses.id BETWEEN :min_id AND :max_id
|
|
||||||
AND accounts.domain IS NULL
|
|
||||||
GROUP BY statuses.account_id
|
|
||||||
HAVING count(*) < :comparison
|
|
||||||
)
|
|
||||||
SELECT count(*) AS total
|
|
||||||
FROM tmp0
|
|
||||||
SQL
|
|
||||||
end
|
|
||||||
|
|
||||||
def total_with_any_followers
|
|
||||||
@total_with_any_followers ||= Follow.where("date_part('year', follows.created_at) = ?", @year).joins(:target_account).merge(Account.local).count('distinct follows.target_account_id')
|
|
||||||
end
|
end
|
||||||
|
|
||||||
def total_with_any_statuses
|
def total_with_any_statuses
|
||||||
@total_with_any_statuses ||= Status.where(id: year_as_snowflake_range).joins(:account).merge(Account.local).count('distinct statuses.account_id')
|
@total_with_any_statuses ||= AnnualReport::StatusesPerAccountCount.where(year: year).count
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -8,6 +8,14 @@ class AnnualReport::Source
|
||||||
@year = year
|
@year = year
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def self.prepare(_year)
|
||||||
|
# Use this method if any pre-calculations must be made before individual annual reports are generated
|
||||||
|
end
|
||||||
|
|
||||||
|
def generate
|
||||||
|
raise NotImplementedError
|
||||||
|
end
|
||||||
|
|
||||||
protected
|
protected
|
||||||
|
|
||||||
def report_statuses
|
def report_statuses
|
||||||
|
|
15
app/models/annual_report/statuses_per_account_count.rb
Normal file
15
app/models/annual_report/statuses_per_account_count.rb
Normal file
|
@ -0,0 +1,15 @@
|
||||||
|
# frozen_string_literal: true
|
||||||
|
|
||||||
|
# == Schema Information
|
||||||
|
#
|
||||||
|
# Table name: annual_report_statuses_per_account_counts
|
||||||
|
#
|
||||||
|
# id :bigint(8) not null, primary key
|
||||||
|
# year :integer not null
|
||||||
|
# account_id :bigint(8) not null
|
||||||
|
# statuses_count :bigint(8) not null
|
||||||
|
#
|
||||||
|
|
||||||
|
class AnnualReport::StatusesPerAccountCount < ApplicationRecord
|
||||||
|
# This table facilitates percentile calculations
|
||||||
|
end
|
|
@ -0,0 +1,13 @@
|
||||||
|
# frozen_string_literal: true
|
||||||
|
|
||||||
|
class CreateAnnualReportStatusesPerAccountCounts < ActiveRecord::Migration[7.1]
|
||||||
|
def change
|
||||||
|
create_table :annual_report_statuses_per_account_counts do |t| # rubocop:disable Rails/CreateTableWithTimestamps
|
||||||
|
t.integer :year, null: false
|
||||||
|
t.bigint :account_id, null: false
|
||||||
|
t.bigint :statuses_count, null: false
|
||||||
|
end
|
||||||
|
|
||||||
|
add_index :annual_report_statuses_per_account_counts, [:year, :account_id], unique: true
|
||||||
|
end
|
||||||
|
end
|
|
@ -10,7 +10,7 @@
|
||||||
#
|
#
|
||||||
# It's strongly recommended that you check this file into your version control system.
|
# It's strongly recommended that you check this file into your version control system.
|
||||||
|
|
||||||
ActiveRecord::Schema[7.1].define(version: 2024_10_22_214312) do
|
ActiveRecord::Schema[7.1].define(version: 2024_11_04_082851) do
|
||||||
# These are extensions that must be enabled in order to support this database
|
# These are extensions that must be enabled in order to support this database
|
||||||
enable_extension "plpgsql"
|
enable_extension "plpgsql"
|
||||||
|
|
||||||
|
@ -260,6 +260,13 @@ ActiveRecord::Schema[7.1].define(version: 2024_10_22_214312) do
|
||||||
t.bigint "status_ids", array: true
|
t.bigint "status_ids", array: true
|
||||||
end
|
end
|
||||||
|
|
||||||
|
create_table "annual_report_statuses_per_account_counts", force: :cascade do |t|
|
||||||
|
t.integer "year", null: false
|
||||||
|
t.bigint "account_id", null: false
|
||||||
|
t.bigint "statuses_count", null: false
|
||||||
|
t.index ["year", "account_id"], name: "idx_on_year_account_id_ff3e167cef", unique: true
|
||||||
|
end
|
||||||
|
|
||||||
create_table "appeals", force: :cascade do |t|
|
create_table "appeals", force: :cascade do |t|
|
||||||
t.bigint "account_id", null: false
|
t.bigint "account_id", null: false
|
||||||
t.bigint "account_warning_id", null: false
|
t.bigint "account_warning_id", null: false
|
||||||
|
|
|
@ -4,17 +4,20 @@ require 'rails_helper'
|
||||||
|
|
||||||
RSpec.describe AnnualReport::Percentiles do
|
RSpec.describe AnnualReport::Percentiles do
|
||||||
describe '#generate' do
|
describe '#generate' do
|
||||||
subject { described_class.new(account, Time.zone.now.year) }
|
subject { described_class.new(account, year) }
|
||||||
|
|
||||||
|
let(:year) { Time.zone.now.year }
|
||||||
|
|
||||||
context 'with an inactive account' do
|
context 'with an inactive account' do
|
||||||
let(:account) { Fabricate :account }
|
let(:account) { Fabricate :account }
|
||||||
|
|
||||||
it 'builds a report for an account' do
|
it 'builds a report for an account' do
|
||||||
|
described_class.prepare(year)
|
||||||
|
|
||||||
expect(subject.generate)
|
expect(subject.generate)
|
||||||
.to include(
|
.to include(
|
||||||
percentiles: include(
|
percentiles: include(
|
||||||
followers: 0,
|
statuses: 100
|
||||||
statuses: 0
|
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
end
|
end
|
||||||
|
@ -25,16 +28,15 @@ RSpec.describe AnnualReport::Percentiles do
|
||||||
|
|
||||||
before do
|
before do
|
||||||
Fabricate.times 2, :status # Others as `account`
|
Fabricate.times 2, :status # Others as `account`
|
||||||
Fabricate.times 2, :follow # Others as `target_account`
|
|
||||||
Fabricate.times 2, :status, account: account
|
Fabricate.times 2, :status, account: account
|
||||||
Fabricate.times 2, :follow, target_account: account
|
|
||||||
end
|
end
|
||||||
|
|
||||||
it 'builds a report for an account' do
|
it 'builds a report for an account' do
|
||||||
|
described_class.prepare(year)
|
||||||
|
|
||||||
expect(subject.generate)
|
expect(subject.generate)
|
||||||
.to include(
|
.to include(
|
||||||
percentiles: include(
|
percentiles: include(
|
||||||
followers: 50,
|
|
||||||
statuses: 50
|
statuses: 50
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
Loading…
Reference in a new issue