From 70ddef2654a931827ce5e4323e3042365f6078f2 Mon Sep 17 00:00:00 2001 From: Eugen Rochko Date: Mon, 2 Sep 2019 18:11:13 +0200 Subject: [PATCH] Change trending hashtags to not disappear instantly after midnight (#11712) --- app/controllers/admin/tags_controller.rb | 2 +- app/lib/feed_manager.rb | 2 +- app/models/tag.rb | 4 +- app/models/trending_tags.rb | 102 ++++++++++++------ .../scheduler/trending_tags_scheduler.rb | 11 ++ config/sidekiq.yml | 3 + .../20190901035623_add_max_score_to_tags.rb | 6 ++ .../20190901040524_remove_score_from_tags.rb | 12 +++ db/schema.rb | 6 +- spec/models/trending_tags_spec.rb | 68 ++++++++++++ 10 files changed, 179 insertions(+), 37 deletions(-) create mode 100644 app/workers/scheduler/trending_tags_scheduler.rb create mode 100644 db/migrate/20190901035623_add_max_score_to_tags.rb create mode 100644 db/post_migrate/20190901040524_remove_score_from_tags.rb create mode 100644 spec/models/trending_tags_spec.rb diff --git a/app/controllers/admin/tags_controller.rb b/app/controllers/admin/tags_controller.rb index 25d9b7d3d..8bd4e5f8b 100644 --- a/app/controllers/admin/tags_controller.rb +++ b/app/controllers/admin/tags_controller.rb @@ -57,7 +57,7 @@ module Admin scope = scope.unreviewed if filter_params[:review] == 'unreviewed' scope = scope.reviewed.order(reviewed_at: :desc) if filter_params[:review] == 'reviewed' scope = scope.pending_review.order(requested_review_at: :desc) if filter_params[:review] == 'pending_review' - scope.order(score: :desc) + scope.order(max_score: :desc) end def filter_params diff --git a/app/lib/feed_manager.rb b/app/lib/feed_manager.rb index ca3d890a8..871ec5c19 100644 --- a/app/lib/feed_manager.rb +++ b/app/lib/feed_manager.rb @@ -63,7 +63,7 @@ class FeedManager reblog_key = key(type, account_id, 'reblogs') # Remove any items past the MAX_ITEMS'th entry in our feed - redis.zremrangebyrank(timeline_key, '0', (-(FeedManager::MAX_ITEMS + 1)).to_s) + redis.zremrangebyrank(timeline_key, 0, -(FeedManager::MAX_ITEMS + 1)) # Get the score of the REBLOG_FALLOFF'th item in our feed, and stop # tracking anything after it for deduplication purposes. diff --git a/app/models/tag.rb b/app/models/tag.rb index 945e3a3c6..135e0a030 100644 --- a/app/models/tag.rb +++ b/app/models/tag.rb @@ -7,14 +7,14 @@ # name :string default(""), not null # created_at :datetime not null # updated_at :datetime not null -# score :integer # usable :boolean # trendable :boolean # listable :boolean # reviewed_at :datetime # requested_review_at :datetime # last_status_at :datetime -# last_trend_at :datetime +# max_score :float +# max_score_at :datetime # class Tag < ApplicationRecord diff --git a/app/models/trending_tags.rb b/app/models/trending_tags.rb index e4ce988c1..e1b92b175 100644 --- a/app/models/trending_tags.rb +++ b/app/models/trending_tags.rb @@ -7,6 +7,8 @@ class TrendingTags THRESHOLD = 5 LIMIT = 10 REVIEW_THRESHOLD = 3 + MAX_SCORE_COOLDOWN = 3.days.freeze + MAX_SCORE_HALFLIFE = 6.hours.freeze class << self include Redisable @@ -16,14 +18,75 @@ class TrendingTags increment_historical_use!(tag.id, at_time) increment_unique_use!(tag.id, account.id, at_time) - increment_vote!(tag, at_time) + increment_use!(tag.id, at_time) tag.update(last_status_at: Time.now.utc) if tag.last_status_at.nil? || tag.last_status_at < 12.hours.ago - tag.update(last_trend_at: Time.now.utc) if trending?(tag) && (tag.last_trend_at.nil? || tag.last_trend_at < 12.hours.ago) + end + + def update!(at_time = Time.now.utc) + tag_ids = redis.smembers("#{KEY}:used:#{at_time.beginning_of_day.to_i}") + redis.zrange(KEY, 0, -1) + tags = Tag.where(id: tag_ids.uniq) + + # First pass to calculate scores and update the set + + tags.each do |tag| + expected = redis.pfcount("activity:tags:#{tag.id}:#{(at_time - 1.day).beginning_of_day.to_i}:accounts").to_f + expected = 1.0 if expected.zero? + observed = redis.pfcount("activity:tags:#{tag.id}:#{at_time.beginning_of_day.to_i}:accounts").to_f + max_time = tag.max_score_at + max_score = tag.max_score + max_score = 0 if max_time.nil? || max_time < (at_time - MAX_SCORE_COOLDOWN) + + score = begin + if expected > observed || observed < THRESHOLD + 0 + else + ((observed - expected)**2) / expected + end + end + + if score > max_score + max_score = score + max_time = at_time + + # Not interested in triggering any callbacks for this + tag.update_columns(max_score: max_score, max_score_at: max_time) + end + + decaying_score = max_score * (0.5**((at_time.to_f - max_time.to_f) / MAX_SCORE_HALFLIFE.to_f)) + + if decaying_score.zero? + redis.zrem(KEY, tag.id) + else + redis.zadd(KEY, decaying_score, tag.id) + end + end + + users_for_review = User.staff.includes(:account).to_a.select(&:allows_trending_tag_emails?) + + # Second pass to notify about previously unreviewed trends + + tags.each do |tag| + current_rank = redis.zrevrank(KEY, tag.id) + needs_review_notification = tag.requires_review? && !tag.requested_review? + rank_passes_threshold = current_rank.present? && current_rank <= REVIEW_THRESHOLD + + next unless !tag.trendable? && rank_passes_threshold && needs_review_notification + + tag.touch(:requested_review_at) + + users_for_review.each do |user| + AdminMailer.new_trending_tag(user.account, tag).deliver_later! + end + end + + # Trim older items + + redis.zremrangebyrank(KEY, 0, -(LIMIT + 1)) end def get(limit, filtered: true) - tag_ids = redis.zrevrange("#{KEY}:#{Time.now.utc.beginning_of_day.to_i}", 0, LIMIT - 1).map(&:to_i) + tag_ids = redis.zrevrange(KEY, 0, LIMIT - 1).map(&:to_i) tags = Tag.where(id: tag_ids) tags = tags.where(trendable: true) if filtered @@ -33,8 +96,8 @@ class TrendingTags end def trending?(tag) - rank = redis.zrevrank("#{KEY}:#{Time.now.utc.beginning_of_day.to_i}", tag.id) - rank.present? && rank <= LIMIT + rank = redis.zrevrank(KEY, tag.id) + rank.present? && rank < LIMIT end private @@ -51,31 +114,10 @@ class TrendingTags redis.expire(key, EXPIRE_HISTORY_AFTER) end - def increment_vote!(tag, at_time) - key = "#{KEY}:#{at_time.beginning_of_day.to_i}" - expected = redis.pfcount("activity:tags:#{tag.id}:#{(at_time - 1.day).beginning_of_day.to_i}:accounts").to_f - expected = 1.0 if expected.zero? - observed = redis.pfcount("activity:tags:#{tag.id}:#{at_time.beginning_of_day.to_i}:accounts").to_f - - if expected > observed || observed < THRESHOLD - redis.zrem(key, tag.id) - else - score = ((observed - expected)**2) / expected - old_rank = redis.zrevrank(key, tag.id) - - redis.zadd(key, score, tag.id) - request_review!(tag) if (old_rank.nil? || old_rank > REVIEW_THRESHOLD) && redis.zrevrank(key, tag.id) <= REVIEW_THRESHOLD && !tag.trendable? && tag.requires_review? && !tag.requested_review? - end - - redis.expire(key, EXPIRE_TRENDS_AFTER) - end - - def request_review!(tag) - return unless Setting.trends - - tag.touch(:requested_review_at) - - User.staff.includes(:account).find_each { |u| AdminMailer.new_trending_tag(u.account, tag).deliver_later! if u.allows_trending_tag_emails? } + def increment_use!(tag_id, at_time) + key = "#{KEY}:used:#{at_time.beginning_of_day.to_i}" + redis.sadd(key, tag_id) + redis.expire(key, EXPIRE_HISTORY_AFTER) end end end diff --git a/app/workers/scheduler/trending_tags_scheduler.rb b/app/workers/scheduler/trending_tags_scheduler.rb new file mode 100644 index 000000000..77f0d5747 --- /dev/null +++ b/app/workers/scheduler/trending_tags_scheduler.rb @@ -0,0 +1,11 @@ +# frozen_string_literal: true + +class Scheduler::TrendingTagsScheduler + include Sidekiq::Worker + + sidekiq_options unique: :until_executed, retry: 0 + + def perform + TrendingTags.update! if Setting.trends + end +end diff --git a/config/sidekiq.yml b/config/sidekiq.yml index 6ebe450b0..5de25de23 100644 --- a/config/sidekiq.yml +++ b/config/sidekiq.yml @@ -9,6 +9,9 @@ scheduled_statuses_scheduler: every: '5m' class: Scheduler::ScheduledStatusesScheduler + trending_tags_scheduler: + every: '5m' + class: Scheduler::TrendingTagsScheduler media_cleanup_scheduler: cron: '<%= Random.rand(0..59) %> <%= Random.rand(3..5) %> * * *' class: Scheduler::MediaCleanupScheduler diff --git a/db/migrate/20190901035623_add_max_score_to_tags.rb b/db/migrate/20190901035623_add_max_score_to_tags.rb new file mode 100644 index 000000000..f936e9871 --- /dev/null +++ b/db/migrate/20190901035623_add_max_score_to_tags.rb @@ -0,0 +1,6 @@ +class AddMaxScoreToTags < ActiveRecord::Migration[5.2] + def change + add_column :tags, :max_score, :float + add_column :tags, :max_score_at, :datetime + end +end diff --git a/db/post_migrate/20190901040524_remove_score_from_tags.rb b/db/post_migrate/20190901040524_remove_score_from_tags.rb new file mode 100644 index 000000000..a1112700b --- /dev/null +++ b/db/post_migrate/20190901040524_remove_score_from_tags.rb @@ -0,0 +1,12 @@ +# frozen_string_literal: true + +class RemoveScoreFromTags < ActiveRecord::Migration[5.2] + disable_ddl_transaction! + + def change + safety_assured do + remove_column :tags, :score, :int + remove_column :tags, :last_trend_at, :datetime + end + end +end diff --git a/db/schema.rb b/db/schema.rb index 482bca367..5576f70bf 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema.define(version: 2019_08_23_221802) do +ActiveRecord::Schema.define(version: 2019_09_01_040524) do # These are extensions that must be enabled in order to support this database enable_extension "plpgsql" @@ -664,14 +664,14 @@ ActiveRecord::Schema.define(version: 2019_08_23_221802) do t.string "name", default: "", null: false t.datetime "created_at", null: false t.datetime "updated_at", null: false - t.integer "score" t.boolean "usable" t.boolean "trendable" t.boolean "listable" t.datetime "reviewed_at" t.datetime "requested_review_at" t.datetime "last_status_at" - t.datetime "last_trend_at" + t.float "max_score" + t.datetime "max_score_at" t.index "lower((name)::text)", name: "index_tags_on_name_lower", unique: true end diff --git a/spec/models/trending_tags_spec.rb b/spec/models/trending_tags_spec.rb new file mode 100644 index 000000000..b6122c994 --- /dev/null +++ b/spec/models/trending_tags_spec.rb @@ -0,0 +1,68 @@ +require 'rails_helper' + +RSpec.describe TrendingTags do + describe '.record_use!' do + pending + end + + describe '.update!' do + let!(:at_time) { Time.now.utc } + let!(:tag1) { Fabricate(:tag, name: 'Catstodon') } + let!(:tag2) { Fabricate(:tag, name: 'DogsOfMastodon') } + let!(:tag3) { Fabricate(:tag, name: 'OCs') } + + before do + allow(Redis.current).to receive(:pfcount) do |key| + case key + when "activity:tags:#{tag1.id}:#{(at_time - 1.day).beginning_of_day.to_i}:accounts" + 2 + when "activity:tags:#{tag1.id}:#{at_time.beginning_of_day.to_i}:accounts" + 16 + when "activity:tags:#{tag2.id}:#{(at_time - 1.day).beginning_of_day.to_i}:accounts" + 0 + when "activity:tags:#{tag2.id}:#{at_time.beginning_of_day.to_i}:accounts" + 4 + when "activity:tags:#{tag3.id}:#{(at_time - 1.day).beginning_of_day.to_i}:accounts" + 13 + end + end + + Redis.current.zadd('trending_tags', 0.9, tag3.id) + Redis.current.sadd("trending_tags:used:#{at_time.beginning_of_day.to_i}", [tag1.id, tag2.id]) + + tag3.update(max_score: 0.9, max_score_at: (at_time - 1.day).beginning_of_day + 12.hours) + + described_class.update!(at_time) + end + + it 'calculates and re-calculates scores' do + expect(described_class.get(10, filtered: false)).to eq [tag1, tag3] + end + + it 'omits hashtags below threshold' do + expect(described_class.get(10, filtered: false)).to_not include(tag2) + end + + it 'decays scores' do + expect(Redis.current.zscore('trending_tags', tag3.id)).to be < 0.9 + end + end + + describe '.trending?' do + let(:tag) { Fabricate(:tag) } + + before do + 10.times { |i| Redis.current.zadd('trending_tags', i + 1, Fabricate(:tag).id) } + end + + it 'returns true if the hashtag is within limit' do + Redis.current.zadd('trending_tags', 11, tag.id) + expect(described_class.trending?(tag)).to be true + end + + it 'returns false if the hashtag is outside the limit' do + Redis.current.zadd('trending_tags', 0, tag.id) + expect(described_class.trending?(tag)).to be false + end + end +end