Add a spam check (#11217)

* Add a spam check * Use Nilsimsa to generate locality-sensitive hashes and compare using Levenshtein distance * Add more tests * Add exemption when the message is a reply to something that mentions the sender * Use Nilsimsa Compare Value instead of Levenshtein distance * Use MD5 for messages shorter than 10 characters * Add message to automated report, do not add non-public statuses to automated report, add trust level to accounts and make unsilencing raise the trust level to prevent repeated spam checks on that account * Expire spam check data after 3 months * Add support for local statuses, reduce expiration to 1 week, always create a report * Add content warnings to the spam check and exempt empty statuses * Change Nilsimsa threshold to 95 and make sure removed statuses are removed from the spam check * Add all matched statuses into automatic report
2019-07-13 16:45:50 +02:00 · 2019-07-13 16:45:50 +02:00 · 6ff67be0f6
parent 402302776c
commit 6ff67be0f6
10 changed files with 377 additions and 5 deletions
--- a/1
+++ b/1
@ -58,6 +58,7 @@ gem 'idn-ruby', require: 'idn'
 gem 'kaminari', '~> 1.1'
 gem 'link_header', '~> 0.0'
 gem 'mime-types', '~> 3.2', require: 'mime/types/columnar'
+gem 'nilsimsa', git: 'https://github.com/witgo/nilsimsa', ref: 'fd184883048b922b176939f851338d0a4971a532'
 gem 'nokogiri', '~> 1.10'
 gem 'nsa', '~> 0.2'
 gem 'oj', '~> 3.7'
--- a/Gemfile.lock
+++ b/Gemfile.lock
@ -12,6 +12,13 @@ GIT
  specs:
    http_parser.rb (0.6.1)

+GIT
+  remote: https://github.com/witgo/nilsimsa
+  revision: fd184883048b922b176939f851338d0a4971a532
+  ref: fd184883048b922b176939f851338d0a4971a532
+  specs:
+    nilsimsa (1.1.2)
+
 GEM
  remote: https://rubygems.org/
  specs:
@ -704,6 +711,7 @@ DEPENDENCIES
  microformats (~> 4.1)
  mime-types (~> 3.2)
  net-ldap (~> 0.10)
+  nilsimsa!
  nokogiri (~> 1.10)
  nsa (~> 0.2)
  oj (~> 3.7)
--- a/app/lib/activitypub/activity/create.rb
+++ b/app/lib/activitypub/activity/create.rb
@ -41,6 +41,7 @@ class ActivityPub::Activity::Create < ActivityPub::Activity

    resolve_thread(@status)
    fetch_replies(@status)
+    check_for_spam
    distribute(@status)
    forward_for_reply if @status.distributable?
  end
@ -406,6 +407,18 @@ class ActivityPub::Activity::Create < ActivityPub::Activity
    Account.local.where(username: local_usernames).exists?
  end

+  def check_for_spam
+    spam_check = SpamCheck.new(@status)
+
+    return if spam_check.skip?
+
+    if spam_check.spam?
+      spam_check.flag!
+    else
+      spam_check.remember!
+    end
+  end
+
  def forward_for_reply
    return unless @json['signature'].present? && reply_to_local?
    ActivityPub::RawDistributionWorker.perform_async(Oj.dump(@json), replied_to_status.account_id, [@account.preferred_inbox_url])
--- a/app/lib/spam_check.rb
+++ b/app/lib/spam_check.rb
@ -0,0 +1,169 @@
+# frozen_string_literal: true
+
+class SpamCheck
+  include Redisable
+  include ActionView::Helpers::TextHelper
+
+  NILSIMSA_COMPARE_THRESHOLD = 95
+  NILSIMSA_MIN_SIZE          = 10
+  EXPIRE_SET_AFTER           = 1.week.seconds
+
+  def initialize(status)
+    @account = status.account
+    @status  = status
+  end
+
+  def skip?
+    already_flagged? || trusted? || no_unsolicited_mentions? || solicited_reply?
+  end
+
+  def spam?
+    if insufficient_data?
+      false
+    elsif nilsimsa?
+      any_other_digest?('nilsimsa') { |_, other_digest| nilsimsa_compare_value(digest, other_digest) >= NILSIMSA_COMPARE_THRESHOLD }
+    else
+      any_other_digest?('md5') { |_, other_digest| other_digest == digest }
+    end
+  end
+
+  def flag!
+    auto_silence_account!
+    auto_report_status!
+  end
+
+  def remember!
+    # The scores in sorted sets don't actually have enough bits to hold an exact
+    # value of our snowflake IDs, so we use it only for its ordering property. To
+    # get the correct status ID back, we have to save it in the string value
+
+    redis.zadd(redis_key, @status.id, digest_with_algorithm)
+    redis.zremrangebyrank(redis_key, '0', '-10')
+    redis.expire(redis_key, EXPIRE_SET_AFTER)
+  end
+
+  def reset!
+    redis.del(redis_key)
+  end
+
+  def hashable_text
+    return @hashable_text if defined?(@hashable_text)
+
+    @hashable_text = @status.text
+    @hashable_text = remove_mentions(@hashable_text)
+    @hashable_text = strip_tags(@hashable_text) unless @status.local?
+    @hashable_text = normalize_unicode(@status.spoiler_text + ' ' + @hashable_text)
+    @hashable_text = remove_whitespace(@hashable_text)
+  end
+
+  def insufficient_data?
+    hashable_text.blank?
+  end
+
+  def digest
+    @digest ||= begin
+      if nilsimsa?
+        Nilsimsa.new(hashable_text).hexdigest
+      else
+        Digest::MD5.hexdigest(hashable_text)
+      end
+    end
+  end
+
+  def digest_with_algorithm
+    if nilsimsa?
+      ['nilsimsa', digest, @status.id].join(':')
+    else
+      ['md5', digest, @status.id].join(':')
+    end
+  end
+
+  private
+
+  def remove_mentions(text)
+    return text.gsub(Account::MENTION_RE, '') if @status.local?
+
+    Nokogiri::HTML.fragment(text).tap do |html|
+      mentions = @status.mentions.map { |mention| ActivityPub::TagManager.instance.url_for(mention.account) }
+
+      html.traverse do |element|
+        element.unlink if element.name == 'a' && mentions.include?(element['href'])
+      end
+    end.to_s
+  end
+
+  def normalize_unicode(text)
+    text.unicode_normalize(:nfkc).downcase
+  end
+
+  def remove_whitespace(text)
+    text.gsub(/\s+/, ' ').strip
+  end
+
+  def auto_silence_account!
+    @account.silence!
+  end
+
+  def auto_report_status!
+    status_ids = Status.where(visibility: %i(public unlisted)).where(id: matching_status_ids).pluck(:id) + [@status.id] if @status.distributable?
+    ReportService.new.call(Account.representative, @account, status_ids: status_ids, comment: I18n.t('spam_check.spam_detected_and_silenced'))
+  end
+
+  def already_flagged?
+    @account.silenced?
+  end
+
+  def trusted?
+    @account.trust_level > Account::TRUST_LEVELS[:untrusted]
+  end
+
+  def no_unsolicited_mentions?
+    @status.mentions.all? { |mention| mention.silent? || (!@account.local? && !mention.account.local?) || mention.account.following?(@account) }
+  end
+
+  def solicited_reply?
+    !@status.thread.nil? && @status.thread.mentions.where(account: @account).exists?
+  end
+
+  def nilsimsa_compare_value(first, second)
+    first  = [first].pack('H*')
+    second = [second].pack('H*')
+    bits   = 0
+
+    0.upto(31) do |i|
+      bits += Nilsimsa::POPC[255 & (first[i].ord ^ second[i].ord)].ord
+    end
+
+    128 - bits # -128 <= Nilsimsa Compare Value <= 128
+  end
+
+  def nilsimsa?
+    hashable_text.size > NILSIMSA_MIN_SIZE
+  end
+
+  def other_digests
+    redis.zrange(redis_key, 0, -1)
+  end
+
+  def any_other_digest?(filter_algorithm)
+    other_digests.any? do |record|
+      algorithm, other_digest, status_id = record.split(':')
+
+      next unless algorithm == filter_algorithm
+
+      yield algorithm, other_digest, status_id
+    end
+  end
+
+  def matching_status_ids
+    if nilsimsa?
+      other_digests.select { |record| record.start_with?('nilsimsa') && nilsimsa_compare_value(digest, record.split(':')[1]) >= NILSIMSA_COMPARE_THRESHOLD }.map { |record| record.split(':')[2] }.compact
+    else
+      other_digests.select { |record| record.start_with?('md5') && record.split(':')[1] == digest }.map { |record| record.split(':')[2] }.compact
+    end
+  end
+
+  def redis_key
+    @redis_key ||= "spam_check:#{@account.id}"
+  end
+end
--- a/app/models/account.rb
+++ b/app/models/account.rb
@ -45,6 +45,7 @@
 #  also_known_as           :string           is an Array
 #  silenced_at             :datetime
 #  suspended_at            :datetime
+#  trust_level             :integer
 #

 class Account < ApplicationRecord
@ -62,6 +63,11 @@ class Account < ApplicationRecord
  include AccountCounters
  include DomainNormalizable

+  TRUST_LEVELS = {
+    untrusted: 0,
+    trusted: 1,
+  }.freeze
+
  enum protocol: [:ostatus, :activitypub]

  validates :username, presence: true
@ -163,6 +169,10 @@ class Account < ApplicationRecord
    last_webfingered_at.nil? || last_webfingered_at <= 1.day.ago
  end

+  def trust_level
+    self[:trust_level] || 0
+  end
+
  def refresh!
    ResolveAccountService.new.call(acct) unless local?
  end
@ -171,21 +181,19 @@ class Account < ApplicationRecord
    silenced_at.present?
  end

-  def silence!(date = nil)
-    date ||= Time.now.utc
+  def silence!(date = Time.now.utc)
    update!(silenced_at: date)
  end

  def unsilence!
-    update!(silenced_at: nil)
+    update!(silenced_at: nil, trust_level: trust_level == TRUST_LEVELS[:untrusted] ? TRUST_LEVELS[:trusted] : trust_level)
  end

  def suspended?
    suspended_at.present?
  end

-  def suspend!(date = nil)
-    date ||= Time.now.utc
+  def suspend!(date = Time.now.utc)
    transaction do
      user&.disable! if local?
      update!(suspended_at: date)
--- a/app/services/remove_status_service.rb
+++ b/app/services/remove_status_service.rb
@ -23,6 +23,7 @@ class RemoveStatusService < BaseService
        remove_from_hashtags
        remove_from_public
        remove_from_media if status.media_attachments.any?
+        remove_from_spam_check

        @status.destroy!
      else
@ -142,6 +143,10 @@ class RemoveStatusService < BaseService
    redis.publish('timeline:public:local:media', @payload) if @status.local?
  end

+  def remove_from_spam_check
+    redis.zremrangebyscore("spam_check:#{@status.account_id}", @status.id, @status.id)
+  end
+
  def lock_options
    { redis: Redis.current, key: "distribute:#{@status.id}" }
  end
--- a/config/locales/en.yml
+++ b/config/locales/en.yml
@ -875,6 +875,8 @@ en:
    profile: Profile
    relationships: Follows and followers
    two_factor_authentication: Two-factor Auth
+  spam_check:
+    spam_detected_and_silenced: This is an automated report. Spam has been detected and the sender has been silenced automatically. If this is a mistake, please unsilence the account.
  statuses:
    attached:
      description: 'Attached: %{attached}'
--- a/db/migrate/20190701022101_add_trust_level_to_accounts.rb
+++ b/db/migrate/20190701022101_add_trust_level_to_accounts.rb
@ -0,0 +1,5 @@
+class AddTrustLevelToAccounts < ActiveRecord::Migration[5.2]
+  def change
+    add_column :accounts, :trust_level, :integer
+  end
+end
--- a/db/schema.rb
+++ b/db/schema.rb
@ -148,6 +148,7 @@ ActiveRecord::Schema.define(version: 2019_07_06_233204) do
    t.string "also_known_as", array: true
    t.datetime "silenced_at"
    t.datetime "suspended_at"
+    t.integer "trust_level"
    t.index "(((setweight(to_tsvector('simple'::regconfig, (display_name)::text), 'A'::\"char\") || setweight(to_tsvector('simple'::regconfig, (username)::text), 'B'::\"char\")) || setweight(to_tsvector('simple'::regconfig, (COALESCE(domain, ''::character varying))::text), 'C'::\"char\")))", name: "search_index", using: :gin
    t.index "lower((username)::text), lower((domain)::text)", name: "index_accounts_on_username_and_domain_lower", unique: true
    t.index ["moved_to_account_id"], name: "index_accounts_on_moved_to_account_id"
--- a/spec/lib/spam_check_spec.rb
+++ b/spec/lib/spam_check_spec.rb
@ -0,0 +1,160 @@
+require 'rails_helper'
+
+RSpec.describe SpamCheck do
+  let!(:sender) { Fabricate(:account) }
+  let!(:alice) { Fabricate(:account, username: 'alice') }
+  let!(:bob) { Fabricate(:account, username: 'bob') }
+
+  def status_with_html(text, options = {})
+    status = PostStatusService.new.call(sender, { text: text }.merge(options))
+    status.update_columns(text: Formatter.instance.format(status), local: false)
+    status
+  end
+
+  describe '#hashable_text' do
+    it 'removes mentions from HTML for remote statuses' do
+      status = status_with_html('@alice Hello')
+      expect(described_class.new(status).hashable_text).to eq 'hello'
+    end
+
+    it 'removes mentions from text for local statuses' do
+      status = PostStatusService.new.call(alice, text: "Hey @#{sender.username}, how are you?")
+      expect(described_class.new(status).hashable_text).to eq 'hey , how are you?'
+    end
+  end
+
+  describe '#insufficient_data?' do
+    it 'returns true when there is no text' do
+      status = status_with_html('@alice')
+      expect(described_class.new(status).insufficient_data?).to be true
+    end
+
+    it 'returns false when there is text' do
+      status = status_with_html('@alice h')
+      expect(described_class.new(status).insufficient_data?).to be false
+    end
+  end
+
+  describe '#digest' do
+    it 'returns a string' do
+      status = status_with_html('@alice Hello world')
+      expect(described_class.new(status).digest).to be_a String
+    end
+  end
+
+  describe '#spam?' do
+    it 'returns false for a unique status' do
+      status = status_with_html('@alice Hello')
+      expect(described_class.new(status).spam?).to be false
+    end
+
+    it 'returns false for different statuses to the same recipient' do
+      status1 = status_with_html('@alice Hello')
+      described_class.new(status1).remember!
+      status2 = status_with_html('@alice Are you available to talk?')
+      expect(described_class.new(status2).spam?).to be false
+    end
+
+    it 'returns false for statuses with different content warnings' do
+      status1 = status_with_html('@alice Are you available to talk?')
+      described_class.new(status1).remember!
+      status2 = status_with_html('@alice Are you available to talk?', spoiler_text: 'This is a completely different matter than what I was talking about previously, I swear!')
+      expect(described_class.new(status2).spam?).to be false
+    end
+
+    it 'returns false for different statuses to different recipients' do
+      status1 = status_with_html('@alice How is it going?')
+      described_class.new(status1).remember!
+      status2 = status_with_html('@bob Are you okay?')
+      expect(described_class.new(status2).spam?).to be false
+    end
+
+    it 'returns false for very short different statuses to different recipients' do
+      status1 = status_with_html('@alice 🙄')
+      described_class.new(status1).remember!
+      status2 = status_with_html('@bob Huh?')
+      expect(described_class.new(status2).spam?).to be false
+    end
+
+    it 'returns false for statuses with no text' do
+      status1 = status_with_html('@alice')
+      described_class.new(status1).remember!
+      status2 = status_with_html('@bob')
+      expect(described_class.new(status2).spam?).to be false
+    end
+
+    it 'returns true for duplicate statuses to the same recipient' do
+      status1 = status_with_html('@alice Hello')
+      described_class.new(status1).remember!
+      status2 = status_with_html('@alice Hello')
+      expect(described_class.new(status2).spam?).to be true
+    end
+
+    it 'returns true for duplicate statuses to different recipients' do
+      status1 = status_with_html('@alice Hello')
+      described_class.new(status1).remember!
+      status2 = status_with_html('@bob Hello')
+      expect(described_class.new(status2).spam?).to be true
+    end
+
+    it 'returns true for nearly identical statuses with random numbers' do
+      source_text = 'Sodium, atomic number 11, was first isolated by Humphry Davy in 1807. A chemical component of salt, he named it Na in honor of the saltiest region on earth, North America.'
+      status1 = status_with_html('@alice ' + source_text + ' 1234')
+      described_class.new(status1).remember!
+      status2 = status_with_html('@bob ' + source_text + ' 9568')
+      expect(described_class.new(status2).spam?).to be true
+    end
+  end
+
+  describe '#skip?' do
+    it 'returns true when the sender is already silenced' do
+      status = status_with_html('@alice Hello')
+      sender.silence!
+      expect(described_class.new(status).skip?).to be true
+    end
+
+    it 'returns true when the mentioned person follows the sender' do
+      status = status_with_html('@alice Hello')
+      alice.follow!(sender)
+      expect(described_class.new(status).skip?).to be true
+    end
+
+    it 'returns false when even one mentioned person doesn\'t follow the sender' do
+      status = status_with_html('@alice @bob Hello')
+      alice.follow!(sender)
+      expect(described_class.new(status).skip?).to be false
+    end
+
+    it 'returns true when the sender is replying to a status that mentions the sender' do
+      parent = PostStatusService.new.call(alice, text: "Hey @#{sender.username}, how are you?")
+      status = status_with_html('@alice @bob Hello', thread: parent)
+      expect(described_class.new(status).skip?).to be true
+    end
+  end
+
+  describe '#remember!' do
+    pending
+  end
+
+  describe '#flag!' do
+    let!(:status1) { status_with_html('@alice General Kenobi you are a bold one') }
+    let!(:status2) { status_with_html('@alice @bob General Kenobi, you are a bold one') }
+
+    before do
+      described_class.new(status1).remember!
+      described_class.new(status2).flag!
+    end
+
+    it 'silences the account' do
+      expect(sender.silenced?).to be true
+    end
+
+    it 'creates a report about the account' do
+      expect(sender.targeted_reports.unresolved.count).to eq 1
+    end
+
+    it 'attaches both matching statuses to the report' do
+      expect(sender.targeted_reports.first.status_ids).to include(status1.id, status2.id)
+    end
+  end
+end