From 212848b66e69ddb72f488d233b6378f494a5fff5 Mon Sep 17 00:00:00 2001 From: Eugen Rochko Date: Thu, 18 Jul 2019 03:02:15 +0200 Subject: [PATCH] Change language detection to include hashtags as words (#11341) --- app/lib/language_detector.rb | 2 +- spec/lib/language_detector_spec.rb | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/app/lib/language_detector.rb b/app/lib/language_detector.rb index 1e90af42d7..6f9511a541 100644 --- a/app/lib/language_detector.rb +++ b/app/lib/language_detector.rb @@ -69,7 +69,7 @@ class LanguageDetector new_text = remove_html(text) new_text.gsub!(FetchLinkCardService::URL_PATTERN, '') new_text.gsub!(Account::MENTION_RE, '') - new_text.gsub!(Tag::HASHTAG_RE, '') + new_text.gsub!(Tag::HASHTAG_RE) { |string| string.gsub(/[#_]/, '#' => '', '_' => ' ').gsub(/[a-z][A-Z]|[a-zA-Z][\d]/) { |s| s.insert(1, ' ') }.downcase } new_text.gsub!(/:#{CustomEmoji::SHORTCODE_RE_FRAGMENT}:/, '') new_text.gsub!(/\s+/, ' ') new_text diff --git a/spec/lib/language_detector_spec.rb b/spec/lib/language_detector_spec.rb index 0cb70605ad..b7ba0f6c4f 100644 --- a/spec/lib/language_detector_spec.rb +++ b/spec/lib/language_detector_spec.rb @@ -32,11 +32,11 @@ describe LanguageDetector do expect(result).to eq 'Our website is and also' end - it 'strips #hashtags from strings before detection' do - string = 'Hey look at all the #animals and #fish' + it 'converts #hashtags back to normal text before detection' do + string = 'Hey look at all the #animals and #FishAndChips' result = described_class.instance.send(:prepare_text, string) - expect(result).to eq 'Hey look at all the and' + expect(result).to eq 'Hey look at all the animals and fish and chips' end end