Refactor search query parser and handle nested quote types

This commit is contained in:
Claire 2023-09-01 10:15:27 +02:00
parent 4d9186a48c
commit 32eb0e7744
5 changed files with 43 additions and 47 deletions

View file

@ -1,15 +1,20 @@
# frozen_string_literal: true
class SearchQueryParser < Parslet::Parser
rule(:term) { match('[^\s":]').repeat(1).as(:term) }
rule(:quote) { str('"') }
rule(:term) { match('[^\s]').repeat(1).as(:term) }
rule(:colon) { str(':') }
rule(:space) { match('\s').repeat(1) }
rule(:operator) { (str('+') | str('-')).as(:operator) }
rule(:prefix) { term >> colon }
rule(:shortcode) { (colon >> term >> colon.maybe).as(:shortcode) }
rule(:phrase) { (quote >> (match('[^\s"]').repeat(1).as(:term) >> space.maybe).repeat >> quote).as(:phrase) }
rule(:clause) { (operator.maybe >> prefix.maybe.as(:prefix) >> (phrase | term | shortcode)).as(:clause) | prefix.as(:clause) | quote.as(:junk) }
rule(:prefix_operator) { str('has') | str('is') | str('language') | str('from') | str('before') | str('after') | str('during') | str('in') }
rule(:prefix) { prefix_operator.as(:prefix_operator) >> colon }
rule(:phrase) do
(str('"') >> match('[^"]').repeat.as(:phrase) >> str('"')) |
(match('[“”„]') >> match('[^“”„]').repeat.as(:phrase) >> match('[“”„]')) |
(str('«') >> match('[^«»]').repeat.as(:phrase) >> str('»')) |
(str('「') >> match('[^「」]').repeat.as(:phrase) >> str('」')) |
(str('《') >> match('[^《》]').repeat.as(:phrase) >> str('》'))
end
rule(:clause) { (operator.maybe >> prefix.maybe.as(:prefix) >> (phrase | term)).as(:clause) }
rule(:query) { (clause >> space.maybe).repeat.as(:query) }
root(:query)
end

View file

@ -1,17 +1,6 @@
# frozen_string_literal: true
class SearchQueryTransformer < Parslet::Transform
SUPPORTED_PREFIXES = %w(
has
is
language
from
before
after
during
in
).freeze
class Query
def initialize(clauses, options = {})
raise ArgumentError if options[:current_account].nil?
@ -223,14 +212,12 @@ class SearchQueryTransformer < Parslet::Transform
end
rule(clause: subtree(:clause)) do
prefix = clause[:prefix][:term].to_s if clause[:prefix]
prefix = clause[:prefix][:prefix_operator].to_s if clause[:prefix]
operator = clause[:operator]&.to_s
term = clause[:phrase] ? clause[:phrase].map { |term| term[:term].to_s }.join(' ') : clause[:term].to_s
term = clause[:phrase] ? clause[:phrase].to_s : clause[:term].to_s
if clause[:prefix] && SUPPORTED_PREFIXES.include?(prefix)
if clause[:prefix]
PrefixClause.new(prefix, operator, term, current_account: current_account)
elsif clause[:prefix]
TermClause.new(operator, "#{prefix} #{term}")
elsif clause[:term]
TermClause.new(operator, term)
elsif clause[:phrase]
@ -240,10 +227,6 @@ class SearchQueryTransformer < Parslet::Transform
end
end
rule(junk: subtree(:junk)) do
nil
end
rule(query: sequence(:clauses)) do
Query.new(clauses, current_account: current_account)
end

View file

@ -1,10 +1,8 @@
# frozen_string_literal: true
class SearchService < BaseService
QUOTE_EQUIVALENT_CHARACTERS = /[“”„«»「」『』《》]/
def call(query, account, limit, options = {})
@query = query&.strip&.gsub(QUOTE_EQUIVALENT_CHARACTERS, '"')
@query = query&.strip
@account = account
@options = options
@limit = limit.to_i

View file

@ -10,11 +10,19 @@ describe SearchQueryParser do
it 'consumes "hello"' do
expect(parser.term).to parse('hello')
end
it 'consumes "foo:"' do
expect(parser.term).to parse('foo:')
end
it 'consumes ":foo:"' do
expect(parser.term).to parse(':foo:')
end
end
context 'with prefix' do
it 'consumes "foo:"' do
expect(parser.prefix).to parse('foo:')
it 'consumes "is:"' do
expect(parser.prefix).to parse('is:')
end
end
@ -28,16 +36,18 @@ describe SearchQueryParser do
end
end
context 'with shortcode' do
it 'consumes ":foo:"' do
expect(parser.shortcode).to parse(':foo:')
end
end
context 'with phrase' do
it 'consumes "hello world"' do
expect(parser.phrase).to parse('"hello world"')
end
it 'consumes "hello “new” world"' do
expect(parser.phrase).to parse('"hello “new” world"')
end
it 'consumes “hello « hi » world”' do
expect(parser.phrase).to parse('“hello « hi » world”')
end
end
context 'with clause' do
@ -57,14 +67,6 @@ describe SearchQueryParser do
expect(parser.clause).to parse('-foo:bar')
end
it 'consumes \'foo:"hello world"\'' do
expect(parser.clause).to parse('foo:"hello world"')
end
it 'consumes \'-foo:"hello world"\'' do
expect(parser.clause).to parse('-foo:"hello world"')
end
it 'consumes "foo:"' do
expect(parser.clause).to parse('foo:')
end
@ -94,5 +96,13 @@ describe SearchQueryParser do
it 'consumes "foo:bar bar: hello"' do
expect(parser.query).to parse('foo:bar bar: hello')
end
it 'consumes \'foo:"hello world"\'' do
expect(parser.query).to parse('foo:"hello world"')
end
it 'consumes \'-foo:"hello world"\'' do
expect(parser.query).to parse('-foo:"hello world"')
end
end
end

View file

@ -42,7 +42,7 @@ describe SearchQueryTransformer do
let(:query) { 'foo: bar' }
it 'transforms clauses' do
expect(subject.send(:must_clauses).map(&:term)).to match_array %w(foo bar)
expect(subject.send(:must_clauses).map(&:term)).to match_array %w(foo: bar)
expect(subject.send(:must_not_clauses)).to be_empty
expect(subject.send(:filter_clauses)).to be_empty
end
@ -52,7 +52,7 @@ describe SearchQueryTransformer do
let(:query) { 'foo:bar' }
it 'transforms clauses' do
expect(subject.send(:must_clauses).map(&:term)).to contain_exactly('foo bar')
expect(subject.send(:must_clauses).map(&:term)).to contain_exactly('foo:bar')
expect(subject.send(:must_not_clauses)).to be_empty
expect(subject.send(:filter_clauses)).to be_empty
end