diff --git a/app/lib/search_query_parser.rb b/app/lib/search_query_parser.rb index dfe8b9e9d86..84e20b7b626 100644 --- a/app/lib/search_query_parser.rb +++ b/app/lib/search_query_parser.rb @@ -1,15 +1,67 @@ # frozen_string_literal: true class SearchQueryParser < Parslet::Parser - rule(:term) { match('[^\s":]').repeat(1).as(:term) } - rule(:quote) { str('"') } + SUPPORTED_PREFIXES = %w( + has + is + language + from + before + after + during + in + ).freeze + + # Efficiently matches disjoint strings + class StrList < Parslet::Atoms::Base + attr_reader :strings + + def initialize(strings) + super() + + @strings = strings + @pattern = Regexp.union(strings) + @min_length = strings.map(&:length).min + end + + def error_msgs + @error_msgs ||= { + premature: 'Premature end of input', + failed: "Expected any of #{strings.inspect}, but got ", + } + end + + def try(source, context, _consume_all) + match = source.match(@pattern) + return succ(source.consume(match)) unless match.nil? + + # Input ending early: + return context.err(self, source, error_msgs[:premature]) if source.chars_left < @min_length + + # Expected something, but got something else instead: + error_pos = source.pos + context.err_at(self, source, [error_msgs[:failed], source.consume(@len)], error_pos) + end + + def to_s_inner(_prec) + "[#{strings.map { |str| "'#{str}'" }.join(',')}]" + end + end + + rule(:term) { match('[^\s]').repeat(1).as(:term) } rule(:colon) { str(':') } rule(:space) { match('\s').repeat(1) } rule(:operator) { (str('+') | str('-')).as(:operator) } - rule(:prefix) { term >> colon } - rule(:shortcode) { (colon >> term >> colon.maybe).as(:shortcode) } - rule(:phrase) { (quote >> (match('[^\s"]').repeat(1).as(:term) >> space.maybe).repeat >> quote).as(:phrase) } - rule(:clause) { (operator.maybe >> prefix.maybe.as(:prefix) >> (phrase | term | shortcode)).as(:clause) | prefix.as(:clause) | quote.as(:junk) } + rule(:prefix_operator) { StrList.new(SUPPORTED_PREFIXES) } + rule(:prefix) { prefix_operator.as(:prefix_operator) >> colon } + rule(:phrase) do + (str('"') >> match('[^"]').repeat.as(:phrase) >> str('"')) | + (match('[“”„]') >> match('[^“”„]').repeat.as(:phrase) >> match('[“”„]')) | + (str('«') >> match('[^«»]').repeat.as(:phrase) >> str('»')) | + (str('「') >> match('[^「」]').repeat.as(:phrase) >> str('」')) | + (str('《') >> match('[^《》]').repeat.as(:phrase) >> str('》')) + end + rule(:clause) { (operator.maybe >> prefix.maybe.as(:prefix) >> (phrase | term)).as(:clause) } rule(:query) { (clause >> space.maybe).repeat.as(:query) } root(:query) end diff --git a/app/lib/search_query_transformer.rb b/app/lib/search_query_transformer.rb index 927495eace9..91fdbb6df8e 100644 --- a/app/lib/search_query_transformer.rb +++ b/app/lib/search_query_transformer.rb @@ -1,17 +1,6 @@ # frozen_string_literal: true class SearchQueryTransformer < Parslet::Transform - SUPPORTED_PREFIXES = %w( - has - is - language - from - before - after - during - in - ).freeze - class Query def initialize(clauses, options = {}) raise ArgumentError if options[:current_account].nil? @@ -225,14 +214,12 @@ class SearchQueryTransformer < Parslet::Transform end rule(clause: subtree(:clause)) do - prefix = clause[:prefix][:term].to_s if clause[:prefix] + prefix = clause[:prefix][:prefix_operator].to_s if clause[:prefix] operator = clause[:operator]&.to_s - term = clause[:phrase] ? clause[:phrase].map { |term| term[:term].to_s }.join(' ') : clause[:term].to_s + term = clause[:phrase] ? clause[:phrase].to_s : clause[:term].to_s - if clause[:prefix] && SUPPORTED_PREFIXES.include?(prefix) + if clause[:prefix] PrefixClause.new(prefix, operator, term, current_account: current_account) - elsif clause[:prefix] - TermClause.new(operator, "#{prefix} #{term}") elsif clause[:term] TermClause.new(operator, term) elsif clause[:phrase] @@ -242,10 +229,6 @@ class SearchQueryTransformer < Parslet::Transform end end - rule(junk: subtree(:junk)) do - nil - end - rule(query: sequence(:clauses)) do Query.new(clauses, current_account: current_account) end diff --git a/app/services/search_service.rb b/app/services/search_service.rb index 9a40d7bdd57..40d82fc525b 100644 --- a/app/services/search_service.rb +++ b/app/services/search_service.rb @@ -1,10 +1,8 @@ # frozen_string_literal: true class SearchService < BaseService - QUOTE_EQUIVALENT_CHARACTERS = /[“”„«»「」『』《》]/ - def call(query, account, limit, options = {}) - @query = query&.strip&.gsub(QUOTE_EQUIVALENT_CHARACTERS, '"') + @query = query&.strip @account = account @options = options @limit = limit.to_i diff --git a/spec/lib/search_query_parser_spec.rb b/spec/lib/search_query_parser_spec.rb index 66b0e8f9e23..3c3d2fd0016 100644 --- a/spec/lib/search_query_parser_spec.rb +++ b/spec/lib/search_query_parser_spec.rb @@ -10,11 +10,19 @@ describe SearchQueryParser do it 'consumes "hello"' do expect(parser.term).to parse('hello') end + + it 'consumes "foo:"' do + expect(parser.term).to parse('foo:') + end + + it 'consumes ":foo:"' do + expect(parser.term).to parse(':foo:') + end end context 'with prefix' do - it 'consumes "foo:"' do - expect(parser.prefix).to parse('foo:') + it 'consumes "is:"' do + expect(parser.prefix).to parse('is:') end end @@ -28,16 +36,18 @@ describe SearchQueryParser do end end - context 'with shortcode' do - it 'consumes ":foo:"' do - expect(parser.shortcode).to parse(':foo:') - end - end - context 'with phrase' do it 'consumes "hello world"' do expect(parser.phrase).to parse('"hello world"') end + + it 'consumes "hello “new” world"' do + expect(parser.phrase).to parse('"hello “new” world"') + end + + it 'consumes “hello « hi » world”' do + expect(parser.phrase).to parse('“hello « hi » world”') + end end context 'with clause' do @@ -57,14 +67,6 @@ describe SearchQueryParser do expect(parser.clause).to parse('-foo:bar') end - it 'consumes \'foo:"hello world"\'' do - expect(parser.clause).to parse('foo:"hello world"') - end - - it 'consumes \'-foo:"hello world"\'' do - expect(parser.clause).to parse('-foo:"hello world"') - end - it 'consumes "foo:"' do expect(parser.clause).to parse('foo:') end @@ -94,5 +96,13 @@ describe SearchQueryParser do it 'consumes "foo:bar bar: hello"' do expect(parser.query).to parse('foo:bar bar: hello') end + + it 'consumes \'foo:"hello world"\'' do + expect(parser.query).to parse('foo:"hello world"') + end + + it 'consumes \'-foo:"hello world"\'' do + expect(parser.query).to parse('-foo:"hello world"') + end end end diff --git a/spec/lib/search_query_transformer_spec.rb b/spec/lib/search_query_transformer_spec.rb index 5817e3d1d20..1c710947df4 100644 --- a/spec/lib/search_query_transformer_spec.rb +++ b/spec/lib/search_query_transformer_spec.rb @@ -42,7 +42,7 @@ describe SearchQueryTransformer do let(:query) { 'foo: bar' } it 'transforms clauses' do - expect(subject.send(:must_clauses).map(&:term)).to match_array %w(foo bar) + expect(subject.send(:must_clauses).map(&:term)).to match_array %w(foo: bar) expect(subject.send(:must_not_clauses)).to be_empty expect(subject.send(:filter_clauses)).to be_empty end @@ -52,7 +52,7 @@ describe SearchQueryTransformer do let(:query) { 'foo:bar' } it 'transforms clauses' do - expect(subject.send(:must_clauses).map(&:term)).to contain_exactly('foo bar') + expect(subject.send(:must_clauses).map(&:term)).to contain_exactly('foo:bar') expect(subject.send(:must_not_clauses)).to be_empty expect(subject.send(:filter_clauses)).to be_empty end