Commit 5454af2517ce4bf55b4beffd221b9a184960f05a
Exists in
master
and in
21 other branches
Merged master
Showing
6 changed files
with
151 additions
and
111 deletions
Show diff stats
CHANGELOG.md
README.md
... | ... | @@ -299,7 +299,15 @@ You can change this with: |
299 | 299 | Product.search "zucini", misspellings: {edit_distance: 2} # zucchini |
300 | 300 | ``` |
301 | 301 | |
302 | -Or turn off misspellings with: | |
302 | +To improve performance for correctly spelled queries (which should be a majority for most applications), Searchkick can first perform a search without misspellings, and if there are few results, perform another with them. [master] | |
303 | + | |
304 | +```ruby | |
305 | +Product.search "zuchini", misspellings: {below: 5} | |
306 | +``` | |
307 | + | |
308 | +If there are fewer than 5 results, a 2nd search is performed for misspellings. | |
309 | + | |
310 | +Turn off misspellings with: | |
303 | 311 | |
304 | 312 | ```ruby |
305 | 313 | Product.search "zuchini", misspellings: false # no zucchini | ... | ... |
lib/searchkick/logging.rb
... | ... | @@ -2,16 +2,16 @@ |
2 | 2 | |
3 | 3 | module Searchkick |
4 | 4 | class Query |
5 | - def execute_with_instrumentation | |
5 | + def execute_search_with_instrumentation | |
6 | 6 | event = { |
7 | 7 | name: "#{searchkick_klass.name} Search", |
8 | 8 | query: params |
9 | 9 | } |
10 | 10 | ActiveSupport::Notifications.instrument("search.searchkick", event) do |
11 | - execute_without_instrumentation | |
11 | + execute_search_without_instrumentation | |
12 | 12 | end |
13 | 13 | end |
14 | - alias_method_chain :execute, :instrumentation | |
14 | + alias_method_chain :execute_search, :instrumentation | |
15 | 15 | end |
16 | 16 | |
17 | 17 | class Index | ... | ... |
lib/searchkick/query.rb
... | ... | @@ -24,6 +24,102 @@ module Searchkick |
24 | 24 | @options = options |
25 | 25 | @match_suffix = options[:match] || searchkick_options[:match] || "analyzed" |
26 | 26 | |
27 | + prepare | |
28 | + end | |
29 | + | |
30 | + def searchkick_index | |
31 | + klass.searchkick_index | |
32 | + end | |
33 | + | |
34 | + def searchkick_options | |
35 | + klass.searchkick_options | |
36 | + end | |
37 | + | |
38 | + def searchkick_klass | |
39 | + klass.searchkick_klass | |
40 | + end | |
41 | + | |
42 | + def params | |
43 | + params = { | |
44 | + index: options[:index_name] || searchkick_index.name, | |
45 | + body: body | |
46 | + } | |
47 | + params.merge!(type: @type) if @type | |
48 | + params.merge!(routing: @routing) if @routing | |
49 | + params | |
50 | + end | |
51 | + | |
52 | + def execute | |
53 | + @execute ||= begin | |
54 | + begin | |
55 | + response = execute_search | |
56 | + if @misspellings_below && response["hits"]["total"] < @misspellings_below | |
57 | + prepare | |
58 | + response = execute_search | |
59 | + end | |
60 | + rescue => e # TODO rescue type | |
61 | + status_code = e.message[1..3].to_i | |
62 | + if status_code == 404 | |
63 | + raise MissingIndexError, "Index missing - run #{searchkick_klass.name}.reindex" | |
64 | + elsif status_code == 500 && ( | |
65 | + e.message.include?("IllegalArgumentException[minimumSimilarity >= 1]") || | |
66 | + e.message.include?("No query registered for [multi_match]") || | |
67 | + e.message.include?("[match] query does not support [cutoff_frequency]]") || | |
68 | + e.message.include?("No query registered for [function_score]]") | |
69 | + ) | |
70 | + | |
71 | + raise UnsupportedVersionError, "This version of Searchkick requires Elasticsearch 1.0 or greater" | |
72 | + elsif status_code == 400 | |
73 | + if e.message.include?("[multi_match] analyzer [searchkick_search] not found") | |
74 | + raise InvalidQueryError, "Bad mapping - run #{searchkick_klass.name}.reindex" | |
75 | + else | |
76 | + raise InvalidQueryError, e.message | |
77 | + end | |
78 | + else | |
79 | + raise e | |
80 | + end | |
81 | + end | |
82 | + | |
83 | + # apply facet limit in client due to | |
84 | + # https://github.com/elasticsearch/elasticsearch/issues/1305 | |
85 | + @facet_limits.each do |field, limit| | |
86 | + field = field.to_s | |
87 | + facet = response["facets"][field] | |
88 | + response["facets"][field]["terms"] = facet["terms"].first(limit) | |
89 | + response["facets"][field]["other"] = facet["total"] - facet["terms"].sum { |term| term["count"] } | |
90 | + end | |
91 | + | |
92 | + opts = { | |
93 | + page: @page, | |
94 | + per_page: @per_page, | |
95 | + padding: @padding, | |
96 | + load: @load, | |
97 | + includes: options[:include] || options[:includes], | |
98 | + json: !options[:json].nil?, | |
99 | + match_suffix: @match_suffix | |
100 | + } | |
101 | + Searchkick::Results.new(searchkick_klass, response, opts) | |
102 | + end | |
103 | + end | |
104 | + | |
105 | + def to_curl | |
106 | + query = params | |
107 | + type = query[:type] | |
108 | + index = query[:index].is_a?(Array) ? query[:index].join(",") : query[:index] | |
109 | + | |
110 | + # no easy way to tell which host the client will use | |
111 | + host = Searchkick.client.transport.hosts.first | |
112 | + credentials = (host[:user] || host[:password]) ? "#{host[:user]}:#{host[:password]}@" : nil | |
113 | + "curl #{host[:protocol]}://#{credentials}#{host[:host]}:#{host[:port]}/#{CGI.escape(index)}#{type ? "/#{type.map { |t| CGI.escape(t) }.join(',')}" : ''}/_search?pretty -d '#{query[:body].to_json}'" | |
114 | + end | |
115 | + | |
116 | + private | |
117 | + | |
118 | + def execute_search | |
119 | + Searchkick.client.search(params) | |
120 | + end | |
121 | + | |
122 | + def prepare | |
27 | 123 | boost_fields = {} |
28 | 124 | fields = |
29 | 125 | if options[:fields] |
... | ... | @@ -94,6 +190,36 @@ module Searchkick |
94 | 190 | } |
95 | 191 | else |
96 | 192 | queries = [] |
193 | + | |
194 | + misspellings = | |
195 | + if options.key?(:misspellings) | |
196 | + options[:misspellings] | |
197 | + elsif options.key?(:mispellings) | |
198 | + options[:mispellings] # why not? | |
199 | + else | |
200 | + true | |
201 | + end | |
202 | + | |
203 | + if misspellings.is_a?(Hash) && misspellings[:below] && !@misspellings_below | |
204 | + @misspellings_below = misspellings[:below].to_i | |
205 | + misspellings = false | |
206 | + end | |
207 | + | |
208 | + if misspellings != false | |
209 | + edit_distance = (misspellings.is_a?(Hash) && (misspellings[:edit_distance] || misspellings[:distance])) || 1 | |
210 | + transpositions = | |
211 | + if misspellings.is_a?(Hash) && misspellings.key?(:transpositions) | |
212 | + {fuzzy_transpositions: misspellings[:transpositions]} | |
213 | + elsif below14? | |
214 | + {} | |
215 | + else | |
216 | + {fuzzy_transpositions: true} | |
217 | + end | |
218 | + prefix_length = (misspellings.is_a?(Hash) && misspellings[:prefix_length]) || 0 | |
219 | + default_max_expansions = @misspellings_below ? 20 : 3 | |
220 | + max_expansions = (misspellings.is_a?(Hash) && misspellings[:max_expansions]) || default_max_expansions | |
221 | + end | |
222 | + | |
97 | 223 | fields.each do |field| |
98 | 224 | qs = [] |
99 | 225 | |
... | ... | @@ -104,29 +230,6 @@ module Searchkick |
104 | 230 | boost: 10 * factor |
105 | 231 | } |
106 | 232 | |
107 | - misspellings = | |
108 | - if options.key?(:misspellings) | |
109 | - options[:misspellings] | |
110 | - elsif options.key?(:mispellings) | |
111 | - options[:mispellings] # why not? | |
112 | - else | |
113 | - true | |
114 | - end | |
115 | - | |
116 | - if misspellings != false | |
117 | - edit_distance = (misspellings.is_a?(Hash) && (misspellings[:edit_distance] || misspellings[:distance])) || 1 | |
118 | - transpositions = | |
119 | - if misspellings.is_a?(Hash) && misspellings.key?(:transpositions) | |
120 | - {fuzzy_transpositions: misspellings[:transpositions]} | |
121 | - elsif below14? | |
122 | - {} | |
123 | - else | |
124 | - {fuzzy_transpositions: true} | |
125 | - end | |
126 | - prefix_length = (misspellings.is_a?(Hash) && misspellings[:prefix_length]) || 0 | |
127 | - max_expansions = (misspellings.is_a?(Hash) && misspellings[:max_expansions]) || 3 | |
128 | - end | |
129 | - | |
130 | 233 | if field == "_all" || field.end_with?(".analyzed") |
131 | 234 | shared_options[:cutoff_frequency] = 0.001 unless operator == "and" || misspellings == false |
132 | 235 | qs.concat [ |
... | ... | @@ -467,90 +570,6 @@ module Searchkick |
467 | 570 | @load = load |
468 | 571 | end |
469 | 572 | |
470 | - def searchkick_index | |
471 | - klass.searchkick_index | |
472 | - end | |
473 | - | |
474 | - def searchkick_options | |
475 | - klass.searchkick_options | |
476 | - end | |
477 | - | |
478 | - def searchkick_klass | |
479 | - klass.searchkick_klass | |
480 | - end | |
481 | - | |
482 | - def params | |
483 | - params = { | |
484 | - index: options[:index_name] || searchkick_index.name, | |
485 | - body: body | |
486 | - } | |
487 | - params.merge!(type: @type) if @type | |
488 | - params.merge!(routing: @routing) if @routing | |
489 | - params | |
490 | - end | |
491 | - | |
492 | - def execute | |
493 | - @execute ||= begin | |
494 | - begin | |
495 | - response = Searchkick.client.search(params) | |
496 | - rescue => e # TODO rescue type | |
497 | - status_code = e.message[1..3].to_i | |
498 | - if status_code == 404 | |
499 | - raise MissingIndexError, "Index missing - run #{searchkick_klass.name}.reindex" | |
500 | - elsif status_code == 500 && ( | |
501 | - e.message.include?("IllegalArgumentException[minimumSimilarity >= 1]") || | |
502 | - e.message.include?("No query registered for [multi_match]") || | |
503 | - e.message.include?("[match] query does not support [cutoff_frequency]]") || | |
504 | - e.message.include?("No query registered for [function_score]]") | |
505 | - ) | |
506 | - | |
507 | - raise UnsupportedVersionError, "This version of Searchkick requires Elasticsearch 1.0 or greater" | |
508 | - elsif status_code == 400 | |
509 | - if e.message.include?("[multi_match] analyzer [searchkick_search] not found") | |
510 | - raise InvalidQueryError, "Bad mapping - run #{searchkick_klass.name}.reindex" | |
511 | - else | |
512 | - raise InvalidQueryError, e.message | |
513 | - end | |
514 | - else | |
515 | - raise e | |
516 | - end | |
517 | - end | |
518 | - | |
519 | - # apply facet limit in client due to | |
520 | - # https://github.com/elasticsearch/elasticsearch/issues/1305 | |
521 | - @facet_limits.each do |field, limit| | |
522 | - field = field.to_s | |
523 | - facet = response["facets"][field] | |
524 | - response["facets"][field]["terms"] = facet["terms"].first(limit) | |
525 | - response["facets"][field]["other"] = facet["total"] - facet["terms"].sum { |term| term["count"] } | |
526 | - end | |
527 | - | |
528 | - opts = { | |
529 | - page: @page, | |
530 | - per_page: @per_page, | |
531 | - padding: @padding, | |
532 | - load: @load, | |
533 | - includes: options[:include] || options[:includes], | |
534 | - json: !options[:json].nil?, | |
535 | - match_suffix: @match_suffix | |
536 | - } | |
537 | - Searchkick::Results.new(searchkick_klass, response, opts) | |
538 | - end | |
539 | - end | |
540 | - | |
541 | - def to_curl | |
542 | - query = params | |
543 | - type = query[:type] | |
544 | - index = query[:index].is_a?(Array) ? query[:index].join(",") : query[:index] | |
545 | - | |
546 | - # no easy way to tell which host the client will use | |
547 | - host = Searchkick.client.transport.hosts.first | |
548 | - credentials = (host[:user] || host[:password]) ? "#{host[:user]}:#{host[:password]}@" : nil | |
549 | - "curl #{host[:protocol]}://#{credentials}#{host[:host]}:#{host[:port]}/#{CGI.escape(index)}#{type ? "/#{type.map { |t| CGI.escape(t) }.join(',')}" : ''}/_search?pretty -d '#{query[:body].to_json}'" | |
550 | - end | |
551 | - | |
552 | - private | |
553 | - | |
554 | 573 | def where_filters(where) |
555 | 574 | filters = [] |
556 | 575 | (where || {}).each do |field, value| | ... | ... |
test/ci/before_install.sh
test/misspellings_test.rb
... | ... | @@ -33,4 +33,14 @@ class MisspellingsTest < Minitest::Test |
33 | 33 | ] |
34 | 34 | assert_search "red blue", ["red", "blue", "cyan", "magenta"], operator: "or", fields: ["color"], misspellings: false |
35 | 35 | end |
36 | + | |
37 | + def test_misspellings_below_unmet | |
38 | + store_names ["abc", "abd", "aee"] | |
39 | + assert_search "abc", ["abc", "abd"], misspellings: {below: 2} | |
40 | + end | |
41 | + | |
42 | + def test_misspellings_below_met | |
43 | + store_names ["abc", "abd", "aee"] | |
44 | + assert_search "abc", ["abc"], misspellings: {below: 1} | |
45 | + end | |
36 | 46 | end | ... | ... |