Commit 5454af2517ce4bf55b4beffd221b9a184960f05a
Exists in
master
and in
21 other branches
Merged master
Showing
6 changed files
with
151 additions
and
111 deletions
Show diff stats
CHANGELOG.md
1 | ## 1.0.4 [unreleased] | 1 | ## 1.0.4 [unreleased] |
2 | 2 | ||
3 | +- Added `below` option to misspellings to improve performance | ||
3 | - Added `match` option | 4 | - Added `match` option |
4 | - Added `word: false` option | 5 | - Added `word: false` option |
5 | - Added highlighted fields to `load: false` | 6 | - Added highlighted fields to `load: false` |
README.md
@@ -299,7 +299,15 @@ You can change this with: | @@ -299,7 +299,15 @@ You can change this with: | ||
299 | Product.search "zucini", misspellings: {edit_distance: 2} # zucchini | 299 | Product.search "zucini", misspellings: {edit_distance: 2} # zucchini |
300 | ``` | 300 | ``` |
301 | 301 | ||
302 | -Or turn off misspellings with: | 302 | +To improve performance for correctly spelled queries (which should be a majority for most applications), Searchkick can first perform a search without misspellings, and if there are few results, perform another with them. [master] |
303 | + | ||
304 | +```ruby | ||
305 | +Product.search "zuchini", misspellings: {below: 5} | ||
306 | +``` | ||
307 | + | ||
308 | +If there are fewer than 5 results, a 2nd search is performed for misspellings. | ||
309 | + | ||
310 | +Turn off misspellings with: | ||
303 | 311 | ||
304 | ```ruby | 312 | ```ruby |
305 | Product.search "zuchini", misspellings: false # no zucchini | 313 | Product.search "zuchini", misspellings: false # no zucchini |
lib/searchkick/logging.rb
@@ -2,16 +2,16 @@ | @@ -2,16 +2,16 @@ | ||
2 | 2 | ||
3 | module Searchkick | 3 | module Searchkick |
4 | class Query | 4 | class Query |
5 | - def execute_with_instrumentation | 5 | + def execute_search_with_instrumentation |
6 | event = { | 6 | event = { |
7 | name: "#{searchkick_klass.name} Search", | 7 | name: "#{searchkick_klass.name} Search", |
8 | query: params | 8 | query: params |
9 | } | 9 | } |
10 | ActiveSupport::Notifications.instrument("search.searchkick", event) do | 10 | ActiveSupport::Notifications.instrument("search.searchkick", event) do |
11 | - execute_without_instrumentation | 11 | + execute_search_without_instrumentation |
12 | end | 12 | end |
13 | end | 13 | end |
14 | - alias_method_chain :execute, :instrumentation | 14 | + alias_method_chain :execute_search, :instrumentation |
15 | end | 15 | end |
16 | 16 | ||
17 | class Index | 17 | class Index |
lib/searchkick/query.rb
@@ -24,6 +24,102 @@ module Searchkick | @@ -24,6 +24,102 @@ module Searchkick | ||
24 | @options = options | 24 | @options = options |
25 | @match_suffix = options[:match] || searchkick_options[:match] || "analyzed" | 25 | @match_suffix = options[:match] || searchkick_options[:match] || "analyzed" |
26 | 26 | ||
27 | + prepare | ||
28 | + end | ||
29 | + | ||
30 | + def searchkick_index | ||
31 | + klass.searchkick_index | ||
32 | + end | ||
33 | + | ||
34 | + def searchkick_options | ||
35 | + klass.searchkick_options | ||
36 | + end | ||
37 | + | ||
38 | + def searchkick_klass | ||
39 | + klass.searchkick_klass | ||
40 | + end | ||
41 | + | ||
42 | + def params | ||
43 | + params = { | ||
44 | + index: options[:index_name] || searchkick_index.name, | ||
45 | + body: body | ||
46 | + } | ||
47 | + params.merge!(type: @type) if @type | ||
48 | + params.merge!(routing: @routing) if @routing | ||
49 | + params | ||
50 | + end | ||
51 | + | ||
52 | + def execute | ||
53 | + @execute ||= begin | ||
54 | + begin | ||
55 | + response = execute_search | ||
56 | + if @misspellings_below && response["hits"]["total"] < @misspellings_below | ||
57 | + prepare | ||
58 | + response = execute_search | ||
59 | + end | ||
60 | + rescue => e # TODO rescue type | ||
61 | + status_code = e.message[1..3].to_i | ||
62 | + if status_code == 404 | ||
63 | + raise MissingIndexError, "Index missing - run #{searchkick_klass.name}.reindex" | ||
64 | + elsif status_code == 500 && ( | ||
65 | + e.message.include?("IllegalArgumentException[minimumSimilarity >= 1]") || | ||
66 | + e.message.include?("No query registered for [multi_match]") || | ||
67 | + e.message.include?("[match] query does not support [cutoff_frequency]]") || | ||
68 | + e.message.include?("No query registered for [function_score]]") | ||
69 | + ) | ||
70 | + | ||
71 | + raise UnsupportedVersionError, "This version of Searchkick requires Elasticsearch 1.0 or greater" | ||
72 | + elsif status_code == 400 | ||
73 | + if e.message.include?("[multi_match] analyzer [searchkick_search] not found") | ||
74 | + raise InvalidQueryError, "Bad mapping - run #{searchkick_klass.name}.reindex" | ||
75 | + else | ||
76 | + raise InvalidQueryError, e.message | ||
77 | + end | ||
78 | + else | ||
79 | + raise e | ||
80 | + end | ||
81 | + end | ||
82 | + | ||
83 | + # apply facet limit in client due to | ||
84 | + # https://github.com/elasticsearch/elasticsearch/issues/1305 | ||
85 | + @facet_limits.each do |field, limit| | ||
86 | + field = field.to_s | ||
87 | + facet = response["facets"][field] | ||
88 | + response["facets"][field]["terms"] = facet["terms"].first(limit) | ||
89 | + response["facets"][field]["other"] = facet["total"] - facet["terms"].sum { |term| term["count"] } | ||
90 | + end | ||
91 | + | ||
92 | + opts = { | ||
93 | + page: @page, | ||
94 | + per_page: @per_page, | ||
95 | + padding: @padding, | ||
96 | + load: @load, | ||
97 | + includes: options[:include] || options[:includes], | ||
98 | + json: !options[:json].nil?, | ||
99 | + match_suffix: @match_suffix | ||
100 | + } | ||
101 | + Searchkick::Results.new(searchkick_klass, response, opts) | ||
102 | + end | ||
103 | + end | ||
104 | + | ||
105 | + def to_curl | ||
106 | + query = params | ||
107 | + type = query[:type] | ||
108 | + index = query[:index].is_a?(Array) ? query[:index].join(",") : query[:index] | ||
109 | + | ||
110 | + # no easy way to tell which host the client will use | ||
111 | + host = Searchkick.client.transport.hosts.first | ||
112 | + credentials = (host[:user] || host[:password]) ? "#{host[:user]}:#{host[:password]}@" : nil | ||
113 | + "curl #{host[:protocol]}://#{credentials}#{host[:host]}:#{host[:port]}/#{CGI.escape(index)}#{type ? "/#{type.map { |t| CGI.escape(t) }.join(',')}" : ''}/_search?pretty -d '#{query[:body].to_json}'" | ||
114 | + end | ||
115 | + | ||
116 | + private | ||
117 | + | ||
118 | + def execute_search | ||
119 | + Searchkick.client.search(params) | ||
120 | + end | ||
121 | + | ||
122 | + def prepare | ||
27 | boost_fields = {} | 123 | boost_fields = {} |
28 | fields = | 124 | fields = |
29 | if options[:fields] | 125 | if options[:fields] |
@@ -94,6 +190,36 @@ module Searchkick | @@ -94,6 +190,36 @@ module Searchkick | ||
94 | } | 190 | } |
95 | else | 191 | else |
96 | queries = [] | 192 | queries = [] |
193 | + | ||
194 | + misspellings = | ||
195 | + if options.key?(:misspellings) | ||
196 | + options[:misspellings] | ||
197 | + elsif options.key?(:mispellings) | ||
198 | + options[:mispellings] # why not? | ||
199 | + else | ||
200 | + true | ||
201 | + end | ||
202 | + | ||
203 | + if misspellings.is_a?(Hash) && misspellings[:below] && !@misspellings_below | ||
204 | + @misspellings_below = misspellings[:below].to_i | ||
205 | + misspellings = false | ||
206 | + end | ||
207 | + | ||
208 | + if misspellings != false | ||
209 | + edit_distance = (misspellings.is_a?(Hash) && (misspellings[:edit_distance] || misspellings[:distance])) || 1 | ||
210 | + transpositions = | ||
211 | + if misspellings.is_a?(Hash) && misspellings.key?(:transpositions) | ||
212 | + {fuzzy_transpositions: misspellings[:transpositions]} | ||
213 | + elsif below14? | ||
214 | + {} | ||
215 | + else | ||
216 | + {fuzzy_transpositions: true} | ||
217 | + end | ||
218 | + prefix_length = (misspellings.is_a?(Hash) && misspellings[:prefix_length]) || 0 | ||
219 | + default_max_expansions = @misspellings_below ? 20 : 3 | ||
220 | + max_expansions = (misspellings.is_a?(Hash) && misspellings[:max_expansions]) || default_max_expansions | ||
221 | + end | ||
222 | + | ||
97 | fields.each do |field| | 223 | fields.each do |field| |
98 | qs = [] | 224 | qs = [] |
99 | 225 | ||
@@ -104,29 +230,6 @@ module Searchkick | @@ -104,29 +230,6 @@ module Searchkick | ||
104 | boost: 10 * factor | 230 | boost: 10 * factor |
105 | } | 231 | } |
106 | 232 | ||
107 | - misspellings = | ||
108 | - if options.key?(:misspellings) | ||
109 | - options[:misspellings] | ||
110 | - elsif options.key?(:mispellings) | ||
111 | - options[:mispellings] # why not? | ||
112 | - else | ||
113 | - true | ||
114 | - end | ||
115 | - | ||
116 | - if misspellings != false | ||
117 | - edit_distance = (misspellings.is_a?(Hash) && (misspellings[:edit_distance] || misspellings[:distance])) || 1 | ||
118 | - transpositions = | ||
119 | - if misspellings.is_a?(Hash) && misspellings.key?(:transpositions) | ||
120 | - {fuzzy_transpositions: misspellings[:transpositions]} | ||
121 | - elsif below14? | ||
122 | - {} | ||
123 | - else | ||
124 | - {fuzzy_transpositions: true} | ||
125 | - end | ||
126 | - prefix_length = (misspellings.is_a?(Hash) && misspellings[:prefix_length]) || 0 | ||
127 | - max_expansions = (misspellings.is_a?(Hash) && misspellings[:max_expansions]) || 3 | ||
128 | - end | ||
129 | - | ||
130 | if field == "_all" || field.end_with?(".analyzed") | 233 | if field == "_all" || field.end_with?(".analyzed") |
131 | shared_options[:cutoff_frequency] = 0.001 unless operator == "and" || misspellings == false | 234 | shared_options[:cutoff_frequency] = 0.001 unless operator == "and" || misspellings == false |
132 | qs.concat [ | 235 | qs.concat [ |
@@ -467,90 +570,6 @@ module Searchkick | @@ -467,90 +570,6 @@ module Searchkick | ||
467 | @load = load | 570 | @load = load |
468 | end | 571 | end |
469 | 572 | ||
470 | - def searchkick_index | ||
471 | - klass.searchkick_index | ||
472 | - end | ||
473 | - | ||
474 | - def searchkick_options | ||
475 | - klass.searchkick_options | ||
476 | - end | ||
477 | - | ||
478 | - def searchkick_klass | ||
479 | - klass.searchkick_klass | ||
480 | - end | ||
481 | - | ||
482 | - def params | ||
483 | - params = { | ||
484 | - index: options[:index_name] || searchkick_index.name, | ||
485 | - body: body | ||
486 | - } | ||
487 | - params.merge!(type: @type) if @type | ||
488 | - params.merge!(routing: @routing) if @routing | ||
489 | - params | ||
490 | - end | ||
491 | - | ||
492 | - def execute | ||
493 | - @execute ||= begin | ||
494 | - begin | ||
495 | - response = Searchkick.client.search(params) | ||
496 | - rescue => e # TODO rescue type | ||
497 | - status_code = e.message[1..3].to_i | ||
498 | - if status_code == 404 | ||
499 | - raise MissingIndexError, "Index missing - run #{searchkick_klass.name}.reindex" | ||
500 | - elsif status_code == 500 && ( | ||
501 | - e.message.include?("IllegalArgumentException[minimumSimilarity >= 1]") || | ||
502 | - e.message.include?("No query registered for [multi_match]") || | ||
503 | - e.message.include?("[match] query does not support [cutoff_frequency]]") || | ||
504 | - e.message.include?("No query registered for [function_score]]") | ||
505 | - ) | ||
506 | - | ||
507 | - raise UnsupportedVersionError, "This version of Searchkick requires Elasticsearch 1.0 or greater" | ||
508 | - elsif status_code == 400 | ||
509 | - if e.message.include?("[multi_match] analyzer [searchkick_search] not found") | ||
510 | - raise InvalidQueryError, "Bad mapping - run #{searchkick_klass.name}.reindex" | ||
511 | - else | ||
512 | - raise InvalidQueryError, e.message | ||
513 | - end | ||
514 | - else | ||
515 | - raise e | ||
516 | - end | ||
517 | - end | ||
518 | - | ||
519 | - # apply facet limit in client due to | ||
520 | - # https://github.com/elasticsearch/elasticsearch/issues/1305 | ||
521 | - @facet_limits.each do |field, limit| | ||
522 | - field = field.to_s | ||
523 | - facet = response["facets"][field] | ||
524 | - response["facets"][field]["terms"] = facet["terms"].first(limit) | ||
525 | - response["facets"][field]["other"] = facet["total"] - facet["terms"].sum { |term| term["count"] } | ||
526 | - end | ||
527 | - | ||
528 | - opts = { | ||
529 | - page: @page, | ||
530 | - per_page: @per_page, | ||
531 | - padding: @padding, | ||
532 | - load: @load, | ||
533 | - includes: options[:include] || options[:includes], | ||
534 | - json: !options[:json].nil?, | ||
535 | - match_suffix: @match_suffix | ||
536 | - } | ||
537 | - Searchkick::Results.new(searchkick_klass, response, opts) | ||
538 | - end | ||
539 | - end | ||
540 | - | ||
541 | - def to_curl | ||
542 | - query = params | ||
543 | - type = query[:type] | ||
544 | - index = query[:index].is_a?(Array) ? query[:index].join(",") : query[:index] | ||
545 | - | ||
546 | - # no easy way to tell which host the client will use | ||
547 | - host = Searchkick.client.transport.hosts.first | ||
548 | - credentials = (host[:user] || host[:password]) ? "#{host[:user]}:#{host[:password]}@" : nil | ||
549 | - "curl #{host[:protocol]}://#{credentials}#{host[:host]}:#{host[:port]}/#{CGI.escape(index)}#{type ? "/#{type.map { |t| CGI.escape(t) }.join(',')}" : ''}/_search?pretty -d '#{query[:body].to_json}'" | ||
550 | - end | ||
551 | - | ||
552 | - private | ||
553 | - | ||
554 | def where_filters(where) | 573 | def where_filters(where) |
555 | filters = [] | 574 | filters = [] |
556 | (where || {}).each do |field, value| | 575 | (where || {}).each do |field, value| |
test/ci/before_install.sh
1 | #!/usr/bin/env bash | 1 | #!/usr/bin/env bash |
2 | 2 | ||
3 | +gem install bundler | ||
4 | + | ||
3 | sudo apt-get purge elasticsearch | 5 | sudo apt-get purge elasticsearch |
4 | wget https://download.elasticsearch.org/elasticsearch/elasticsearch/elasticsearch-1.7.3.deb | 6 | wget https://download.elasticsearch.org/elasticsearch/elasticsearch/elasticsearch-1.7.3.deb |
5 | sudo dpkg -i elasticsearch-1.7.3.deb | 7 | sudo dpkg -i elasticsearch-1.7.3.deb |
test/misspellings_test.rb
@@ -33,4 +33,14 @@ class MisspellingsTest < Minitest::Test | @@ -33,4 +33,14 @@ class MisspellingsTest < Minitest::Test | ||
33 | ] | 33 | ] |
34 | assert_search "red blue", ["red", "blue", "cyan", "magenta"], operator: "or", fields: ["color"], misspellings: false | 34 | assert_search "red blue", ["red", "blue", "cyan", "magenta"], operator: "or", fields: ["color"], misspellings: false |
35 | end | 35 | end |
36 | + | ||
37 | + def test_misspellings_below_unmet | ||
38 | + store_names ["abc", "abd", "aee"] | ||
39 | + assert_search "abc", ["abc", "abd"], misspellings: {below: 2} | ||
40 | + end | ||
41 | + | ||
42 | + def test_misspellings_below_met | ||
43 | + store_names ["abc", "abd", "aee"] | ||
44 | + assert_search "abc", ["abc"], misspellings: {below: 1} | ||
45 | + end | ||
36 | end | 46 | end |