Commit 5454af2517ce4bf55b4beffd221b9a184960f05a

Authored by Andrew Kane
2 parents 3cdc6b08 4f130606

Merged master

1 ## 1.0.4 [unreleased] 1 ## 1.0.4 [unreleased]
2 2
  3 +- Added `below` option to misspellings to improve performance
3 - Added `match` option 4 - Added `match` option
4 - Added `word: false` option 5 - Added `word: false` option
5 - Added highlighted fields to `load: false` 6 - Added highlighted fields to `load: false`
@@ -299,7 +299,15 @@ You can change this with: @@ -299,7 +299,15 @@ You can change this with:
299 Product.search "zucini", misspellings: {edit_distance: 2} # zucchini 299 Product.search "zucini", misspellings: {edit_distance: 2} # zucchini
300 ``` 300 ```
301 301
302 -Or turn off misspellings with: 302 +To improve performance for correctly spelled queries (which should be a majority for most applications), Searchkick can first perform a search without misspellings, and if there are few results, perform another with them. [master]
  303 +
  304 +```ruby
  305 +Product.search "zuchini", misspellings: {below: 5}
  306 +```
  307 +
  308 +If there are fewer than 5 results, a 2nd search is performed for misspellings.
  309 +
  310 +Turn off misspellings with:
303 311
304 ```ruby 312 ```ruby
305 Product.search "zuchini", misspellings: false # no zucchini 313 Product.search "zuchini", misspellings: false # no zucchini
lib/searchkick/logging.rb
@@ -2,16 +2,16 @@ @@ -2,16 +2,16 @@
2 2
3 module Searchkick 3 module Searchkick
4 class Query 4 class Query
5 - def execute_with_instrumentation 5 + def execute_search_with_instrumentation
6 event = { 6 event = {
7 name: "#{searchkick_klass.name} Search", 7 name: "#{searchkick_klass.name} Search",
8 query: params 8 query: params
9 } 9 }
10 ActiveSupport::Notifications.instrument("search.searchkick", event) do 10 ActiveSupport::Notifications.instrument("search.searchkick", event) do
11 - execute_without_instrumentation 11 + execute_search_without_instrumentation
12 end 12 end
13 end 13 end
14 - alias_method_chain :execute, :instrumentation 14 + alias_method_chain :execute_search, :instrumentation
15 end 15 end
16 16
17 class Index 17 class Index
lib/searchkick/query.rb
@@ -24,6 +24,102 @@ module Searchkick @@ -24,6 +24,102 @@ module Searchkick
24 @options = options 24 @options = options
25 @match_suffix = options[:match] || searchkick_options[:match] || "analyzed" 25 @match_suffix = options[:match] || searchkick_options[:match] || "analyzed"
26 26
  27 + prepare
  28 + end
  29 +
  30 + def searchkick_index
  31 + klass.searchkick_index
  32 + end
  33 +
  34 + def searchkick_options
  35 + klass.searchkick_options
  36 + end
  37 +
  38 + def searchkick_klass
  39 + klass.searchkick_klass
  40 + end
  41 +
  42 + def params
  43 + params = {
  44 + index: options[:index_name] || searchkick_index.name,
  45 + body: body
  46 + }
  47 + params.merge!(type: @type) if @type
  48 + params.merge!(routing: @routing) if @routing
  49 + params
  50 + end
  51 +
  52 + def execute
  53 + @execute ||= begin
  54 + begin
  55 + response = execute_search
  56 + if @misspellings_below && response["hits"]["total"] < @misspellings_below
  57 + prepare
  58 + response = execute_search
  59 + end
  60 + rescue => e # TODO rescue type
  61 + status_code = e.message[1..3].to_i
  62 + if status_code == 404
  63 + raise MissingIndexError, "Index missing - run #{searchkick_klass.name}.reindex"
  64 + elsif status_code == 500 && (
  65 + e.message.include?("IllegalArgumentException[minimumSimilarity >= 1]") ||
  66 + e.message.include?("No query registered for [multi_match]") ||
  67 + e.message.include?("[match] query does not support [cutoff_frequency]]") ||
  68 + e.message.include?("No query registered for [function_score]]")
  69 + )
  70 +
  71 + raise UnsupportedVersionError, "This version of Searchkick requires Elasticsearch 1.0 or greater"
  72 + elsif status_code == 400
  73 + if e.message.include?("[multi_match] analyzer [searchkick_search] not found")
  74 + raise InvalidQueryError, "Bad mapping - run #{searchkick_klass.name}.reindex"
  75 + else
  76 + raise InvalidQueryError, e.message
  77 + end
  78 + else
  79 + raise e
  80 + end
  81 + end
  82 +
  83 + # apply facet limit in client due to
  84 + # https://github.com/elasticsearch/elasticsearch/issues/1305
  85 + @facet_limits.each do |field, limit|
  86 + field = field.to_s
  87 + facet = response["facets"][field]
  88 + response["facets"][field]["terms"] = facet["terms"].first(limit)
  89 + response["facets"][field]["other"] = facet["total"] - facet["terms"].sum { |term| term["count"] }
  90 + end
  91 +
  92 + opts = {
  93 + page: @page,
  94 + per_page: @per_page,
  95 + padding: @padding,
  96 + load: @load,
  97 + includes: options[:include] || options[:includes],
  98 + json: !options[:json].nil?,
  99 + match_suffix: @match_suffix
  100 + }
  101 + Searchkick::Results.new(searchkick_klass, response, opts)
  102 + end
  103 + end
  104 +
  105 + def to_curl
  106 + query = params
  107 + type = query[:type]
  108 + index = query[:index].is_a?(Array) ? query[:index].join(",") : query[:index]
  109 +
  110 + # no easy way to tell which host the client will use
  111 + host = Searchkick.client.transport.hosts.first
  112 + credentials = (host[:user] || host[:password]) ? "#{host[:user]}:#{host[:password]}@" : nil
  113 + "curl #{host[:protocol]}://#{credentials}#{host[:host]}:#{host[:port]}/#{CGI.escape(index)}#{type ? "/#{type.map { |t| CGI.escape(t) }.join(',')}" : ''}/_search?pretty -d '#{query[:body].to_json}'"
  114 + end
  115 +
  116 + private
  117 +
  118 + def execute_search
  119 + Searchkick.client.search(params)
  120 + end
  121 +
  122 + def prepare
27 boost_fields = {} 123 boost_fields = {}
28 fields = 124 fields =
29 if options[:fields] 125 if options[:fields]
@@ -94,6 +190,36 @@ module Searchkick @@ -94,6 +190,36 @@ module Searchkick
94 } 190 }
95 else 191 else
96 queries = [] 192 queries = []
  193 +
  194 + misspellings =
  195 + if options.key?(:misspellings)
  196 + options[:misspellings]
  197 + elsif options.key?(:mispellings)
  198 + options[:mispellings] # why not?
  199 + else
  200 + true
  201 + end
  202 +
  203 + if misspellings.is_a?(Hash) && misspellings[:below] && !@misspellings_below
  204 + @misspellings_below = misspellings[:below].to_i
  205 + misspellings = false
  206 + end
  207 +
  208 + if misspellings != false
  209 + edit_distance = (misspellings.is_a?(Hash) && (misspellings[:edit_distance] || misspellings[:distance])) || 1
  210 + transpositions =
  211 + if misspellings.is_a?(Hash) && misspellings.key?(:transpositions)
  212 + {fuzzy_transpositions: misspellings[:transpositions]}
  213 + elsif below14?
  214 + {}
  215 + else
  216 + {fuzzy_transpositions: true}
  217 + end
  218 + prefix_length = (misspellings.is_a?(Hash) && misspellings[:prefix_length]) || 0
  219 + default_max_expansions = @misspellings_below ? 20 : 3
  220 + max_expansions = (misspellings.is_a?(Hash) && misspellings[:max_expansions]) || default_max_expansions
  221 + end
  222 +
97 fields.each do |field| 223 fields.each do |field|
98 qs = [] 224 qs = []
99 225
@@ -104,29 +230,6 @@ module Searchkick @@ -104,29 +230,6 @@ module Searchkick
104 boost: 10 * factor 230 boost: 10 * factor
105 } 231 }
106 232
107 - misspellings =  
108 - if options.key?(:misspellings)  
109 - options[:misspellings]  
110 - elsif options.key?(:mispellings)  
111 - options[:mispellings] # why not?  
112 - else  
113 - true  
114 - end  
115 -  
116 - if misspellings != false  
117 - edit_distance = (misspellings.is_a?(Hash) && (misspellings[:edit_distance] || misspellings[:distance])) || 1  
118 - transpositions =  
119 - if misspellings.is_a?(Hash) && misspellings.key?(:transpositions)  
120 - {fuzzy_transpositions: misspellings[:transpositions]}  
121 - elsif below14?  
122 - {}  
123 - else  
124 - {fuzzy_transpositions: true}  
125 - end  
126 - prefix_length = (misspellings.is_a?(Hash) && misspellings[:prefix_length]) || 0  
127 - max_expansions = (misspellings.is_a?(Hash) && misspellings[:max_expansions]) || 3  
128 - end  
129 -  
130 if field == "_all" || field.end_with?(".analyzed") 233 if field == "_all" || field.end_with?(".analyzed")
131 shared_options[:cutoff_frequency] = 0.001 unless operator == "and" || misspellings == false 234 shared_options[:cutoff_frequency] = 0.001 unless operator == "and" || misspellings == false
132 qs.concat [ 235 qs.concat [
@@ -467,90 +570,6 @@ module Searchkick @@ -467,90 +570,6 @@ module Searchkick
467 @load = load 570 @load = load
468 end 571 end
469 572
470 - def searchkick_index  
471 - klass.searchkick_index  
472 - end  
473 -  
474 - def searchkick_options  
475 - klass.searchkick_options  
476 - end  
477 -  
478 - def searchkick_klass  
479 - klass.searchkick_klass  
480 - end  
481 -  
482 - def params  
483 - params = {  
484 - index: options[:index_name] || searchkick_index.name,  
485 - body: body  
486 - }  
487 - params.merge!(type: @type) if @type  
488 - params.merge!(routing: @routing) if @routing  
489 - params  
490 - end  
491 -  
492 - def execute  
493 - @execute ||= begin  
494 - begin  
495 - response = Searchkick.client.search(params)  
496 - rescue => e # TODO rescue type  
497 - status_code = e.message[1..3].to_i  
498 - if status_code == 404  
499 - raise MissingIndexError, "Index missing - run #{searchkick_klass.name}.reindex"  
500 - elsif status_code == 500 && (  
501 - e.message.include?("IllegalArgumentException[minimumSimilarity >= 1]") ||  
502 - e.message.include?("No query registered for [multi_match]") ||  
503 - e.message.include?("[match] query does not support [cutoff_frequency]]") ||  
504 - e.message.include?("No query registered for [function_score]]")  
505 - )  
506 -  
507 - raise UnsupportedVersionError, "This version of Searchkick requires Elasticsearch 1.0 or greater"  
508 - elsif status_code == 400  
509 - if e.message.include?("[multi_match] analyzer [searchkick_search] not found")  
510 - raise InvalidQueryError, "Bad mapping - run #{searchkick_klass.name}.reindex"  
511 - else  
512 - raise InvalidQueryError, e.message  
513 - end  
514 - else  
515 - raise e  
516 - end  
517 - end  
518 -  
519 - # apply facet limit in client due to  
520 - # https://github.com/elasticsearch/elasticsearch/issues/1305  
521 - @facet_limits.each do |field, limit|  
522 - field = field.to_s  
523 - facet = response["facets"][field]  
524 - response["facets"][field]["terms"] = facet["terms"].first(limit)  
525 - response["facets"][field]["other"] = facet["total"] - facet["terms"].sum { |term| term["count"] }  
526 - end  
527 -  
528 - opts = {  
529 - page: @page,  
530 - per_page: @per_page,  
531 - padding: @padding,  
532 - load: @load,  
533 - includes: options[:include] || options[:includes],  
534 - json: !options[:json].nil?,  
535 - match_suffix: @match_suffix  
536 - }  
537 - Searchkick::Results.new(searchkick_klass, response, opts)  
538 - end  
539 - end  
540 -  
541 - def to_curl  
542 - query = params  
543 - type = query[:type]  
544 - index = query[:index].is_a?(Array) ? query[:index].join(",") : query[:index]  
545 -  
546 - # no easy way to tell which host the client will use  
547 - host = Searchkick.client.transport.hosts.first  
548 - credentials = (host[:user] || host[:password]) ? "#{host[:user]}:#{host[:password]}@" : nil  
549 - "curl #{host[:protocol]}://#{credentials}#{host[:host]}:#{host[:port]}/#{CGI.escape(index)}#{type ? "/#{type.map { |t| CGI.escape(t) }.join(',')}" : ''}/_search?pretty -d '#{query[:body].to_json}'"  
550 - end  
551 -  
552 - private  
553 -  
554 def where_filters(where) 573 def where_filters(where)
555 filters = [] 574 filters = []
556 (where || {}).each do |field, value| 575 (where || {}).each do |field, value|
test/ci/before_install.sh
1 #!/usr/bin/env bash 1 #!/usr/bin/env bash
2 2
  3 +gem install bundler
  4 +
3 sudo apt-get purge elasticsearch 5 sudo apt-get purge elasticsearch
4 wget https://download.elasticsearch.org/elasticsearch/elasticsearch/elasticsearch-1.7.3.deb 6 wget https://download.elasticsearch.org/elasticsearch/elasticsearch/elasticsearch-1.7.3.deb
5 sudo dpkg -i elasticsearch-1.7.3.deb 7 sudo dpkg -i elasticsearch-1.7.3.deb
test/misspellings_test.rb
@@ -33,4 +33,14 @@ class MisspellingsTest &lt; Minitest::Test @@ -33,4 +33,14 @@ class MisspellingsTest &lt; Minitest::Test
33 ] 33 ]
34 assert_search "red blue", ["red", "blue", "cyan", "magenta"], operator: "or", fields: ["color"], misspellings: false 34 assert_search "red blue", ["red", "blue", "cyan", "magenta"], operator: "or", fields: ["color"], misspellings: false
35 end 35 end
  36 +
  37 + def test_misspellings_below_unmet
  38 + store_names ["abc", "abd", "aee"]
  39 + assert_search "abc", ["abc", "abd"], misspellings: {below: 2}
  40 + end
  41 +
  42 + def test_misspellings_below_met
  43 + store_names ["abc", "abd", "aee"]
  44 + assert_search "abc", ["abc"], misspellings: {below: 1}
  45 + end
36 end 46 end