Commit 5454af2517ce4bf55b4beffd221b9a184960f05a

Authored by Andrew Kane
2 parents 3cdc6b08 4f130606

Merged master

CHANGELOG.md
1 1 ## 1.0.4 [unreleased]
2 2  
  3 +- Added `below` option to misspellings to improve performance
3 4 - Added `match` option
4 5 - Added `word: false` option
5 6 - Added highlighted fields to `load: false`
... ...
README.md
... ... @@ -299,7 +299,15 @@ You can change this with:
299 299 Product.search "zucini", misspellings: {edit_distance: 2} # zucchini
300 300 ```
301 301  
302   -Or turn off misspellings with:
  302 +To improve performance for correctly spelled queries (which should be a majority for most applications), Searchkick can first perform a search without misspellings, and if there are few results, perform another with them. [master]
  303 +
  304 +```ruby
  305 +Product.search "zuchini", misspellings: {below: 5}
  306 +```
  307 +
  308 +If there are fewer than 5 results, a 2nd search is performed for misspellings.
  309 +
  310 +Turn off misspellings with:
303 311  
304 312 ```ruby
305 313 Product.search "zuchini", misspellings: false # no zucchini
... ...
lib/searchkick/logging.rb
... ... @@ -2,16 +2,16 @@
2 2  
3 3 module Searchkick
4 4 class Query
5   - def execute_with_instrumentation
  5 + def execute_search_with_instrumentation
6 6 event = {
7 7 name: "#{searchkick_klass.name} Search",
8 8 query: params
9 9 }
10 10 ActiveSupport::Notifications.instrument("search.searchkick", event) do
11   - execute_without_instrumentation
  11 + execute_search_without_instrumentation
12 12 end
13 13 end
14   - alias_method_chain :execute, :instrumentation
  14 + alias_method_chain :execute_search, :instrumentation
15 15 end
16 16  
17 17 class Index
... ...
lib/searchkick/query.rb
... ... @@ -24,6 +24,102 @@ module Searchkick
24 24 @options = options
25 25 @match_suffix = options[:match] || searchkick_options[:match] || "analyzed"
26 26  
  27 + prepare
  28 + end
  29 +
  30 + def searchkick_index
  31 + klass.searchkick_index
  32 + end
  33 +
  34 + def searchkick_options
  35 + klass.searchkick_options
  36 + end
  37 +
  38 + def searchkick_klass
  39 + klass.searchkick_klass
  40 + end
  41 +
  42 + def params
  43 + params = {
  44 + index: options[:index_name] || searchkick_index.name,
  45 + body: body
  46 + }
  47 + params.merge!(type: @type) if @type
  48 + params.merge!(routing: @routing) if @routing
  49 + params
  50 + end
  51 +
  52 + def execute
  53 + @execute ||= begin
  54 + begin
  55 + response = execute_search
  56 + if @misspellings_below && response["hits"]["total"] < @misspellings_below
  57 + prepare
  58 + response = execute_search
  59 + end
  60 + rescue => e # TODO rescue type
  61 + status_code = e.message[1..3].to_i
  62 + if status_code == 404
  63 + raise MissingIndexError, "Index missing - run #{searchkick_klass.name}.reindex"
  64 + elsif status_code == 500 && (
  65 + e.message.include?("IllegalArgumentException[minimumSimilarity >= 1]") ||
  66 + e.message.include?("No query registered for [multi_match]") ||
  67 + e.message.include?("[match] query does not support [cutoff_frequency]]") ||
  68 + e.message.include?("No query registered for [function_score]]")
  69 + )
  70 +
  71 + raise UnsupportedVersionError, "This version of Searchkick requires Elasticsearch 1.0 or greater"
  72 + elsif status_code == 400
  73 + if e.message.include?("[multi_match] analyzer [searchkick_search] not found")
  74 + raise InvalidQueryError, "Bad mapping - run #{searchkick_klass.name}.reindex"
  75 + else
  76 + raise InvalidQueryError, e.message
  77 + end
  78 + else
  79 + raise e
  80 + end
  81 + end
  82 +
  83 + # apply facet limit in client due to
  84 + # https://github.com/elasticsearch/elasticsearch/issues/1305
  85 + @facet_limits.each do |field, limit|
  86 + field = field.to_s
  87 + facet = response["facets"][field]
  88 + response["facets"][field]["terms"] = facet["terms"].first(limit)
  89 + response["facets"][field]["other"] = facet["total"] - facet["terms"].sum { |term| term["count"] }
  90 + end
  91 +
  92 + opts = {
  93 + page: @page,
  94 + per_page: @per_page,
  95 + padding: @padding,
  96 + load: @load,
  97 + includes: options[:include] || options[:includes],
  98 + json: !options[:json].nil?,
  99 + match_suffix: @match_suffix
  100 + }
  101 + Searchkick::Results.new(searchkick_klass, response, opts)
  102 + end
  103 + end
  104 +
  105 + def to_curl
  106 + query = params
  107 + type = query[:type]
  108 + index = query[:index].is_a?(Array) ? query[:index].join(",") : query[:index]
  109 +
  110 + # no easy way to tell which host the client will use
  111 + host = Searchkick.client.transport.hosts.first
  112 + credentials = (host[:user] || host[:password]) ? "#{host[:user]}:#{host[:password]}@" : nil
  113 + "curl #{host[:protocol]}://#{credentials}#{host[:host]}:#{host[:port]}/#{CGI.escape(index)}#{type ? "/#{type.map { |t| CGI.escape(t) }.join(',')}" : ''}/_search?pretty -d '#{query[:body].to_json}'"
  114 + end
  115 +
  116 + private
  117 +
  118 + def execute_search
  119 + Searchkick.client.search(params)
  120 + end
  121 +
  122 + def prepare
27 123 boost_fields = {}
28 124 fields =
29 125 if options[:fields]
... ... @@ -94,6 +190,36 @@ module Searchkick
94 190 }
95 191 else
96 192 queries = []
  193 +
  194 + misspellings =
  195 + if options.key?(:misspellings)
  196 + options[:misspellings]
  197 + elsif options.key?(:mispellings)
  198 + options[:mispellings] # why not?
  199 + else
  200 + true
  201 + end
  202 +
  203 + if misspellings.is_a?(Hash) && misspellings[:below] && !@misspellings_below
  204 + @misspellings_below = misspellings[:below].to_i
  205 + misspellings = false
  206 + end
  207 +
  208 + if misspellings != false
  209 + edit_distance = (misspellings.is_a?(Hash) && (misspellings[:edit_distance] || misspellings[:distance])) || 1
  210 + transpositions =
  211 + if misspellings.is_a?(Hash) && misspellings.key?(:transpositions)
  212 + {fuzzy_transpositions: misspellings[:transpositions]}
  213 + elsif below14?
  214 + {}
  215 + else
  216 + {fuzzy_transpositions: true}
  217 + end
  218 + prefix_length = (misspellings.is_a?(Hash) && misspellings[:prefix_length]) || 0
  219 + default_max_expansions = @misspellings_below ? 20 : 3
  220 + max_expansions = (misspellings.is_a?(Hash) && misspellings[:max_expansions]) || default_max_expansions
  221 + end
  222 +
97 223 fields.each do |field|
98 224 qs = []
99 225  
... ... @@ -104,29 +230,6 @@ module Searchkick
104 230 boost: 10 * factor
105 231 }
106 232  
107   - misspellings =
108   - if options.key?(:misspellings)
109   - options[:misspellings]
110   - elsif options.key?(:mispellings)
111   - options[:mispellings] # why not?
112   - else
113   - true
114   - end
115   -
116   - if misspellings != false
117   - edit_distance = (misspellings.is_a?(Hash) && (misspellings[:edit_distance] || misspellings[:distance])) || 1
118   - transpositions =
119   - if misspellings.is_a?(Hash) && misspellings.key?(:transpositions)
120   - {fuzzy_transpositions: misspellings[:transpositions]}
121   - elsif below14?
122   - {}
123   - else
124   - {fuzzy_transpositions: true}
125   - end
126   - prefix_length = (misspellings.is_a?(Hash) && misspellings[:prefix_length]) || 0
127   - max_expansions = (misspellings.is_a?(Hash) && misspellings[:max_expansions]) || 3
128   - end
129   -
130 233 if field == "_all" || field.end_with?(".analyzed")
131 234 shared_options[:cutoff_frequency] = 0.001 unless operator == "and" || misspellings == false
132 235 qs.concat [
... ... @@ -467,90 +570,6 @@ module Searchkick
467 570 @load = load
468 571 end
469 572  
470   - def searchkick_index
471   - klass.searchkick_index
472   - end
473   -
474   - def searchkick_options
475   - klass.searchkick_options
476   - end
477   -
478   - def searchkick_klass
479   - klass.searchkick_klass
480   - end
481   -
482   - def params
483   - params = {
484   - index: options[:index_name] || searchkick_index.name,
485   - body: body
486   - }
487   - params.merge!(type: @type) if @type
488   - params.merge!(routing: @routing) if @routing
489   - params
490   - end
491   -
492   - def execute
493   - @execute ||= begin
494   - begin
495   - response = Searchkick.client.search(params)
496   - rescue => e # TODO rescue type
497   - status_code = e.message[1..3].to_i
498   - if status_code == 404
499   - raise MissingIndexError, "Index missing - run #{searchkick_klass.name}.reindex"
500   - elsif status_code == 500 && (
501   - e.message.include?("IllegalArgumentException[minimumSimilarity >= 1]") ||
502   - e.message.include?("No query registered for [multi_match]") ||
503   - e.message.include?("[match] query does not support [cutoff_frequency]]") ||
504   - e.message.include?("No query registered for [function_score]]")
505   - )
506   -
507   - raise UnsupportedVersionError, "This version of Searchkick requires Elasticsearch 1.0 or greater"
508   - elsif status_code == 400
509   - if e.message.include?("[multi_match] analyzer [searchkick_search] not found")
510   - raise InvalidQueryError, "Bad mapping - run #{searchkick_klass.name}.reindex"
511   - else
512   - raise InvalidQueryError, e.message
513   - end
514   - else
515   - raise e
516   - end
517   - end
518   -
519   - # apply facet limit in client due to
520   - # https://github.com/elasticsearch/elasticsearch/issues/1305
521   - @facet_limits.each do |field, limit|
522   - field = field.to_s
523   - facet = response["facets"][field]
524   - response["facets"][field]["terms"] = facet["terms"].first(limit)
525   - response["facets"][field]["other"] = facet["total"] - facet["terms"].sum { |term| term["count"] }
526   - end
527   -
528   - opts = {
529   - page: @page,
530   - per_page: @per_page,
531   - padding: @padding,
532   - load: @load,
533   - includes: options[:include] || options[:includes],
534   - json: !options[:json].nil?,
535   - match_suffix: @match_suffix
536   - }
537   - Searchkick::Results.new(searchkick_klass, response, opts)
538   - end
539   - end
540   -
541   - def to_curl
542   - query = params
543   - type = query[:type]
544   - index = query[:index].is_a?(Array) ? query[:index].join(",") : query[:index]
545   -
546   - # no easy way to tell which host the client will use
547   - host = Searchkick.client.transport.hosts.first
548   - credentials = (host[:user] || host[:password]) ? "#{host[:user]}:#{host[:password]}@" : nil
549   - "curl #{host[:protocol]}://#{credentials}#{host[:host]}:#{host[:port]}/#{CGI.escape(index)}#{type ? "/#{type.map { |t| CGI.escape(t) }.join(',')}" : ''}/_search?pretty -d '#{query[:body].to_json}'"
550   - end
551   -
552   - private
553   -
554 573 def where_filters(where)
555 574 filters = []
556 575 (where || {}).each do |field, value|
... ...
test/ci/before_install.sh
1 1 #!/usr/bin/env bash
2 2  
  3 +gem install bundler
  4 +
3 5 sudo apt-get purge elasticsearch
4 6 wget https://download.elasticsearch.org/elasticsearch/elasticsearch/elasticsearch-1.7.3.deb
5 7 sudo dpkg -i elasticsearch-1.7.3.deb
... ...
test/misspellings_test.rb
... ... @@ -33,4 +33,14 @@ class MisspellingsTest &lt; Minitest::Test
33 33 ]
34 34 assert_search "red blue", ["red", "blue", "cyan", "magenta"], operator: "or", fields: ["color"], misspellings: false
35 35 end
  36 +
  37 + def test_misspellings_below_unmet
  38 + store_names ["abc", "abd", "aee"]
  39 + assert_search "abc", ["abc", "abd"], misspellings: {below: 2}
  40 + end
  41 +
  42 + def test_misspellings_below_met
  43 + store_names ["abc", "abd", "aee"]
  44 + assert_search "abc", ["abc"], misspellings: {below: 1}
  45 + end
36 46 end
... ...