Commit 94ca756e23ffd624bd62e7ca2a4e5a30f3522a91

Authored by Andrew Kane
1 parent 7213725b

Added support for reloadable, multi-word, search time synonyms

CHANGELOG.md
1   -## 4.3.2 (unreleased)
  1 +## 4.4.0 (unreleased)
2 2  
  3 +- Added support for reloadable, multi-word, search time synonyms
3 4 - Fixed another deprecation warning in Ruby 2.7
4 5  
5 6 ## 4.3.1 (2020-05-13)
... ...
README.md
... ... @@ -324,29 +324,52 @@ A few languages require plugins:
324 324  
325 325 ```ruby
326 326 class Product < ApplicationRecord
327   - searchkick synonyms: [["pop", "soda"], ["burger", "hamburger"]]
  327 + searchkick search_synonyms: [["pop", "soda"], ["burger", "hamburger"]]
328 328 end
329 329 ```
330 330  
331   -Call `Product.reindex` after changing synonyms.
  331 +Call `Product.reindex` after changing synonyms. Synonyms are applied at search time before stemming, and can be a single word or multiple words.
332 332  
333   -Synonyms cannot be multiple words at the moment.
  333 +For directional synonyms, use:
  334 +
  335 +```ruby
  336 +synonyms: ["lightbulb => halogenlamp"]
  337 +```
  338 +
  339 +### Dynamic Synonyms
  340 +
  341 +The above approach works well when your synonym list is static, but in practice, this is often not the case. When you analyze search conversions, you often want to add new synonyms without a full reindex.
  342 +
  343 +#### Elasticsearch 7.3+
  344 +
  345 +For Elasticsearch 7.3+, we recommend placing synonyms in a file on the Elasticsearch server (in the `config` directory). This allows you to reload synonyms without reindexing.
334 346  
335   -To read synonyms from a file, use:
  347 +```txt
  348 +pop, soda
  349 +burger, hamburger
  350 +```
  351 +
  352 +Then use:
336 353  
337 354 ```ruby
338   -synonyms: -> { CSV.read("/some/path/synonyms.csv") }
  355 +search_synonyms: "synonyms.txt"
339 356 ```
340 357  
341   -For directional synonyms, use:
  358 +Add [elasticsearch-xpack](https://github.com/elastic/elasticsearch-ruby/tree/master/elasticsearch-xpack) to your Gemfile:
342 359  
343 360 ```ruby
344   -synonyms: ["lightbulb => halogenlamp"]
  361 +gem 'elasticsearch-xpack', '>= 7.8.0.pre'
  362 +```
  363 +
  364 +And use:
  365 +
  366 +```ruby
  367 +Product.search_index.reload_synonyms
345 368 ```
346 369  
347   -### Tags and Dynamic Synonyms
  370 +#### Elasticsearch < 7.3
348 371  
349   -The above approach works well when your synonym list is static, but in practice, this is often not the case. When you analyze search conversions, you often want to add new synonyms or tags without a full reindex. You can use a library like [ActsAsTaggableOn](https://github.com/mbleigh/acts-as-taggable-on) and do:
  372 +You can use a library like [ActsAsTaggableOn](https://github.com/mbleigh/acts-as-taggable-on) and do:
350 373  
351 374 ```ruby
352 375 class Product < ApplicationRecord
... ...
lib/searchkick/index.rb
... ... @@ -174,6 +174,13 @@ module Searchkick
174 174 Searchkick.search(like_text, model: record.class, **options)
175 175 end
176 176  
  177 + def reload_synonyms
  178 + require "elasticsearch/xpack"
  179 + raise Error, "Requires Elasticsearch 7.3+" if Searchkick.server_below?("7.3.0")
  180 + raise Error, "Requires elasticsearch-xpack 7.8+" unless client.xpack.respond_to?(:indices)
  181 + client.xpack.indices.reload_search_analyzers(index: name)
  182 + end
  183 +
177 184 # queue
178 185  
179 186 def reindex_queue
... ...
lib/searchkick/index_options.rb
... ... @@ -7,6 +7,7 @@ module Searchkick
7 7  
8 8 below62 = Searchkick.server_below?("6.2.0")
9 9 below70 = Searchkick.server_below?("7.0.0")
  10 + below73 = Searchkick.server_below?("7.3.0")
10 11  
11 12 if below70
12 13 index_type = options[:_type]
... ... @@ -285,9 +286,7 @@ module Searchkick
285 286  
286 287 # synonyms
287 288 synonyms = options[:synonyms] || []
288   -
289 289 synonyms = synonyms.call if synonyms.respond_to?(:call)
290   -
291 290 if synonyms.any?
292 291 settings[:analysis][:filter][:searchkick_synonym] = {
293 292 type: "synonym",
... ... @@ -310,6 +309,29 @@ module Searchkick
310 309 end
311 310 end
312 311  
  312 + search_synonyms = options[:search_synonyms] || []
  313 + search_synonyms = search_synonyms.call if search_synonyms.respond_to?(:call)
  314 + if search_synonyms.is_a?(String) || search_synonyms.any?
  315 + if search_synonyms.is_a?(String)
  316 + synonym_graph = {
  317 + type: "synonym_graph",
  318 + synonyms_path: search_synonyms
  319 + }
  320 + synonym_graph[:updateable] = true unless below73
  321 + else
  322 + synonym_graph = {
  323 + type: "synonym_graph",
  324 + # TODO confirm this is correct
  325 + synonyms: search_synonyms.select { |s| s.size > 1 }.map { |s| s.is_a?(Array) ? s.join(",") : s }.map(&:downcase)
  326 + }
  327 + end
  328 + settings[:analysis][:filter][:searchkick_synonym_graph] = synonym_graph
  329 +
  330 + [:searchkick_search2, :searchkick_word_search].each do |analyzer|
  331 + settings[:analysis][:analyzer][analyzer][:filter].insert(2, "searchkick_synonym_graph")
  332 + end
  333 + end
  334 +
313 335 if options[:wordnet]
314 336 settings[:analysis][:filter][:searchkick_wordnet] = {
315 337 type: "synonym",
... ...
lib/searchkick/model.rb
... ... @@ -5,7 +5,7 @@ module Searchkick
5 5  
6 6 unknown_keywords = options.keys - [:_all, :_type, :batch_size, :callbacks, :case_sensitive, :conversions, :deep_paging, :default_fields,
7 7 :filterable, :geo_shape, :highlight, :ignore_above, :index_name, :index_prefix, :inheritance, :language,
8   - :locations, :mappings, :match, :merge_mappings, :routing, :searchable, :settings, :similarity,
  8 + :locations, :mappings, :match, :merge_mappings, :routing, :searchable, :search_synonyms, :settings, :similarity,
9 9 :special_characters, :stem, :stem_conversions, :suggest, :synonyms, :text_end,
10 10 :text_middle, :text_start, :word, :wordnet, :word_end, :word_middle, :word_start]
11 11 raise ArgumentError, "unknown keywords: #{unknown_keywords.join(", ")}" if unknown_keywords.any?
... ...
searchkick.gemspec
... ... @@ -22,4 +22,5 @@ Gem::Specification.new do |spec|
22 22 spec.add_development_dependency "bundler"
23 23 spec.add_development_dependency "minitest"
24 24 spec.add_development_dependency "rake"
  25 + spec.add_development_dependency "elasticsearch-xpack", ">= 7.8.0.pre"
25 26 end
... ...
test/models/speaker.rb
1 1 class Speaker
2 2 searchkick \
3   - conversions: ["conversions_a", "conversions_b"]
  3 + conversions: ["conversions_a", "conversions_b"],
  4 + search_synonyms: [
  5 + ["clorox", "bleach"],
  6 + ["burger", "hamburger"],
  7 + ["bandaids", "bandages"],
  8 + ["UPPERCASE", "lowercase"],
  9 + "led => led,lightbulb",
  10 + "halogen lamp => lightbulb",
  11 + ["United States of America", "USA"]
  12 + ],
  13 + word_start: [:name]
4 14  
5 15 attr_accessor :conversions_a, :conversions_b, :aisle
6 16  
... ...
test/search_synonyms_test.rb 0 โ†’ 100644
... ... @@ -0,0 +1,76 @@
  1 +require_relative "test_helper"
  2 +
  3 +class SearchSynonymsTest < Minitest::Test
  4 + def test_bleach
  5 + store_names ["Clorox Bleach", "Kroger Bleach"]
  6 + assert_search "clorox", ["Clorox Bleach", "Kroger Bleach"]
  7 + end
  8 +
  9 + def test_burger_buns
  10 + store_names ["Hamburger Buns"]
  11 + assert_search "burger buns", ["Hamburger Buns"]
  12 + end
  13 +
  14 + def test_bandaids
  15 + store_names ["Band-Aid", "Kroger 12-Pack Bandages"]
  16 + assert_search "bandaids", ["Band-Aid", "Kroger 12-Pack Bandages"]
  17 + end
  18 +
  19 + def test_reverse
  20 + store_names ["Hamburger"]
  21 + assert_search "burger", ["Hamburger"]
  22 + end
  23 +
  24 + def test_not_stemmed
  25 + store_names ["Burger"]
  26 + assert_search "hamburgers", []
  27 + assert_search "hamburger", ["Burger"]
  28 + end
  29 +
  30 + def test_word_start
  31 + store_names ["Clorox Bleach", "Kroger Bleach"]
  32 + assert_search "clorox", ["Clorox Bleach", "Kroger Bleach"], {match: :word_start}
  33 + end
  34 +
  35 + def test_directional
  36 + store_names ["Lightbulb", "Green Onions", "Led"]
  37 + assert_search "led", ["Lightbulb", "Led"]
  38 + assert_search "Lightbulb", ["Lightbulb"]
  39 + assert_search "Halogen Lamp", ["Lightbulb"]
  40 + assert_search "onions", ["Green Onions"]
  41 + end
  42 +
  43 + def test_case
  44 + store_names ["Uppercase"]
  45 + assert_search "lowercase", ["Uppercase"]
  46 + end
  47 +
  48 + def test_multiple_words
  49 + store_names ["USA"]
  50 + assert_search "United States of America", ["USA"]
  51 + assert_search "usa", ["USA"]
  52 + assert_search "United States", []
  53 + end
  54 +
  55 + def test_multiple_words_expanded
  56 + store_names ["United States of America"]
  57 + assert_search "usa", ["United States of America"]
  58 + assert_search "United States of America", ["United States of America"]
  59 + assert_search "United States", ["United States of America"] # no synonyms used
  60 + end
  61 +
  62 + def test_reload_synonyms
  63 + if Searchkick.server_below?("7.3.0")
  64 + error = assert_raises(Searchkick::Error) do
  65 + Speaker.search_index.reload_synonyms
  66 + end
  67 + assert_equal "Requires Elasticsearch 7.3+", error.message
  68 + else
  69 + Speaker.search_index.reload_synonyms
  70 + end
  71 + end
  72 +
  73 + def default_model
  74 + Speaker
  75 + end
  76 +end
... ...
test/test_helper.rb
... ... @@ -86,28 +86,28 @@ class Minitest::Test
86 86 end
87 87 end
88 88  
89   - def store_names(names, klass = Product, reindex: true)
  89 + def store_names(names, klass = default_model, reindex: true)
90 90 store names.map { |name| {name: name} }, klass, reindex: reindex
91 91 end
92 92  
93 93 # no order
94   - def assert_search(term, expected, options = {}, klass = Product)
  94 + def assert_search(term, expected, options = {}, klass = default_model)
95 95 assert_equal expected.sort, klass.search(term, **options).map(&:name).sort
96 96 end
97 97  
98   - def assert_order(term, expected, options = {}, klass = Product)
  98 + def assert_order(term, expected, options = {}, klass = default_model)
99 99 assert_equal expected, klass.search(term, **options).map(&:name)
100 100 end
101 101  
102   - def assert_equal_scores(term, options = {}, klass = Product)
  102 + def assert_equal_scores(term, options = {}, klass = default_model)
103 103 assert_equal 1, klass.search(term, **options).hits.map { |a| a["_score"] }.uniq.size
104 104 end
105 105  
106   - def assert_first(term, expected, options = {}, klass = Product)
  106 + def assert_first(term, expected, options = {}, klass = default_model)
107 107 assert_equal expected, klass.search(term, **options).map(&:name).first
108 108 end
109 109  
110   - def assert_misspellings(term, expected, misspellings = {}, klass = Product)
  110 + def assert_misspellings(term, expected, misspellings = {}, klass = default_model)
111 111 options = {
112 112 fields: [:name, :color],
113 113 misspellings: misspellings
... ... @@ -134,4 +134,8 @@ class Minitest::Test
134 134 def cequel?
135 135 defined?(Cequel)
136 136 end
  137 +
  138 + def default_model
  139 + Product
  140 + end
137 141 end
... ...