Commit 94ca756e23ffd624bd62e7ca2a4e5a30f3522a91

Authored by Andrew Kane
1 parent 7213725b

Added support for reloadable, multi-word, search time synonyms

1 -## 4.3.2 (unreleased) 1 +## 4.4.0 (unreleased)
2 2
  3 +- Added support for reloadable, multi-word, search time synonyms
3 - Fixed another deprecation warning in Ruby 2.7 4 - Fixed another deprecation warning in Ruby 2.7
4 5
5 ## 4.3.1 (2020-05-13) 6 ## 4.3.1 (2020-05-13)
@@ -324,29 +324,52 @@ A few languages require plugins: @@ -324,29 +324,52 @@ A few languages require plugins:
324 324
325 ```ruby 325 ```ruby
326 class Product < ApplicationRecord 326 class Product < ApplicationRecord
327 - searchkick synonyms: [["pop", "soda"], ["burger", "hamburger"]] 327 + searchkick search_synonyms: [["pop", "soda"], ["burger", "hamburger"]]
328 end 328 end
329 ``` 329 ```
330 330
331 -Call `Product.reindex` after changing synonyms. 331 +Call `Product.reindex` after changing synonyms. Synonyms are applied at search time before stemming, and can be a single word or multiple words.
332 332
333 -Synonyms cannot be multiple words at the moment. 333 +For directional synonyms, use:
  334 +
  335 +```ruby
  336 +synonyms: ["lightbulb => halogenlamp"]
  337 +```
  338 +
  339 +### Dynamic Synonyms
  340 +
  341 +The above approach works well when your synonym list is static, but in practice, this is often not the case. When you analyze search conversions, you often want to add new synonyms without a full reindex.
  342 +
  343 +#### Elasticsearch 7.3+
  344 +
  345 +For Elasticsearch 7.3+, we recommend placing synonyms in a file on the Elasticsearch server (in the `config` directory). This allows you to reload synonyms without reindexing.
334 346
335 -To read synonyms from a file, use: 347 +```txt
  348 +pop, soda
  349 +burger, hamburger
  350 +```
  351 +
  352 +Then use:
336 353
337 ```ruby 354 ```ruby
338 -synonyms: -> { CSV.read("/some/path/synonyms.csv") } 355 +search_synonyms: "synonyms.txt"
339 ``` 356 ```
340 357
341 -For directional synonyms, use: 358 +Add [elasticsearch-xpack](https://github.com/elastic/elasticsearch-ruby/tree/master/elasticsearch-xpack) to your Gemfile:
342 359
343 ```ruby 360 ```ruby
344 -synonyms: ["lightbulb => halogenlamp"] 361 +gem 'elasticsearch-xpack', '>= 7.8.0.pre'
  362 +```
  363 +
  364 +And use:
  365 +
  366 +```ruby
  367 +Product.search_index.reload_synonyms
345 ``` 368 ```
346 369
347 -### Tags and Dynamic Synonyms 370 +#### Elasticsearch < 7.3
348 371
349 -The above approach works well when your synonym list is static, but in practice, this is often not the case. When you analyze search conversions, you often want to add new synonyms or tags without a full reindex. You can use a library like [ActsAsTaggableOn](https://github.com/mbleigh/acts-as-taggable-on) and do: 372 +You can use a library like [ActsAsTaggableOn](https://github.com/mbleigh/acts-as-taggable-on) and do:
350 373
351 ```ruby 374 ```ruby
352 class Product < ApplicationRecord 375 class Product < ApplicationRecord
lib/searchkick/index.rb
@@ -174,6 +174,13 @@ module Searchkick @@ -174,6 +174,13 @@ module Searchkick
174 Searchkick.search(like_text, model: record.class, **options) 174 Searchkick.search(like_text, model: record.class, **options)
175 end 175 end
176 176
  177 + def reload_synonyms
  178 + require "elasticsearch/xpack"
  179 + raise Error, "Requires Elasticsearch 7.3+" if Searchkick.server_below?("7.3.0")
  180 + raise Error, "Requires elasticsearch-xpack 7.8+" unless client.xpack.respond_to?(:indices)
  181 + client.xpack.indices.reload_search_analyzers(index: name)
  182 + end
  183 +
177 # queue 184 # queue
178 185
179 def reindex_queue 186 def reindex_queue
lib/searchkick/index_options.rb
@@ -7,6 +7,7 @@ module Searchkick @@ -7,6 +7,7 @@ module Searchkick
7 7
8 below62 = Searchkick.server_below?("6.2.0") 8 below62 = Searchkick.server_below?("6.2.0")
9 below70 = Searchkick.server_below?("7.0.0") 9 below70 = Searchkick.server_below?("7.0.0")
  10 + below73 = Searchkick.server_below?("7.3.0")
10 11
11 if below70 12 if below70
12 index_type = options[:_type] 13 index_type = options[:_type]
@@ -285,9 +286,7 @@ module Searchkick @@ -285,9 +286,7 @@ module Searchkick
285 286
286 # synonyms 287 # synonyms
287 synonyms = options[:synonyms] || [] 288 synonyms = options[:synonyms] || []
288 -  
289 synonyms = synonyms.call if synonyms.respond_to?(:call) 289 synonyms = synonyms.call if synonyms.respond_to?(:call)
290 -  
291 if synonyms.any? 290 if synonyms.any?
292 settings[:analysis][:filter][:searchkick_synonym] = { 291 settings[:analysis][:filter][:searchkick_synonym] = {
293 type: "synonym", 292 type: "synonym",
@@ -310,6 +309,29 @@ module Searchkick @@ -310,6 +309,29 @@ module Searchkick
310 end 309 end
311 end 310 end
312 311
  312 + search_synonyms = options[:search_synonyms] || []
  313 + search_synonyms = search_synonyms.call if search_synonyms.respond_to?(:call)
  314 + if search_synonyms.is_a?(String) || search_synonyms.any?
  315 + if search_synonyms.is_a?(String)
  316 + synonym_graph = {
  317 + type: "synonym_graph",
  318 + synonyms_path: search_synonyms
  319 + }
  320 + synonym_graph[:updateable] = true unless below73
  321 + else
  322 + synonym_graph = {
  323 + type: "synonym_graph",
  324 + # TODO confirm this is correct
  325 + synonyms: search_synonyms.select { |s| s.size > 1 }.map { |s| s.is_a?(Array) ? s.join(",") : s }.map(&:downcase)
  326 + }
  327 + end
  328 + settings[:analysis][:filter][:searchkick_synonym_graph] = synonym_graph
  329 +
  330 + [:searchkick_search2, :searchkick_word_search].each do |analyzer|
  331 + settings[:analysis][:analyzer][analyzer][:filter].insert(2, "searchkick_synonym_graph")
  332 + end
  333 + end
  334 +
313 if options[:wordnet] 335 if options[:wordnet]
314 settings[:analysis][:filter][:searchkick_wordnet] = { 336 settings[:analysis][:filter][:searchkick_wordnet] = {
315 type: "synonym", 337 type: "synonym",
lib/searchkick/model.rb
@@ -5,7 +5,7 @@ module Searchkick @@ -5,7 +5,7 @@ module Searchkick
5 5
6 unknown_keywords = options.keys - [:_all, :_type, :batch_size, :callbacks, :case_sensitive, :conversions, :deep_paging, :default_fields, 6 unknown_keywords = options.keys - [:_all, :_type, :batch_size, :callbacks, :case_sensitive, :conversions, :deep_paging, :default_fields,
7 :filterable, :geo_shape, :highlight, :ignore_above, :index_name, :index_prefix, :inheritance, :language, 7 :filterable, :geo_shape, :highlight, :ignore_above, :index_name, :index_prefix, :inheritance, :language,
8 - :locations, :mappings, :match, :merge_mappings, :routing, :searchable, :settings, :similarity, 8 + :locations, :mappings, :match, :merge_mappings, :routing, :searchable, :search_synonyms, :settings, :similarity,
9 :special_characters, :stem, :stem_conversions, :suggest, :synonyms, :text_end, 9 :special_characters, :stem, :stem_conversions, :suggest, :synonyms, :text_end,
10 :text_middle, :text_start, :word, :wordnet, :word_end, :word_middle, :word_start] 10 :text_middle, :text_start, :word, :wordnet, :word_end, :word_middle, :word_start]
11 raise ArgumentError, "unknown keywords: #{unknown_keywords.join(", ")}" if unknown_keywords.any? 11 raise ArgumentError, "unknown keywords: #{unknown_keywords.join(", ")}" if unknown_keywords.any?
searchkick.gemspec
@@ -22,4 +22,5 @@ Gem::Specification.new do |spec| @@ -22,4 +22,5 @@ Gem::Specification.new do |spec|
22 spec.add_development_dependency "bundler" 22 spec.add_development_dependency "bundler"
23 spec.add_development_dependency "minitest" 23 spec.add_development_dependency "minitest"
24 spec.add_development_dependency "rake" 24 spec.add_development_dependency "rake"
  25 + spec.add_development_dependency "elasticsearch-xpack", ">= 7.8.0.pre"
25 end 26 end
test/models/speaker.rb
1 class Speaker 1 class Speaker
2 searchkick \ 2 searchkick \
3 - conversions: ["conversions_a", "conversions_b"] 3 + conversions: ["conversions_a", "conversions_b"],
  4 + search_synonyms: [
  5 + ["clorox", "bleach"],
  6 + ["burger", "hamburger"],
  7 + ["bandaids", "bandages"],
  8 + ["UPPERCASE", "lowercase"],
  9 + "led => led,lightbulb",
  10 + "halogen lamp => lightbulb",
  11 + ["United States of America", "USA"]
  12 + ],
  13 + word_start: [:name]
4 14
5 attr_accessor :conversions_a, :conversions_b, :aisle 15 attr_accessor :conversions_a, :conversions_b, :aisle
6 16
test/search_synonyms_test.rb 0 โ†’ 100644
@@ -0,0 +1,76 @@ @@ -0,0 +1,76 @@
  1 +require_relative "test_helper"
  2 +
  3 +class SearchSynonymsTest < Minitest::Test
  4 + def test_bleach
  5 + store_names ["Clorox Bleach", "Kroger Bleach"]
  6 + assert_search "clorox", ["Clorox Bleach", "Kroger Bleach"]
  7 + end
  8 +
  9 + def test_burger_buns
  10 + store_names ["Hamburger Buns"]
  11 + assert_search "burger buns", ["Hamburger Buns"]
  12 + end
  13 +
  14 + def test_bandaids
  15 + store_names ["Band-Aid", "Kroger 12-Pack Bandages"]
  16 + assert_search "bandaids", ["Band-Aid", "Kroger 12-Pack Bandages"]
  17 + end
  18 +
  19 + def test_reverse
  20 + store_names ["Hamburger"]
  21 + assert_search "burger", ["Hamburger"]
  22 + end
  23 +
  24 + def test_not_stemmed
  25 + store_names ["Burger"]
  26 + assert_search "hamburgers", []
  27 + assert_search "hamburger", ["Burger"]
  28 + end
  29 +
  30 + def test_word_start
  31 + store_names ["Clorox Bleach", "Kroger Bleach"]
  32 + assert_search "clorox", ["Clorox Bleach", "Kroger Bleach"], {match: :word_start}
  33 + end
  34 +
  35 + def test_directional
  36 + store_names ["Lightbulb", "Green Onions", "Led"]
  37 + assert_search "led", ["Lightbulb", "Led"]
  38 + assert_search "Lightbulb", ["Lightbulb"]
  39 + assert_search "Halogen Lamp", ["Lightbulb"]
  40 + assert_search "onions", ["Green Onions"]
  41 + end
  42 +
  43 + def test_case
  44 + store_names ["Uppercase"]
  45 + assert_search "lowercase", ["Uppercase"]
  46 + end
  47 +
  48 + def test_multiple_words
  49 + store_names ["USA"]
  50 + assert_search "United States of America", ["USA"]
  51 + assert_search "usa", ["USA"]
  52 + assert_search "United States", []
  53 + end
  54 +
  55 + def test_multiple_words_expanded
  56 + store_names ["United States of America"]
  57 + assert_search "usa", ["United States of America"]
  58 + assert_search "United States of America", ["United States of America"]
  59 + assert_search "United States", ["United States of America"] # no synonyms used
  60 + end
  61 +
  62 + def test_reload_synonyms
  63 + if Searchkick.server_below?("7.3.0")
  64 + error = assert_raises(Searchkick::Error) do
  65 + Speaker.search_index.reload_synonyms
  66 + end
  67 + assert_equal "Requires Elasticsearch 7.3+", error.message
  68 + else
  69 + Speaker.search_index.reload_synonyms
  70 + end
  71 + end
  72 +
  73 + def default_model
  74 + Speaker
  75 + end
  76 +end
test/test_helper.rb
@@ -86,28 +86,28 @@ class Minitest::Test @@ -86,28 +86,28 @@ class Minitest::Test
86 end 86 end
87 end 87 end
88 88
89 - def store_names(names, klass = Product, reindex: true) 89 + def store_names(names, klass = default_model, reindex: true)
90 store names.map { |name| {name: name} }, klass, reindex: reindex 90 store names.map { |name| {name: name} }, klass, reindex: reindex
91 end 91 end
92 92
93 # no order 93 # no order
94 - def assert_search(term, expected, options = {}, klass = Product) 94 + def assert_search(term, expected, options = {}, klass = default_model)
95 assert_equal expected.sort, klass.search(term, **options).map(&:name).sort 95 assert_equal expected.sort, klass.search(term, **options).map(&:name).sort
96 end 96 end
97 97
98 - def assert_order(term, expected, options = {}, klass = Product) 98 + def assert_order(term, expected, options = {}, klass = default_model)
99 assert_equal expected, klass.search(term, **options).map(&:name) 99 assert_equal expected, klass.search(term, **options).map(&:name)
100 end 100 end
101 101
102 - def assert_equal_scores(term, options = {}, klass = Product) 102 + def assert_equal_scores(term, options = {}, klass = default_model)
103 assert_equal 1, klass.search(term, **options).hits.map { |a| a["_score"] }.uniq.size 103 assert_equal 1, klass.search(term, **options).hits.map { |a| a["_score"] }.uniq.size
104 end 104 end
105 105
106 - def assert_first(term, expected, options = {}, klass = Product) 106 + def assert_first(term, expected, options = {}, klass = default_model)
107 assert_equal expected, klass.search(term, **options).map(&:name).first 107 assert_equal expected, klass.search(term, **options).map(&:name).first
108 end 108 end
109 109
110 - def assert_misspellings(term, expected, misspellings = {}, klass = Product) 110 + def assert_misspellings(term, expected, misspellings = {}, klass = default_model)
111 options = { 111 options = {
112 fields: [:name, :color], 112 fields: [:name, :color],
113 misspellings: misspellings 113 misspellings: misspellings
@@ -134,4 +134,8 @@ class Minitest::Test @@ -134,4 +134,8 @@ class Minitest::Test
134 def cequel? 134 def cequel?
135 defined?(Cequel) 135 defined?(Cequel)
136 end 136 end
  137 +
  138 + def default_model
  139 + Product
  140 + end
137 end 141 end