Commit 94ca756e23ffd624bd62e7ca2a4e5a30f3522a91
1 parent
7213725b
Exists in
master
and in
8 other branches
Added support for reloadable, multi-word, search time synonyms
Showing
9 changed files
with
164 additions
and
20 deletions
Show diff stats
CHANGELOG.md
README.md
... | ... | @@ -324,29 +324,52 @@ A few languages require plugins: |
324 | 324 | |
325 | 325 | ```ruby |
326 | 326 | class Product < ApplicationRecord |
327 | - searchkick synonyms: [["pop", "soda"], ["burger", "hamburger"]] | |
327 | + searchkick search_synonyms: [["pop", "soda"], ["burger", "hamburger"]] | |
328 | 328 | end |
329 | 329 | ``` |
330 | 330 | |
331 | -Call `Product.reindex` after changing synonyms. | |
331 | +Call `Product.reindex` after changing synonyms. Synonyms are applied at search time before stemming, and can be a single word or multiple words. | |
332 | 332 | |
333 | -Synonyms cannot be multiple words at the moment. | |
333 | +For directional synonyms, use: | |
334 | + | |
335 | +```ruby | |
336 | +synonyms: ["lightbulb => halogenlamp"] | |
337 | +``` | |
338 | + | |
339 | +### Dynamic Synonyms | |
340 | + | |
341 | +The above approach works well when your synonym list is static, but in practice, this is often not the case. When you analyze search conversions, you often want to add new synonyms without a full reindex. | |
342 | + | |
343 | +#### Elasticsearch 7.3+ | |
344 | + | |
345 | +For Elasticsearch 7.3+, we recommend placing synonyms in a file on the Elasticsearch server (in the `config` directory). This allows you to reload synonyms without reindexing. | |
334 | 346 | |
335 | -To read synonyms from a file, use: | |
347 | +```txt | |
348 | +pop, soda | |
349 | +burger, hamburger | |
350 | +``` | |
351 | + | |
352 | +Then use: | |
336 | 353 | |
337 | 354 | ```ruby |
338 | -synonyms: -> { CSV.read("/some/path/synonyms.csv") } | |
355 | +search_synonyms: "synonyms.txt" | |
339 | 356 | ``` |
340 | 357 | |
341 | -For directional synonyms, use: | |
358 | +Add [elasticsearch-xpack](https://github.com/elastic/elasticsearch-ruby/tree/master/elasticsearch-xpack) to your Gemfile: | |
342 | 359 | |
343 | 360 | ```ruby |
344 | -synonyms: ["lightbulb => halogenlamp"] | |
361 | +gem 'elasticsearch-xpack', '>= 7.8.0.pre' | |
362 | +``` | |
363 | + | |
364 | +And use: | |
365 | + | |
366 | +```ruby | |
367 | +Product.search_index.reload_synonyms | |
345 | 368 | ``` |
346 | 369 | |
347 | -### Tags and Dynamic Synonyms | |
370 | +#### Elasticsearch < 7.3 | |
348 | 371 | |
349 | -The above approach works well when your synonym list is static, but in practice, this is often not the case. When you analyze search conversions, you often want to add new synonyms or tags without a full reindex. You can use a library like [ActsAsTaggableOn](https://github.com/mbleigh/acts-as-taggable-on) and do: | |
372 | +You can use a library like [ActsAsTaggableOn](https://github.com/mbleigh/acts-as-taggable-on) and do: | |
350 | 373 | |
351 | 374 | ```ruby |
352 | 375 | class Product < ApplicationRecord | ... | ... |
lib/searchkick/index.rb
... | ... | @@ -174,6 +174,13 @@ module Searchkick |
174 | 174 | Searchkick.search(like_text, model: record.class, **options) |
175 | 175 | end |
176 | 176 | |
177 | + def reload_synonyms | |
178 | + require "elasticsearch/xpack" | |
179 | + raise Error, "Requires Elasticsearch 7.3+" if Searchkick.server_below?("7.3.0") | |
180 | + raise Error, "Requires elasticsearch-xpack 7.8+" unless client.xpack.respond_to?(:indices) | |
181 | + client.xpack.indices.reload_search_analyzers(index: name) | |
182 | + end | |
183 | + | |
177 | 184 | # queue |
178 | 185 | |
179 | 186 | def reindex_queue | ... | ... |
lib/searchkick/index_options.rb
... | ... | @@ -7,6 +7,7 @@ module Searchkick |
7 | 7 | |
8 | 8 | below62 = Searchkick.server_below?("6.2.0") |
9 | 9 | below70 = Searchkick.server_below?("7.0.0") |
10 | + below73 = Searchkick.server_below?("7.3.0") | |
10 | 11 | |
11 | 12 | if below70 |
12 | 13 | index_type = options[:_type] |
... | ... | @@ -285,9 +286,7 @@ module Searchkick |
285 | 286 | |
286 | 287 | # synonyms |
287 | 288 | synonyms = options[:synonyms] || [] |
288 | - | |
289 | 289 | synonyms = synonyms.call if synonyms.respond_to?(:call) |
290 | - | |
291 | 290 | if synonyms.any? |
292 | 291 | settings[:analysis][:filter][:searchkick_synonym] = { |
293 | 292 | type: "synonym", |
... | ... | @@ -310,6 +309,29 @@ module Searchkick |
310 | 309 | end |
311 | 310 | end |
312 | 311 | |
312 | + search_synonyms = options[:search_synonyms] || [] | |
313 | + search_synonyms = search_synonyms.call if search_synonyms.respond_to?(:call) | |
314 | + if search_synonyms.is_a?(String) || search_synonyms.any? | |
315 | + if search_synonyms.is_a?(String) | |
316 | + synonym_graph = { | |
317 | + type: "synonym_graph", | |
318 | + synonyms_path: search_synonyms | |
319 | + } | |
320 | + synonym_graph[:updateable] = true unless below73 | |
321 | + else | |
322 | + synonym_graph = { | |
323 | + type: "synonym_graph", | |
324 | + # TODO confirm this is correct | |
325 | + synonyms: search_synonyms.select { |s| s.size > 1 }.map { |s| s.is_a?(Array) ? s.join(",") : s }.map(&:downcase) | |
326 | + } | |
327 | + end | |
328 | + settings[:analysis][:filter][:searchkick_synonym_graph] = synonym_graph | |
329 | + | |
330 | + [:searchkick_search2, :searchkick_word_search].each do |analyzer| | |
331 | + settings[:analysis][:analyzer][analyzer][:filter].insert(2, "searchkick_synonym_graph") | |
332 | + end | |
333 | + end | |
334 | + | |
313 | 335 | if options[:wordnet] |
314 | 336 | settings[:analysis][:filter][:searchkick_wordnet] = { |
315 | 337 | type: "synonym", | ... | ... |
lib/searchkick/model.rb
... | ... | @@ -5,7 +5,7 @@ module Searchkick |
5 | 5 | |
6 | 6 | unknown_keywords = options.keys - [:_all, :_type, :batch_size, :callbacks, :case_sensitive, :conversions, :deep_paging, :default_fields, |
7 | 7 | :filterable, :geo_shape, :highlight, :ignore_above, :index_name, :index_prefix, :inheritance, :language, |
8 | - :locations, :mappings, :match, :merge_mappings, :routing, :searchable, :settings, :similarity, | |
8 | + :locations, :mappings, :match, :merge_mappings, :routing, :searchable, :search_synonyms, :settings, :similarity, | |
9 | 9 | :special_characters, :stem, :stem_conversions, :suggest, :synonyms, :text_end, |
10 | 10 | :text_middle, :text_start, :word, :wordnet, :word_end, :word_middle, :word_start] |
11 | 11 | raise ArgumentError, "unknown keywords: #{unknown_keywords.join(", ")}" if unknown_keywords.any? | ... | ... |
searchkick.gemspec
test/models/speaker.rb
1 | 1 | class Speaker |
2 | 2 | searchkick \ |
3 | - conversions: ["conversions_a", "conversions_b"] | |
3 | + conversions: ["conversions_a", "conversions_b"], | |
4 | + search_synonyms: [ | |
5 | + ["clorox", "bleach"], | |
6 | + ["burger", "hamburger"], | |
7 | + ["bandaids", "bandages"], | |
8 | + ["UPPERCASE", "lowercase"], | |
9 | + "led => led,lightbulb", | |
10 | + "halogen lamp => lightbulb", | |
11 | + ["United States of America", "USA"] | |
12 | + ], | |
13 | + word_start: [:name] | |
4 | 14 | |
5 | 15 | attr_accessor :conversions_a, :conversions_b, :aisle |
6 | 16 | ... | ... |
... | ... | @@ -0,0 +1,76 @@ |
1 | +require_relative "test_helper" | |
2 | + | |
3 | +class SearchSynonymsTest < Minitest::Test | |
4 | + def test_bleach | |
5 | + store_names ["Clorox Bleach", "Kroger Bleach"] | |
6 | + assert_search "clorox", ["Clorox Bleach", "Kroger Bleach"] | |
7 | + end | |
8 | + | |
9 | + def test_burger_buns | |
10 | + store_names ["Hamburger Buns"] | |
11 | + assert_search "burger buns", ["Hamburger Buns"] | |
12 | + end | |
13 | + | |
14 | + def test_bandaids | |
15 | + store_names ["Band-Aid", "Kroger 12-Pack Bandages"] | |
16 | + assert_search "bandaids", ["Band-Aid", "Kroger 12-Pack Bandages"] | |
17 | + end | |
18 | + | |
19 | + def test_reverse | |
20 | + store_names ["Hamburger"] | |
21 | + assert_search "burger", ["Hamburger"] | |
22 | + end | |
23 | + | |
24 | + def test_not_stemmed | |
25 | + store_names ["Burger"] | |
26 | + assert_search "hamburgers", [] | |
27 | + assert_search "hamburger", ["Burger"] | |
28 | + end | |
29 | + | |
30 | + def test_word_start | |
31 | + store_names ["Clorox Bleach", "Kroger Bleach"] | |
32 | + assert_search "clorox", ["Clorox Bleach", "Kroger Bleach"], {match: :word_start} | |
33 | + end | |
34 | + | |
35 | + def test_directional | |
36 | + store_names ["Lightbulb", "Green Onions", "Led"] | |
37 | + assert_search "led", ["Lightbulb", "Led"] | |
38 | + assert_search "Lightbulb", ["Lightbulb"] | |
39 | + assert_search "Halogen Lamp", ["Lightbulb"] | |
40 | + assert_search "onions", ["Green Onions"] | |
41 | + end | |
42 | + | |
43 | + def test_case | |
44 | + store_names ["Uppercase"] | |
45 | + assert_search "lowercase", ["Uppercase"] | |
46 | + end | |
47 | + | |
48 | + def test_multiple_words | |
49 | + store_names ["USA"] | |
50 | + assert_search "United States of America", ["USA"] | |
51 | + assert_search "usa", ["USA"] | |
52 | + assert_search "United States", [] | |
53 | + end | |
54 | + | |
55 | + def test_multiple_words_expanded | |
56 | + store_names ["United States of America"] | |
57 | + assert_search "usa", ["United States of America"] | |
58 | + assert_search "United States of America", ["United States of America"] | |
59 | + assert_search "United States", ["United States of America"] # no synonyms used | |
60 | + end | |
61 | + | |
62 | + def test_reload_synonyms | |
63 | + if Searchkick.server_below?("7.3.0") | |
64 | + error = assert_raises(Searchkick::Error) do | |
65 | + Speaker.search_index.reload_synonyms | |
66 | + end | |
67 | + assert_equal "Requires Elasticsearch 7.3+", error.message | |
68 | + else | |
69 | + Speaker.search_index.reload_synonyms | |
70 | + end | |
71 | + end | |
72 | + | |
73 | + def default_model | |
74 | + Speaker | |
75 | + end | |
76 | +end | ... | ... |
test/test_helper.rb
... | ... | @@ -86,28 +86,28 @@ class Minitest::Test |
86 | 86 | end |
87 | 87 | end |
88 | 88 | |
89 | - def store_names(names, klass = Product, reindex: true) | |
89 | + def store_names(names, klass = default_model, reindex: true) | |
90 | 90 | store names.map { |name| {name: name} }, klass, reindex: reindex |
91 | 91 | end |
92 | 92 | |
93 | 93 | # no order |
94 | - def assert_search(term, expected, options = {}, klass = Product) | |
94 | + def assert_search(term, expected, options = {}, klass = default_model) | |
95 | 95 | assert_equal expected.sort, klass.search(term, **options).map(&:name).sort |
96 | 96 | end |
97 | 97 | |
98 | - def assert_order(term, expected, options = {}, klass = Product) | |
98 | + def assert_order(term, expected, options = {}, klass = default_model) | |
99 | 99 | assert_equal expected, klass.search(term, **options).map(&:name) |
100 | 100 | end |
101 | 101 | |
102 | - def assert_equal_scores(term, options = {}, klass = Product) | |
102 | + def assert_equal_scores(term, options = {}, klass = default_model) | |
103 | 103 | assert_equal 1, klass.search(term, **options).hits.map { |a| a["_score"] }.uniq.size |
104 | 104 | end |
105 | 105 | |
106 | - def assert_first(term, expected, options = {}, klass = Product) | |
106 | + def assert_first(term, expected, options = {}, klass = default_model) | |
107 | 107 | assert_equal expected, klass.search(term, **options).map(&:name).first |
108 | 108 | end |
109 | 109 | |
110 | - def assert_misspellings(term, expected, misspellings = {}, klass = Product) | |
110 | + def assert_misspellings(term, expected, misspellings = {}, klass = default_model) | |
111 | 111 | options = { |
112 | 112 | fields: [:name, :color], |
113 | 113 | misspellings: misspellings |
... | ... | @@ -134,4 +134,8 @@ class Minitest::Test |
134 | 134 | def cequel? |
135 | 135 | defined?(Cequel) |
136 | 136 | end |
137 | + | |
138 | + def default_model | |
139 | + Product | |
140 | + end | |
137 | 141 | end | ... | ... |