diff --git a/CHANGELOG.md b/CHANGELOG.md index 91aa586..704c4f2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,6 @@ -## 4.3.2 (unreleased) +## 4.4.0 (unreleased) +- Added support for reloadable, multi-word, search time synonyms - Fixed another deprecation warning in Ruby 2.7 ## 4.3.1 (2020-05-13) diff --git a/README.md b/README.md index 29f9331..ed29c50 100644 --- a/README.md +++ b/README.md @@ -324,29 +324,52 @@ A few languages require plugins: ```ruby class Product < ApplicationRecord - searchkick synonyms: [["pop", "soda"], ["burger", "hamburger"]] + searchkick search_synonyms: [["pop", "soda"], ["burger", "hamburger"]] end ``` -Call `Product.reindex` after changing synonyms. +Call `Product.reindex` after changing synonyms. Synonyms are applied at search time before stemming, and can be a single word or multiple words. -Synonyms cannot be multiple words at the moment. +For directional synonyms, use: + +```ruby +synonyms: ["lightbulb => halogenlamp"] +``` + +### Dynamic Synonyms + +The above approach works well when your synonym list is static, but in practice, this is often not the case. When you analyze search conversions, you often want to add new synonyms without a full reindex. + +#### Elasticsearch 7.3+ + +For Elasticsearch 7.3+, we recommend placing synonyms in a file on the Elasticsearch server (in the `config` directory). This allows you to reload synonyms without reindexing. -To read synonyms from a file, use: +```txt +pop, soda +burger, hamburger +``` + +Then use: ```ruby -synonyms: -> { CSV.read("/some/path/synonyms.csv") } +search_synonyms: "synonyms.txt" ``` -For directional synonyms, use: +Add [elasticsearch-xpack](https://github.com/elastic/elasticsearch-ruby/tree/master/elasticsearch-xpack) to your Gemfile: ```ruby -synonyms: ["lightbulb => halogenlamp"] +gem 'elasticsearch-xpack', '>= 7.8.0.pre' +``` + +And use: + +```ruby +Product.search_index.reload_synonyms ``` -### Tags and Dynamic Synonyms +#### Elasticsearch < 7.3 -The above approach works well when your synonym list is static, but in practice, this is often not the case. When you analyze search conversions, you often want to add new synonyms or tags without a full reindex. You can use a library like [ActsAsTaggableOn](https://github.com/mbleigh/acts-as-taggable-on) and do: +You can use a library like [ActsAsTaggableOn](https://github.com/mbleigh/acts-as-taggable-on) and do: ```ruby class Product < ApplicationRecord diff --git a/lib/searchkick/index.rb b/lib/searchkick/index.rb index 48beede..0d6755c 100644 --- a/lib/searchkick/index.rb +++ b/lib/searchkick/index.rb @@ -174,6 +174,13 @@ module Searchkick Searchkick.search(like_text, model: record.class, **options) end + def reload_synonyms + require "elasticsearch/xpack" + raise Error, "Requires Elasticsearch 7.3+" if Searchkick.server_below?("7.3.0") + raise Error, "Requires elasticsearch-xpack 7.8+" unless client.xpack.respond_to?(:indices) + client.xpack.indices.reload_search_analyzers(index: name) + end + # queue def reindex_queue diff --git a/lib/searchkick/index_options.rb b/lib/searchkick/index_options.rb index a636410..b9d3673 100644 --- a/lib/searchkick/index_options.rb +++ b/lib/searchkick/index_options.rb @@ -7,6 +7,7 @@ module Searchkick below62 = Searchkick.server_below?("6.2.0") below70 = Searchkick.server_below?("7.0.0") + below73 = Searchkick.server_below?("7.3.0") if below70 index_type = options[:_type] @@ -285,9 +286,7 @@ module Searchkick # synonyms synonyms = options[:synonyms] || [] - synonyms = synonyms.call if synonyms.respond_to?(:call) - if synonyms.any? settings[:analysis][:filter][:searchkick_synonym] = { type: "synonym", @@ -310,6 +309,29 @@ module Searchkick end end + search_synonyms = options[:search_synonyms] || [] + search_synonyms = search_synonyms.call if search_synonyms.respond_to?(:call) + if search_synonyms.is_a?(String) || search_synonyms.any? + if search_synonyms.is_a?(String) + synonym_graph = { + type: "synonym_graph", + synonyms_path: search_synonyms + } + synonym_graph[:updateable] = true unless below73 + else + synonym_graph = { + type: "synonym_graph", + # TODO confirm this is correct + synonyms: search_synonyms.select { |s| s.size > 1 }.map { |s| s.is_a?(Array) ? s.join(",") : s }.map(&:downcase) + } + end + settings[:analysis][:filter][:searchkick_synonym_graph] = synonym_graph + + [:searchkick_search2, :searchkick_word_search].each do |analyzer| + settings[:analysis][:analyzer][analyzer][:filter].insert(2, "searchkick_synonym_graph") + end + end + if options[:wordnet] settings[:analysis][:filter][:searchkick_wordnet] = { type: "synonym", diff --git a/lib/searchkick/model.rb b/lib/searchkick/model.rb index ab7b640..6d5f99a 100644 --- a/lib/searchkick/model.rb +++ b/lib/searchkick/model.rb @@ -5,7 +5,7 @@ module Searchkick unknown_keywords = options.keys - [:_all, :_type, :batch_size, :callbacks, :case_sensitive, :conversions, :deep_paging, :default_fields, :filterable, :geo_shape, :highlight, :ignore_above, :index_name, :index_prefix, :inheritance, :language, - :locations, :mappings, :match, :merge_mappings, :routing, :searchable, :settings, :similarity, + :locations, :mappings, :match, :merge_mappings, :routing, :searchable, :search_synonyms, :settings, :similarity, :special_characters, :stem, :stem_conversions, :suggest, :synonyms, :text_end, :text_middle, :text_start, :word, :wordnet, :word_end, :word_middle, :word_start] raise ArgumentError, "unknown keywords: #{unknown_keywords.join(", ")}" if unknown_keywords.any? diff --git a/searchkick.gemspec b/searchkick.gemspec index f235c2c..506c4a1 100644 --- a/searchkick.gemspec +++ b/searchkick.gemspec @@ -22,4 +22,5 @@ Gem::Specification.new do |spec| spec.add_development_dependency "bundler" spec.add_development_dependency "minitest" spec.add_development_dependency "rake" + spec.add_development_dependency "elasticsearch-xpack", ">= 7.8.0.pre" end diff --git a/test/models/speaker.rb b/test/models/speaker.rb index 7c971b3..76a9ed8 100644 --- a/test/models/speaker.rb +++ b/test/models/speaker.rb @@ -1,6 +1,16 @@ class Speaker searchkick \ - conversions: ["conversions_a", "conversions_b"] + conversions: ["conversions_a", "conversions_b"], + search_synonyms: [ + ["clorox", "bleach"], + ["burger", "hamburger"], + ["bandaids", "bandages"], + ["UPPERCASE", "lowercase"], + "led => led,lightbulb", + "halogen lamp => lightbulb", + ["United States of America", "USA"] + ], + word_start: [:name] attr_accessor :conversions_a, :conversions_b, :aisle diff --git a/test/search_synonyms_test.rb b/test/search_synonyms_test.rb new file mode 100644 index 0000000..e46ce44 --- /dev/null +++ b/test/search_synonyms_test.rb @@ -0,0 +1,76 @@ +require_relative "test_helper" + +class SearchSynonymsTest < Minitest::Test + def test_bleach + store_names ["Clorox Bleach", "Kroger Bleach"] + assert_search "clorox", ["Clorox Bleach", "Kroger Bleach"] + end + + def test_burger_buns + store_names ["Hamburger Buns"] + assert_search "burger buns", ["Hamburger Buns"] + end + + def test_bandaids + store_names ["Band-Aid", "Kroger 12-Pack Bandages"] + assert_search "bandaids", ["Band-Aid", "Kroger 12-Pack Bandages"] + end + + def test_reverse + store_names ["Hamburger"] + assert_search "burger", ["Hamburger"] + end + + def test_not_stemmed + store_names ["Burger"] + assert_search "hamburgers", [] + assert_search "hamburger", ["Burger"] + end + + def test_word_start + store_names ["Clorox Bleach", "Kroger Bleach"] + assert_search "clorox", ["Clorox Bleach", "Kroger Bleach"], {match: :word_start} + end + + def test_directional + store_names ["Lightbulb", "Green Onions", "Led"] + assert_search "led", ["Lightbulb", "Led"] + assert_search "Lightbulb", ["Lightbulb"] + assert_search "Halogen Lamp", ["Lightbulb"] + assert_search "onions", ["Green Onions"] + end + + def test_case + store_names ["Uppercase"] + assert_search "lowercase", ["Uppercase"] + end + + def test_multiple_words + store_names ["USA"] + assert_search "United States of America", ["USA"] + assert_search "usa", ["USA"] + assert_search "United States", [] + end + + def test_multiple_words_expanded + store_names ["United States of America"] + assert_search "usa", ["United States of America"] + assert_search "United States of America", ["United States of America"] + assert_search "United States", ["United States of America"] # no synonyms used + end + + def test_reload_synonyms + if Searchkick.server_below?("7.3.0") + error = assert_raises(Searchkick::Error) do + Speaker.search_index.reload_synonyms + end + assert_equal "Requires Elasticsearch 7.3+", error.message + else + Speaker.search_index.reload_synonyms + end + end + + def default_model + Speaker + end +end diff --git a/test/test_helper.rb b/test/test_helper.rb index 8027930..a6953fc 100644 --- a/test/test_helper.rb +++ b/test/test_helper.rb @@ -86,28 +86,28 @@ class Minitest::Test end end - def store_names(names, klass = Product, reindex: true) + def store_names(names, klass = default_model, reindex: true) store names.map { |name| {name: name} }, klass, reindex: reindex end # no order - def assert_search(term, expected, options = {}, klass = Product) + def assert_search(term, expected, options = {}, klass = default_model) assert_equal expected.sort, klass.search(term, **options).map(&:name).sort end - def assert_order(term, expected, options = {}, klass = Product) + def assert_order(term, expected, options = {}, klass = default_model) assert_equal expected, klass.search(term, **options).map(&:name) end - def assert_equal_scores(term, options = {}, klass = Product) + def assert_equal_scores(term, options = {}, klass = default_model) assert_equal 1, klass.search(term, **options).hits.map { |a| a["_score"] }.uniq.size end - def assert_first(term, expected, options = {}, klass = Product) + def assert_first(term, expected, options = {}, klass = default_model) assert_equal expected, klass.search(term, **options).map(&:name).first end - def assert_misspellings(term, expected, misspellings = {}, klass = Product) + def assert_misspellings(term, expected, misspellings = {}, klass = default_model) options = { fields: [:name, :color], misspellings: misspellings @@ -134,4 +134,8 @@ class Minitest::Test def cequel? defined?(Cequel) end + + def default_model + Product + end end -- libgit2 0.21.0