Commit c77c3b9235de75ad5752ba58f3845e86a63a76a4

Authored by Andrew Kane
1 parent 316c03f8

Added stemmer option for Hunspell

README.md
... ... @@ -326,7 +326,7 @@ You can also use a Hunspell dictionary for stemming. [master]
326 326  
327 327 ```ruby
328 328 class Product < ApplicationRecord
329   - searchkick language: {type: "hunspell", locale: "en_US"}
  329 + searchkick stemmer: {type: "hunspell", locale: "en_US"}
330 330 end
331 331 ```
332 332  
... ...
lib/searchkick/index_options.rb
... ... @@ -153,11 +153,8 @@ module Searchkick
153 153 }
154 154 }
155 155  
156   - if language.is_a?(Hash)
157   - update_language_type(settings, language)
158   - else
159   - update_language(settings, language)
160   - end
  156 + raise ArgumentError, "Can't pass both language and stemmer" if options[:stemmer] && language
  157 + update_language(settings, language)
161 158 update_stemming(settings)
162 159  
163 160 if Searchkick.env == "test"
... ... @@ -200,16 +197,6 @@ module Searchkick
200 197 settings
201 198 end
202 199  
203   - def update_language_type(settings, language)
204   - case language[:type]
205   - when "hunspell"
206   - # supports all token filter options
207   - settings[:analysis][:filter][:searchkick_stemmer] = language
208   - else
209   - raise ArgumentError, "Unknown language: #{language[:type]}"
210   - end
211   - end
212   -
213 200 def update_language(settings, language)
214 201 case language
215 202 when "chinese"
... ... @@ -300,6 +287,18 @@ module Searchkick
300 287 end
301 288  
302 289 def update_stemming(settings)
  290 + if options[:stemmer]
  291 + stemmer = options[:stemmer]
  292 + # could also support snowball and stemmer
  293 + case stemmer[:type]
  294 + when "hunspell"
  295 + # supports all token filter options
  296 + settings[:analysis][:filter][:searchkick_stemmer] = stemmer
  297 + else
  298 + raise ArgumentError, "Unknown stemmer: #{stemmer[:type]}"
  299 + end
  300 + end
  301 +
303 302 stem = options[:stem]
304 303  
305 304 # language analyzer used
... ...
lib/searchkick/model.rb
... ... @@ -6,7 +6,7 @@ module Searchkick
6 6 unknown_keywords = options.keys - [:_all, :_type, :batch_size, :callbacks, :case_sensitive, :conversions, :deep_paging, :default_fields,
7 7 :filterable, :geo_shape, :highlight, :ignore_above, :index_name, :index_prefix, :inheritance, :language,
8 8 :locations, :mappings, :match, :merge_mappings, :routing, :searchable, :search_synonyms, :settings, :similarity,
9   - :special_characters, :stem, :stem_conversions, :stem_exclusion, :stemmer_override, :suggest, :synonyms, :text_end,
  9 + :special_characters, :stem, :stemmer, :stem_conversions, :stem_exclusion, :stemmer_override, :suggest, :synonyms, :text_end,
10 10 :text_middle, :text_start, :word, :wordnet, :word_end, :word_middle, :word_start]
11 11 raise ArgumentError, "unknown keywords: #{unknown_keywords.join(", ")}" if unknown_keywords.any?
12 12  
... ...
test/language_test.rb
... ... @@ -90,21 +90,31 @@ class LanguageTest &lt; Minitest::Test
90 90 end
91 91 end
92 92  
93   - def test_hunspell
  93 + def test_stemmer_hunspell
94 94 skip if ci?
95 95  
96   - with_options({language: {type: "hunspell", locale: "en_US"}}) do
  96 + with_options({stemmer: {type: "hunspell", locale: "en_US"}}) do
97 97 store_names ["the foxes jumping quickly"]
98 98 assert_language_search "fox", ["the foxes jumping quickly"]
99 99 end
100 100 end
101 101  
102   - def test_unknown_type
  102 + def test_stemmer_unknown_type
103 103 error = assert_raises(ArgumentError) do
104   - with_options({language: {type: "bad"}}) do
  104 + with_options({stemmer: {type: "bad"}}) do
105 105 end
106 106 end
107   - assert_equal "Unknown language: bad", error.message
  107 + assert_equal "Unknown stemmer: bad", error.message
  108 + end
  109 +
  110 + def test_stemmer_language
  111 + skip if ci?
  112 +
  113 + error = assert_raises(ArgumentError) do
  114 + with_options({stemmer: {type: "hunspell", locale: "en_US"}, language: "english"}) do
  115 + end
  116 + end
  117 + assert_equal "Can't pass both language and stemmer", error.message
108 118 end
109 119  
110 120 def assert_language_search(term, expected)
... ...