Commit c77c3b9235de75ad5752ba58f3845e86a63a76a4

Authored by Andrew Kane
1 parent 316c03f8

Added stemmer option for Hunspell

@@ -326,7 +326,7 @@ You can also use a Hunspell dictionary for stemming. [master] @@ -326,7 +326,7 @@ You can also use a Hunspell dictionary for stemming. [master]
326 326
327 ```ruby 327 ```ruby
328 class Product < ApplicationRecord 328 class Product < ApplicationRecord
329 - searchkick language: {type: "hunspell", locale: "en_US"} 329 + searchkick stemmer: {type: "hunspell", locale: "en_US"}
330 end 330 end
331 ``` 331 ```
332 332
lib/searchkick/index_options.rb
@@ -153,11 +153,8 @@ module Searchkick @@ -153,11 +153,8 @@ module Searchkick
153 } 153 }
154 } 154 }
155 155
156 - if language.is_a?(Hash)  
157 - update_language_type(settings, language)  
158 - else  
159 - update_language(settings, language)  
160 - end 156 + raise ArgumentError, "Can't pass both language and stemmer" if options[:stemmer] && language
  157 + update_language(settings, language)
161 update_stemming(settings) 158 update_stemming(settings)
162 159
163 if Searchkick.env == "test" 160 if Searchkick.env == "test"
@@ -200,16 +197,6 @@ module Searchkick @@ -200,16 +197,6 @@ module Searchkick
200 settings 197 settings
201 end 198 end
202 199
203 - def update_language_type(settings, language)  
204 - case language[:type]  
205 - when "hunspell"  
206 - # supports all token filter options  
207 - settings[:analysis][:filter][:searchkick_stemmer] = language  
208 - else  
209 - raise ArgumentError, "Unknown language: #{language[:type]}"  
210 - end  
211 - end  
212 -  
213 def update_language(settings, language) 200 def update_language(settings, language)
214 case language 201 case language
215 when "chinese" 202 when "chinese"
@@ -300,6 +287,18 @@ module Searchkick @@ -300,6 +287,18 @@ module Searchkick
300 end 287 end
301 288
302 def update_stemming(settings) 289 def update_stemming(settings)
  290 + if options[:stemmer]
  291 + stemmer = options[:stemmer]
  292 + # could also support snowball and stemmer
  293 + case stemmer[:type]
  294 + when "hunspell"
  295 + # supports all token filter options
  296 + settings[:analysis][:filter][:searchkick_stemmer] = stemmer
  297 + else
  298 + raise ArgumentError, "Unknown stemmer: #{stemmer[:type]}"
  299 + end
  300 + end
  301 +
303 stem = options[:stem] 302 stem = options[:stem]
304 303
305 # language analyzer used 304 # language analyzer used
lib/searchkick/model.rb
@@ -6,7 +6,7 @@ module Searchkick @@ -6,7 +6,7 @@ module Searchkick
6 unknown_keywords = options.keys - [:_all, :_type, :batch_size, :callbacks, :case_sensitive, :conversions, :deep_paging, :default_fields, 6 unknown_keywords = options.keys - [:_all, :_type, :batch_size, :callbacks, :case_sensitive, :conversions, :deep_paging, :default_fields,
7 :filterable, :geo_shape, :highlight, :ignore_above, :index_name, :index_prefix, :inheritance, :language, 7 :filterable, :geo_shape, :highlight, :ignore_above, :index_name, :index_prefix, :inheritance, :language,
8 :locations, :mappings, :match, :merge_mappings, :routing, :searchable, :search_synonyms, :settings, :similarity, 8 :locations, :mappings, :match, :merge_mappings, :routing, :searchable, :search_synonyms, :settings, :similarity,
9 - :special_characters, :stem, :stem_conversions, :stem_exclusion, :stemmer_override, :suggest, :synonyms, :text_end, 9 + :special_characters, :stem, :stemmer, :stem_conversions, :stem_exclusion, :stemmer_override, :suggest, :synonyms, :text_end,
10 :text_middle, :text_start, :word, :wordnet, :word_end, :word_middle, :word_start] 10 :text_middle, :text_start, :word, :wordnet, :word_end, :word_middle, :word_start]
11 raise ArgumentError, "unknown keywords: #{unknown_keywords.join(", ")}" if unknown_keywords.any? 11 raise ArgumentError, "unknown keywords: #{unknown_keywords.join(", ")}" if unknown_keywords.any?
12 12
test/language_test.rb
@@ -90,21 +90,31 @@ class LanguageTest &lt; Minitest::Test @@ -90,21 +90,31 @@ class LanguageTest &lt; Minitest::Test
90 end 90 end
91 end 91 end
92 92
93 - def test_hunspell 93 + def test_stemmer_hunspell
94 skip if ci? 94 skip if ci?
95 95
96 - with_options({language: {type: "hunspell", locale: "en_US"}}) do 96 + with_options({stemmer: {type: "hunspell", locale: "en_US"}}) do
97 store_names ["the foxes jumping quickly"] 97 store_names ["the foxes jumping quickly"]
98 assert_language_search "fox", ["the foxes jumping quickly"] 98 assert_language_search "fox", ["the foxes jumping quickly"]
99 end 99 end
100 end 100 end
101 101
102 - def test_unknown_type 102 + def test_stemmer_unknown_type
103 error = assert_raises(ArgumentError) do 103 error = assert_raises(ArgumentError) do
104 - with_options({language: {type: "bad"}}) do 104 + with_options({stemmer: {type: "bad"}}) do
105 end 105 end
106 end 106 end
107 - assert_equal "Unknown language: bad", error.message 107 + assert_equal "Unknown stemmer: bad", error.message
  108 + end
  109 +
  110 + def test_stemmer_language
  111 + skip if ci?
  112 +
  113 + error = assert_raises(ArgumentError) do
  114 + with_options({stemmer: {type: "hunspell", locale: "en_US"}, language: "english"}) do
  115 + end
  116 + end
  117 + assert_equal "Can't pass both language and stemmer", error.message
108 end 118 end
109 119
110 def assert_language_search(term, expected) 120 def assert_language_search(term, expected)