Commit c77c3b9235de75ad5752ba58f3845e86a63a76a4
1 parent
316c03f8
Exists in
master
and in
5 other branches
Added stemmer option for Hunspell
Showing
4 changed files
with
31 additions
and
22 deletions
Show diff stats
README.md
... | ... | @@ -326,7 +326,7 @@ You can also use a Hunspell dictionary for stemming. [master] |
326 | 326 | |
327 | 327 | ```ruby |
328 | 328 | class Product < ApplicationRecord |
329 | - searchkick language: {type: "hunspell", locale: "en_US"} | |
329 | + searchkick stemmer: {type: "hunspell", locale: "en_US"} | |
330 | 330 | end |
331 | 331 | ``` |
332 | 332 | ... | ... |
lib/searchkick/index_options.rb
... | ... | @@ -153,11 +153,8 @@ module Searchkick |
153 | 153 | } |
154 | 154 | } |
155 | 155 | |
156 | - if language.is_a?(Hash) | |
157 | - update_language_type(settings, language) | |
158 | - else | |
159 | - update_language(settings, language) | |
160 | - end | |
156 | + raise ArgumentError, "Can't pass both language and stemmer" if options[:stemmer] && language | |
157 | + update_language(settings, language) | |
161 | 158 | update_stemming(settings) |
162 | 159 | |
163 | 160 | if Searchkick.env == "test" |
... | ... | @@ -200,16 +197,6 @@ module Searchkick |
200 | 197 | settings |
201 | 198 | end |
202 | 199 | |
203 | - def update_language_type(settings, language) | |
204 | - case language[:type] | |
205 | - when "hunspell" | |
206 | - # supports all token filter options | |
207 | - settings[:analysis][:filter][:searchkick_stemmer] = language | |
208 | - else | |
209 | - raise ArgumentError, "Unknown language: #{language[:type]}" | |
210 | - end | |
211 | - end | |
212 | - | |
213 | 200 | def update_language(settings, language) |
214 | 201 | case language |
215 | 202 | when "chinese" |
... | ... | @@ -300,6 +287,18 @@ module Searchkick |
300 | 287 | end |
301 | 288 | |
302 | 289 | def update_stemming(settings) |
290 | + if options[:stemmer] | |
291 | + stemmer = options[:stemmer] | |
292 | + # could also support snowball and stemmer | |
293 | + case stemmer[:type] | |
294 | + when "hunspell" | |
295 | + # supports all token filter options | |
296 | + settings[:analysis][:filter][:searchkick_stemmer] = stemmer | |
297 | + else | |
298 | + raise ArgumentError, "Unknown stemmer: #{stemmer[:type]}" | |
299 | + end | |
300 | + end | |
301 | + | |
303 | 302 | stem = options[:stem] |
304 | 303 | |
305 | 304 | # language analyzer used | ... | ... |
lib/searchkick/model.rb
... | ... | @@ -6,7 +6,7 @@ module Searchkick |
6 | 6 | unknown_keywords = options.keys - [:_all, :_type, :batch_size, :callbacks, :case_sensitive, :conversions, :deep_paging, :default_fields, |
7 | 7 | :filterable, :geo_shape, :highlight, :ignore_above, :index_name, :index_prefix, :inheritance, :language, |
8 | 8 | :locations, :mappings, :match, :merge_mappings, :routing, :searchable, :search_synonyms, :settings, :similarity, |
9 | - :special_characters, :stem, :stem_conversions, :stem_exclusion, :stemmer_override, :suggest, :synonyms, :text_end, | |
9 | + :special_characters, :stem, :stemmer, :stem_conversions, :stem_exclusion, :stemmer_override, :suggest, :synonyms, :text_end, | |
10 | 10 | :text_middle, :text_start, :word, :wordnet, :word_end, :word_middle, :word_start] |
11 | 11 | raise ArgumentError, "unknown keywords: #{unknown_keywords.join(", ")}" if unknown_keywords.any? |
12 | 12 | ... | ... |
test/language_test.rb
... | ... | @@ -90,21 +90,31 @@ class LanguageTest < Minitest::Test |
90 | 90 | end |
91 | 91 | end |
92 | 92 | |
93 | - def test_hunspell | |
93 | + def test_stemmer_hunspell | |
94 | 94 | skip if ci? |
95 | 95 | |
96 | - with_options({language: {type: "hunspell", locale: "en_US"}}) do | |
96 | + with_options({stemmer: {type: "hunspell", locale: "en_US"}}) do | |
97 | 97 | store_names ["the foxes jumping quickly"] |
98 | 98 | assert_language_search "fox", ["the foxes jumping quickly"] |
99 | 99 | end |
100 | 100 | end |
101 | 101 | |
102 | - def test_unknown_type | |
102 | + def test_stemmer_unknown_type | |
103 | 103 | error = assert_raises(ArgumentError) do |
104 | - with_options({language: {type: "bad"}}) do | |
104 | + with_options({stemmer: {type: "bad"}}) do | |
105 | 105 | end |
106 | 106 | end |
107 | - assert_equal "Unknown language: bad", error.message | |
107 | + assert_equal "Unknown stemmer: bad", error.message | |
108 | + end | |
109 | + | |
110 | + def test_stemmer_language | |
111 | + skip if ci? | |
112 | + | |
113 | + error = assert_raises(ArgumentError) do | |
114 | + with_options({stemmer: {type: "hunspell", locale: "en_US"}, language: "english"}) do | |
115 | + end | |
116 | + end | |
117 | + assert_equal "Can't pass both language and stemmer", error.message | |
108 | 118 | end |
109 | 119 | |
110 | 120 | def assert_language_search(term, expected) | ... | ... |