Commit c77c3b9235de75ad5752ba58f3845e86a63a76a4
1 parent
316c03f8
Exists in
master
and in
5 other branches
Added stemmer option for Hunspell
Showing
4 changed files
with
31 additions
and
22 deletions
Show diff stats
README.md
@@ -326,7 +326,7 @@ You can also use a Hunspell dictionary for stemming. [master] | @@ -326,7 +326,7 @@ You can also use a Hunspell dictionary for stemming. [master] | ||
326 | 326 | ||
327 | ```ruby | 327 | ```ruby |
328 | class Product < ApplicationRecord | 328 | class Product < ApplicationRecord |
329 | - searchkick language: {type: "hunspell", locale: "en_US"} | 329 | + searchkick stemmer: {type: "hunspell", locale: "en_US"} |
330 | end | 330 | end |
331 | ``` | 331 | ``` |
332 | 332 |
lib/searchkick/index_options.rb
@@ -153,11 +153,8 @@ module Searchkick | @@ -153,11 +153,8 @@ module Searchkick | ||
153 | } | 153 | } |
154 | } | 154 | } |
155 | 155 | ||
156 | - if language.is_a?(Hash) | ||
157 | - update_language_type(settings, language) | ||
158 | - else | ||
159 | - update_language(settings, language) | ||
160 | - end | 156 | + raise ArgumentError, "Can't pass both language and stemmer" if options[:stemmer] && language |
157 | + update_language(settings, language) | ||
161 | update_stemming(settings) | 158 | update_stemming(settings) |
162 | 159 | ||
163 | if Searchkick.env == "test" | 160 | if Searchkick.env == "test" |
@@ -200,16 +197,6 @@ module Searchkick | @@ -200,16 +197,6 @@ module Searchkick | ||
200 | settings | 197 | settings |
201 | end | 198 | end |
202 | 199 | ||
203 | - def update_language_type(settings, language) | ||
204 | - case language[:type] | ||
205 | - when "hunspell" | ||
206 | - # supports all token filter options | ||
207 | - settings[:analysis][:filter][:searchkick_stemmer] = language | ||
208 | - else | ||
209 | - raise ArgumentError, "Unknown language: #{language[:type]}" | ||
210 | - end | ||
211 | - end | ||
212 | - | ||
213 | def update_language(settings, language) | 200 | def update_language(settings, language) |
214 | case language | 201 | case language |
215 | when "chinese" | 202 | when "chinese" |
@@ -300,6 +287,18 @@ module Searchkick | @@ -300,6 +287,18 @@ module Searchkick | ||
300 | end | 287 | end |
301 | 288 | ||
302 | def update_stemming(settings) | 289 | def update_stemming(settings) |
290 | + if options[:stemmer] | ||
291 | + stemmer = options[:stemmer] | ||
292 | + # could also support snowball and stemmer | ||
293 | + case stemmer[:type] | ||
294 | + when "hunspell" | ||
295 | + # supports all token filter options | ||
296 | + settings[:analysis][:filter][:searchkick_stemmer] = stemmer | ||
297 | + else | ||
298 | + raise ArgumentError, "Unknown stemmer: #{stemmer[:type]}" | ||
299 | + end | ||
300 | + end | ||
301 | + | ||
303 | stem = options[:stem] | 302 | stem = options[:stem] |
304 | 303 | ||
305 | # language analyzer used | 304 | # language analyzer used |
lib/searchkick/model.rb
@@ -6,7 +6,7 @@ module Searchkick | @@ -6,7 +6,7 @@ module Searchkick | ||
6 | unknown_keywords = options.keys - [:_all, :_type, :batch_size, :callbacks, :case_sensitive, :conversions, :deep_paging, :default_fields, | 6 | unknown_keywords = options.keys - [:_all, :_type, :batch_size, :callbacks, :case_sensitive, :conversions, :deep_paging, :default_fields, |
7 | :filterable, :geo_shape, :highlight, :ignore_above, :index_name, :index_prefix, :inheritance, :language, | 7 | :filterable, :geo_shape, :highlight, :ignore_above, :index_name, :index_prefix, :inheritance, :language, |
8 | :locations, :mappings, :match, :merge_mappings, :routing, :searchable, :search_synonyms, :settings, :similarity, | 8 | :locations, :mappings, :match, :merge_mappings, :routing, :searchable, :search_synonyms, :settings, :similarity, |
9 | - :special_characters, :stem, :stem_conversions, :stem_exclusion, :stemmer_override, :suggest, :synonyms, :text_end, | 9 | + :special_characters, :stem, :stemmer, :stem_conversions, :stem_exclusion, :stemmer_override, :suggest, :synonyms, :text_end, |
10 | :text_middle, :text_start, :word, :wordnet, :word_end, :word_middle, :word_start] | 10 | :text_middle, :text_start, :word, :wordnet, :word_end, :word_middle, :word_start] |
11 | raise ArgumentError, "unknown keywords: #{unknown_keywords.join(", ")}" if unknown_keywords.any? | 11 | raise ArgumentError, "unknown keywords: #{unknown_keywords.join(", ")}" if unknown_keywords.any? |
12 | 12 |
test/language_test.rb
@@ -90,21 +90,31 @@ class LanguageTest < Minitest::Test | @@ -90,21 +90,31 @@ class LanguageTest < Minitest::Test | ||
90 | end | 90 | end |
91 | end | 91 | end |
92 | 92 | ||
93 | - def test_hunspell | 93 | + def test_stemmer_hunspell |
94 | skip if ci? | 94 | skip if ci? |
95 | 95 | ||
96 | - with_options({language: {type: "hunspell", locale: "en_US"}}) do | 96 | + with_options({stemmer: {type: "hunspell", locale: "en_US"}}) do |
97 | store_names ["the foxes jumping quickly"] | 97 | store_names ["the foxes jumping quickly"] |
98 | assert_language_search "fox", ["the foxes jumping quickly"] | 98 | assert_language_search "fox", ["the foxes jumping quickly"] |
99 | end | 99 | end |
100 | end | 100 | end |
101 | 101 | ||
102 | - def test_unknown_type | 102 | + def test_stemmer_unknown_type |
103 | error = assert_raises(ArgumentError) do | 103 | error = assert_raises(ArgumentError) do |
104 | - with_options({language: {type: "bad"}}) do | 104 | + with_options({stemmer: {type: "bad"}}) do |
105 | end | 105 | end |
106 | end | 106 | end |
107 | - assert_equal "Unknown language: bad", error.message | 107 | + assert_equal "Unknown stemmer: bad", error.message |
108 | + end | ||
109 | + | ||
110 | + def test_stemmer_language | ||
111 | + skip if ci? | ||
112 | + | ||
113 | + error = assert_raises(ArgumentError) do | ||
114 | + with_options({stemmer: {type: "hunspell", locale: "en_US"}, language: "english"}) do | ||
115 | + end | ||
116 | + end | ||
117 | + assert_equal "Can't pass both language and stemmer", error.message | ||
108 | end | 118 | end |
109 | 119 | ||
110 | def assert_language_search(term, expected) | 120 | def assert_language_search(term, expected) |