Commit 037eff05f61f7a6e8763404cc113e89878787ef0
1 parent
4e57986c
Exists in
master
and in
5 other branches
Added support for synonyms in Japanese - fixes #1489
Showing
4 changed files
with
63 additions
and
4 deletions
Show diff stats
CHANGELOG.md
lib/searchkick/index_options.rb
... | ... | @@ -235,6 +235,27 @@ module Searchkick |
235 | 235 | type: "kuromoji" |
236 | 236 | } |
237 | 237 | ) |
238 | + when "japanese2" | |
239 | + analyzer = { | |
240 | + type: "custom", | |
241 | + tokenizer: "kuromoji_tokenizer", | |
242 | + filter: [ | |
243 | + "kuromoji_baseform", | |
244 | + "kuromoji_part_of_speech", | |
245 | + "cjk_width", | |
246 | + "ja_stop", | |
247 | + "searchkick_stemmer", | |
248 | + "lowercase" | |
249 | + ] | |
250 | + } | |
251 | + settings[:analysis][:analyzer].merge!( | |
252 | + default_analyzer => analyzer.deep_dup, | |
253 | + searchkick_search: analyzer.deep_dup, | |
254 | + searchkick_search2: analyzer.deep_dup | |
255 | + ) | |
256 | + settings[:analysis][:filter][:searchkick_stemmer] = { | |
257 | + type: "kuromoji_stemmer" | |
258 | + } | |
238 | 259 | when "korean" |
239 | 260 | settings[:analysis][:analyzer].merge!( |
240 | 261 | default_analyzer => { |
... | ... | @@ -512,8 +533,18 @@ module Searchkick |
512 | 533 | end |
513 | 534 | settings[:analysis][:filter][:searchkick_synonym_graph] = synonym_graph |
514 | 535 | |
515 | - [:searchkick_search2, :searchkick_word_search].each do |analyzer| | |
516 | - settings[:analysis][:analyzer][analyzer][:filter].insert(2, "searchkick_synonym_graph") | |
536 | + if options[:language] == "japanese2" | |
537 | + [:searchkick_search, :searchkick_search2].each do |analyzer| | |
538 | + settings[:analysis][:analyzer][analyzer][:filter].insert(4, "searchkick_synonym_graph") | |
539 | + end | |
540 | + else | |
541 | + [:searchkick_search2, :searchkick_word_search].each do |analyzer| | |
542 | + unless settings[:analysis][:analyzer][analyzer].key?(:filter) | |
543 | + raise Searchkick::Error, "Search synonyms are not supported yet for language" | |
544 | + end | |
545 | + | |
546 | + settings[:analysis][:analyzer][analyzer][:filter].insert(2, "searchkick_synonym_graph") | |
547 | + end | |
517 | 548 | end |
518 | 549 | end |
519 | 550 | end | ... | ... |
lib/searchkick/query.rb
... | ... | @@ -353,8 +353,8 @@ module Searchkick |
353 | 353 | shared_options[:cutoff_frequency] = 0.001 unless operator.to_s == "and" || field_misspellings == false || (!below73? && !track_total_hits?) |
354 | 354 | qs << shared_options.merge(analyzer: "searchkick_search") |
355 | 355 | |
356 | - # searchkick_search and searchkick_search2 are the same for ukrainian | |
357 | - unless %w(japanese korean polish ukrainian vietnamese).include?(searchkick_options[:language]) | |
356 | + # searchkick_search and searchkick_search2 are the same for some languages | |
357 | + unless %w(japanese japanese2 korean polish ukrainian vietnamese).include?(searchkick_options[:language]) | |
358 | 358 | qs << shared_options.merge(analyzer: "searchkick_search2") |
359 | 359 | end |
360 | 360 | exclude_analyzer = "searchkick_search2" | ... | ... |
test/language_test.rb
... | ... | @@ -40,6 +40,33 @@ class LanguageTest < Minitest::Test |
40 | 40 | end |
41 | 41 | end |
42 | 42 | |
43 | + def test_japanese_search_synonyms | |
44 | + error = assert_raises(Searchkick::Error) do | |
45 | + with_options({language: "japanese", search_synonyms: [["飲む", "喰らう"]]}) do | |
46 | + end | |
47 | + end | |
48 | + assert_equal "Search synonyms not supported yet for language", error.message | |
49 | + end | |
50 | + | |
51 | + def test_japanese2 | |
52 | + # requires https://www.elastic.co/guide/en/elasticsearch/plugins/7.4/analysis-kuromoji.html | |
53 | + with_options({language: "japanese2"}) do | |
54 | + store_names ["JR新宿駅の近くにビールを飲みに行こうか"] | |
55 | + assert_language_search "飲む", ["JR新宿駅の近くにビールを飲みに行こうか"] | |
56 | + assert_language_search "jr", ["JR新宿駅の近くにビールを飲みに行こうか"] | |
57 | + assert_language_search "新", [] | |
58 | + end | |
59 | + end | |
60 | + | |
61 | + def test_japanese2_search_synonyms | |
62 | + # requires https://www.elastic.co/guide/en/elasticsearch/plugins/7.4/analysis-kuromoji.html | |
63 | + with_options({language: "japanese2", search_synonyms: [["飲む", "喰らう"]]}) do | |
64 | + store_names ["JR新宿駅の近くにビールを飲みに行こうか"] | |
65 | + assert_language_search "喰らう", ["JR新宿駅の近くにビールを飲みに行こうか"] | |
66 | + assert_language_search "新", [] | |
67 | + end | |
68 | + end | |
69 | + | |
43 | 70 | def test_korean |
44 | 71 | skip if ci? |
45 | 72 | ... | ... |