Commit 037eff05f61f7a6e8763404cc113e89878787ef0

Authored by Andrew Kane
1 parent 4e57986c

Added support for synonyms in Japanese - fixes #1489

CHANGELOG.md
1 1 ## 4.5.0 (unreleased)
2 2  
3 3 - Added experimental support for OpenSearch
  4 +- Added support for synonyms in Japanese
4 5  
5 6 ## 4.4.4 (2021-03-12)
6 7  
... ...
lib/searchkick/index_options.rb
... ... @@ -235,6 +235,27 @@ module Searchkick
235 235 type: "kuromoji"
236 236 }
237 237 )
  238 + when "japanese2"
  239 + analyzer = {
  240 + type: "custom",
  241 + tokenizer: "kuromoji_tokenizer",
  242 + filter: [
  243 + "kuromoji_baseform",
  244 + "kuromoji_part_of_speech",
  245 + "cjk_width",
  246 + "ja_stop",
  247 + "searchkick_stemmer",
  248 + "lowercase"
  249 + ]
  250 + }
  251 + settings[:analysis][:analyzer].merge!(
  252 + default_analyzer => analyzer.deep_dup,
  253 + searchkick_search: analyzer.deep_dup,
  254 + searchkick_search2: analyzer.deep_dup
  255 + )
  256 + settings[:analysis][:filter][:searchkick_stemmer] = {
  257 + type: "kuromoji_stemmer"
  258 + }
238 259 when "korean"
239 260 settings[:analysis][:analyzer].merge!(
240 261 default_analyzer => {
... ... @@ -512,8 +533,18 @@ module Searchkick
512 533 end
513 534 settings[:analysis][:filter][:searchkick_synonym_graph] = synonym_graph
514 535  
515   - [:searchkick_search2, :searchkick_word_search].each do |analyzer|
516   - settings[:analysis][:analyzer][analyzer][:filter].insert(2, "searchkick_synonym_graph")
  536 + if options[:language] == "japanese2"
  537 + [:searchkick_search, :searchkick_search2].each do |analyzer|
  538 + settings[:analysis][:analyzer][analyzer][:filter].insert(4, "searchkick_synonym_graph")
  539 + end
  540 + else
  541 + [:searchkick_search2, :searchkick_word_search].each do |analyzer|
  542 + unless settings[:analysis][:analyzer][analyzer].key?(:filter)
  543 + raise Searchkick::Error, "Search synonyms are not supported yet for language"
  544 + end
  545 +
  546 + settings[:analysis][:analyzer][analyzer][:filter].insert(2, "searchkick_synonym_graph")
  547 + end
517 548 end
518 549 end
519 550 end
... ...
lib/searchkick/query.rb
... ... @@ -353,8 +353,8 @@ module Searchkick
353 353 shared_options[:cutoff_frequency] = 0.001 unless operator.to_s == "and" || field_misspellings == false || (!below73? && !track_total_hits?)
354 354 qs << shared_options.merge(analyzer: "searchkick_search")
355 355  
356   - # searchkick_search and searchkick_search2 are the same for ukrainian
357   - unless %w(japanese korean polish ukrainian vietnamese).include?(searchkick_options[:language])
  356 + # searchkick_search and searchkick_search2 are the same for some languages
  357 + unless %w(japanese japanese2 korean polish ukrainian vietnamese).include?(searchkick_options[:language])
358 358 qs << shared_options.merge(analyzer: "searchkick_search2")
359 359 end
360 360 exclude_analyzer = "searchkick_search2"
... ...
test/language_test.rb
... ... @@ -40,6 +40,33 @@ class LanguageTest &lt; Minitest::Test
40 40 end
41 41 end
42 42  
  43 + def test_japanese_search_synonyms
  44 + error = assert_raises(Searchkick::Error) do
  45 + with_options({language: "japanese", search_synonyms: [["飲む", "喰らう"]]}) do
  46 + end
  47 + end
  48 + assert_equal "Search synonyms not supported yet for language", error.message
  49 + end
  50 +
  51 + def test_japanese2
  52 + # requires https://www.elastic.co/guide/en/elasticsearch/plugins/7.4/analysis-kuromoji.html
  53 + with_options({language: "japanese2"}) do
  54 + store_names ["JR新宿駅の近くにビールを飲みに行こうか"]
  55 + assert_language_search "飲む", ["JR新宿駅の近くにビールを飲みに行こうか"]
  56 + assert_language_search "jr", ["JR新宿駅の近くにビールを飲みに行こうか"]
  57 + assert_language_search "新", []
  58 + end
  59 + end
  60 +
  61 + def test_japanese2_search_synonyms
  62 + # requires https://www.elastic.co/guide/en/elasticsearch/plugins/7.4/analysis-kuromoji.html
  63 + with_options({language: "japanese2", search_synonyms: [["飲む", "喰らう"]]}) do
  64 + store_names ["JR新宿駅の近くにビールを飲みに行こうか"]
  65 + assert_language_search "喰らう", ["JR新宿駅の近くにビールを飲みに行こうか"]
  66 + assert_language_search "新", []
  67 + end
  68 + end
  69 +
43 70 def test_korean
44 71 skip if ci?
45 72  
... ...