From 037eff05f61f7a6e8763404cc113e89878787ef0 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Wed, 26 May 2021 03:04:52 -0700 Subject: [PATCH] Added support for synonyms in Japanese - fixes #1489 --- CHANGELOG.md | 1 + lib/searchkick/index_options.rb | 35 +++++++++++++++++++++++++++++++++-- lib/searchkick/query.rb | 4 ++-- test/language_test.rb | 27 +++++++++++++++++++++++++++ 4 files changed, 63 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fc265d4..ebac112 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ ## 4.5.0 (unreleased) - Added experimental support for OpenSearch +- Added support for synonyms in Japanese ## 4.4.4 (2021-03-12) diff --git a/lib/searchkick/index_options.rb b/lib/searchkick/index_options.rb index 83bb5ec..6089f1f 100644 --- a/lib/searchkick/index_options.rb +++ b/lib/searchkick/index_options.rb @@ -235,6 +235,27 @@ module Searchkick type: "kuromoji" } ) + when "japanese2" + analyzer = { + type: "custom", + tokenizer: "kuromoji_tokenizer", + filter: [ + "kuromoji_baseform", + "kuromoji_part_of_speech", + "cjk_width", + "ja_stop", + "searchkick_stemmer", + "lowercase" + ] + } + settings[:analysis][:analyzer].merge!( + default_analyzer => analyzer.deep_dup, + searchkick_search: analyzer.deep_dup, + searchkick_search2: analyzer.deep_dup + ) + settings[:analysis][:filter][:searchkick_stemmer] = { + type: "kuromoji_stemmer" + } when "korean" settings[:analysis][:analyzer].merge!( default_analyzer => { @@ -512,8 +533,18 @@ module Searchkick end settings[:analysis][:filter][:searchkick_synonym_graph] = synonym_graph - [:searchkick_search2, :searchkick_word_search].each do |analyzer| - settings[:analysis][:analyzer][analyzer][:filter].insert(2, "searchkick_synonym_graph") + if options[:language] == "japanese2" + [:searchkick_search, :searchkick_search2].each do |analyzer| + settings[:analysis][:analyzer][analyzer][:filter].insert(4, "searchkick_synonym_graph") + end + else + [:searchkick_search2, :searchkick_word_search].each do |analyzer| + unless settings[:analysis][:analyzer][analyzer].key?(:filter) + raise Searchkick::Error, "Search synonyms are not supported yet for language" + end + + settings[:analysis][:analyzer][analyzer][:filter].insert(2, "searchkick_synonym_graph") + end end end end diff --git a/lib/searchkick/query.rb b/lib/searchkick/query.rb index 96a41f5..1491b12 100644 --- a/lib/searchkick/query.rb +++ b/lib/searchkick/query.rb @@ -353,8 +353,8 @@ module Searchkick shared_options[:cutoff_frequency] = 0.001 unless operator.to_s == "and" || field_misspellings == false || (!below73? && !track_total_hits?) qs << shared_options.merge(analyzer: "searchkick_search") - # searchkick_search and searchkick_search2 are the same for ukrainian - unless %w(japanese korean polish ukrainian vietnamese).include?(searchkick_options[:language]) + # searchkick_search and searchkick_search2 are the same for some languages + unless %w(japanese japanese2 korean polish ukrainian vietnamese).include?(searchkick_options[:language]) qs << shared_options.merge(analyzer: "searchkick_search2") end exclude_analyzer = "searchkick_search2" diff --git a/test/language_test.rb b/test/language_test.rb index 3e4d121..933d106 100644 --- a/test/language_test.rb +++ b/test/language_test.rb @@ -40,6 +40,33 @@ class LanguageTest < Minitest::Test end end + def test_japanese_search_synonyms + error = assert_raises(Searchkick::Error) do + with_options({language: "japanese", search_synonyms: [["飲む", "喰らう"]]}) do + end + end + assert_equal "Search synonyms not supported yet for language", error.message + end + + def test_japanese2 + # requires https://www.elastic.co/guide/en/elasticsearch/plugins/7.4/analysis-kuromoji.html + with_options({language: "japanese2"}) do + store_names ["JR新宿駅の近くにビールを飲みに行こうか"] + assert_language_search "飲む", ["JR新宿駅の近くにビールを飲みに行こうか"] + assert_language_search "jr", ["JR新宿駅の近くにビールを飲みに行こうか"] + assert_language_search "新", [] + end + end + + def test_japanese2_search_synonyms + # requires https://www.elastic.co/guide/en/elasticsearch/plugins/7.4/analysis-kuromoji.html + with_options({language: "japanese2", search_synonyms: [["飲む", "喰らう"]]}) do + store_names ["JR新宿駅の近くにビールを飲みに行こうか"] + assert_language_search "喰らう", ["JR新宿駅の近くにビールを飲みに行こうか"] + assert_language_search "新", [] + end + end + def test_korean skip if ci? -- libgit2 0.21.0