Commit dbe04a0a11e8cbfc4d4e74abc2ae7ef528455eb4

Authored by Andrew Kane
1 parent 443f9d9c

Started cleaning up index options

lib/searchkick/index.rb
@@ -2,8 +2,6 @@ require "searchkick/index_options" @@ -2,8 +2,6 @@ require "searchkick/index_options"
2 2
3 module Searchkick 3 module Searchkick
4 class Index 4 class Index
5 - include IndexOptions  
6 -  
7 attr_reader :name, :options 5 attr_reader :name, :options
8 6
9 def initialize(name, options = {}) 7 def initialize(name, options = {})
@@ -12,6 +10,10 @@ module Searchkick @@ -12,6 +10,10 @@ module Searchkick
12 @klass_document_type = {} # cache 10 @klass_document_type = {} # cache
13 end 11 end
14 12
  13 + def index_options
  14 + IndexOptions.new(self).index_options
  15 + end
  16 +
15 def create(body = {}) 17 def create(body = {})
16 client.indices.create index: name, body: body 18 client.indices.create index: name, body: body
17 end 19 end
lib/searchkick/index_options.rb
1 module Searchkick 1 module Searchkick
2 - module IndexOptions  
3 - def index_options  
4 - options = @options  
5 - language = options[:language]  
6 - language = language.call if language.respond_to?(:call) 2 + class IndexOptions
  3 + attr_reader :options
7 4
8 - below62 = Searchkick.server_below?("6.2.0")  
9 - below70 = Searchkick.server_below?("7.0.0")  
10 - below73 = Searchkick.server_below?("7.3.0") 5 + def initialize(index)
  6 + @options = index.options
  7 + end
11 8
  9 + def index_options
12 if below70 10 if below70
13 index_type = options[:_type] 11 index_type = options[:_type]
14 index_type = index_type.call if index_type.respond_to?(:call) 12 index_type = index_type.call if index_type.respond_to?(:call)
@@ -24,10 +22,10 @@ module Searchkick @@ -24,10 +22,10 @@ module Searchkick
24 settings = options[:settings] || {} 22 settings = options[:settings] || {}
25 mappings = custom_mapping 23 mappings = custom_mapping
26 else 24 else
27 - default_type = "text"  
28 - default_analyzer = :searchkick_index  
29 - keyword_mapping = {type: "keyword"} 25 + language = options[:language]
  26 + language = language.call if language.respond_to?(:call)
30 27
  28 + keyword_mapping = {type: "keyword"}
31 keyword_mapping[:ignore_above] = options[:ignore_above] || 30000 29 keyword_mapping[:ignore_above] = options[:ignore_above] || 30000
32 30
33 settings = { 31 settings = {
@@ -284,68 +282,9 @@ module Searchkick @@ -284,68 +282,9 @@ module Searchkick
284 282
285 settings = settings.symbolize_keys.deep_merge((options[:settings] || {}).symbolize_keys) 283 settings = settings.symbolize_keys.deep_merge((options[:settings] || {}).symbolize_keys)
286 284
287 - # synonyms  
288 - synonyms = options[:synonyms] || []  
289 - synonyms = synonyms.call if synonyms.respond_to?(:call)  
290 - if synonyms.any?  
291 - settings[:analysis][:filter][:searchkick_synonym] = {  
292 - type: "synonym",  
293 - # only remove a single space from synonyms so three-word synonyms will fail noisily instead of silently  
294 - synonyms: synonyms.select { |s| s.size > 1 }.map { |s| s.is_a?(Array) ? s.map { |s2| s2.sub(/\s+/, "") }.join(",") : s }.map(&:downcase)  
295 - }  
296 - # choosing a place for the synonym filter when stemming is not easy  
297 - # https://groups.google.com/forum/#!topic/elasticsearch/p7qcQlgHdB8  
298 - # TODO use a snowball stemmer on synonyms when creating the token filter  
299 -  
300 - # http://elasticsearch-users.115913.n3.nabble.com/synonym-multi-words-search-td4030811.html  
301 - # I find the following approach effective if you are doing multi-word synonyms (synonym phrases):  
302 - # - Only apply the synonym expansion at index time  
303 - # - Don't have the synonym filter applied search  
304 - # - Use directional synonyms where appropriate. You want to make sure that you're not injecting terms that are too general.  
305 - settings[:analysis][:analyzer][default_analyzer][:filter].insert(2, "searchkick_synonym")  
306 -  
307 - %w(word_start word_middle word_end).each do |type|  
308 - settings[:analysis][:analyzer]["searchkick_#{type}_index".to_sym][:filter].insert(2, "searchkick_synonym")  
309 - end  
310 - end  
311 -  
312 - search_synonyms = options[:search_synonyms] || []  
313 - search_synonyms = search_synonyms.call if search_synonyms.respond_to?(:call)  
314 - if search_synonyms.is_a?(String) || search_synonyms.any?  
315 - if search_synonyms.is_a?(String)  
316 - synonym_graph = {  
317 - type: "synonym_graph",  
318 - synonyms_path: search_synonyms  
319 - }  
320 - synonym_graph[:updateable] = true unless below73  
321 - else  
322 - synonym_graph = {  
323 - type: "synonym_graph",  
324 - # TODO confirm this is correct  
325 - synonyms: search_synonyms.select { |s| s.size > 1 }.map { |s| s.is_a?(Array) ? s.join(",") : s }.map(&:downcase)  
326 - }  
327 - end  
328 - settings[:analysis][:filter][:searchkick_synonym_graph] = synonym_graph  
329 -  
330 - [:searchkick_search2, :searchkick_word_search].each do |analyzer|  
331 - settings[:analysis][:analyzer][analyzer][:filter].insert(2, "searchkick_synonym_graph")  
332 - end  
333 - end  
334 -  
335 - if options[:wordnet]  
336 - settings[:analysis][:filter][:searchkick_wordnet] = {  
337 - type: "synonym",  
338 - format: "wordnet",  
339 - synonyms_path: Searchkick.wordnet_path  
340 - }  
341 -  
342 - settings[:analysis][:analyzer][default_analyzer][:filter].insert(4, "searchkick_wordnet")  
343 - settings[:analysis][:analyzer][default_analyzer][:filter] << "searchkick_wordnet"  
344 -  
345 - %w(word_start word_middle word_end).each do |type|  
346 - settings[:analysis][:analyzer]["searchkick_#{type}_index".to_sym][:filter].insert(2, "searchkick_wordnet")  
347 - end  
348 - end 285 + add_synonyms(settings)
  286 + add_search_synonyms(settings)
  287 + add_wordnet(settings) if options[:wordnet]
349 288
350 if options[:special_characters] == false 289 if options[:special_characters] == false
351 settings[:analysis][:analyzer].each_value do |analyzer_settings| 290 settings[:analysis][:analyzer].each_value do |analyzer_settings|
@@ -487,5 +426,91 @@ module Searchkick @@ -487,5 +426,91 @@ module Searchkick
487 mappings: mappings 426 mappings: mappings
488 } 427 }
489 end 428 end
  429 +
  430 + def add_synonyms(settings)
  431 + synonyms = options[:synonyms] || []
  432 + synonyms = synonyms.call if synonyms.respond_to?(:call)
  433 + if synonyms.any?
  434 + settings[:analysis][:filter][:searchkick_synonym] = {
  435 + type: "synonym",
  436 + # only remove a single space from synonyms so three-word synonyms will fail noisily instead of silently
  437 + synonyms: synonyms.select { |s| s.size > 1 }.map { |s| s.is_a?(Array) ? s.map { |s2| s2.sub(/\s+/, "") }.join(",") : s }.map(&:downcase)
  438 + }
  439 + # choosing a place for the synonym filter when stemming is not easy
  440 + # https://groups.google.com/forum/#!topic/elasticsearch/p7qcQlgHdB8
  441 + # TODO use a snowball stemmer on synonyms when creating the token filter
  442 +
  443 + # http://elasticsearch-users.115913.n3.nabble.com/synonym-multi-words-search-td4030811.html
  444 + # I find the following approach effective if you are doing multi-word synonyms (synonym phrases):
  445 + # - Only apply the synonym expansion at index time
  446 + # - Don't have the synonym filter applied search
  447 + # - Use directional synonyms where appropriate. You want to make sure that you're not injecting terms that are too general.
  448 + settings[:analysis][:analyzer][default_analyzer][:filter].insert(2, "searchkick_synonym")
  449 +
  450 + %w(word_start word_middle word_end).each do |type|
  451 + settings[:analysis][:analyzer]["searchkick_#{type}_index".to_sym][:filter].insert(2, "searchkick_synonym")
  452 + end
  453 + end
  454 + end
  455 +
  456 + def add_search_synonyms(settings)
  457 + search_synonyms = options[:search_synonyms] || []
  458 + search_synonyms = search_synonyms.call if search_synonyms.respond_to?(:call)
  459 + if search_synonyms.is_a?(String) || search_synonyms.any?
  460 + if search_synonyms.is_a?(String)
  461 + synonym_graph = {
  462 + type: "synonym_graph",
  463 + synonyms_path: search_synonyms
  464 + }
  465 + synonym_graph[:updateable] = true unless below73
  466 + else
  467 + synonym_graph = {
  468 + type: "synonym_graph",
  469 + # TODO confirm this is correct
  470 + synonyms: search_synonyms.select { |s| s.size > 1 }.map { |s| s.is_a?(Array) ? s.join(",") : s }.map(&:downcase)
  471 + }
  472 + end
  473 + settings[:analysis][:filter][:searchkick_synonym_graph] = synonym_graph
  474 +
  475 + [:searchkick_search2, :searchkick_word_search].each do |analyzer|
  476 + settings[:analysis][:analyzer][analyzer][:filter].insert(2, "searchkick_synonym_graph")
  477 + end
  478 + end
  479 + end
  480 +
  481 + def add_wordnet(settings)
  482 + settings[:analysis][:filter][:searchkick_wordnet] = {
  483 + type: "synonym",
  484 + format: "wordnet",
  485 + synonyms_path: Searchkick.wordnet_path
  486 + }
  487 +
  488 + settings[:analysis][:analyzer][default_analyzer][:filter].insert(4, "searchkick_wordnet")
  489 + settings[:analysis][:analyzer][default_analyzer][:filter] << "searchkick_wordnet"
  490 +
  491 + %w(word_start word_middle word_end).each do |type|
  492 + settings[:analysis][:analyzer]["searchkick_#{type}_index".to_sym][:filter].insert(2, "searchkick_wordnet")
  493 + end
  494 + end
  495 +
  496 + def default_type
  497 + "text"
  498 + end
  499 +
  500 + def default_analyzer
  501 + :searchkick_index
  502 + end
  503 +
  504 + def below62
  505 + Searchkick.server_below?("6.2.0")
  506 + end
  507 +
  508 + def below70
  509 + Searchkick.server_below?("7.0.0")
  510 + end
  511 +
  512 + def below73
  513 + Searchkick.server_below?("7.3.0")
  514 + end
490 end 515 end
491 end 516 end