Commit dbe04a0a11e8cbfc4d4e74abc2ae7ef528455eb4

Authored by Andrew Kane
1 parent 443f9d9c

Started cleaning up index options

lib/searchkick/index.rb
... ... @@ -2,8 +2,6 @@ require "searchkick/index_options"
2 2  
3 3 module Searchkick
4 4 class Index
5   - include IndexOptions
6   -
7 5 attr_reader :name, :options
8 6  
9 7 def initialize(name, options = {})
... ... @@ -12,6 +10,10 @@ module Searchkick
12 10 @klass_document_type = {} # cache
13 11 end
14 12  
  13 + def index_options
  14 + IndexOptions.new(self).index_options
  15 + end
  16 +
15 17 def create(body = {})
16 18 client.indices.create index: name, body: body
17 19 end
... ...
lib/searchkick/index_options.rb
1 1 module Searchkick
2   - module IndexOptions
3   - def index_options
4   - options = @options
5   - language = options[:language]
6   - language = language.call if language.respond_to?(:call)
  2 + class IndexOptions
  3 + attr_reader :options
7 4  
8   - below62 = Searchkick.server_below?("6.2.0")
9   - below70 = Searchkick.server_below?("7.0.0")
10   - below73 = Searchkick.server_below?("7.3.0")
  5 + def initialize(index)
  6 + @options = index.options
  7 + end
11 8  
  9 + def index_options
12 10 if below70
13 11 index_type = options[:_type]
14 12 index_type = index_type.call if index_type.respond_to?(:call)
... ... @@ -24,10 +22,10 @@ module Searchkick
24 22 settings = options[:settings] || {}
25 23 mappings = custom_mapping
26 24 else
27   - default_type = "text"
28   - default_analyzer = :searchkick_index
29   - keyword_mapping = {type: "keyword"}
  25 + language = options[:language]
  26 + language = language.call if language.respond_to?(:call)
30 27  
  28 + keyword_mapping = {type: "keyword"}
31 29 keyword_mapping[:ignore_above] = options[:ignore_above] || 30000
32 30  
33 31 settings = {
... ... @@ -284,68 +282,9 @@ module Searchkick
284 282  
285 283 settings = settings.symbolize_keys.deep_merge((options[:settings] || {}).symbolize_keys)
286 284  
287   - # synonyms
288   - synonyms = options[:synonyms] || []
289   - synonyms = synonyms.call if synonyms.respond_to?(:call)
290   - if synonyms.any?
291   - settings[:analysis][:filter][:searchkick_synonym] = {
292   - type: "synonym",
293   - # only remove a single space from synonyms so three-word synonyms will fail noisily instead of silently
294   - synonyms: synonyms.select { |s| s.size > 1 }.map { |s| s.is_a?(Array) ? s.map { |s2| s2.sub(/\s+/, "") }.join(",") : s }.map(&:downcase)
295   - }
296   - # choosing a place for the synonym filter when stemming is not easy
297   - # https://groups.google.com/forum/#!topic/elasticsearch/p7qcQlgHdB8
298   - # TODO use a snowball stemmer on synonyms when creating the token filter
299   -
300   - # http://elasticsearch-users.115913.n3.nabble.com/synonym-multi-words-search-td4030811.html
301   - # I find the following approach effective if you are doing multi-word synonyms (synonym phrases):
302   - # - Only apply the synonym expansion at index time
303   - # - Don't have the synonym filter applied search
304   - # - Use directional synonyms where appropriate. You want to make sure that you're not injecting terms that are too general.
305   - settings[:analysis][:analyzer][default_analyzer][:filter].insert(2, "searchkick_synonym")
306   -
307   - %w(word_start word_middle word_end).each do |type|
308   - settings[:analysis][:analyzer]["searchkick_#{type}_index".to_sym][:filter].insert(2, "searchkick_synonym")
309   - end
310   - end
311   -
312   - search_synonyms = options[:search_synonyms] || []
313   - search_synonyms = search_synonyms.call if search_synonyms.respond_to?(:call)
314   - if search_synonyms.is_a?(String) || search_synonyms.any?
315   - if search_synonyms.is_a?(String)
316   - synonym_graph = {
317   - type: "synonym_graph",
318   - synonyms_path: search_synonyms
319   - }
320   - synonym_graph[:updateable] = true unless below73
321   - else
322   - synonym_graph = {
323   - type: "synonym_graph",
324   - # TODO confirm this is correct
325   - synonyms: search_synonyms.select { |s| s.size > 1 }.map { |s| s.is_a?(Array) ? s.join(",") : s }.map(&:downcase)
326   - }
327   - end
328   - settings[:analysis][:filter][:searchkick_synonym_graph] = synonym_graph
329   -
330   - [:searchkick_search2, :searchkick_word_search].each do |analyzer|
331   - settings[:analysis][:analyzer][analyzer][:filter].insert(2, "searchkick_synonym_graph")
332   - end
333   - end
334   -
335   - if options[:wordnet]
336   - settings[:analysis][:filter][:searchkick_wordnet] = {
337   - type: "synonym",
338   - format: "wordnet",
339   - synonyms_path: Searchkick.wordnet_path
340   - }
341   -
342   - settings[:analysis][:analyzer][default_analyzer][:filter].insert(4, "searchkick_wordnet")
343   - settings[:analysis][:analyzer][default_analyzer][:filter] << "searchkick_wordnet"
344   -
345   - %w(word_start word_middle word_end).each do |type|
346   - settings[:analysis][:analyzer]["searchkick_#{type}_index".to_sym][:filter].insert(2, "searchkick_wordnet")
347   - end
348   - end
  285 + add_synonyms(settings)
  286 + add_search_synonyms(settings)
  287 + add_wordnet(settings) if options[:wordnet]
349 288  
350 289 if options[:special_characters] == false
351 290 settings[:analysis][:analyzer].each_value do |analyzer_settings|
... ... @@ -487,5 +426,91 @@ module Searchkick
487 426 mappings: mappings
488 427 }
489 428 end
  429 +
  430 + def add_synonyms(settings)
  431 + synonyms = options[:synonyms] || []
  432 + synonyms = synonyms.call if synonyms.respond_to?(:call)
  433 + if synonyms.any?
  434 + settings[:analysis][:filter][:searchkick_synonym] = {
  435 + type: "synonym",
  436 + # only remove a single space from synonyms so three-word synonyms will fail noisily instead of silently
  437 + synonyms: synonyms.select { |s| s.size > 1 }.map { |s| s.is_a?(Array) ? s.map { |s2| s2.sub(/\s+/, "") }.join(",") : s }.map(&:downcase)
  438 + }
  439 + # choosing a place for the synonym filter when stemming is not easy
  440 + # https://groups.google.com/forum/#!topic/elasticsearch/p7qcQlgHdB8
  441 + # TODO use a snowball stemmer on synonyms when creating the token filter
  442 +
  443 + # http://elasticsearch-users.115913.n3.nabble.com/synonym-multi-words-search-td4030811.html
  444 + # I find the following approach effective if you are doing multi-word synonyms (synonym phrases):
  445 + # - Only apply the synonym expansion at index time
  446 + # - Don't have the synonym filter applied search
  447 + # - Use directional synonyms where appropriate. You want to make sure that you're not injecting terms that are too general.
  448 + settings[:analysis][:analyzer][default_analyzer][:filter].insert(2, "searchkick_synonym")
  449 +
  450 + %w(word_start word_middle word_end).each do |type|
  451 + settings[:analysis][:analyzer]["searchkick_#{type}_index".to_sym][:filter].insert(2, "searchkick_synonym")
  452 + end
  453 + end
  454 + end
  455 +
  456 + def add_search_synonyms(settings)
  457 + search_synonyms = options[:search_synonyms] || []
  458 + search_synonyms = search_synonyms.call if search_synonyms.respond_to?(:call)
  459 + if search_synonyms.is_a?(String) || search_synonyms.any?
  460 + if search_synonyms.is_a?(String)
  461 + synonym_graph = {
  462 + type: "synonym_graph",
  463 + synonyms_path: search_synonyms
  464 + }
  465 + synonym_graph[:updateable] = true unless below73
  466 + else
  467 + synonym_graph = {
  468 + type: "synonym_graph",
  469 + # TODO confirm this is correct
  470 + synonyms: search_synonyms.select { |s| s.size > 1 }.map { |s| s.is_a?(Array) ? s.join(",") : s }.map(&:downcase)
  471 + }
  472 + end
  473 + settings[:analysis][:filter][:searchkick_synonym_graph] = synonym_graph
  474 +
  475 + [:searchkick_search2, :searchkick_word_search].each do |analyzer|
  476 + settings[:analysis][:analyzer][analyzer][:filter].insert(2, "searchkick_synonym_graph")
  477 + end
  478 + end
  479 + end
  480 +
  481 + def add_wordnet(settings)
  482 + settings[:analysis][:filter][:searchkick_wordnet] = {
  483 + type: "synonym",
  484 + format: "wordnet",
  485 + synonyms_path: Searchkick.wordnet_path
  486 + }
  487 +
  488 + settings[:analysis][:analyzer][default_analyzer][:filter].insert(4, "searchkick_wordnet")
  489 + settings[:analysis][:analyzer][default_analyzer][:filter] << "searchkick_wordnet"
  490 +
  491 + %w(word_start word_middle word_end).each do |type|
  492 + settings[:analysis][:analyzer]["searchkick_#{type}_index".to_sym][:filter].insert(2, "searchkick_wordnet")
  493 + end
  494 + end
  495 +
  496 + def default_type
  497 + "text"
  498 + end
  499 +
  500 + def default_analyzer
  501 + :searchkick_index
  502 + end
  503 +
  504 + def below62
  505 + Searchkick.server_below?("6.2.0")
  506 + end
  507 +
  508 + def below70
  509 + Searchkick.server_below?("7.0.0")
  510 + end
  511 +
  512 + def below73
  513 + Searchkick.server_below?("7.3.0")
  514 + end
490 515 end
491 516 end
... ...