Commit dbe04a0a11e8cbfc4d4e74abc2ae7ef528455eb4
1 parent
443f9d9c
Exists in
master
and in
8 other branches
Started cleaning up index options
Showing
2 changed files
with
102 additions
and
75 deletions
Show diff stats
lib/searchkick/index.rb
... | ... | @@ -2,8 +2,6 @@ require "searchkick/index_options" |
2 | 2 | |
3 | 3 | module Searchkick |
4 | 4 | class Index |
5 | - include IndexOptions | |
6 | - | |
7 | 5 | attr_reader :name, :options |
8 | 6 | |
9 | 7 | def initialize(name, options = {}) |
... | ... | @@ -12,6 +10,10 @@ module Searchkick |
12 | 10 | @klass_document_type = {} # cache |
13 | 11 | end |
14 | 12 | |
13 | + def index_options | |
14 | + IndexOptions.new(self).index_options | |
15 | + end | |
16 | + | |
15 | 17 | def create(body = {}) |
16 | 18 | client.indices.create index: name, body: body |
17 | 19 | end | ... | ... |
lib/searchkick/index_options.rb
1 | 1 | module Searchkick |
2 | - module IndexOptions | |
3 | - def index_options | |
4 | - options = @options | |
5 | - language = options[:language] | |
6 | - language = language.call if language.respond_to?(:call) | |
2 | + class IndexOptions | |
3 | + attr_reader :options | |
7 | 4 | |
8 | - below62 = Searchkick.server_below?("6.2.0") | |
9 | - below70 = Searchkick.server_below?("7.0.0") | |
10 | - below73 = Searchkick.server_below?("7.3.0") | |
5 | + def initialize(index) | |
6 | + @options = index.options | |
7 | + end | |
11 | 8 | |
9 | + def index_options | |
12 | 10 | if below70 |
13 | 11 | index_type = options[:_type] |
14 | 12 | index_type = index_type.call if index_type.respond_to?(:call) |
... | ... | @@ -24,10 +22,10 @@ module Searchkick |
24 | 22 | settings = options[:settings] || {} |
25 | 23 | mappings = custom_mapping |
26 | 24 | else |
27 | - default_type = "text" | |
28 | - default_analyzer = :searchkick_index | |
29 | - keyword_mapping = {type: "keyword"} | |
25 | + language = options[:language] | |
26 | + language = language.call if language.respond_to?(:call) | |
30 | 27 | |
28 | + keyword_mapping = {type: "keyword"} | |
31 | 29 | keyword_mapping[:ignore_above] = options[:ignore_above] || 30000 |
32 | 30 | |
33 | 31 | settings = { |
... | ... | @@ -284,68 +282,9 @@ module Searchkick |
284 | 282 | |
285 | 283 | settings = settings.symbolize_keys.deep_merge((options[:settings] || {}).symbolize_keys) |
286 | 284 | |
287 | - # synonyms | |
288 | - synonyms = options[:synonyms] || [] | |
289 | - synonyms = synonyms.call if synonyms.respond_to?(:call) | |
290 | - if synonyms.any? | |
291 | - settings[:analysis][:filter][:searchkick_synonym] = { | |
292 | - type: "synonym", | |
293 | - # only remove a single space from synonyms so three-word synonyms will fail noisily instead of silently | |
294 | - synonyms: synonyms.select { |s| s.size > 1 }.map { |s| s.is_a?(Array) ? s.map { |s2| s2.sub(/\s+/, "") }.join(",") : s }.map(&:downcase) | |
295 | - } | |
296 | - # choosing a place for the synonym filter when stemming is not easy | |
297 | - # https://groups.google.com/forum/#!topic/elasticsearch/p7qcQlgHdB8 | |
298 | - # TODO use a snowball stemmer on synonyms when creating the token filter | |
299 | - | |
300 | - # http://elasticsearch-users.115913.n3.nabble.com/synonym-multi-words-search-td4030811.html | |
301 | - # I find the following approach effective if you are doing multi-word synonyms (synonym phrases): | |
302 | - # - Only apply the synonym expansion at index time | |
303 | - # - Don't have the synonym filter applied search | |
304 | - # - Use directional synonyms where appropriate. You want to make sure that you're not injecting terms that are too general. | |
305 | - settings[:analysis][:analyzer][default_analyzer][:filter].insert(2, "searchkick_synonym") | |
306 | - | |
307 | - %w(word_start word_middle word_end).each do |type| | |
308 | - settings[:analysis][:analyzer]["searchkick_#{type}_index".to_sym][:filter].insert(2, "searchkick_synonym") | |
309 | - end | |
310 | - end | |
311 | - | |
312 | - search_synonyms = options[:search_synonyms] || [] | |
313 | - search_synonyms = search_synonyms.call if search_synonyms.respond_to?(:call) | |
314 | - if search_synonyms.is_a?(String) || search_synonyms.any? | |
315 | - if search_synonyms.is_a?(String) | |
316 | - synonym_graph = { | |
317 | - type: "synonym_graph", | |
318 | - synonyms_path: search_synonyms | |
319 | - } | |
320 | - synonym_graph[:updateable] = true unless below73 | |
321 | - else | |
322 | - synonym_graph = { | |
323 | - type: "synonym_graph", | |
324 | - # TODO confirm this is correct | |
325 | - synonyms: search_synonyms.select { |s| s.size > 1 }.map { |s| s.is_a?(Array) ? s.join(",") : s }.map(&:downcase) | |
326 | - } | |
327 | - end | |
328 | - settings[:analysis][:filter][:searchkick_synonym_graph] = synonym_graph | |
329 | - | |
330 | - [:searchkick_search2, :searchkick_word_search].each do |analyzer| | |
331 | - settings[:analysis][:analyzer][analyzer][:filter].insert(2, "searchkick_synonym_graph") | |
332 | - end | |
333 | - end | |
334 | - | |
335 | - if options[:wordnet] | |
336 | - settings[:analysis][:filter][:searchkick_wordnet] = { | |
337 | - type: "synonym", | |
338 | - format: "wordnet", | |
339 | - synonyms_path: Searchkick.wordnet_path | |
340 | - } | |
341 | - | |
342 | - settings[:analysis][:analyzer][default_analyzer][:filter].insert(4, "searchkick_wordnet") | |
343 | - settings[:analysis][:analyzer][default_analyzer][:filter] << "searchkick_wordnet" | |
344 | - | |
345 | - %w(word_start word_middle word_end).each do |type| | |
346 | - settings[:analysis][:analyzer]["searchkick_#{type}_index".to_sym][:filter].insert(2, "searchkick_wordnet") | |
347 | - end | |
348 | - end | |
285 | + add_synonyms(settings) | |
286 | + add_search_synonyms(settings) | |
287 | + add_wordnet(settings) if options[:wordnet] | |
349 | 288 | |
350 | 289 | if options[:special_characters] == false |
351 | 290 | settings[:analysis][:analyzer].each_value do |analyzer_settings| |
... | ... | @@ -487,5 +426,91 @@ module Searchkick |
487 | 426 | mappings: mappings |
488 | 427 | } |
489 | 428 | end |
429 | + | |
430 | + def add_synonyms(settings) | |
431 | + synonyms = options[:synonyms] || [] | |
432 | + synonyms = synonyms.call if synonyms.respond_to?(:call) | |
433 | + if synonyms.any? | |
434 | + settings[:analysis][:filter][:searchkick_synonym] = { | |
435 | + type: "synonym", | |
436 | + # only remove a single space from synonyms so three-word synonyms will fail noisily instead of silently | |
437 | + synonyms: synonyms.select { |s| s.size > 1 }.map { |s| s.is_a?(Array) ? s.map { |s2| s2.sub(/\s+/, "") }.join(",") : s }.map(&:downcase) | |
438 | + } | |
439 | + # choosing a place for the synonym filter when stemming is not easy | |
440 | + # https://groups.google.com/forum/#!topic/elasticsearch/p7qcQlgHdB8 | |
441 | + # TODO use a snowball stemmer on synonyms when creating the token filter | |
442 | + | |
443 | + # http://elasticsearch-users.115913.n3.nabble.com/synonym-multi-words-search-td4030811.html | |
444 | + # I find the following approach effective if you are doing multi-word synonyms (synonym phrases): | |
445 | + # - Only apply the synonym expansion at index time | |
446 | + # - Don't have the synonym filter applied search | |
447 | + # - Use directional synonyms where appropriate. You want to make sure that you're not injecting terms that are too general. | |
448 | + settings[:analysis][:analyzer][default_analyzer][:filter].insert(2, "searchkick_synonym") | |
449 | + | |
450 | + %w(word_start word_middle word_end).each do |type| | |
451 | + settings[:analysis][:analyzer]["searchkick_#{type}_index".to_sym][:filter].insert(2, "searchkick_synonym") | |
452 | + end | |
453 | + end | |
454 | + end | |
455 | + | |
456 | + def add_search_synonyms(settings) | |
457 | + search_synonyms = options[:search_synonyms] || [] | |
458 | + search_synonyms = search_synonyms.call if search_synonyms.respond_to?(:call) | |
459 | + if search_synonyms.is_a?(String) || search_synonyms.any? | |
460 | + if search_synonyms.is_a?(String) | |
461 | + synonym_graph = { | |
462 | + type: "synonym_graph", | |
463 | + synonyms_path: search_synonyms | |
464 | + } | |
465 | + synonym_graph[:updateable] = true unless below73 | |
466 | + else | |
467 | + synonym_graph = { | |
468 | + type: "synonym_graph", | |
469 | + # TODO confirm this is correct | |
470 | + synonyms: search_synonyms.select { |s| s.size > 1 }.map { |s| s.is_a?(Array) ? s.join(",") : s }.map(&:downcase) | |
471 | + } | |
472 | + end | |
473 | + settings[:analysis][:filter][:searchkick_synonym_graph] = synonym_graph | |
474 | + | |
475 | + [:searchkick_search2, :searchkick_word_search].each do |analyzer| | |
476 | + settings[:analysis][:analyzer][analyzer][:filter].insert(2, "searchkick_synonym_graph") | |
477 | + end | |
478 | + end | |
479 | + end | |
480 | + | |
481 | + def add_wordnet(settings) | |
482 | + settings[:analysis][:filter][:searchkick_wordnet] = { | |
483 | + type: "synonym", | |
484 | + format: "wordnet", | |
485 | + synonyms_path: Searchkick.wordnet_path | |
486 | + } | |
487 | + | |
488 | + settings[:analysis][:analyzer][default_analyzer][:filter].insert(4, "searchkick_wordnet") | |
489 | + settings[:analysis][:analyzer][default_analyzer][:filter] << "searchkick_wordnet" | |
490 | + | |
491 | + %w(word_start word_middle word_end).each do |type| | |
492 | + settings[:analysis][:analyzer]["searchkick_#{type}_index".to_sym][:filter].insert(2, "searchkick_wordnet") | |
493 | + end | |
494 | + end | |
495 | + | |
496 | + def default_type | |
497 | + "text" | |
498 | + end | |
499 | + | |
500 | + def default_analyzer | |
501 | + :searchkick_index | |
502 | + end | |
503 | + | |
504 | + def below62 | |
505 | + Searchkick.server_below?("6.2.0") | |
506 | + end | |
507 | + | |
508 | + def below70 | |
509 | + Searchkick.server_below?("7.0.0") | |
510 | + end | |
511 | + | |
512 | + def below73 | |
513 | + Searchkick.server_below?("7.3.0") | |
514 | + end | |
490 | 515 | end |
491 | 516 | end | ... | ... |