Commit bf98f61bc9618eef3aa57d557a0f5efb5f8e22fe
1 parent
ee67ffe1
Exists in
master
and in
21 other branches
Moved index options to new file
Showing
3 changed files
with
362 additions
and
356 deletions
Show diff stats
lib/searchkick.rb
lib/searchkick/index.rb
1 | 1 | module Searchkick |
2 | 2 | class Index |
3 | + include IndexOptions | |
4 | + | |
3 | 5 | attr_reader :name, :options |
4 | 6 | |
5 | 7 | def initialize(name, options = {}) |
... | ... | @@ -244,362 +246,6 @@ module Searchkick |
244 | 246 | end |
245 | 247 | end |
246 | 248 | |
247 | - def index_options | |
248 | - options = @options | |
249 | - language = options[:language] | |
250 | - language = language.call if language.respond_to?(:call) | |
251 | - | |
252 | - if options[:mappings] && !options[:merge_mappings] | |
253 | - settings = options[:settings] || {} | |
254 | - mappings = options[:mappings] | |
255 | - else | |
256 | - below22 = Searchkick.server_below?("2.2.0") | |
257 | - below50 = Searchkick.server_below?("5.0.0-alpha1") | |
258 | - default_type = below50 ? "string" : "text" | |
259 | - default_analyzer = below50 ? :default_index : :default | |
260 | - keyword_mapping = | |
261 | - if below50 | |
262 | - { | |
263 | - type: default_type, | |
264 | - index: "not_analyzed" | |
265 | - } | |
266 | - else | |
267 | - { | |
268 | - type: "keyword" | |
269 | - } | |
270 | - end | |
271 | - | |
272 | - keyword_mapping[:ignore_above] = 256 unless below22 | |
273 | - | |
274 | - settings = { | |
275 | - analysis: { | |
276 | - analyzer: { | |
277 | - searchkick_keyword: { | |
278 | - type: "custom", | |
279 | - tokenizer: "keyword", | |
280 | - filter: ["lowercase"] + (options[:stem_conversions] == false ? [] : ["searchkick_stemmer"]) | |
281 | - }, | |
282 | - default_analyzer => { | |
283 | - type: "custom", | |
284 | - # character filters -> tokenizer -> token filters | |
285 | - # https://www.elastic.co/guide/en/elasticsearch/guide/current/analysis-intro.html | |
286 | - char_filter: ["ampersand"], | |
287 | - tokenizer: "standard", | |
288 | - # synonym should come last, after stemming and shingle | |
289 | - # shingle must come before searchkick_stemmer | |
290 | - filter: ["standard", "lowercase", "asciifolding", "searchkick_index_shingle", "searchkick_stemmer"] | |
291 | - }, | |
292 | - searchkick_search: { | |
293 | - type: "custom", | |
294 | - char_filter: ["ampersand"], | |
295 | - tokenizer: "standard", | |
296 | - filter: ["standard", "lowercase", "asciifolding", "searchkick_search_shingle", "searchkick_stemmer"] | |
297 | - }, | |
298 | - searchkick_search2: { | |
299 | - type: "custom", | |
300 | - char_filter: ["ampersand"], | |
301 | - tokenizer: "standard", | |
302 | - filter: ["standard", "lowercase", "asciifolding", "searchkick_stemmer"] | |
303 | - }, | |
304 | - # https://github.com/leschenko/elasticsearch_autocomplete/blob/master/lib/elasticsearch_autocomplete/analyzers.rb | |
305 | - searchkick_autocomplete_index: { | |
306 | - type: "custom", | |
307 | - tokenizer: "searchkick_autocomplete_ngram", | |
308 | - filter: ["lowercase", "asciifolding"] | |
309 | - }, | |
310 | - searchkick_autocomplete_search: { | |
311 | - type: "custom", | |
312 | - tokenizer: "keyword", | |
313 | - filter: ["lowercase", "asciifolding"] | |
314 | - }, | |
315 | - searchkick_word_search: { | |
316 | - type: "custom", | |
317 | - tokenizer: "standard", | |
318 | - filter: ["lowercase", "asciifolding"] | |
319 | - }, | |
320 | - searchkick_suggest_index: { | |
321 | - type: "custom", | |
322 | - tokenizer: "standard", | |
323 | - filter: ["lowercase", "asciifolding", "searchkick_suggest_shingle"] | |
324 | - }, | |
325 | - searchkick_text_start_index: { | |
326 | - type: "custom", | |
327 | - tokenizer: "keyword", | |
328 | - filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"] | |
329 | - }, | |
330 | - searchkick_text_middle_index: { | |
331 | - type: "custom", | |
332 | - tokenizer: "keyword", | |
333 | - filter: ["lowercase", "asciifolding", "searchkick_ngram"] | |
334 | - }, | |
335 | - searchkick_text_end_index: { | |
336 | - type: "custom", | |
337 | - tokenizer: "keyword", | |
338 | - filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"] | |
339 | - }, | |
340 | - searchkick_word_start_index: { | |
341 | - type: "custom", | |
342 | - tokenizer: "standard", | |
343 | - filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"] | |
344 | - }, | |
345 | - searchkick_word_middle_index: { | |
346 | - type: "custom", | |
347 | - tokenizer: "standard", | |
348 | - filter: ["lowercase", "asciifolding", "searchkick_ngram"] | |
349 | - }, | |
350 | - searchkick_word_end_index: { | |
351 | - type: "custom", | |
352 | - tokenizer: "standard", | |
353 | - filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"] | |
354 | - } | |
355 | - }, | |
356 | - filter: { | |
357 | - searchkick_index_shingle: { | |
358 | - type: "shingle", | |
359 | - token_separator: "" | |
360 | - }, | |
361 | - # lucky find http://web.archiveorange.com/archive/v/AAfXfQ17f57FcRINsof7 | |
362 | - searchkick_search_shingle: { | |
363 | - type: "shingle", | |
364 | - token_separator: "", | |
365 | - output_unigrams: false, | |
366 | - output_unigrams_if_no_shingles: true | |
367 | - }, | |
368 | - searchkick_suggest_shingle: { | |
369 | - type: "shingle", | |
370 | - max_shingle_size: 5 | |
371 | - }, | |
372 | - searchkick_edge_ngram: { | |
373 | - type: "edgeNGram", | |
374 | - min_gram: 1, | |
375 | - max_gram: 50 | |
376 | - }, | |
377 | - searchkick_ngram: { | |
378 | - type: "nGram", | |
379 | - min_gram: 1, | |
380 | - max_gram: 50 | |
381 | - }, | |
382 | - searchkick_stemmer: { | |
383 | - # use stemmer if language is lowercase, snowball otherwise | |
384 | - # TODO deprecate language option in favor of stemmer | |
385 | - type: language == language.to_s.downcase ? "stemmer" : "snowball", | |
386 | - language: language || "English" | |
387 | - } | |
388 | - }, | |
389 | - char_filter: { | |
390 | - # https://www.elastic.co/guide/en/elasticsearch/guide/current/custom-analyzers.html | |
391 | - # &_to_and | |
392 | - ampersand: { | |
393 | - type: "mapping", | |
394 | - mappings: ["&=> and "] | |
395 | - } | |
396 | - }, | |
397 | - tokenizer: { | |
398 | - searchkick_autocomplete_ngram: { | |
399 | - type: "edgeNGram", | |
400 | - min_gram: 1, | |
401 | - max_gram: 50 | |
402 | - } | |
403 | - } | |
404 | - } | |
405 | - } | |
406 | - | |
407 | - if Searchkick.env == "test" | |
408 | - settings[:number_of_shards] = 1 | |
409 | - settings[:number_of_replicas] = 0 | |
410 | - end | |
411 | - | |
412 | - if options[:similarity] | |
413 | - settings[:similarity] = {default: {type: options[:similarity]}} | |
414 | - end | |
415 | - | |
416 | - settings.deep_merge!(options[:settings] || {}) | |
417 | - | |
418 | - # synonyms | |
419 | - synonyms = options[:synonyms] || [] | |
420 | - | |
421 | - synonyms = synonyms.call if synonyms.respond_to?(:call) | |
422 | - | |
423 | - if synonyms.any? | |
424 | - settings[:analysis][:filter][:searchkick_synonym] = { | |
425 | - type: "synonym", | |
426 | - synonyms: synonyms.select { |s| s.size > 1 }.map { |s| s.join(",") } | |
427 | - } | |
428 | - # choosing a place for the synonym filter when stemming is not easy | |
429 | - # https://groups.google.com/forum/#!topic/elasticsearch/p7qcQlgHdB8 | |
430 | - # TODO use a snowball stemmer on synonyms when creating the token filter | |
431 | - | |
432 | - # http://elasticsearch-users.115913.n3.nabble.com/synonym-multi-words-search-td4030811.html | |
433 | - # I find the following approach effective if you are doing multi-word synonyms (synonym phrases): | |
434 | - # - Only apply the synonym expansion at index time | |
435 | - # - Don't have the synonym filter applied search | |
436 | - # - Use directional synonyms where appropriate. You want to make sure that you're not injecting terms that are too general. | |
437 | - settings[:analysis][:analyzer][default_analyzer][:filter].insert(4, "searchkick_synonym") | |
438 | - settings[:analysis][:analyzer][default_analyzer][:filter] << "searchkick_synonym" | |
439 | - | |
440 | - %w(word_start word_middle word_end).each do |type| | |
441 | - settings[:analysis][:analyzer]["searchkick_#{type}_index".to_sym][:filter].insert(2, "searchkick_synonym") | |
442 | - end | |
443 | - end | |
444 | - | |
445 | - if options[:wordnet] | |
446 | - settings[:analysis][:filter][:searchkick_wordnet] = { | |
447 | - type: "synonym", | |
448 | - format: "wordnet", | |
449 | - synonyms_path: Searchkick.wordnet_path | |
450 | - } | |
451 | - | |
452 | - settings[:analysis][:analyzer][default_analyzer][:filter].insert(4, "searchkick_wordnet") | |
453 | - settings[:analysis][:analyzer][default_analyzer][:filter] << "searchkick_wordnet" | |
454 | - | |
455 | - %w(word_start word_middle word_end).each do |type| | |
456 | - settings[:analysis][:analyzer]["searchkick_#{type}_index".to_sym][:filter].insert(2, "searchkick_wordnet") | |
457 | - end | |
458 | - end | |
459 | - | |
460 | - if options[:special_characters] == false | |
461 | - settings[:analysis][:analyzer].each do |_, analyzer_settings| | |
462 | - analyzer_settings[:filter].reject! { |f| f == "asciifolding" } | |
463 | - end | |
464 | - end | |
465 | - | |
466 | - mapping = {} | |
467 | - | |
468 | - # conversions | |
469 | - Array(options[:conversions]).each do |conversions_field| | |
470 | - mapping[conversions_field] = { | |
471 | - type: "nested", | |
472 | - properties: { | |
473 | - query: {type: default_type, analyzer: "searchkick_keyword"}, | |
474 | - count: {type: "integer"} | |
475 | - } | |
476 | - } | |
477 | - end | |
478 | - | |
479 | - mapping_options = Hash[ | |
480 | - [:autocomplete, :suggest, :word, :text_start, :text_middle, :text_end, :word_start, :word_middle, :word_end, :highlight, :searchable, :filterable, :only_analyzed] | |
481 | - .map { |type| [type, (options[type] || []).map(&:to_s)] } | |
482 | - ] | |
483 | - | |
484 | - word = options[:word] != false && (!options[:match] || options[:match] == :word) | |
485 | - | |
486 | - mapping_options.values.flatten.uniq.each do |field| | |
487 | - fields = {} | |
488 | - | |
489 | - if mapping_options[:only_analyzed].include?(field) || (options.key?(:filterable) && !mapping_options[:filterable].include?(field)) | |
490 | - fields[field] = {type: default_type, index: "no"} | |
491 | - else | |
492 | - fields[field] = keyword_mapping | |
493 | - end | |
494 | - | |
495 | - if !options[:searchable] || mapping_options[:searchable].include?(field) | |
496 | - if word | |
497 | - fields["analyzed"] = {type: default_type, index: "analyzed", analyzer: default_analyzer} | |
498 | - | |
499 | - if mapping_options[:highlight].include?(field) | |
500 | - fields["analyzed"][:term_vector] = "with_positions_offsets" | |
501 | - end | |
502 | - end | |
503 | - | |
504 | - mapping_options.except(:highlight, :searchable, :filterable, :only_analyzed, :word).each do |type, f| | |
505 | - if options[:match] == type || f.include?(field) | |
506 | - fields[type] = {type: default_type, index: "analyzed", analyzer: "searchkick_#{type}_index"} | |
507 | - end | |
508 | - end | |
509 | - end | |
510 | - | |
511 | - mapping[field] = | |
512 | - if below50 | |
513 | - { | |
514 | - type: "multi_field", | |
515 | - fields: fields | |
516 | - } | |
517 | - elsif fields[field] | |
518 | - fields[field].merge(fields: fields.except(field)) | |
519 | - end | |
520 | - end | |
521 | - | |
522 | - (options[:locations] || []).map(&:to_s).each do |field| | |
523 | - mapping[field] = { | |
524 | - type: "geo_point" | |
525 | - } | |
526 | - end | |
527 | - | |
528 | - (options[:unsearchable] || []).map(&:to_s).each do |field| | |
529 | - mapping[field] = { | |
530 | - type: default_type, | |
531 | - index: "no" | |
532 | - } | |
533 | - end | |
534 | - | |
535 | - routing = {} | |
536 | - if options[:routing] | |
537 | - routing = {required: true} | |
538 | - unless options[:routing] == true | |
539 | - routing[:path] = options[:routing].to_s | |
540 | - end | |
541 | - end | |
542 | - | |
543 | - dynamic_fields = { | |
544 | - # analyzed field must be the default field for include_in_all | |
545 | - # http://www.elasticsearch.org/guide/reference/mapping/multi-field-type/ | |
546 | - # however, we can include the not_analyzed field in _all | |
547 | - # and the _all index analyzer will take care of it | |
548 | - "{name}" => keyword_mapping.merge(include_in_all: !options[:searchable]) | |
549 | - } | |
550 | - | |
551 | - if options.key?(:filterable) | |
552 | - dynamic_fields["{name}"] = {type: default_type, index: "no"} | |
553 | - end | |
554 | - | |
555 | - dynamic_fields["{name}"][:ignore_above] = 256 unless below22 | |
556 | - | |
557 | - unless options[:searchable] | |
558 | - if options[:match] && options[:match] != :word | |
559 | - dynamic_fields[options[:match]] = {type: default_type, index: "analyzed", analyzer: "searchkick_#{options[:match]}_index"} | |
560 | - end | |
561 | - | |
562 | - if word | |
563 | - dynamic_fields["analyzed"] = {type: default_type, index: "analyzed"} | |
564 | - end | |
565 | - end | |
566 | - | |
567 | - # http://www.elasticsearch.org/guide/reference/mapping/multi-field-type/ | |
568 | - multi_field = | |
569 | - if below50 | |
570 | - { | |
571 | - type: "multi_field", | |
572 | - fields: dynamic_fields | |
573 | - } | |
574 | - else | |
575 | - dynamic_fields["{name}"].merge(fields: dynamic_fields.except("{name}")) | |
576 | - end | |
577 | - | |
578 | - mappings = { | |
579 | - _default_: { | |
580 | - _all: {type: default_type, index: "analyzed", analyzer: default_analyzer}, | |
581 | - properties: mapping, | |
582 | - _routing: routing, | |
583 | - # https://gist.github.com/kimchy/2898285 | |
584 | - dynamic_templates: [ | |
585 | - { | |
586 | - string_template: { | |
587 | - match: "*", | |
588 | - match_mapping_type: "string", | |
589 | - mapping: multi_field | |
590 | - } | |
591 | - } | |
592 | - ] | |
593 | - } | |
594 | - }.deep_merge(options[:mappings] || {}) | |
595 | - end | |
596 | - | |
597 | - { | |
598 | - settings: settings, | |
599 | - mappings: mappings | |
600 | - } | |
601 | - end | |
602 | - | |
603 | 249 | # other |
604 | 250 | |
605 | 251 | def tokens(text, options = {}) | ... | ... |
... | ... | @@ -0,0 +1,359 @@ |
1 | +module Searchkick | |
2 | + module IndexOptions | |
3 | + def index_options | |
4 | + options = @options | |
5 | + language = options[:language] | |
6 | + language = language.call if language.respond_to?(:call) | |
7 | + | |
8 | + if options[:mappings] && !options[:merge_mappings] | |
9 | + settings = options[:settings] || {} | |
10 | + mappings = options[:mappings] | |
11 | + else | |
12 | + below22 = Searchkick.server_below?("2.2.0") | |
13 | + below50 = Searchkick.server_below?("5.0.0-alpha1") | |
14 | + default_type = below50 ? "string" : "text" | |
15 | + default_analyzer = below50 ? :default_index : :default | |
16 | + keyword_mapping = | |
17 | + if below50 | |
18 | + { | |
19 | + type: default_type, | |
20 | + index: "not_analyzed" | |
21 | + } | |
22 | + else | |
23 | + { | |
24 | + type: "keyword" | |
25 | + } | |
26 | + end | |
27 | + | |
28 | + keyword_mapping[:ignore_above] = 256 unless below22 | |
29 | + | |
30 | + settings = { | |
31 | + analysis: { | |
32 | + analyzer: { | |
33 | + searchkick_keyword: { | |
34 | + type: "custom", | |
35 | + tokenizer: "keyword", | |
36 | + filter: ["lowercase"] + (options[:stem_conversions] == false ? [] : ["searchkick_stemmer"]) | |
37 | + }, | |
38 | + default_analyzer => { | |
39 | + type: "custom", | |
40 | + # character filters -> tokenizer -> token filters | |
41 | + # https://www.elastic.co/guide/en/elasticsearch/guide/current/analysis-intro.html | |
42 | + char_filter: ["ampersand"], | |
43 | + tokenizer: "standard", | |
44 | + # synonym should come last, after stemming and shingle | |
45 | + # shingle must come before searchkick_stemmer | |
46 | + filter: ["standard", "lowercase", "asciifolding", "searchkick_index_shingle", "searchkick_stemmer"] | |
47 | + }, | |
48 | + searchkick_search: { | |
49 | + type: "custom", | |
50 | + char_filter: ["ampersand"], | |
51 | + tokenizer: "standard", | |
52 | + filter: ["standard", "lowercase", "asciifolding", "searchkick_search_shingle", "searchkick_stemmer"] | |
53 | + }, | |
54 | + searchkick_search2: { | |
55 | + type: "custom", | |
56 | + char_filter: ["ampersand"], | |
57 | + tokenizer: "standard", | |
58 | + filter: ["standard", "lowercase", "asciifolding", "searchkick_stemmer"] | |
59 | + }, | |
60 | + # https://github.com/leschenko/elasticsearch_autocomplete/blob/master/lib/elasticsearch_autocomplete/analyzers.rb | |
61 | + searchkick_autocomplete_index: { | |
62 | + type: "custom", | |
63 | + tokenizer: "searchkick_autocomplete_ngram", | |
64 | + filter: ["lowercase", "asciifolding"] | |
65 | + }, | |
66 | + searchkick_autocomplete_search: { | |
67 | + type: "custom", | |
68 | + tokenizer: "keyword", | |
69 | + filter: ["lowercase", "asciifolding"] | |
70 | + }, | |
71 | + searchkick_word_search: { | |
72 | + type: "custom", | |
73 | + tokenizer: "standard", | |
74 | + filter: ["lowercase", "asciifolding"] | |
75 | + }, | |
76 | + searchkick_suggest_index: { | |
77 | + type: "custom", | |
78 | + tokenizer: "standard", | |
79 | + filter: ["lowercase", "asciifolding", "searchkick_suggest_shingle"] | |
80 | + }, | |
81 | + searchkick_text_start_index: { | |
82 | + type: "custom", | |
83 | + tokenizer: "keyword", | |
84 | + filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"] | |
85 | + }, | |
86 | + searchkick_text_middle_index: { | |
87 | + type: "custom", | |
88 | + tokenizer: "keyword", | |
89 | + filter: ["lowercase", "asciifolding", "searchkick_ngram"] | |
90 | + }, | |
91 | + searchkick_text_end_index: { | |
92 | + type: "custom", | |
93 | + tokenizer: "keyword", | |
94 | + filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"] | |
95 | + }, | |
96 | + searchkick_word_start_index: { | |
97 | + type: "custom", | |
98 | + tokenizer: "standard", | |
99 | + filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"] | |
100 | + }, | |
101 | + searchkick_word_middle_index: { | |
102 | + type: "custom", | |
103 | + tokenizer: "standard", | |
104 | + filter: ["lowercase", "asciifolding", "searchkick_ngram"] | |
105 | + }, | |
106 | + searchkick_word_end_index: { | |
107 | + type: "custom", | |
108 | + tokenizer: "standard", | |
109 | + filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"] | |
110 | + } | |
111 | + }, | |
112 | + filter: { | |
113 | + searchkick_index_shingle: { | |
114 | + type: "shingle", | |
115 | + token_separator: "" | |
116 | + }, | |
117 | + # lucky find http://web.archiveorange.com/archive/v/AAfXfQ17f57FcRINsof7 | |
118 | + searchkick_search_shingle: { | |
119 | + type: "shingle", | |
120 | + token_separator: "", | |
121 | + output_unigrams: false, | |
122 | + output_unigrams_if_no_shingles: true | |
123 | + }, | |
124 | + searchkick_suggest_shingle: { | |
125 | + type: "shingle", | |
126 | + max_shingle_size: 5 | |
127 | + }, | |
128 | + searchkick_edge_ngram: { | |
129 | + type: "edgeNGram", | |
130 | + min_gram: 1, | |
131 | + max_gram: 50 | |
132 | + }, | |
133 | + searchkick_ngram: { | |
134 | + type: "nGram", | |
135 | + min_gram: 1, | |
136 | + max_gram: 50 | |
137 | + }, | |
138 | + searchkick_stemmer: { | |
139 | + # use stemmer if language is lowercase, snowball otherwise | |
140 | + # TODO deprecate language option in favor of stemmer | |
141 | + type: language == language.to_s.downcase ? "stemmer" : "snowball", | |
142 | + language: language || "English" | |
143 | + } | |
144 | + }, | |
145 | + char_filter: { | |
146 | + # https://www.elastic.co/guide/en/elasticsearch/guide/current/custom-analyzers.html | |
147 | + # &_to_and | |
148 | + ampersand: { | |
149 | + type: "mapping", | |
150 | + mappings: ["&=> and "] | |
151 | + } | |
152 | + }, | |
153 | + tokenizer: { | |
154 | + searchkick_autocomplete_ngram: { | |
155 | + type: "edgeNGram", | |
156 | + min_gram: 1, | |
157 | + max_gram: 50 | |
158 | + } | |
159 | + } | |
160 | + } | |
161 | + } | |
162 | + | |
163 | + if Searchkick.env == "test" | |
164 | + settings[:number_of_shards] = 1 | |
165 | + settings[:number_of_replicas] = 0 | |
166 | + end | |
167 | + | |
168 | + if options[:similarity] | |
169 | + settings[:similarity] = {default: {type: options[:similarity]}} | |
170 | + end | |
171 | + | |
172 | + settings.deep_merge!(options[:settings] || {}) | |
173 | + | |
174 | + # synonyms | |
175 | + synonyms = options[:synonyms] || [] | |
176 | + | |
177 | + synonyms = synonyms.call if synonyms.respond_to?(:call) | |
178 | + | |
179 | + if synonyms.any? | |
180 | + settings[:analysis][:filter][:searchkick_synonym] = { | |
181 | + type: "synonym", | |
182 | + synonyms: synonyms.select { |s| s.size > 1 }.map { |s| s.join(",") } | |
183 | + } | |
184 | + # choosing a place for the synonym filter when stemming is not easy | |
185 | + # https://groups.google.com/forum/#!topic/elasticsearch/p7qcQlgHdB8 | |
186 | + # TODO use a snowball stemmer on synonyms when creating the token filter | |
187 | + | |
188 | + # http://elasticsearch-users.115913.n3.nabble.com/synonym-multi-words-search-td4030811.html | |
189 | + # I find the following approach effective if you are doing multi-word synonyms (synonym phrases): | |
190 | + # - Only apply the synonym expansion at index time | |
191 | + # - Don't have the synonym filter applied search | |
192 | + # - Use directional synonyms where appropriate. You want to make sure that you're not injecting terms that are too general. | |
193 | + settings[:analysis][:analyzer][default_analyzer][:filter].insert(4, "searchkick_synonym") | |
194 | + settings[:analysis][:analyzer][default_analyzer][:filter] << "searchkick_synonym" | |
195 | + | |
196 | + %w(word_start word_middle word_end).each do |type| | |
197 | + settings[:analysis][:analyzer]["searchkick_#{type}_index".to_sym][:filter].insert(2, "searchkick_synonym") | |
198 | + end | |
199 | + end | |
200 | + | |
201 | + if options[:wordnet] | |
202 | + settings[:analysis][:filter][:searchkick_wordnet] = { | |
203 | + type: "synonym", | |
204 | + format: "wordnet", | |
205 | + synonyms_path: Searchkick.wordnet_path | |
206 | + } | |
207 | + | |
208 | + settings[:analysis][:analyzer][default_analyzer][:filter].insert(4, "searchkick_wordnet") | |
209 | + settings[:analysis][:analyzer][default_analyzer][:filter] << "searchkick_wordnet" | |
210 | + | |
211 | + %w(word_start word_middle word_end).each do |type| | |
212 | + settings[:analysis][:analyzer]["searchkick_#{type}_index".to_sym][:filter].insert(2, "searchkick_wordnet") | |
213 | + end | |
214 | + end | |
215 | + | |
216 | + if options[:special_characters] == false | |
217 | + settings[:analysis][:analyzer].each do |_, analyzer_settings| | |
218 | + analyzer_settings[:filter].reject! { |f| f == "asciifolding" } | |
219 | + end | |
220 | + end | |
221 | + | |
222 | + mapping = {} | |
223 | + | |
224 | + # conversions | |
225 | + Array(options[:conversions]).each do |conversions_field| | |
226 | + mapping[conversions_field] = { | |
227 | + type: "nested", | |
228 | + properties: { | |
229 | + query: {type: default_type, analyzer: "searchkick_keyword"}, | |
230 | + count: {type: "integer"} | |
231 | + } | |
232 | + } | |
233 | + end | |
234 | + | |
235 | + mapping_options = Hash[ | |
236 | + [:autocomplete, :suggest, :word, :text_start, :text_middle, :text_end, :word_start, :word_middle, :word_end, :highlight, :searchable, :filterable, :only_analyzed] | |
237 | + .map { |type| [type, (options[type] || []).map(&:to_s)] } | |
238 | + ] | |
239 | + | |
240 | + word = options[:word] != false && (!options[:match] || options[:match] == :word) | |
241 | + | |
242 | + mapping_options.values.flatten.uniq.each do |field| | |
243 | + fields = {} | |
244 | + | |
245 | + if mapping_options[:only_analyzed].include?(field) || (options.key?(:filterable) && !mapping_options[:filterable].include?(field)) | |
246 | + fields[field] = {type: default_type, index: "no"} | |
247 | + else | |
248 | + fields[field] = keyword_mapping | |
249 | + end | |
250 | + | |
251 | + if !options[:searchable] || mapping_options[:searchable].include?(field) | |
252 | + if word | |
253 | + fields["analyzed"] = {type: default_type, index: "analyzed", analyzer: default_analyzer} | |
254 | + | |
255 | + if mapping_options[:highlight].include?(field) | |
256 | + fields["analyzed"][:term_vector] = "with_positions_offsets" | |
257 | + end | |
258 | + end | |
259 | + | |
260 | + mapping_options.except(:highlight, :searchable, :filterable, :only_analyzed, :word).each do |type, f| | |
261 | + if options[:match] == type || f.include?(field) | |
262 | + fields[type] = {type: default_type, index: "analyzed", analyzer: "searchkick_#{type}_index"} | |
263 | + end | |
264 | + end | |
265 | + end | |
266 | + | |
267 | + mapping[field] = | |
268 | + if below50 | |
269 | + { | |
270 | + type: "multi_field", | |
271 | + fields: fields | |
272 | + } | |
273 | + elsif fields[field] | |
274 | + fields[field].merge(fields: fields.except(field)) | |
275 | + end | |
276 | + end | |
277 | + | |
278 | + (options[:locations] || []).map(&:to_s).each do |field| | |
279 | + mapping[field] = { | |
280 | + type: "geo_point" | |
281 | + } | |
282 | + end | |
283 | + | |
284 | + (options[:unsearchable] || []).map(&:to_s).each do |field| | |
285 | + mapping[field] = { | |
286 | + type: default_type, | |
287 | + index: "no" | |
288 | + } | |
289 | + end | |
290 | + | |
291 | + routing = {} | |
292 | + if options[:routing] | |
293 | + routing = {required: true} | |
294 | + unless options[:routing] == true | |
295 | + routing[:path] = options[:routing].to_s | |
296 | + end | |
297 | + end | |
298 | + | |
299 | + dynamic_fields = { | |
300 | + # analyzed field must be the default field for include_in_all | |
301 | + # http://www.elasticsearch.org/guide/reference/mapping/multi-field-type/ | |
302 | + # however, we can include the not_analyzed field in _all | |
303 | + # and the _all index analyzer will take care of it | |
304 | + "{name}" => keyword_mapping.merge(include_in_all: !options[:searchable]) | |
305 | + } | |
306 | + | |
307 | + if options.key?(:filterable) | |
308 | + dynamic_fields["{name}"] = {type: default_type, index: "no"} | |
309 | + end | |
310 | + | |
311 | + dynamic_fields["{name}"][:ignore_above] = 256 unless below22 | |
312 | + | |
313 | + unless options[:searchable] | |
314 | + if options[:match] && options[:match] != :word | |
315 | + dynamic_fields[options[:match]] = {type: default_type, index: "analyzed", analyzer: "searchkick_#{options[:match]}_index"} | |
316 | + end | |
317 | + | |
318 | + if word | |
319 | + dynamic_fields["analyzed"] = {type: default_type, index: "analyzed"} | |
320 | + end | |
321 | + end | |
322 | + | |
323 | + # http://www.elasticsearch.org/guide/reference/mapping/multi-field-type/ | |
324 | + multi_field = | |
325 | + if below50 | |
326 | + { | |
327 | + type: "multi_field", | |
328 | + fields: dynamic_fields | |
329 | + } | |
330 | + else | |
331 | + dynamic_fields["{name}"].merge(fields: dynamic_fields.except("{name}")) | |
332 | + end | |
333 | + | |
334 | + mappings = { | |
335 | + _default_: { | |
336 | + _all: {type: default_type, index: "analyzed", analyzer: default_analyzer}, | |
337 | + properties: mapping, | |
338 | + _routing: routing, | |
339 | + # https://gist.github.com/kimchy/2898285 | |
340 | + dynamic_templates: [ | |
341 | + { | |
342 | + string_template: { | |
343 | + match: "*", | |
344 | + match_mapping_type: "string", | |
345 | + mapping: multi_field | |
346 | + } | |
347 | + } | |
348 | + ] | |
349 | + } | |
350 | + }.deep_merge(options[:mappings] || {}) | |
351 | + end | |
352 | + | |
353 | + { | |
354 | + settings: settings, | |
355 | + mappings: mappings | |
356 | + } | |
357 | + end | |
358 | + end | |
359 | +end | ... | ... |