From 668b7948df1d84548690633968de0174c539fc9b Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Wed, 4 May 2016 02:35:49 -0700 Subject: [PATCH] Added support for Elasticsearch 5.0 alpha --- CHANGELOG.md | 3 ++- lib/searchkick.rb | 2 +- lib/searchkick/index.rb | 87 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------------- lib/searchkick/query.rb | 98 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------------- test/match_test.rb | 6 +++--- test/order_test.rb | 6 ++++++ test/sql_test.rb | 5 +++++ test/test_helper.rb | 10 +++++++--- 8 files changed, 156 insertions(+), 61 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4e0812d..d2c9ea6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,6 @@ -## 1.2.2 [unreleased] +## 1.3.0 [unreleased] +- Added support for Elasticsearch 5.0 alpha - Added support for phrase matches - Added support for procs for `index_prefix` option diff --git a/lib/searchkick.rb b/lib/searchkick.rb index 5f70647..ec38d14 100644 --- a/lib/searchkick.rb +++ b/lib/searchkick.rb @@ -58,7 +58,7 @@ module Searchkick @server_version ||= client.info["version"]["number"] end - def self.below_version?(version) + def self.server_below?(version) Gem::Version.new(server_version) < Gem::Version.new(version) end diff --git a/lib/searchkick/index.rb b/lib/searchkick/index.rb index 1a3ca62..aee861c 100644 --- a/lib/searchkick/index.rb +++ b/lib/searchkick/index.rb @@ -218,6 +218,22 @@ module Searchkick settings = options[:settings] || {} mappings = options[:mappings] else + below22 = Searchkick.server_below?("2.2.0") + below50 = Searchkick.server_below?("5.0.0-alpha1") + default_type = below50 ? "string" : "text" + default_analyzer = below50 ? :default_index : :default + keyword_mapping = + if below50 + { + type: default_type, + index: "not_analyzed" + } + else + { + type: "keyword" + } + end + settings = { analysis: { analyzer: { @@ -226,7 +242,7 @@ module Searchkick tokenizer: "keyword", filter: ["lowercase"] + (options[:stem_conversions] == false ? [] : ["searchkick_stemmer"]) }, - default_index: { + default_analyzer => { type: "custom", # character filters -> tokenizer -> token filters # https://www.elastic.co/guide/en/elasticsearch/guide/current/analysis-intro.html @@ -380,8 +396,8 @@ module Searchkick # - Only apply the synonym expansion at index time # - Don't have the synonym filter applied search # - Use directional synonyms where appropriate. You want to make sure that you're not injecting terms that are too general. - settings[:analysis][:analyzer][:default_index][:filter].insert(4, "searchkick_synonym") - settings[:analysis][:analyzer][:default_index][:filter] << "searchkick_synonym" + settings[:analysis][:analyzer][default_analyzer][:filter].insert(4, "searchkick_synonym") + settings[:analysis][:analyzer][default_analyzer][:filter] << "searchkick_synonym" %w(word_start word_middle word_end).each do |type| settings[:analysis][:analyzer]["searchkick_#{type}_index".to_sym][:filter].insert(2, "searchkick_synonym") @@ -395,8 +411,8 @@ module Searchkick synonyms_path: Searchkick.wordnet_path } - settings[:analysis][:analyzer][:default_index][:filter].insert(4, "searchkick_wordnet") - settings[:analysis][:analyzer][:default_index][:filter] << "searchkick_wordnet" + settings[:analysis][:analyzer][default_analyzer][:filter].insert(4, "searchkick_wordnet") + settings[:analysis][:analyzer][default_analyzer][:filter] << "searchkick_wordnet" %w(word_start word_middle word_end).each do |type| settings[:analysis][:analyzer]["searchkick_#{type}_index".to_sym][:filter].insert(2, "searchkick_wordnet") @@ -416,7 +432,7 @@ module Searchkick mapping[conversions_field] = { type: "nested", properties: { - query: {type: "string", analyzer: "searchkick_keyword"}, + query: {type: default_type, analyzer: "searchkick_keyword"}, count: {type: "integer"} } } @@ -430,32 +446,39 @@ module Searchkick word = options[:word] != false && (!options[:match] || options[:match] == :word) mapping_options.values.flatten.uniq.each do |field| - field_mapping = { - type: "multi_field", - fields: {} - } + fields = {} - unless mapping_options[:only_analyzed].include?(field) - field_mapping[:fields][field] = {type: "string", index: "not_analyzed"} + if mapping_options[:only_analyzed].include?(field) + fields[field] = {type: default_type, index: "no"} + else + fields[field] = keyword_mapping end if !options[:searchable] || mapping_options[:searchable].include?(field) if word - field_mapping[:fields]["analyzed"] = {type: "string", index: "analyzed"} + fields["analyzed"] = {type: default_type, index: "analyzed", analyzer: default_analyzer} if mapping_options[:highlight].include?(field) - field_mapping[:fields]["analyzed"][:term_vector] = "with_positions_offsets" + fields["analyzed"][:term_vector] = "with_positions_offsets" end end - mapping_options.except(:highlight, :searchable, :only_analyzed).each do |type, fields| - if options[:match] == type || fields.include?(field) - field_mapping[:fields][type] = {type: "string", index: "analyzed", analyzer: "searchkick_#{type}_index"} + mapping_options.except(:highlight, :searchable, :only_analyzed).each do |type, f| + if options[:match] == type || f.include?(field) + fields[type] = {type: default_type, index: "analyzed", analyzer: "searchkick_#{type}_index"} end end end - mapping[field] = field_mapping + mapping[field] = + if below50 + { + type: "multi_field", + fields: fields + } + elsif fields[field] + fields[field].merge(fields: fields.except(field)) + end end (options[:locations] || []).map(&:to_s).each do |field| @@ -466,7 +489,7 @@ module Searchkick (options[:unsearchable] || []).map(&:to_s).each do |field| mapping[field] = { - type: "string", + type: default_type, index: "no" } end @@ -484,21 +507,35 @@ module Searchkick # http://www.elasticsearch.org/guide/reference/mapping/multi-field-type/ # however, we can include the not_analyzed field in _all # and the _all index analyzer will take care of it - "{name}" => {type: "string", index: "not_analyzed", include_in_all: !options[:searchable]} + "{name}" => keyword_mapping.merge(include_in_all: !options[:searchable]) } + dynamic_fields["{name}"][:ignore_above] = 256 unless below22 + unless options[:searchable] if options[:match] && options[:match] != :word - dynamic_fields[options[:match]] = {type: "string", index: "analyzed", analyzer: "searchkick_#{options[:match]}_index"} + dynamic_fields[options[:match]] = {type: default_type, index: "analyzed", analyzer: "searchkick_#{options[:match]}_index"} end if word - dynamic_fields["analyzed"] = {type: "string", index: "analyzed"} + dynamic_fields["analyzed"] = {type: default_type, index: "analyzed"} end end + # http://www.elasticsearch.org/guide/reference/mapping/multi-field-type/ + multi_field = + if below50 + { + type: "multi_field", + fields: dynamic_fields + } + else + dynamic_fields["{name}"].merge(fields: dynamic_fields.except("{name}")) + end + mappings = { _default_: { + _all: {type: default_type, index: "analyzed", analyzer: default_analyzer}, properties: mapping, _routing: routing, # https://gist.github.com/kimchy/2898285 @@ -507,11 +544,7 @@ module Searchkick string_template: { match: "*", match_mapping_type: "string", - mapping: { - # http://www.elasticsearch.org/guide/reference/mapping/multi-field-type/ - type: "multi_field", - fields: dynamic_fields - } + mapping: multi_field } } ] diff --git a/lib/searchkick/query.rb b/lib/searchkick/query.rb index 6556cc1..e9d493e 100644 --- a/lib/searchkick/query.rb +++ b/lib/searchkick/query.rb @@ -234,7 +234,6 @@ module Searchkick factor = boost_fields[field] || 1 shared_options = { query: term, - operator: operator, boost: 10 * factor } @@ -246,6 +245,8 @@ module Searchkick :match end + shared_options[:operator] = operator if match_type == :match || below50? + if field == "_all" || field.end_with?(".analyzed") shared_options[:cutoff_frequency] = 0.001 unless operator == "and" || misspellings == false qs.concat [ @@ -260,7 +261,7 @@ module Searchkick qs << shared_options.merge(analyzer: analyzer) end - if misspellings != false + if misspellings != false && (match_type == :match || below50?) qs.concat qs.map { |q| q.except(:cutoff_frequency).merge(fuzziness: edit_distance, prefix_length: prefix_length, max_expansions: max_expansions, boost: factor).merge(transpositions) } end @@ -625,26 +626,44 @@ module Searchkick def set_filters(payload, filters) if options[:facets] || options[:aggs] - payload[:filter] = { - and: filters - } + if below20? + payload[:filter] = { + and: filters + } + else + payload[:post_filter] = { + bool: { + filter: filters + } + } + end else # more efficient query if no facets - payload[:query] = { - filtered: { - query: payload[:query], - filter: { - and: filters + if below20? + payload[:query] = { + filtered: { + query: payload[:query], + filter: { + and: filters + } } } - } + else + payload[:query] = { + bool: { + must: payload[:query], + filter: filters + } + } + end end end + # TODO id transformation for arrays def set_order(payload) order = options[:order].is_a?(Enumerable) ? options[:order] : {options[:order] => :asc} - # TODO id transformation for arrays - payload[:sort] = order.is_a?(Array) ? order : Hash[order.map { |k, v| [k.to_s == "id" ? :_id : k, v] }] + id_field = below50? ? :_id : :_uid + payload[:sort] = order.is_a?(Array) ? order : Hash[order.map { |k, v| [k.to_s == "id" ? id_field : k, v] }] end def where_filters(where) @@ -654,7 +673,11 @@ module Searchkick if field == :or value.each do |or_clause| - filters << {or: or_clause.map { |or_statement| {and: where_filters(or_statement)} }} + if below50? + filters << {or: or_clause.map { |or_statement| {and: where_filters(or_statement)} }} + else + filters << {bool: {should: or_clause.map { |or_statement| {bool: {filter: where_filters(or_statement)}} }}} + end end else # expand ranges @@ -688,7 +711,11 @@ module Searchkick when :regexp # support for regexp queries without using a regexp ruby object filters << {regexp: {field => {value: op_value}}} when :not # not equal - filters << {not: {filter: term_filters(field, op_value)}} + if below50? + filters << {not: {filter: term_filters(field, op_value)}} + else + filters << {bool: {must_not: term_filters(field, op_value)}} + end when :all op_value.each do |value| filters << term_filters(field, value) @@ -728,12 +755,20 @@ module Searchkick def term_filters(field, value) if value.is_a?(Array) # in query if value.any?(&:nil?) - {or: [term_filters(field, nil), term_filters(field, value.compact)]} + if below50? + {or: [term_filters(field, nil), term_filters(field, value.compact)]} + else + {bool: {should: [term_filters(field, nil), term_filters(field, value.compact)]}} + end else {in: {field => value}} end elsif value.nil? - {missing: {"field" => field, existence: true, null_value: true}} + if below50? + {missing: {field: field, existence: true, null_value: true}} + else + {bool: {must_not: {exists: {field: field}}}} + end elsif value.is_a?(Regexp) {regexp: {field => {value: value.source}}} else @@ -742,12 +777,19 @@ module Searchkick end def custom_filter(field, value, factor) - { - filter: { - and: where_filters(field => value) - }, - boost_factor: factor - } + if below50? + { + filter: { + and: where_filters(field => value) + }, + boost_factor: factor + } + else + { + filter: where_filters(field => value), + weight: factor + } + end end def boost_filters(boost_by, options = {}) @@ -781,15 +823,19 @@ module Searchkick end def below12? - Searchkick.below_version?("1.2.0") + Searchkick.server_below?("1.2.0") end def below14? - Searchkick.below_version?("1.4.0") + Searchkick.server_below?("1.4.0") end def below20? - Searchkick.below_version?("2.0.0") + Searchkick.server_below?("2.0.0") + end + + def below50? + Searchkick.server_below?("5.0.0-alpha1") end end end diff --git a/test/match_test.rb b/test/match_test.rb index e52861d..2594275 100644 --- a/test/match_test.rb +++ b/test/match_test.rb @@ -110,14 +110,14 @@ class MatchTest < Minitest::Test end def test_misspelling_zucchini_transposition - skip unless elasticsearch_below14? + skip if elasticsearch_below14? store_names ["zucchini"] assert_search "zuccihni", ["zucchini"] assert_search "zuccihni", [], misspellings: {transpositions: false} end def test_misspelling_lasagna - skip unless elasticsearch_below14? + skip if elasticsearch_below14? store_names ["lasagna"] assert_search "lasanga", ["lasagna"], misspellings: {transpositions: true} assert_search "lasgana", ["lasagna"], misspellings: {transpositions: true} @@ -126,7 +126,7 @@ class MatchTest < Minitest::Test end def test_misspelling_lasagna_pasta - skip unless elasticsearch_below14? + skip if elasticsearch_below14? store_names ["lasagna pasta"] assert_search "lasanga", ["lasagna pasta"], misspellings: {transpositions: true} assert_search "lasanga pasta", ["lasagna pasta"], misspellings: {transpositions: true} diff --git a/test/order_test.rb b/test/order_test.rb index 1a8694e..d8e0fd3 100644 --- a/test/order_test.rb +++ b/test/order_test.rb @@ -28,9 +28,15 @@ class OrderTest < Minitest::Test end def test_order_ignore_unmapped + skip unless elasticsearch_below50? assert_order "product", [], order: {not_mapped: {ignore_unmapped: true}} end + def test_order_unmapped_type + skip if elasticsearch_below50? + assert_order "product", [], order: {not_mapped: {unmapped_type: "long"}} + end + def test_order_array store [{name: "San Francisco", latitude: 37.7833, longitude: -122.4167}] assert_order "francisco", ["San Francisco"], order: [{_geo_distance: {location: "0,0"}}] diff --git a/test/sql_test.rb b/test/sql_test.rb index ac0b556..0c4bb51 100644 --- a/test/sql_test.rb +++ b/test/sql_test.rb @@ -77,6 +77,7 @@ class SqlTest < Minitest::Test # select def test_select + skip unless elasticsearch_below50? store [{name: "Product A", store_id: 1}] result = Product.search("product", load: false, select: [:name, :store_id]).first assert_equal %w(id name store_id), result.keys.reject { |k| k.start_with?("_") }.sort @@ -85,12 +86,14 @@ class SqlTest < Minitest::Test end def test_select_array + skip unless elasticsearch_below50? store [{name: "Product A", user_ids: [1, 2]}] result = Product.search("product", load: false, select: [:user_ids]).first assert_equal [1, 2], result.user_ids end def test_select_single_field + skip unless elasticsearch_below50? store [{name: "Product A", store_id: 1}] result = Product.search("product", load: false, select: :name).first assert_equal %w(id name), result.keys.reject { |k| k.start_with?("_") }.sort @@ -99,6 +102,7 @@ class SqlTest < Minitest::Test end def test_select_all + skip unless elasticsearch_below50? store [{name: "Product A", user_ids: [1, 2]}] hit = Product.search("product", select: true).hits.first assert_equal hit["_source"]["name"], "Product A" @@ -106,6 +110,7 @@ class SqlTest < Minitest::Test end def test_select_none + skip unless elasticsearch_below50? store [{name: "Product A", user_ids: [1, 2]}] hit = Product.search("product", select: []).hits.first assert_nil hit["_source"] diff --git a/test/test_helper.rb b/test/test_helper.rb index 0662177..c67a89a 100644 --- a/test/test_helper.rb +++ b/test/test_helper.rb @@ -21,12 +21,16 @@ I18n.config.enforce_available_locales = true ActiveJob::Base.logger = nil if defined?(ActiveJob) ActiveSupport::LogSubscriber.logger = Logger.new(STDOUT) if ENV["NOTIFICATIONS"] +def elasticsearch_below50? + Searchkick.server_below?("5.0.0-alpha1") +end + def elasticsearch_below20? - Searchkick.below_version?("2.0.0") + Searchkick.server_below?("2.0.0") end def elasticsearch_below14? - Searchkick.server_version.starts_with?("1.4.0") + Searchkick.server_below?("1.4.0") end def mongoid2? @@ -292,7 +296,7 @@ class Store mappings: { store: { properties: { - name: {type: "string", analyzer: "keyword"} + name: elasticsearch_below50? ? {type: "string", analyzer: "keyword"} : {type: "keyword"} } } } -- libgit2 0.21.0