From cd9aa612d66ace44be9253cd69d5e8e54c6dcbe7 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Mon, 5 Aug 2013 00:22:20 -0700 Subject: [PATCH] First version of typeahead - closes #5 --- README.md | 20 ++++++++++++++++++-- lib/searchkick/reindex.rb | 34 ++++++++++++++++++++++++++++++++++ lib/searchkick/search.rb | 43 ++++++++++++++++++++++++++++++------------- test/match_test.rb | 17 +++++++++++++++++ test/test_helper.rb | 3 ++- 5 files changed, 101 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index 920bf7e..d901886 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,7 @@ Plus: - query like SQL - no need to learn a new query language - reindex without downtime - easily personalize results for each user [master branch] +- typeahead / autocomplete [master branch] :tangerine: Battle-tested at [Instacart](https://www.instacart.com) @@ -130,6 +131,22 @@ To change this, use: Product.search "fresh honey", partial: true # fresh OR honey ``` +### Typeahead / Autocomplete [master branch] + +You must specify which fields use this feature since this can increase the index size significantly. Don’t worry - this gives you blazing faster queries. + +```ruby +class Product < ActiveRecord::Base + searchkick typeahead: [:name] +end +``` + +Reindex and search with: + +```ruby +Product.search "puddi", typeahead: true +``` + ### Synonyms ```ruby @@ -349,11 +366,10 @@ end ## Thanks -Thanks to Karel Minarik for [Tire](https://github.com/karmi/tire) and Jaroslav Kalistsuk for [zero downtime reindexing](https://gist.github.com/jarosan/3124884). +Thanks to Karel Minarik for [Tire](https://github.com/karmi/tire), Jaroslav Kalistsuk for [zero downtime reindexing](https://gist.github.com/jarosan/3124884), and Alex Leschenko for [Elasticsearch autocomplete](https://github.com/leschenko/elasticsearch_autocomplete). ## TODO -- Custom results for each user - Make Searchkick work with any language - Built-in synonyms from WordNet - [Did you mean?](http://www.elasticsearch.org/guide/reference/api/search/suggest/) diff --git a/lib/searchkick/reindex.rb b/lib/searchkick/reindex.rb index a08b552..66a667d 100644 --- a/lib/searchkick/reindex.rb +++ b/lib/searchkick/reindex.rb @@ -68,6 +68,17 @@ module Searchkick type: "custom", tokenizer: "standard", filter: ["standard", "lowercase", "asciifolding", "stop", "snowball"] + }, + # https://github.com/leschenko/elasticsearch_autocomplete/blob/master/lib/elasticsearch_autocomplete/analyzers.rb + searchkick_typeahead_index: { + type: "custom", + tokenizer: "searchkick_typeahead_ngram", + filter: ["lowercase", "asciifolding"] + }, + searchkick_typeahead_search: { + type: "custom", + tokenizer: "keyword", + filter: ["lowercase", "asciifolding"] } }, filter: { @@ -82,9 +93,18 @@ module Searchkick output_unigrams: false, output_unigrams_if_no_shingles: true } + }, + tokenizer: { + searchkick_typeahead_ngram: { + type: "edgeNGram", + min_gram: 1, + max_gram: 50 + } } } }.merge(options[:settings] || {}) + + # synonyms synonyms = options[:synonyms] || [] if synonyms.any? settings[:analysis][:filter][:searchkick_synonym] = { @@ -103,6 +123,8 @@ module Searchkick end mapping = {} + + # conversions if options[:conversions] mapping[:conversions] = { type: "nested", @@ -113,6 +135,18 @@ module Searchkick } end + # typeahead + (options[:typeahead] || []).each do |field| + mapping[field] = { + type: "multi_field", + fields: { + field => {type: "string", index: "not_analyzed"}, + "analyzed" => {type: "string", index: "analyzed"}, + "typeahead" => {type: "string", index: "analyzed", analyzer: "searchkick_typeahead_index"} + } + } + end + mappings = { document_type.to_sym => { properties: mapping, diff --git a/lib/searchkick/search.rb b/lib/searchkick/search.rb index eeb5397..9cbf46d 100644 --- a/lib/searchkick/search.rb +++ b/lib/searchkick/search.rb @@ -3,7 +3,20 @@ module Searchkick def search(term, options = {}) term = term.to_s - fields = options[:fields] ? options[:fields].map{|f| "#{f}.analyzed" } : ["_all"] + fields = + if options[:fields] + if options[:typeahead] + options[:fields].map{|f| "#{f}.typeahead" } + else + options[:fields].map{|f| "#{f}.analyzed" } + end + else + if options[:typeahead] + (@searchkick_options[:typeahead] || []).map{|f| "#{f}.typeahead" } + else + ["_all"] + end + end operator = options[:partial] ? "or" : "and" load = options[:load].nil? ? true : options[:load] load = (options[:include] ? {include: options[:include]} : true) if load @@ -22,18 +35,22 @@ module Searchkick query do boolean do must do - dis_max do - query do - match fields, term, boost: 10, operator: operator, analyzer: "searchkick_search" - end - query do - match fields, term, boost: 10, operator: operator, analyzer: "searchkick_search2" - end - query do - match fields, term, use_dis_max: false, fuzziness: 1, max_expansions: 1, operator: operator, analyzer: "searchkick_search" - end - query do - match fields, term, use_dis_max: false, fuzziness: 1, max_expansions: 1, operator: operator, analyzer: "searchkick_search2" + if options[:typeahead] + match fields, term, analyzer: "searchkick_typeahead_search" + else + dis_max do + query do + match fields, term, boost: 10, operator: operator, analyzer: "searchkick_search" + end + query do + match fields, term, boost: 10, operator: operator, analyzer: "searchkick_search2" + end + query do + match fields, term, use_dis_max: false, fuzziness: 1, max_expansions: 1, operator: operator, analyzer: "searchkick_search" + end + query do + match fields, term, use_dis_max: false, fuzziness: 1, max_expansions: 1, operator: operator, analyzer: "searchkick_search2" + end end end end diff --git a/test/match_test.rb b/test/match_test.rb index de3d0a3..7c3c526 100644 --- a/test/match_test.rb +++ b/test/match_test.rb @@ -110,4 +110,21 @@ class TestMatch < Minitest::Unit::TestCase assert_search "almondmilks", ["Almond Milk"] end + # typeahead + + def test_typeahead + store_names ["Hummus"] + assert_search "hum", ["Hummus"], typeahead: true + end + + def test_typeahead_two_words + store_names ["Organic Hummus"] + assert_search "hum", [], typeahead: true + end + + def test_typeahead_fields + store_names ["Hummus"] + assert_search "hum", ["Hummus"], typeahead: true, fields: [:name] + end + end diff --git a/test/test_helper.rb b/test/test_helper.rb index e324966..2b0cff8 100644 --- a/test/test_helper.rb +++ b/test/test_helper.rb @@ -47,7 +47,8 @@ class Product < ActiveRecord::Base ["qtip", "cotton swab"], ["burger", "hamburger"], ["bandaid", "bandag"] - ] + ], + typeahead: [:name] attr_accessor :conversions, :user_ids -- libgit2 0.21.0