Commit 1fd43efcd4d189fcb1943e8dc17cfedf0df54f54
Exists in
master
and in
21 other branches
Merged master
Showing
19 changed files
with
851 additions
and
402 deletions
Show diff stats
CHANGELOG.md
1 | +## 0.5.3 | |
2 | + | |
3 | +- Fixed bug w/ word_* queries | |
4 | + | |
5 | +## 0.5.2 | |
6 | + | |
7 | +- Use after_commit hook for ActiveRecord to prevent data inconsistencies | |
8 | + | |
9 | +## 0.5.1 | |
10 | + | |
11 | +- Replaced stop words with common terms query | |
12 | +- Added language option | |
13 | +- Fixed bug with empty array in where clause | |
14 | +- Fixed bug with MongoDB integer _id | |
15 | +- Fixed reindex bug when callbacks disabled | |
16 | + | |
17 | +## 0.5.0 | |
18 | + | |
19 | +- Better control over partial matches | |
20 | +- Added merge_mappings option | |
21 | +- Added batch_size option | |
22 | +- Fixed bug with nil where clauses | |
23 | + | |
1 | 24 | ## 0.4.2 |
2 | 25 | |
3 | 26 | - Added `should_index?` method to control which records are indexed | ... | ... |
Gemfile
README.md
... | ... | @@ -21,12 +21,14 @@ Plus: |
21 | 21 | - “Did you mean” suggestions |
22 | 22 | - works with ActiveRecord and Mongoid |
23 | 23 | |
24 | -:zap: Even better with [Searchjoy](http://ankane.github.io/searchjoy/) | |
25 | - | |
26 | 24 | :tangerine: Battle-tested at [Instacart](https://www.instacart.com) |
27 | 25 | |
28 | 26 | [](https://travis-ci.org/ankane/searchkick) |
29 | 27 | |
28 | +We highly recommend tracking queries and conversions | |
29 | + | |
30 | +:zap: [Searchjoy](http://ankane.github.io/searchjoy/) makes it easy | |
31 | + | |
30 | 32 | ## Get Started |
31 | 33 | |
32 | 34 | [Install Elasticsearch](http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/setup.html). For Homebrew, use: |
... | ... | @@ -148,6 +150,44 @@ To change this, use: |
148 | 150 | Product.search "fresh honey", partial: true # fresh OR honey |
149 | 151 | ``` |
150 | 152 | |
153 | +By default, results must match the entire word - `back` will not match `backpack`. You can change this behavior with: | |
154 | + | |
155 | +```ruby | |
156 | +class Product < ActiveRecord::Base | |
157 | + searchkick word_start: [:name] | |
158 | +end | |
159 | +``` | |
160 | + | |
161 | +And to search: | |
162 | + | |
163 | +```ruby | |
164 | +Product.search "back", fields: [{name: :word_start}] | |
165 | +``` | |
166 | + | |
167 | +Available options are: | |
168 | + | |
169 | +```ruby | |
170 | +:word # default | |
171 | +:word_start | |
172 | +:word_middle | |
173 | +:word_end | |
174 | +:text_start | |
175 | +:text_middle | |
176 | +:text_end | |
177 | +``` | |
178 | + | |
179 | +### Language | |
180 | + | |
181 | +Searchkick defaults to English for stemming. To change this, use: | |
182 | + | |
183 | +```ruby | |
184 | +class Product < ActiveRecord::Base | |
185 | + searchkick language: "German" | |
186 | +end | |
187 | +``` | |
188 | + | |
189 | +[See the list of languages](http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/analysis-snowball-tokenfilter.html) | |
190 | + | |
151 | 191 | ### Synonyms |
152 | 192 | |
153 | 193 | ```ruby |
... | ... | @@ -193,7 +233,7 @@ Searchkick uses `find_in_batches` to import documents. To eager load associatio |
193 | 233 | |
194 | 234 | ```ruby |
195 | 235 | class Product < ActiveRecord::Base |
196 | - scope :search_import, includes(:searches) | |
236 | + scope :search_import, -> { includes(:searches) } | |
197 | 237 | end |
198 | 238 | ``` |
199 | 239 | |
... | ... | @@ -293,14 +333,14 @@ First, specify which fields use this feature. This is necessary since autocompl |
293 | 333 | |
294 | 334 | ```ruby |
295 | 335 | class City < ActiveRecord::Base |
296 | - searchkick autocomplete: ["name"] | |
336 | + searchkick text_start: [:name] | |
297 | 337 | end |
298 | 338 | ``` |
299 | 339 | |
300 | 340 | Reindex and search with: |
301 | 341 | |
302 | 342 | ```ruby |
303 | -City.search "san fr", autocomplete: true | |
343 | +City.search "san fr", fields: [{name: :text_start}] | |
304 | 344 | ``` |
305 | 345 | |
306 | 346 | Typically, you want to use a Javascript library like [typeahead.js](http://twitter.github.io/typeahead.js/) or [jQuery UI](http://jqueryui.com/autocomplete/). |
... | ... | @@ -314,7 +354,7 @@ First, add a controller action. |
314 | 354 | class CitiesController < ApplicationController |
315 | 355 | |
316 | 356 | def autocomplete |
317 | - render json: City.search(params[:query], autocomplete: true, limit: 10).map(&:name) | |
357 | + render json: City.search(params[:query], fields: [{name: :text_start}], limit: 10).map(&:name) | |
318 | 358 | end |
319 | 359 | |
320 | 360 | end |
... | ... | @@ -454,8 +494,9 @@ Dog.reindex # equivalent |
454 | 494 | And to search, use: |
455 | 495 | |
456 | 496 | ```ruby |
457 | -Animal.search "*" # all animals | |
458 | -Dog.search "*" # just dogs | |
497 | +Animal.search "*" # all animals | |
498 | +Dog.search "*" # just dogs | |
499 | +Animal.search "*", type: [Dog, Cat] # just cats and dogs [master] | |
459 | 500 | ``` |
460 | 501 | |
461 | 502 | **Note:** The `suggest` option retrieves suggestions from the parent at the moment. |
... | ... | @@ -530,6 +571,24 @@ And use the `query` option to search: |
530 | 571 | Product.search query: {match: {name: "milk"}} |
531 | 572 | ``` |
532 | 573 | |
574 | +To keep the mappings and settings generated by Searchkick, use: | |
575 | + | |
576 | +```ruby | |
577 | +class Product < ActiveRecord::Base | |
578 | + searchkick merge_mappings: true, mappings: {...} | |
579 | +end | |
580 | +``` | |
581 | + | |
582 | +## Experimental [master] | |
583 | + | |
584 | +Modify the query generated by Searchkick. | |
585 | + | |
586 | +```ruby | |
587 | +query = Product.search "2% Milk", execute: false | |
588 | +query.body[:query] = {match_all: {}} | |
589 | +products = query.execute | |
590 | +``` | |
591 | + | |
533 | 592 | ## Reference |
534 | 593 | |
535 | 594 | Searchkick requires Elasticsearch `0.90.0` or higher. |
... | ... | @@ -574,9 +633,9 @@ end |
574 | 633 | or temporarily |
575 | 634 | |
576 | 635 | ```ruby |
577 | -Product.disable_search_callbacks # use Searchkick.disable_callbacks for all models | |
636 | +Product.disable_search_callbacks # or use Searchkick.disable_callbacks for all models | |
578 | 637 | ExpensiveProductsTask.execute |
579 | -Product.enable_search_callbacks | |
638 | +Product.enable_search_callbacks # or use Searchkick.enable_callbacks for all models | |
580 | 639 | Product.reindex |
581 | 640 | ``` |
582 | 641 | |
... | ... | @@ -601,6 +660,50 @@ class Product < ActiveRecord::Base |
601 | 660 | end |
602 | 661 | ``` |
603 | 662 | |
663 | +Change import batch size | |
664 | + | |
665 | +```ruby | |
666 | +class Product < ActiveRecord::Base | |
667 | + searchkick batch_size: 200 # defaults to 1000 | |
668 | +end | |
669 | +``` | |
670 | + | |
671 | +Asynchronous reindexing | |
672 | + | |
673 | +```ruby | |
674 | +class Product < ActiveRecord::Base | |
675 | + searchkick callbacks: false | |
676 | + | |
677 | + # add the callbacks manually | |
678 | + | |
679 | + # ActiveRecord - one callback | |
680 | + after_commit :reindex_async | |
681 | + | |
682 | + # Mongoid - two callbacks | |
683 | + after_save :reindex_async | |
684 | + after_destroy :reindex_async | |
685 | + | |
686 | + def reindex_async | |
687 | + # delayed job | |
688 | + delay.reindex | |
689 | + end | |
690 | +end | |
691 | +``` | |
692 | + | |
693 | +Reindex conditionally | |
694 | + | |
695 | +**Note:** With ActiveRecord, use this feature with caution - [transaction rollbacks can cause data inconstencies](https://github.com/elasticsearch/elasticsearch-rails/blob/master/elasticsearch-model/README.md#custom-callbacks) | |
696 | + | |
697 | +```ruby | |
698 | +class Product < ActiveRecord::Base | |
699 | + searchkick callbacks: false | |
700 | + | |
701 | + # add the callbacks manually | |
702 | + after_save :reindex, if: proc{|model| model.name_changed? } # use your own condition | |
703 | + after_destroy :reindex | |
704 | +end | |
705 | +``` | |
706 | + | |
604 | 707 | Reindex all models (Rails only) |
605 | 708 | |
606 | 709 | ```sh |
... | ... | @@ -655,11 +758,14 @@ For convenience, this is set by default in the test environment. |
655 | 758 | |
656 | 759 | Thanks to Karel Minarik for [Tire](https://github.com/karmi/tire), Jaroslav Kalistsuk for [zero downtime reindexing](https://gist.github.com/jarosan/3124884), and Alex Leschenko for [Elasticsearch autocomplete](https://github.com/leschenko/elasticsearch_autocomplete). |
657 | 760 | |
658 | -## TODO | |
761 | +## Roadmap | |
659 | 762 | |
660 | -- Generate autocomplete predictions from past search queries | |
661 | -- Automatic failover | |
662 | -- Make Searchkick work with any language | |
763 | +- Search multiple fields for different terms | |
764 | +- Search across models | |
765 | +- Search nested objects | |
766 | +- Add section on testing | |
767 | +- Much finer customization | |
768 | +- More transparency into generated queries (for advanced use) | |
663 | 769 | |
664 | 770 | ## History |
665 | 771 | |
... | ... | @@ -673,3 +779,11 @@ Everyone is encouraged to help improve this project. Here are a few ways you can |
673 | 779 | - Fix bugs and [submit pull requests](https://github.com/ankane/searchkick/pulls) |
674 | 780 | - Write, clarify, or fix documentation |
675 | 781 | - Suggest or add new features |
782 | + | |
783 | +To get started with development and testing: | |
784 | + | |
785 | +1. Clone the repo | |
786 | +2. Install PostgreSQL and create a database called `searchkick_test` (`psql -d postgres -c "create database searchkick_test"`) | |
787 | +3. Install Elasticsearch | |
788 | +4. `bundle` | |
789 | +5. `rake test` | ... | ... |
lib/searchkick.rb
lib/searchkick/model.rb
... | ... | @@ -19,8 +19,12 @@ module Searchkick |
19 | 19 | extend Searchkick::Reindex |
20 | 20 | include Searchkick::Similar |
21 | 21 | |
22 | - after_save :reindex | |
23 | - after_destroy :reindex | |
22 | + if respond_to?(:after_commit) | |
23 | + after_commit :reindex, if: proc{ self.class.search_callbacks? } | |
24 | + else | |
25 | + after_save :reindex, if: proc{ self.class.search_callbacks? } | |
26 | + after_destroy :reindex, if: proc{ self.class.search_callbacks? } | |
27 | + end | |
24 | 28 | |
25 | 29 | def self.enable_search_callbacks |
26 | 30 | class_variable_set :@@searchkick_callbacks, true |
... | ... | @@ -39,13 +43,11 @@ module Searchkick |
39 | 43 | end |
40 | 44 | |
41 | 45 | def reindex |
42 | - if self.class.search_callbacks? | |
43 | - index = self.class.searchkick_index | |
44 | - if destroyed? or !should_index? | |
45 | - index.remove self | |
46 | - else | |
47 | - index.store self | |
48 | - end | |
46 | + index = self.class.searchkick_index | |
47 | + if destroyed? or !should_index? | |
48 | + index.remove self | |
49 | + else | |
50 | + index.store self | |
49 | 51 | end |
50 | 52 | end |
51 | 53 | |
... | ... | @@ -60,7 +62,9 @@ module Searchkick |
60 | 62 | source = source.inject({}){|memo,(k,v)| memo[k.to_s] = v; memo} |
61 | 63 | |
62 | 64 | # Mongoid 4 hack |
63 | - source["_id"] = source["_id"].to_s if source["_id"] | |
65 | + if defined?(BSON::ObjectId) and source["_id"].is_a?(BSON::ObjectId) | |
66 | + source["_id"] = source["_id"].to_s | |
67 | + end | |
64 | 68 | |
65 | 69 | options = self.class.searchkick_options |
66 | 70 | ... | ... |
... | ... | @@ -0,0 +1,429 @@ |
1 | +module Searchkick | |
2 | + class Query | |
3 | + attr_reader :klass, :term, :options | |
4 | + attr_accessor :body | |
5 | + | |
6 | + def initialize(klass, term, options = {}) | |
7 | + if term.is_a?(Hash) | |
8 | + options = term | |
9 | + term = nil | |
10 | + else | |
11 | + term = term.to_s | |
12 | + end | |
13 | + | |
14 | + @klass = klass | |
15 | + @term = term | |
16 | + @options = options | |
17 | + | |
18 | + fields = | |
19 | + if options[:fields] | |
20 | + if options[:autocomplete] | |
21 | + options[:fields].map{|f| "#{f}.autocomplete" } | |
22 | + else | |
23 | + options[:fields].map do |value| | |
24 | + k, v = value.is_a?(Hash) ? value.to_a.first : [value, :word] | |
25 | + "#{k}.#{v == :word ? "analyzed" : v}" | |
26 | + end | |
27 | + end | |
28 | + else | |
29 | + if options[:autocomplete] | |
30 | + (searchkick_options[:autocomplete] || []).map{|f| "#{f}.autocomplete" } | |
31 | + else | |
32 | + ["_all"] | |
33 | + end | |
34 | + end | |
35 | + | |
36 | + operator = options[:operator] || (options[:partial] ? "or" : "and") | |
37 | + | |
38 | + # model and eagar loading | |
39 | + load = options[:load].nil? ? true : options[:load] | |
40 | + load = (options[:include] ? {include: options[:include]} : true) if load | |
41 | + | |
42 | + # pagination | |
43 | + page = [options[:page].to_i, 1].max | |
44 | + per_page = (options[:limit] || options[:per_page] || 100000).to_i | |
45 | + offset = options[:offset] || (page - 1) * per_page | |
46 | + index_name = options[:index_name] || searchkick_index.name | |
47 | + | |
48 | + conversions_field = searchkick_options[:conversions] | |
49 | + personalize_field = searchkick_options[:personalize] | |
50 | + | |
51 | + all = term == "*" | |
52 | + | |
53 | + if options[:query] | |
54 | + payload = options[:query] | |
55 | + elsif options[:similar] | |
56 | + payload = { | |
57 | + more_like_this: { | |
58 | + fields: fields, | |
59 | + like_text: term, | |
60 | + min_doc_freq: 1, | |
61 | + min_term_freq: 1, | |
62 | + analyzer: "searchkick_search2" | |
63 | + } | |
64 | + } | |
65 | + elsif all | |
66 | + payload = { | |
67 | + match_all: {} | |
68 | + } | |
69 | + else | |
70 | + if options[:autocomplete] | |
71 | + payload = { | |
72 | + multi_match: { | |
73 | + fields: fields, | |
74 | + query: term, | |
75 | + analyzer: "searchkick_autocomplete_search" | |
76 | + } | |
77 | + } | |
78 | + else | |
79 | + queries = [] | |
80 | + fields.each do |field| | |
81 | + if field == "_all" or field.end_with?(".analyzed") | |
82 | + shared_options = { | |
83 | + fields: [field], | |
84 | + query: term, | |
85 | + use_dis_max: false, | |
86 | + operator: operator, | |
87 | + cutoff_frequency: 0.001 | |
88 | + } | |
89 | + queries.concat [ | |
90 | + {multi_match: shared_options.merge(boost: 10, analyzer: "searchkick_search")}, | |
91 | + {multi_match: shared_options.merge(boost: 10, analyzer: "searchkick_search2")} | |
92 | + ] | |
93 | + if options[:misspellings] != false | |
94 | + distance = (options[:misspellings].is_a?(Hash) && options[:misspellings][:distance]) || 1 | |
95 | + queries.concat [ | |
96 | + {multi_match: shared_options.merge(fuzziness: distance, max_expansions: 3, analyzer: "searchkick_search")}, | |
97 | + {multi_match: shared_options.merge(fuzziness: distance, max_expansions: 3, analyzer: "searchkick_search2")} | |
98 | + ] | |
99 | + end | |
100 | + else | |
101 | + analyzer = field.match(/\.word_(start|middle|end)\z/) ? "searchkick_word_search" : "searchkick_autocomplete_search" | |
102 | + queries << { | |
103 | + multi_match: { | |
104 | + fields: [field], | |
105 | + query: term, | |
106 | + analyzer: analyzer | |
107 | + } | |
108 | + } | |
109 | + end | |
110 | + end | |
111 | + | |
112 | + payload = { | |
113 | + dis_max: { | |
114 | + queries: queries | |
115 | + } | |
116 | + } | |
117 | + end | |
118 | + | |
119 | + if conversions_field and options[:conversions] != false | |
120 | + # wrap payload in a bool query | |
121 | + payload = { | |
122 | + bool: { | |
123 | + must: payload, | |
124 | + should: { | |
125 | + nested: { | |
126 | + path: conversions_field, | |
127 | + score_mode: "total", | |
128 | + query: { | |
129 | + custom_score: { | |
130 | + query: { | |
131 | + match: { | |
132 | + query: term | |
133 | + } | |
134 | + }, | |
135 | + script: "doc['count'].value" | |
136 | + } | |
137 | + } | |
138 | + } | |
139 | + } | |
140 | + } | |
141 | + } | |
142 | + end | |
143 | + end | |
144 | + | |
145 | + custom_filters = [] | |
146 | + | |
147 | + if options[:boost] | |
148 | + custom_filters << { | |
149 | + filter: { | |
150 | + exists: { | |
151 | + field: options[:boost] | |
152 | + } | |
153 | + }, | |
154 | + script: "log(doc['#{options[:boost]}'].value + 2.718281828)" | |
155 | + } | |
156 | + end | |
157 | + | |
158 | + if options[:user_id] and personalize_field | |
159 | + custom_filters << { | |
160 | + filter: { | |
161 | + term: { | |
162 | + personalize_field => options[:user_id] | |
163 | + } | |
164 | + }, | |
165 | + boost: 100 | |
166 | + } | |
167 | + end | |
168 | + | |
169 | + if options[:personalize] | |
170 | + custom_filters << { | |
171 | + filter: { | |
172 | + term: options[:personalize] | |
173 | + }, | |
174 | + boost: 100 | |
175 | + } | |
176 | + end | |
177 | + | |
178 | + if custom_filters.any? | |
179 | + payload = { | |
180 | + custom_filters_score: { | |
181 | + query: payload, | |
182 | + filters: custom_filters, | |
183 | + score_mode: "total" | |
184 | + } | |
185 | + } | |
186 | + end | |
187 | + | |
188 | + payload = { | |
189 | + query: payload, | |
190 | + size: per_page, | |
191 | + from: offset | |
192 | + } | |
193 | + payload[:explain] = options[:explain] if options[:explain] | |
194 | + | |
195 | + # order | |
196 | + if options[:order] | |
197 | + order = options[:order].is_a?(Enumerable) ? options[:order] : {options[:order] => :asc} | |
198 | + payload[:sort] = Hash[ order.map{|k, v| [k.to_s == "id" ? :_id : k, v] } ] | |
199 | + end | |
200 | + | |
201 | + # filters | |
202 | + filters = where_filters(options[:where]) | |
203 | + if filters.any? | |
204 | + payload[:filter] = { | |
205 | + and: filters | |
206 | + } | |
207 | + end | |
208 | + | |
209 | + # facets | |
210 | + facet_limits = {} | |
211 | + if options[:facets] | |
212 | + facets = options[:facets] || {} | |
213 | + if facets.is_a?(Array) # convert to more advanced syntax | |
214 | + facets = Hash[ facets.map{|f| [f, {}] } ] | |
215 | + end | |
216 | + | |
217 | + payload[:facets] = {} | |
218 | + facets.each do |field, facet_options| | |
219 | + # ask for extra facets due to | |
220 | + # https://github.com/elasticsearch/elasticsearch/issues/1305 | |
221 | + | |
222 | + if facet_options[:ranges] | |
223 | + payload[:facets][field] = { | |
224 | + range: { | |
225 | + field.to_sym => facet_options[:ranges] | |
226 | + } | |
227 | + } | |
228 | + else | |
229 | + payload[:facets][field] = { | |
230 | + terms: { | |
231 | + field: field, | |
232 | + size: facet_options[:limit] ? facet_options[:limit] + 150 : 100000 | |
233 | + } | |
234 | + } | |
235 | + end | |
236 | + | |
237 | + facet_limits[field] = facet_options[:limit] if facet_options[:limit] | |
238 | + | |
239 | + # offset is not possible | |
240 | + # http://elasticsearch-users.115913.n3.nabble.com/Is-pagination-possible-in-termsStatsFacet-td3422943.html | |
241 | + | |
242 | + facet_filters = where_filters(facet_options[:where]) | |
243 | + if facet_filters.any? | |
244 | + payload[:facets][field][:facet_filter] = { | |
245 | + and: { | |
246 | + filters: facet_filters | |
247 | + } | |
248 | + } | |
249 | + end | |
250 | + end | |
251 | + end | |
252 | + | |
253 | + # suggestions | |
254 | + if options[:suggest] | |
255 | + suggest_fields = (searchkick_options[:suggest] || []).map(&:to_s) | |
256 | + # intersection | |
257 | + suggest_fields = suggest_fields & options[:fields].map(&:to_s) if options[:fields] | |
258 | + if suggest_fields.any? | |
259 | + payload[:suggest] = {text: term} | |
260 | + suggest_fields.each do |field| | |
261 | + payload[:suggest][field] = { | |
262 | + phrase: { | |
263 | + field: "#{field}.suggest" | |
264 | + } | |
265 | + } | |
266 | + end | |
267 | + end | |
268 | + end | |
269 | + | |
270 | + # highlight | |
271 | + if options[:highlight] | |
272 | + payload[:highlight] = { | |
273 | + fields: Hash[ fields.map{|f| [f, {}] } ] | |
274 | + } | |
275 | + if options[:highlight].is_a?(Hash) and tag = options[:highlight][:tag] | |
276 | + payload[:highlight][:pre_tags] = [tag] | |
277 | + payload[:highlight][:post_tags] = [tag.to_s.gsub(/\A</, "</")] | |
278 | + end | |
279 | + end | |
280 | + | |
281 | + # An empty array will cause only the _id and _type for each hit to be returned | |
282 | + # http://www.elasticsearch.org/guide/reference/api/search/fields/ | |
283 | + payload[:fields] = [] if load | |
284 | + | |
285 | + tire_options = {load: load, size: per_page, from: offset} | |
286 | + if options[:type] or klass != searchkick_klass | |
287 | + tire_options[:type] = [options[:type] || klass].flatten.map(&:document_type) | |
288 | + end | |
289 | + | |
290 | + @body = payload | |
291 | + @facet_limits = facet_limits | |
292 | + end | |
293 | + | |
294 | + def searchkick_index | |
295 | + klass.searchkick_index | |
296 | + end | |
297 | + | |
298 | + def searchkick_options | |
299 | + klass.searchkick_options | |
300 | + end | |
301 | + | |
302 | + def searchkick_klass | |
303 | + klass.searchkick_klass | |
304 | + end | |
305 | + | |
306 | + def document_type | |
307 | + klass.document_type | |
308 | + end | |
309 | + | |
310 | + def execute | |
311 | + begin | |
312 | + response = Searchkick.client.search(index: searchkick_index.name, body: body) | |
313 | + rescue => e # TODO rescue type | |
314 | + status_code = e.message[0..3].to_i | |
315 | + if status_code == 404 | |
316 | + raise "Index missing - run #{searchkick_klass.name}.reindex" | |
317 | + elsif status_code == 500 and (e.message.include?("IllegalArgumentException[minimumSimilarity >= 1]") or e.message.include?("No query registered for [multi_match]")) | |
318 | + raise "Upgrade Elasticsearch to 0.90.0 or greater" | |
319 | + else | |
320 | + raise e | |
321 | + end | |
322 | + end | |
323 | + | |
324 | + # apply facet limit in client due to | |
325 | + # https://github.com/elasticsearch/elasticsearch/issues/1305 | |
326 | + @facet_limits.each do |field, limit| | |
327 | + field = field.to_s | |
328 | + facet = response["facets"][field] | |
329 | + response["facets"][field]["terms"] = facet["terms"].first(limit) | |
330 | + response["facets"][field]["other"] = facet["total"] - facet["terms"].sum{|term| term["count"] } | |
331 | + end | |
332 | + | |
333 | + Searchkick::Results.new(response, @search.options.merge(term: term, model_name: searchkick_klass.model_name)) | |
334 | + end | |
335 | + | |
336 | + private | |
337 | + | |
338 | + def where_filters(where) | |
339 | + filters = [] | |
340 | + (where || {}).each do |field, value| | |
341 | + field = :_id if field.to_s == "id" | |
342 | + | |
343 | + if field == :or | |
344 | + value.each do |or_clause| | |
345 | + filters << {or: or_clause.map{|or_statement| {and: where_filters(or_statement)} }} | |
346 | + end | |
347 | + else | |
348 | + # expand ranges | |
349 | + if value.is_a?(Range) | |
350 | + value = {gte: value.first, (value.exclude_end? ? :lt : :lte) => value.last} | |
351 | + end | |
352 | + | |
353 | + if value.is_a?(Array) | |
354 | + value = {in: value} | |
355 | + end | |
356 | + | |
357 | + if value.is_a?(Hash) | |
358 | + value.each do |op, op_value| | |
359 | + case op | |
360 | + when :within, :bottom_right | |
361 | + # do nothing | |
362 | + when :near | |
363 | + filters << { | |
364 | + geo_distance: { | |
365 | + field => op_value.map(&:to_f).reverse, | |
366 | + distance: value[:within] || "50mi" | |
367 | + } | |
368 | + } | |
369 | + when :top_left | |
370 | + filters << { | |
371 | + geo_bounding_box: { | |
372 | + field => { | |
373 | + top_left: op_value.map(&:to_f).reverse, | |
374 | + bottom_right: value[:bottom_right].map(&:to_f).reverse | |
375 | + } | |
376 | + } | |
377 | + } | |
378 | + when :not # not equal | |
379 | + filters << {not: term_filters(field, op_value)} | |
380 | + when :all | |
381 | + filters << {terms: {field => op_value, execution: "and"}} | |
382 | + when :in | |
383 | + filters << term_filters(field, op_value) | |
384 | + else | |
385 | + range_query = | |
386 | + case op | |
387 | + when :gt | |
388 | + {from: op_value, include_lower: false} | |
389 | + when :gte | |
390 | + {from: op_value, include_lower: true} | |
391 | + when :lt | |
392 | + {to: op_value, include_upper: false} | |
393 | + when :lte | |
394 | + {to: op_value, include_upper: true} | |
395 | + else | |
396 | + raise "Unknown where operator" | |
397 | + end | |
398 | + # issue 132 | |
399 | + if existing = filters.find{ |f| f[:range] && f[:range][field] } | |
400 | + existing[:range][field].merge!(range_query) | |
401 | + else | |
402 | + filters << {range: {field => range_query}} | |
403 | + end | |
404 | + end | |
405 | + end | |
406 | + else | |
407 | + filters << term_filters(field, value) | |
408 | + end | |
409 | + end | |
410 | + end | |
411 | + filters | |
412 | + end | |
413 | + | |
414 | + def term_filters(field, value) | |
415 | + if value.is_a?(Array) # in query | |
416 | + if value.any? | |
417 | + {or: value.map{|v| term_filters(field, v) }} | |
418 | + else | |
419 | + {terms: {field => value}} # match nothing | |
420 | + end | |
421 | + elsif value.nil? | |
422 | + {missing: {"field" => field, existence: true, null_value: true}} | |
423 | + else | |
424 | + {term: {field => value}} | |
425 | + end | |
426 | + end | |
427 | + | |
428 | + end | |
429 | +end | |
0 | 430 | \ No newline at end of file | ... | ... |
lib/searchkick/reindex.rb
... | ... | @@ -44,17 +44,20 @@ module Searchkick |
44 | 44 | end |
45 | 45 | |
46 | 46 | def self.extended(klass) |
47 | - (@descendents ||= []) << klass | |
47 | + @descendents ||= [] | |
48 | + @descendents << klass unless @descendents.include?(klass) | |
48 | 49 | end |
49 | 50 | |
50 | 51 | private |
51 | 52 | |
52 | 53 | def searchkick_import(index) |
54 | + batch_size = searchkick_options[:batch_size] || 1000 | |
55 | + | |
53 | 56 | # use scope for import |
54 | 57 | scope = searchkick_klass |
55 | 58 | scope = scope.search_import if scope.respond_to?(:search_import) |
56 | 59 | if scope.respond_to?(:find_in_batches) |
57 | - scope.find_in_batches do |batch| | |
60 | + scope.find_in_batches batch_size: batch_size do |batch| | |
58 | 61 | index.import batch.select{|item| item.should_index? } |
59 | 62 | end |
60 | 63 | else |
... | ... | @@ -63,7 +66,7 @@ module Searchkick |
63 | 66 | items = [] |
64 | 67 | scope.all.each do |item| |
65 | 68 | items << item if item.should_index? |
66 | - if items.length % 1000 == 0 | |
69 | + if items.length % batch_size == 0 | |
67 | 70 | index.import items |
68 | 71 | items = [] |
69 | 72 | end |
... | ... | @@ -75,7 +78,7 @@ module Searchkick |
75 | 78 | def searchkick_index_options |
76 | 79 | options = searchkick_options |
77 | 80 | |
78 | - if options[:mappings] | |
81 | + if options[:mappings] and !options[:merge_mappings] | |
79 | 82 | settings = options[:settings] || {} |
80 | 83 | mappings = options[:mappings] |
81 | 84 | else |
... | ... | @@ -85,24 +88,24 @@ module Searchkick |
85 | 88 | searchkick_keyword: { |
86 | 89 | type: "custom", |
87 | 90 | tokenizer: "keyword", |
88 | - filter: ["lowercase", "snowball"] | |
91 | + filter: ["lowercase", "searchkick_stemmer"] | |
89 | 92 | }, |
90 | 93 | default_index: { |
91 | 94 | type: "custom", |
92 | 95 | tokenizer: "standard", |
93 | 96 | # synonym should come last, after stemming and shingle |
94 | - # shingle must come before snowball | |
95 | - filter: ["standard", "lowercase", "asciifolding", "stop", "searchkick_index_shingle", "snowball"] | |
97 | + # shingle must come before searchkick_stemmer | |
98 | + filter: ["standard", "lowercase", "asciifolding", "searchkick_index_shingle", "searchkick_stemmer"] | |
96 | 99 | }, |
97 | 100 | searchkick_search: { |
98 | 101 | type: "custom", |
99 | 102 | tokenizer: "standard", |
100 | - filter: ["standard", "lowercase", "asciifolding", "stop", "searchkick_search_shingle", "snowball"] | |
103 | + filter: ["standard", "lowercase", "asciifolding", "searchkick_search_shingle", "searchkick_stemmer"] | |
101 | 104 | }, |
102 | 105 | searchkick_search2: { |
103 | 106 | type: "custom", |
104 | 107 | tokenizer: "standard", |
105 | - filter: ["standard", "lowercase", "asciifolding", "stop", "snowball"] | |
108 | + filter: ["standard", "lowercase", "asciifolding", "searchkick_stemmer"] | |
106 | 109 | }, |
107 | 110 | # https://github.com/leschenko/elasticsearch_autocomplete/blob/master/lib/elasticsearch_autocomplete/analyzers.rb |
108 | 111 | searchkick_autocomplete_index: { |
... | ... | @@ -115,10 +118,50 @@ module Searchkick |
115 | 118 | tokenizer: "keyword", |
116 | 119 | filter: ["lowercase", "asciifolding"] |
117 | 120 | }, |
121 | + searchkick_word_search: { | |
122 | + type: "custom", | |
123 | + tokenizer: "standard", | |
124 | + filter: ["lowercase", "asciifolding"] | |
125 | + }, | |
118 | 126 | searchkick_suggest_index: { |
119 | 127 | type: "custom", |
120 | 128 | tokenizer: "standard", |
121 | 129 | filter: ["lowercase", "asciifolding", "searchkick_suggest_shingle"] |
130 | + }, | |
131 | + searchkick_suggest_index: { | |
132 | + type: "custom", | |
133 | + tokenizer: "standard", | |
134 | + filter: ["lowercase", "asciifolding", "searchkick_suggest_shingle"] | |
135 | + }, | |
136 | + searchkick_text_start_index: { | |
137 | + type: "custom", | |
138 | + tokenizer: "keyword", | |
139 | + filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"] | |
140 | + }, | |
141 | + searchkick_text_middle_index: { | |
142 | + type: "custom", | |
143 | + tokenizer: "keyword", | |
144 | + filter: ["lowercase", "asciifolding", "searchkick_ngram"] | |
145 | + }, | |
146 | + searchkick_text_end_index: { | |
147 | + type: "custom", | |
148 | + tokenizer: "keyword", | |
149 | + filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"] | |
150 | + }, | |
151 | + searchkick_word_start_index: { | |
152 | + type: "custom", | |
153 | + tokenizer: "standard", | |
154 | + filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"] | |
155 | + }, | |
156 | + searchkick_word_middle_index: { | |
157 | + type: "custom", | |
158 | + tokenizer: "standard", | |
159 | + filter: ["lowercase", "asciifolding", "searchkick_ngram"] | |
160 | + }, | |
161 | + searchkick_word_end_index: { | |
162 | + type: "custom", | |
163 | + tokenizer: "standard", | |
164 | + filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"] | |
122 | 165 | } |
123 | 166 | }, |
124 | 167 | filter: { |
... | ... | @@ -136,6 +179,20 @@ module Searchkick |
136 | 179 | searchkick_suggest_shingle: { |
137 | 180 | type: "shingle", |
138 | 181 | max_shingle_size: 5 |
182 | + }, | |
183 | + searchkick_edge_ngram: { | |
184 | + type: "edgeNGram", | |
185 | + min_gram: 1, | |
186 | + max_gram: 50 | |
187 | + }, | |
188 | + searchkick_ngram: { | |
189 | + type: "nGram", | |
190 | + min_gram: 1, | |
191 | + max_gram: 50 | |
192 | + }, | |
193 | + searchkick_stemmer: { | |
194 | + type: "snowball", | |
195 | + language: options[:language] || "English" | |
139 | 196 | } |
140 | 197 | }, |
141 | 198 | tokenizer: { |
... | ... | @@ -152,7 +209,7 @@ module Searchkick |
152 | 209 | settings.merge!(number_of_shards: 1, number_of_replicas: 0) |
153 | 210 | end |
154 | 211 | |
155 | - settings.merge!(options[:settings] || {}) | |
212 | + settings.deep_merge!(options[:settings] || {}) | |
156 | 213 | |
157 | 214 | # synonyms |
158 | 215 | synonyms = options[:synonyms] || [] |
... | ... | @@ -193,10 +250,12 @@ module Searchkick |
193 | 250 | } |
194 | 251 | end |
195 | 252 | |
196 | - # autocomplete and suggest | |
197 | - autocomplete = (options[:autocomplete] || []).map(&:to_s) | |
198 | - suggest = (options[:suggest] || []).map(&:to_s) | |
199 | - (autocomplete + suggest).uniq.each do |field| | |
253 | + mapping_options = Hash[ | |
254 | + [:autocomplete, :suggest, :text_start, :text_middle, :text_end, :word_start, :word_middle, :word_end] | |
255 | + .map{|type| [type, (options[type] || []).map(&:to_s)] } | |
256 | + ] | |
257 | + | |
258 | + mapping_options.values.flatten.uniq.each do |field| | |
200 | 259 | field_mapping = { |
201 | 260 | type: "multi_field", |
202 | 261 | fields: { |
... | ... | @@ -206,12 +265,13 @@ module Searchkick |
206 | 265 | # http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/search-request-highlighting.html#_fast_vector_highlighter |
207 | 266 | } |
208 | 267 | } |
209 | - if autocomplete.include?(field) | |
210 | - field_mapping[:fields]["autocomplete"] = {type: "string", index: "analyzed", analyzer: "searchkick_autocomplete_index"} | |
211 | - end | |
212 | - if suggest.include?(field) | |
213 | - field_mapping[:fields]["suggest"] = {type: "string", index: "analyzed", analyzer: "searchkick_suggest_index"} | |
268 | + | |
269 | + mapping_options.each do |type, fields| | |
270 | + if fields.include?(field) | |
271 | + field_mapping[:fields][type] = {type: "string", index: "analyzed", analyzer: "searchkick_#{type}_index"} | |
272 | + end | |
214 | 273 | end |
274 | + | |
215 | 275 | mapping[field] = field_mapping |
216 | 276 | end |
217 | 277 | |
... | ... | @@ -246,7 +306,7 @@ module Searchkick |
246 | 306 | } |
247 | 307 | ] |
248 | 308 | } |
249 | - } | |
309 | + }.deep_merge(options[:mappings] || {}) | |
250 | 310 | end |
251 | 311 | |
252 | 312 | { | ... | ... |
lib/searchkick/results.rb
lib/searchkick/search.rb
... | ... | @@ -2,355 +2,12 @@ module Searchkick |
2 | 2 | module Search |
3 | 3 | |
4 | 4 | def search(term, options = {}) |
5 | - if term.is_a?(Hash) | |
6 | - options = term | |
7 | - term = nil | |
5 | + query = Searchkick::Query.new(self, term, options) | |
6 | + if options[:execute] == false | |
7 | + query | |
8 | 8 | else |
9 | - term = term.to_s | |
9 | + query.execute | |
10 | 10 | end |
11 | - | |
12 | - fields = | |
13 | - if options[:fields] | |
14 | - if options[:autocomplete] | |
15 | - options[:fields].map{|f| "#{f}.autocomplete" } | |
16 | - else | |
17 | - options[:fields].map{|f| "#{f}.analyzed" } | |
18 | - end | |
19 | - else | |
20 | - if options[:autocomplete] | |
21 | - (searchkick_options[:autocomplete] || []).map{|f| "#{f}.autocomplete" } | |
22 | - else | |
23 | - ["_all"] | |
24 | - end | |
25 | - end | |
26 | - | |
27 | - operator = options[:partial] ? "or" : "and" | |
28 | - | |
29 | - # model and eagar loading | |
30 | - load = options[:load].nil? ? true : options[:load] | |
31 | - load = (options[:include] ? {include: options[:include]} : true) if load | |
32 | - | |
33 | - # pagination | |
34 | - page = [options[:page].to_i, 1].max | |
35 | - per_page = (options[:limit] || options[:per_page] || 100000).to_i | |
36 | - offset = options[:offset] || (page - 1) * per_page | |
37 | - index_name = options[:index_name] || searchkick_index.name | |
38 | - | |
39 | - conversions_field = searchkick_options[:conversions] | |
40 | - personalize_field = searchkick_options[:personalize] | |
41 | - | |
42 | - all = term == "*" | |
43 | - | |
44 | - if options[:query] | |
45 | - payload = options[:query] | |
46 | - elsif options[:similar] | |
47 | - payload = { | |
48 | - more_like_this: { | |
49 | - fields: fields, | |
50 | - like_text: term, | |
51 | - min_doc_freq: 1, | |
52 | - min_term_freq: 1, | |
53 | - analyzer: "searchkick_search2" | |
54 | - } | |
55 | - } | |
56 | - elsif all | |
57 | - payload = { | |
58 | - match_all: {} | |
59 | - } | |
60 | - else | |
61 | - if options[:autocomplete] | |
62 | - payload = { | |
63 | - multi_match: { | |
64 | - fields: fields, | |
65 | - query: term, | |
66 | - analyzer: "searchkick_autocomplete_search" | |
67 | - } | |
68 | - } | |
69 | - else | |
70 | - shared_options = { | |
71 | - fields: fields, | |
72 | - query: term, | |
73 | - use_dis_max: false, | |
74 | - operator: operator | |
75 | - } | |
76 | - queries = [ | |
77 | - {multi_match: shared_options.merge(boost: 10, analyzer: "searchkick_search")}, | |
78 | - {multi_match: shared_options.merge(boost: 10, analyzer: "searchkick_search2")} | |
79 | - ] | |
80 | - if options[:misspellings] != false | |
81 | - distance = (options[:misspellings].is_a?(Hash) && options[:misspellings][:distance]) || 1 | |
82 | - queries.concat [ | |
83 | - {multi_match: shared_options.merge(fuzziness: distance, max_expansions: 3, analyzer: "searchkick_search")}, | |
84 | - {multi_match: shared_options.merge(fuzziness: distance, max_expansions: 3, analyzer: "searchkick_search2")} | |
85 | - ] | |
86 | - end | |
87 | - payload = { | |
88 | - dis_max: { | |
89 | - queries: queries | |
90 | - } | |
91 | - } | |
92 | - end | |
93 | - | |
94 | - if conversions_field and options[:conversions] != false | |
95 | - # wrap payload in a bool query | |
96 | - payload = { | |
97 | - bool: { | |
98 | - must: payload, | |
99 | - should: { | |
100 | - nested: { | |
101 | - path: conversions_field, | |
102 | - score_mode: "total", | |
103 | - query: { | |
104 | - custom_score: { | |
105 | - query: { | |
106 | - match: { | |
107 | - query: term | |
108 | - } | |
109 | - }, | |
110 | - script: "doc['count'].value" | |
111 | - } | |
112 | - } | |
113 | - } | |
114 | - } | |
115 | - } | |
116 | - } | |
117 | - end | |
118 | - end | |
119 | - | |
120 | - custom_filters = [] | |
121 | - | |
122 | - if options[:boost] | |
123 | - custom_filters << { | |
124 | - filter: { | |
125 | - exists: { | |
126 | - field: options[:boost] | |
127 | - } | |
128 | - }, | |
129 | - script: "log(doc['#{options[:boost]}'].value + 2.718281828)" | |
130 | - } | |
131 | - end | |
132 | - | |
133 | - if options[:user_id] and personalize_field | |
134 | - custom_filters << { | |
135 | - filter: { | |
136 | - term: { | |
137 | - personalize_field => options[:user_id] | |
138 | - } | |
139 | - }, | |
140 | - boost: 100 | |
141 | - } | |
142 | - end | |
143 | - | |
144 | - if custom_filters.any? | |
145 | - payload = { | |
146 | - custom_filters_score: { | |
147 | - query: payload, | |
148 | - filters: custom_filters, | |
149 | - score_mode: "total" | |
150 | - } | |
151 | - } | |
152 | - end | |
153 | - | |
154 | - payload = { | |
155 | - query: payload, | |
156 | - size: per_page, | |
157 | - from: offset | |
158 | - } | |
159 | - payload[:explain] = options[:explain] if options[:explain] | |
160 | - | |
161 | - # order | |
162 | - if options[:order] | |
163 | - order = options[:order].is_a?(Enumerable) ? options[:order] : {options[:order] => :asc} | |
164 | - payload[:sort] = order | |
165 | - end | |
166 | - | |
167 | - # where | |
168 | - where_filters = | |
169 | - proc do |where| | |
170 | - filters = [] | |
171 | - (where || {}).each do |field, value| | |
172 | - field = :_id if field.to_s == "id" | |
173 | - | |
174 | - if field == :or | |
175 | - value.each do |or_clause| | |
176 | - filters << {or: or_clause.map{|or_statement| {and: where_filters.call(or_statement)} }} | |
177 | - end | |
178 | - else | |
179 | - # expand ranges | |
180 | - if value.is_a?(Range) | |
181 | - value = {gte: value.first, (value.exclude_end? ? :lt : :lte) => value.last} | |
182 | - end | |
183 | - | |
184 | - if value.is_a?(Array) # in query | |
185 | - filters << {terms: {field => value}} | |
186 | - elsif value.is_a?(Hash) | |
187 | - if value[:near] | |
188 | - filters << { | |
189 | - geo_distance: { | |
190 | - field => value.delete(:near).map(&:to_f).reverse, | |
191 | - distance: value.delete(:within) || "50mi" | |
192 | - } | |
193 | - } | |
194 | - end | |
195 | - | |
196 | - if value[:top_left] | |
197 | - filters << { | |
198 | - geo_bounding_box: { | |
199 | - field => { | |
200 | - top_left: value.delete(:top_left).map(&:to_f).reverse, | |
201 | - bottom_right: value.delete(:bottom_right).map(&:to_f).reverse | |
202 | - } | |
203 | - } | |
204 | - } | |
205 | - end | |
206 | - | |
207 | - value.each do |op, op_value| | |
208 | - if op == :not # not equal | |
209 | - if op_value.is_a?(Array) | |
210 | - filters << {not: {terms: {field => op_value}}} | |
211 | - else | |
212 | - filters << {not: {term: {field => op_value}}} | |
213 | - end | |
214 | - elsif op == :all | |
215 | - filters << {terms: {field => op_value, execution: "and"}} | |
216 | - else | |
217 | - range_query = | |
218 | - case op | |
219 | - when :gt | |
220 | - {from: op_value, include_lower: false} | |
221 | - when :gte | |
222 | - {from: op_value, include_lower: true} | |
223 | - when :lt | |
224 | - {to: op_value, include_upper: false} | |
225 | - when :lte | |
226 | - {to: op_value, include_upper: true} | |
227 | - else | |
228 | - raise "Unknown where operator" | |
229 | - end | |
230 | - filters << {range: {field => range_query}} | |
231 | - end | |
232 | - end | |
233 | - else | |
234 | - if value.nil? | |
235 | - filters << {missing: {"field" => field, existence: true, null_value: true}} | |
236 | - else | |
237 | - filters << {term: {field => value}} | |
238 | - end | |
239 | - end | |
240 | - end | |
241 | - end | |
242 | - filters | |
243 | - end | |
244 | - | |
245 | - # filters | |
246 | - filters = where_filters.call(options[:where]) | |
247 | - if filters.any? | |
248 | - payload[:filter] = { | |
249 | - and: filters | |
250 | - } | |
251 | - end | |
252 | - | |
253 | - # facets | |
254 | - facet_limits = {} | |
255 | - if options[:facets] | |
256 | - facets = options[:facets] || {} | |
257 | - if facets.is_a?(Array) # convert to more advanced syntax | |
258 | - facets = Hash[ facets.map{|f| [f, {}] } ] | |
259 | - end | |
260 | - | |
261 | - payload[:facets] = {} | |
262 | - facets.each do |field, facet_options| | |
263 | - # ask for extra facets due to | |
264 | - # https://github.com/elasticsearch/elasticsearch/issues/1305 | |
265 | - | |
266 | - if facet_options[:ranges] | |
267 | - payload[:facets][field] = { | |
268 | - range: { | |
269 | - field.to_sym => facet_options[:ranges] | |
270 | - } | |
271 | - } | |
272 | - else | |
273 | - payload[:facets][field] = { | |
274 | - terms: { | |
275 | - field: field, | |
276 | - size: facet_options[:limit] ? facet_options[:limit] + 150 : 100000 | |
277 | - } | |
278 | - } | |
279 | - end | |
280 | - | |
281 | - facet_limits[field] = facet_options[:limit] if facet_options[:limit] | |
282 | - | |
283 | - # offset is not possible | |
284 | - # http://elasticsearch-users.115913.n3.nabble.com/Is-pagination-possible-in-termsStatsFacet-td3422943.html | |
285 | - | |
286 | - facet_filters = where_filters.call(facet_options[:where]) | |
287 | - if facet_filters.any? | |
288 | - payload[:facets][field][:facet_filter] = { | |
289 | - and: { | |
290 | - filters: facet_filters | |
291 | - } | |
292 | - } | |
293 | - end | |
294 | - end | |
295 | - end | |
296 | - | |
297 | - # suggestions | |
298 | - if options[:suggest] | |
299 | - suggest_fields = (searchkick_options[:suggest] || []).map(&:to_s) | |
300 | - # intersection | |
301 | - suggest_fields = suggest_fields & options[:fields].map(&:to_s) if options[:fields] | |
302 | - if suggest_fields.any? | |
303 | - payload[:suggest] = {text: term} | |
304 | - suggest_fields.each do |field| | |
305 | - payload[:suggest][field] = { | |
306 | - phrase: { | |
307 | - field: "#{field}.suggest" | |
308 | - } | |
309 | - } | |
310 | - end | |
311 | - end | |
312 | - end | |
313 | - | |
314 | - # highlight | |
315 | - if options[:highlight] | |
316 | - payload[:highlight] = { | |
317 | - fields: Hash[ fields.map{|f| [f, {}] } ] | |
318 | - } | |
319 | - if options[:highlight].is_a?(Hash) and tag = options[:highlight][:tag] | |
320 | - payload[:highlight][:pre_tags] = [tag] | |
321 | - payload[:highlight][:post_tags] = [tag.to_s.gsub(/\A</, "</")] | |
322 | - end | |
323 | - end | |
324 | - | |
325 | - # An empty array will cause only the _id and _type for each hit to be returned | |
326 | - # http://www.elasticsearch.org/guide/reference/api/search/fields/ | |
327 | - payload[:fields] = [] if load | |
328 | - | |
329 | - tire_options = {load: load, payload: payload, size: per_page, from: offset} | |
330 | - tire_options[:type] = document_type if self != searchkick_klass | |
331 | - begin | |
332 | - response = Searchkick.client.search index: index_name, body: payload | |
333 | - rescue => e | |
334 | - status_code = e.message[0..3].to_i | |
335 | - if status_code == 404 | |
336 | - raise "Index missing - run #{searchkick_klass.name}.reindex" | |
337 | - elsif status_code == 500 and (e.message.include?("IllegalArgumentException[minimumSimilarity >= 1]") or e.message.include?("No query registered for [multi_match]")) | |
338 | - raise "Upgrade Elasticsearch to 0.90.0 or greater" | |
339 | - else | |
340 | - raise e | |
341 | - end | |
342 | - end | |
343 | - | |
344 | - # apply facet limit in client due to | |
345 | - # https://github.com/elasticsearch/elasticsearch/issues/1305 | |
346 | - facet_limits.each do |field, limit| | |
347 | - field = field.to_s | |
348 | - facet = response["facets"][field] | |
349 | - response["facets"][field]["terms"] = facet["terms"].first(limit) | |
350 | - response["facets"][field]["other"] = facet["total"] - facet["terms"].sum{|term| term["count"] } | |
351 | - end | |
352 | - | |
353 | - Searchkick::Results.new(response, search.options.merge(term: term)) | |
354 | 11 | end |
355 | 12 | |
356 | 13 | end | ... | ... |
lib/searchkick/version.rb
searchkick.gemspec
... | ... | @@ -23,6 +23,4 @@ Gem::Specification.new do |spec| |
23 | 23 | spec.add_development_dependency "bundler", "~> 1.3" |
24 | 24 | spec.add_development_dependency "rake" |
25 | 25 | spec.add_development_dependency "minitest", "~> 4.7" |
26 | - spec.add_development_dependency "activerecord" | |
27 | - spec.add_development_dependency "pg" | |
28 | 26 | end | ... | ... |
test/autocomplete_test.rb
... | ... | @@ -17,4 +17,39 @@ class TestAutocomplete < Minitest::Unit::TestCase |
17 | 17 | assert_search "hum", ["Hummus"], autocomplete: true, fields: [:name] |
18 | 18 | end |
19 | 19 | |
20 | + def test_text_start | |
21 | + store_names ["Where in the World is Carmen San Diego"] | |
22 | + assert_search "where in the world is", ["Where in the World is Carmen San Diego"], fields: [{name: :text_start}] | |
23 | + assert_search "in the world", [], fields: [{name: :text_start}] | |
24 | + end | |
25 | + | |
26 | + def test_text_middle | |
27 | + store_names ["Where in the World is Carmen San Diego"] | |
28 | + assert_search "where in the world is", ["Where in the World is Carmen San Diego"], fields: [{name: :text_middle}] | |
29 | + assert_search "n the wor", ["Where in the World is Carmen San Diego"], fields: [{name: :text_middle}] | |
30 | + assert_search "men san diego", ["Where in the World is Carmen San Diego"], fields: [{name: :text_middle}] | |
31 | + assert_search "world carmen", [], fields: [{name: :text_middle}] | |
32 | + end | |
33 | + | |
34 | + def test_text_end | |
35 | + store_names ["Where in the World is Carmen San Diego"] | |
36 | + assert_search "men san diego", ["Where in the World is Carmen San Diego"], fields: [{name: :text_end}] | |
37 | + assert_search "carmen san", [], fields: [{name: :text_end}] | |
38 | + end | |
39 | + | |
40 | + def test_word_start | |
41 | + store_names ["Where in the World is Carmen San Diego"] | |
42 | + assert_search "car san wor", ["Where in the World is Carmen San Diego"], fields: [{name: :word_start}] | |
43 | + end | |
44 | + | |
45 | + def test_word_middle | |
46 | + store_names ["Where in the World is Carmen San Diego"] | |
47 | + assert_search "orl", ["Where in the World is Carmen San Diego"], fields: [{name: :word_middle}] | |
48 | + end | |
49 | + | |
50 | + def test_word_end | |
51 | + store_names ["Where in the World is Carmen San Diego"] | |
52 | + assert_search "rld men ego", ["Where in the World is Carmen San Diego"], fields: [{name: :word_end}] | |
53 | + end | |
54 | + | |
20 | 55 | end | ... | ... |
test/boost_test.rb
... | ... | @@ -57,4 +57,14 @@ class TestBoost < Minitest::Unit::TestCase |
57 | 57 | assert_first "tomato", "Tomato B", user_id: 2 |
58 | 58 | end |
59 | 59 | |
60 | + def test_personalize | |
61 | + store [ | |
62 | + {name: "Tomato A"}, | |
63 | + {name: "Tomato B", user_ids: [1, 2, 3]}, | |
64 | + {name: "Tomato C"}, | |
65 | + {name: "Tomato D"} | |
66 | + ] | |
67 | + assert_first "tomato", "Tomato B", personalize: {user_ids: 2} | |
68 | + end | |
69 | + | |
60 | 70 | end | ... | ... |
test/index_test.rb
... | ... | @@ -32,4 +32,17 @@ class TestIndex < Minitest::Unit::TestCase |
32 | 32 | assert_equal ["Dollar Tree"], Store.search(query: {match: {name: "Dollar Tree"}}).map(&:name) |
33 | 33 | end |
34 | 34 | |
35 | + if defined?(ActiveRecord) | |
36 | + | |
37 | + def test_transaction | |
38 | + Product.transaction do | |
39 | + store_names ["Product A"] | |
40 | + raise ActiveRecord::Rollback | |
41 | + end | |
42 | + | |
43 | + assert_search "product", [] | |
44 | + end | |
45 | + | |
46 | + end | |
47 | + | |
35 | 48 | end | ... | ... |
test/inheritance_test.rb
... | ... | @@ -25,6 +25,19 @@ class TestInheritance < Minitest::Unit::TestCase |
25 | 25 | assert_equal 2, Animal.search("bear").size |
26 | 26 | end |
27 | 27 | |
28 | + def test_force_one_type | |
29 | + store_names ["Green Bear"], Dog | |
30 | + store_names ["Blue Bear"], Cat | |
31 | + assert_equal ["Blue Bear"], Animal.search("bear", type: [Cat]).map(&:name) | |
32 | + end | |
33 | + | |
34 | + def test_force_multiple_types | |
35 | + store_names ["Green Bear"], Dog | |
36 | + store_names ["Blue Bear"], Cat | |
37 | + store_names ["Red Bear"], Animal | |
38 | + assert_equal ["Green Bear", "Blue Bear"], Animal.search("bear", type: [Dog, Cat]).map(&:name) | |
39 | + end | |
40 | + | |
28 | 41 | def test_child_autocomplete |
29 | 42 | store_names ["Max"], Cat |
30 | 43 | store_names ["Mark"], Dog | ... | ... |
test/match_test.rb
... | ... | @@ -139,4 +139,9 @@ class TestMatch < Minitest::Unit::TestCase |
139 | 139 | assert_search "*", ["Product A", "Product B"] |
140 | 140 | end |
141 | 141 | |
142 | + def test_to_be_or_not_to_be | |
143 | + store_names ["to be or not to be"] | |
144 | + assert_search "to be", ["to be or not to be"] | |
145 | + end | |
146 | + | |
142 | 147 | end | ... | ... |
... | ... | @@ -0,0 +1,14 @@ |
1 | +require_relative "test_helper" | |
2 | + | |
3 | +class TestQuery < Minitest::Unit::TestCase | |
4 | + | |
5 | + def test_basic | |
6 | + store_names ["Milk", "Apple"] | |
7 | + query = Product.search("milk", execute: false) | |
8 | + # query.body = {query: {match_all: {}}} | |
9 | + # query.body = {query: {match: {name: "Apple"}}} | |
10 | + query.body[:query] = {match_all: {}} | |
11 | + assert_equal ["Apple", "Milk"], query.execute.map(&:name).sort | |
12 | + end | |
13 | + | |
14 | +end | ... | ... |
test/sql_test.rb
... | ... | @@ -40,7 +40,7 @@ class TestSql < Minitest::Unit::TestCase |
40 | 40 | store [ |
41 | 41 | {name: "Product A", store_id: 1, in_stock: true, backordered: true, created_at: now, orders_count: 4, user_ids: [1, 2, 3]}, |
42 | 42 | {name: "Product B", store_id: 2, in_stock: true, backordered: false, created_at: now - 1, orders_count: 3, user_ids: [1]}, |
43 | - {name: "Product C", store_id: 3, in_stock: false, backordered: true, created_at: now - 2, orders_count: 2}, | |
43 | + {name: "Product C", store_id: 3, in_stock: false, backordered: true, created_at: now - 2, orders_count: 2, user_ids: [1, 3]}, | |
44 | 44 | {name: "Product D", store_id: 4, in_stock: false, backordered: false, created_at: now - 3, orders_count: 1}, |
45 | 45 | ] |
46 | 46 | assert_search "product", ["Product A", "Product B"], where: {in_stock: true} |
... | ... | @@ -60,13 +60,21 @@ class TestSql < Minitest::Unit::TestCase |
60 | 60 | assert_search "product", ["Product A", "Product B"], where: {store_id: [1, 2]} |
61 | 61 | assert_search "product", ["Product B", "Product C", "Product D"], where: {store_id: {not: 1}} |
62 | 62 | assert_search "product", ["Product C", "Product D"], where: {store_id: {not: [1, 2]}} |
63 | + assert_search "product", ["Product A"], where: {user_ids: {lte: 2, gte: 2}} | |
63 | 64 | # or |
64 | 65 | assert_search "product", ["Product A", "Product B", "Product C"], where: {or: [[{in_stock: true}, {store_id: 3}]]} |
65 | 66 | assert_search "product", ["Product A", "Product B", "Product C"], where: {or: [[{orders_count: [2, 4]}, {store_id: [1, 2]}]]} |
66 | 67 | assert_search "product", ["Product A", "Product D"], where: {or: [[{orders_count: 1}, {created_at: {gte: now - 1}, backordered: true}]]} |
67 | 68 | # all |
68 | - assert_search "product", ["Product A"], where: {user_ids: {all: [1, 3]}} | |
69 | + assert_search "product", ["Product A", "Product C"], where: {user_ids: {all: [1, 3]}} | |
69 | 70 | assert_search "product", [], where: {user_ids: {all: [1, 2, 3, 4]}} |
71 | + # any / nested terms | |
72 | + assert_search "product", ["Product B", "Product C"], where: {user_ids: {not: [2], in: [1,3]}} | |
73 | + # not / exists | |
74 | + assert_search "product", ["Product D"], where: {user_ids: nil} | |
75 | + assert_search "product", ["Product A", "Product B", "Product C"], where: {user_ids: {not: nil}} | |
76 | + assert_search "product", ["Product A", "Product C", "Product D"], where: {user_ids: [3, nil]} | |
77 | + assert_search "product", ["Product B"], where: {user_ids: {not: [3, nil]}} | |
70 | 78 | end |
71 | 79 | |
72 | 80 | def test_where_string |
... | ... | @@ -90,6 +98,35 @@ class TestSql < Minitest::Unit::TestCase |
90 | 98 | assert_search "product", ["Product A"], where: {id: product.id.to_s} |
91 | 99 | end |
92 | 100 | |
101 | + def test_where_empty | |
102 | + store_names ["Product A"] | |
103 | + assert_search "product", ["Product A"], where: {} | |
104 | + end | |
105 | + | |
106 | + def test_where_empty_array | |
107 | + store_names ["Product A"] | |
108 | + assert_search "product", [], where: {store_id: []} | |
109 | + end | |
110 | + | |
111 | + # http://elasticsearch-users.115913.n3.nabble.com/Numeric-range-quey-or-filter-in-an-array-field-possible-or-not-td4042967.html | |
112 | + # https://gist.github.com/jprante/7099463 | |
113 | + def test_where_range_array | |
114 | + store [ | |
115 | + {name: "Product A", user_ids: [11, 23, 13, 16, 17, 23.6]}, | |
116 | + {name: "Product B", user_ids: [1, 2, 3, 4, 5, 6, 7, 8, 8.9, 9.1, 9.4]}, | |
117 | + {name: "Product C", user_ids: [101, 230, 150, 200]} | |
118 | + ] | |
119 | + assert_search "product", ["Product A"], where: {user_ids: {gt: 10, lt: 23.9}} | |
120 | + end | |
121 | + | |
122 | + def test_where_range_array_again | |
123 | + store [ | |
124 | + {name: "Product A", user_ids: [19, 32, 42]}, | |
125 | + {name: "Product B", user_ids: [13, 40, 52]} | |
126 | + ] | |
127 | + assert_search "product", ["Product A"], where: {user_ids: {gt: 26, lt: 36}} | |
128 | + end | |
129 | + | |
93 | 130 | def test_near |
94 | 131 | store [ |
95 | 132 | {name: "San Francisco", latitude: 37.7833, longitude: -122.4167}, |
... | ... | @@ -133,12 +170,34 @@ class TestSql < Minitest::Unit::TestCase |
133 | 170 | assert_order "product", ["Product A", "Product B", "Product C", "Product D"], order: "name" |
134 | 171 | end |
135 | 172 | |
173 | + def test_order_id | |
174 | + store_names ["Product A", "Product B"] | |
175 | + product_a = Product.where(name: "Product A").first | |
176 | + product_b = Product.where(name: "Product B").first | |
177 | + assert_order "product", [product_a, product_b].sort_by(&:id).map(&:name), order: {id: :asc} | |
178 | + end | |
179 | + | |
180 | + def test_order_multiple | |
181 | + store [ | |
182 | + {name: "Product A", color: "blue", store_id: 1}, | |
183 | + {name: "Product B", color: "red", store_id: 3}, | |
184 | + {name: "Product C", color: "red", store_id: 2} | |
185 | + ] | |
186 | + assert_order "product", ["Product A", "Product B", "Product C"], order: {color: :asc, store_id: :desc} | |
187 | + end | |
188 | + | |
136 | 189 | def test_partial |
137 | 190 | store_names ["Honey"] |
138 | 191 | assert_search "fresh honey", [] |
139 | 192 | assert_search "fresh honey", ["Honey"], partial: true |
140 | 193 | end |
141 | 194 | |
195 | + def test_operator | |
196 | + store_names ["Honey"] | |
197 | + assert_search "fresh honey", [] | |
198 | + assert_search "fresh honey", ["Honey"], operator: "or" | |
199 | + end | |
200 | + | |
142 | 201 | def test_misspellings |
143 | 202 | store_names ["abc", "abd", "aee"] |
144 | 203 | assert_search "abc", ["abc"], misspellings: false | ... | ... |
test/test_helper.rb
... | ... | @@ -56,9 +56,9 @@ else |
56 | 56 | ActiveRecord::Base.time_zone_aware_attributes = true |
57 | 57 | |
58 | 58 | # migrations |
59 | - ActiveRecord::Base.establish_connection :adapter => "postgresql", :database => "searchkick_test" | |
59 | + ActiveRecord::Base.establish_connection adapter: "sqlite3", database: ":memory:" | |
60 | 60 | |
61 | - ActiveRecord::Migration.create_table :products, :force => true do |t| | |
61 | + ActiveRecord::Migration.create_table :products do |t| | |
62 | 62 | t.string :name |
63 | 63 | t.integer :store_id |
64 | 64 | t.boolean :in_stock |
... | ... | @@ -71,11 +71,11 @@ else |
71 | 71 | t.timestamps |
72 | 72 | end |
73 | 73 | |
74 | - ActiveRecord::Migration.create_table :stores, :force => true do |t| | |
74 | + ActiveRecord::Migration.create_table :stores do |t| | |
75 | 75 | t.string :name |
76 | 76 | end |
77 | 77 | |
78 | - ActiveRecord::Migration.create_table :animals, :force => true do |t| | |
78 | + ActiveRecord::Migration.create_table :animals do |t| | |
79 | 79 | t.string :name |
80 | 80 | t.string :type |
81 | 81 | end |
... | ... | @@ -112,12 +112,18 @@ class Product |
112 | 112 | suggest: [:name, :color], |
113 | 113 | conversions: "conversions", |
114 | 114 | personalize: "user_ids", |
115 | - locations: ["location", "multiple_locations"] | |
115 | + locations: ["location", "multiple_locations"], | |
116 | + text_start: [:name], | |
117 | + text_middle: [:name], | |
118 | + text_end: [:name], | |
119 | + word_start: [:name], | |
120 | + word_middle: [:name], | |
121 | + word_end: [:name] | |
116 | 122 | |
117 | 123 | attr_accessor :conversions, :user_ids |
118 | 124 | |
119 | 125 | def search_data |
120 | - serializable_hash.merge conversions: conversions, user_ids: user_ids, location: [latitude, longitude], multiple_locations: [[latitude, longitude], [0, 0]] | |
126 | + serializable_hash.except("id").merge conversions: conversions, user_ids: user_ids, location: [latitude, longitude], multiple_locations: [[latitude, longitude], [0, 0]] | |
121 | 127 | end |
122 | 128 | |
123 | 129 | def should_index? |
... | ... | @@ -150,6 +156,7 @@ class Minitest::Unit::TestCase |
150 | 156 | |
151 | 157 | def setup |
152 | 158 | Product.destroy_all |
159 | + Store.destroy_all | |
153 | 160 | Animal.destroy_all |
154 | 161 | end |
155 | 162 | ... | ... |