Commit 929c4ce7ec8186bd92e6b0d1791e0b1dcb85506e
1 parent
8dc87dc6
Exists in
master
and in
19 other branches
Added async reindex
Showing
6 changed files
with
152 additions
and
32 deletions
Show diff stats
README.md
@@ -1156,6 +1156,37 @@ Reindex and search with: | @@ -1156,6 +1156,37 @@ Reindex and search with: | ||
1156 | Business.search "ice cream", routing: params[:city_id] | 1156 | Business.search "ice cream", routing: params[:city_id] |
1157 | ``` | 1157 | ``` |
1158 | 1158 | ||
1159 | +## Large Data Sets | ||
1160 | + | ||
1161 | +### Background Reindexing [experimental, ActiveRecord only] | ||
1162 | + | ||
1163 | +For large data sets, you can use background jobs to parallelize reindexing. | ||
1164 | + | ||
1165 | +```ruby | ||
1166 | +Product.reindex(async: true) | ||
1167 | +# {index_name: "products_production_20170111210018065"} | ||
1168 | +``` | ||
1169 | + | ||
1170 | +Once the jobs complete, promote the new index with: | ||
1171 | + | ||
1172 | +```ruby | ||
1173 | +Product.searchkick_index.promote(index_name) | ||
1174 | +``` | ||
1175 | + | ||
1176 | +You can optionally track the status with Redis: | ||
1177 | + | ||
1178 | +```ruby | ||
1179 | +Searchkick.redis = Redis.new | ||
1180 | +``` | ||
1181 | + | ||
1182 | +And use: | ||
1183 | + | ||
1184 | +```ruby | ||
1185 | +Searchkick.reindex_status(index_name) | ||
1186 | +``` | ||
1187 | + | ||
1188 | +For more tips, check out [Keeping Elasticsearch in Sync](https://www.elastic.co/blog/found-keeping-elasticsearch-in-sync). | ||
1189 | + | ||
1159 | ## Advanced | 1190 | ## Advanced |
1160 | 1191 | ||
1161 | Prefer to use the [Elasticsearch DSL](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-queries.html) but still want awesome features like zero-downtime reindexing? | 1192 | Prefer to use the [Elasticsearch DSL](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-queries.html) but still want awesome features like zero-downtime reindexing? |
@@ -1457,10 +1488,6 @@ Product.search "api", misspellings: {prefix_length: 2} # api, apt, no ahi | @@ -1457,10 +1488,6 @@ Product.search "api", misspellings: {prefix_length: 2} # api, apt, no ahi | ||
1457 | Product.search "ah", misspellings: {prefix_length: 2} # ah, no aha | 1488 | Product.search "ah", misspellings: {prefix_length: 2} # ah, no aha |
1458 | ``` | 1489 | ``` |
1459 | 1490 | ||
1460 | -## Large Data Sets | ||
1461 | - | ||
1462 | -For large data sets, check out [Keeping Elasticsearch in Sync](https://www.elastic.co/blog/found-keeping-elasticsearch-in-sync). Searchkick will make this easy in the future. | ||
1463 | - | ||
1464 | ## Testing | 1491 | ## Testing |
1465 | 1492 | ||
1466 | This section could use some love. | 1493 | This section could use some love. |
lib/searchkick.rb
@@ -19,7 +19,10 @@ begin | @@ -19,7 +19,10 @@ begin | ||
19 | rescue LoadError | 19 | rescue LoadError |
20 | # do nothing | 20 | # do nothing |
21 | end | 21 | end |
22 | -require "searchkick/reindex_v2_job" if defined?(ActiveJob) | 22 | +if defined?(ActiveJob) |
23 | + require "searchkick/bulk_reindex_job" | ||
24 | + require "searchkick/reindex_v2_job" | ||
25 | +end | ||
23 | 26 | ||
24 | module Searchkick | 27 | module Searchkick |
25 | class Error < StandardError; end | 28 | class Error < StandardError; end |
@@ -30,7 +33,7 @@ module Searchkick | @@ -30,7 +33,7 @@ module Searchkick | ||
30 | class ImportError < Error; end | 33 | class ImportError < Error; end |
31 | 34 | ||
32 | class << self | 35 | class << self |
33 | - attr_accessor :search_method_name, :wordnet_path, :timeout, :models, :client_options | 36 | + attr_accessor :search_method_name, :wordnet_path, :timeout, :models, :client_options, :redis |
34 | attr_writer :client, :env, :search_timeout | 37 | attr_writer :client, :env, :search_timeout |
35 | attr_reader :aws_credentials | 38 | attr_reader :aws_credentials |
36 | end | 39 | end |
@@ -129,6 +132,16 @@ module Searchkick | @@ -129,6 +132,16 @@ module Searchkick | ||
129 | @client = nil # reset client | 132 | @client = nil # reset client |
130 | end | 133 | end |
131 | 134 | ||
135 | + def self.reindex_status(index_name) | ||
136 | + if redis | ||
137 | + batches_left = Searchkick::Index.new(index_name).batches_left | ||
138 | + { | ||
139 | + completed: batches_left == 0, | ||
140 | + batches_left: batches_left | ||
141 | + } | ||
142 | + end | ||
143 | + end | ||
144 | + | ||
132 | # private | 145 | # private |
133 | def self.indexer | 146 | def self.indexer |
134 | Thread.current[:searchkick_indexer] ||= Searchkick::Indexer.new | 147 | Thread.current[:searchkick_indexer] ||= Searchkick::Indexer.new |
@@ -0,0 +1,12 @@ | @@ -0,0 +1,12 @@ | ||
1 | +module Searchkick | ||
2 | + class BulkReindexJob < ActiveJob::Base | ||
3 | + queue_as :searchkick | ||
4 | + | ||
5 | + def perform(class_name:, record_ids: nil, index_name: nil, method_name: nil, batch_id: nil, min_id: nil, max_id: nil) | ||
6 | + klass = class_name.constantize | ||
7 | + index = index_name ? Searchkick::Index.new(index_name) : klass.searchkick_index | ||
8 | + record_ids ||= min_id..max_id | ||
9 | + index.import_scope(klass.where(klass.primary_key => record_ids), method_name: method_name, batch: true, batch_id: batch_id) | ||
10 | + end | ||
11 | + end | ||
12 | +end |
lib/searchkick/index.rb
@@ -191,7 +191,7 @@ module Searchkick | @@ -191,7 +191,7 @@ module Searchkick | ||
191 | 191 | ||
192 | # https://gist.github.com/jarosan/3124884 | 192 | # https://gist.github.com/jarosan/3124884 |
193 | # http://www.elasticsearch.org/blog/changing-mapping-with-zero-downtime/ | 193 | # http://www.elasticsearch.org/blog/changing-mapping-with-zero-downtime/ |
194 | - def reindex_scope(scope, import: true, resume: false, retain: false) | 194 | + def reindex_scope(scope, import: true, resume: false, retain: false, async: false) |
195 | if resume | 195 | if resume |
196 | index_name = all_indices.sort.last | 196 | index_name = all_indices.sort.last |
197 | raise Searchkick::Error, "No index to resume" unless index_name | 197 | raise Searchkick::Error, "No index to resume" unless index_name |
@@ -205,30 +205,58 @@ module Searchkick | @@ -205,30 +205,58 @@ module Searchkick | ||
205 | # check if alias exists | 205 | # check if alias exists |
206 | if alias_exists? | 206 | if alias_exists? |
207 | # import before promotion | 207 | # import before promotion |
208 | - index.import_scope(scope, resume: resume) if import | 208 | + index.import_scope(scope, resume: resume, async: async, full: true) if import |
209 | 209 | ||
210 | # get existing indices to remove | 210 | # get existing indices to remove |
211 | - promote(index.name) | ||
212 | - clean_indices unless retain | 211 | + unless async |
212 | + promote(index.name) | ||
213 | + clean_indices unless retain | ||
214 | + end | ||
213 | else | 215 | else |
214 | delete if exists? | 216 | delete if exists? |
215 | promote(index.name) | 217 | promote(index.name) |
216 | 218 | ||
217 | # import after promotion | 219 | # import after promotion |
218 | - index.import_scope(scope, resume: resume) if import | 220 | + index.import_scope(scope, resume: resume, async: async, full: true) if import |
219 | end | 221 | end |
220 | 222 | ||
221 | - index.refresh | ||
222 | - | ||
223 | - true | 223 | + if async |
224 | + {index_name: index.name} | ||
225 | + else | ||
226 | + index.refresh | ||
227 | + true | ||
228 | + end | ||
224 | end | 229 | end |
225 | 230 | ||
226 | - def import_scope(scope, resume: false, method_name: nil) | 231 | + def import_scope(scope, resume: false, method_name: nil, async: false, batch: false, batch_id: nil, full: false) |
227 | batch_size = @options[:batch_size] || 1000 | 232 | batch_size = @options[:batch_size] || 1000 |
228 | 233 | ||
229 | # use scope for import | 234 | # use scope for import |
230 | scope = scope.search_import if scope.respond_to?(:search_import) | 235 | scope = scope.search_import if scope.respond_to?(:search_import) |
231 | - if scope.respond_to?(:find_in_batches) | 236 | + |
237 | + if batch | ||
238 | + import_or_update scope.to_a, method_name, async | ||
239 | + Searchkick.redis.srem(batches_key, batch_id) if batch_id && Searchkick.redis | ||
240 | + elsif full && async | ||
241 | + # TODO expire Redis key | ||
242 | + primary_key = scope.primary_key | ||
243 | + starting_id = scope.minimum(primary_key) | ||
244 | + max_id = scope.maximum(primary_key) | ||
245 | + batches_count = ((max_id - starting_id + 1) / batch_size.to_f).ceil | ||
246 | + | ||
247 | + batches_count.times do |i| | ||
248 | + batch_id = i + 1 | ||
249 | + min_id = starting_id + (i * batch_size) | ||
250 | + Searchkick::BulkReindexJob.perform_later( | ||
251 | + class_name: scope.model_name.name, | ||
252 | + min_id: min_id, | ||
253 | + max_id: min_id + batch_size - 1, | ||
254 | + index_name: name, | ||
255 | + batch_id: batch_id | ||
256 | + ) | ||
257 | + Searchkick.redis.sadd(batches_key, batch_id) if Searchkick.redis | ||
258 | + end | ||
259 | + elsif scope.respond_to?(:find_in_batches) | ||
232 | if resume | 260 | if resume |
233 | # use total docs instead of max id since there's not a great way | 261 | # use total docs instead of max id since there's not a great way |
234 | # to get the max _id without scripting since it's a string | 262 | # to get the max _id without scripting since it's a string |
@@ -237,8 +265,10 @@ module Searchkick | @@ -237,8 +265,10 @@ module Searchkick | ||
237 | scope = scope.where("id > ?", total_docs) | 265 | scope = scope.where("id > ?", total_docs) |
238 | end | 266 | end |
239 | 267 | ||
268 | + scope = scope.select("id").except(:includes, :preload) if async | ||
269 | + | ||
240 | scope.find_in_batches batch_size: batch_size do |batch| | 270 | scope.find_in_batches batch_size: batch_size do |batch| |
241 | - import_or_update batch.select(&:should_index?), method_name | 271 | + import_or_update batch, method_name, async |
242 | end | 272 | end |
243 | else | 273 | else |
244 | # https://github.com/karmi/tire/blob/master/lib/tire/model/import.rb | 274 | # https://github.com/karmi/tire/blob/master/lib/tire/model/import.rb |
@@ -246,27 +276,18 @@ module Searchkick | @@ -246,27 +276,18 @@ module Searchkick | ||
246 | items = [] | 276 | items = [] |
247 | # TODO add resume | 277 | # TODO add resume |
248 | scope.all.each do |item| | 278 | scope.all.each do |item| |
249 | - items << item if item.should_index? | 279 | + items << item |
250 | if items.length == batch_size | 280 | if items.length == batch_size |
251 | - import_or_update items, method_name | 281 | + import_or_update items, method_name, async |
252 | items = [] | 282 | items = [] |
253 | end | 283 | end |
254 | end | 284 | end |
255 | - import_or_update items, method_name | 285 | + import_or_update items, method_name, async |
256 | end | 286 | end |
257 | end | 287 | end |
258 | 288 | ||
259 | - def import_or_update(records, method_name) | ||
260 | - retries = 0 | ||
261 | - begin | ||
262 | - method_name ? bulk_update(records, method_name) : import(records) | ||
263 | - rescue Faraday::ClientError => e | ||
264 | - if retries < 1 | ||
265 | - retries += 1 | ||
266 | - retry | ||
267 | - end | ||
268 | - raise e | ||
269 | - end | 289 | + def batches_left |
290 | + Searchkick.redis.scard(batches_key) if Searchkick.redis | ||
270 | end | 291 | end |
271 | 292 | ||
272 | # other | 293 | # other |
@@ -377,5 +398,34 @@ module Searchkick | @@ -377,5 +398,34 @@ module Searchkick | ||
377 | obj | 398 | obj |
378 | end | 399 | end |
379 | end | 400 | end |
401 | + | ||
402 | + def import_or_update(records, method_name, async) | ||
403 | + if records.any? | ||
404 | + if async | ||
405 | + Searchkick::BulkReindexJob.perform_later( | ||
406 | + class_name: records.first.class.name, | ||
407 | + record_ids: records.map(&:id), | ||
408 | + index_name: name, | ||
409 | + method_name: method_name ? method_name.to_s : nil | ||
410 | + ) | ||
411 | + else | ||
412 | + retries = 0 | ||
413 | + records = records.select(&:should_index?) | ||
414 | + begin | ||
415 | + method_name ? bulk_update(records, method_name) : import(records) | ||
416 | + rescue Faraday::ClientError => e | ||
417 | + if retries < 1 | ||
418 | + retries += 1 | ||
419 | + retry | ||
420 | + end | ||
421 | + raise e | ||
422 | + end | ||
423 | + end | ||
424 | + end | ||
425 | + end | ||
426 | + | ||
427 | + def batches_key | ||
428 | + "searchkick:reindex:#{name}:batches" | ||
429 | + end | ||
380 | end | 430 | end |
381 | end | 431 | end |
lib/searchkick/model.rb
@@ -58,15 +58,16 @@ module Searchkick | @@ -58,15 +58,16 @@ module Searchkick | ||
58 | # update | 58 | # update |
59 | searchkick_index.import_scope(searchkick_klass, method_name: method_name) | 59 | searchkick_index.import_scope(searchkick_klass, method_name: method_name) |
60 | searchkick_index.refresh if refresh | 60 | searchkick_index.refresh if refresh |
61 | + true | ||
61 | elsif scoped && !full | 62 | elsif scoped && !full |
62 | # reindex association | 63 | # reindex association |
63 | searchkick_index.import_scope(searchkick_klass) | 64 | searchkick_index.import_scope(searchkick_klass) |
64 | searchkick_index.refresh if refresh | 65 | searchkick_index.refresh if refresh |
66 | + true | ||
65 | else | 67 | else |
66 | # full reindex | 68 | # full reindex |
67 | searchkick_index.reindex_scope(searchkick_klass, options) | 69 | searchkick_index.reindex_scope(searchkick_klass, options) |
68 | end | 70 | end |
69 | - true | ||
70 | end | 71 | end |
71 | alias_method :reindex, :searchkick_reindex unless method_defined?(:reindex) | 72 | alias_method :reindex, :searchkick_reindex unless method_defined?(:reindex) |
72 | 73 |
test/reindex_test.rb
@@ -22,4 +22,21 @@ class ReindexTest < Minitest::Test | @@ -22,4 +22,21 @@ class ReindexTest < Minitest::Test | ||
22 | store.products.reindex(refresh: true) | 22 | store.products.reindex(refresh: true) |
23 | assert_search "product", ["Product A", "Product B"] | 23 | assert_search "product", ["Product A", "Product B"] |
24 | end | 24 | end |
25 | + | ||
26 | + def test_async | ||
27 | + skip unless defined?(ActiveJob) && defined?(ActiveRecord) | ||
28 | + | ||
29 | + Searchkick.callbacks(false) do | ||
30 | + store_names ["Product A"] | ||
31 | + end | ||
32 | + reindex = Product.reindex(async: true) | ||
33 | + assert_search "product", [] | ||
34 | + | ||
35 | + index = Searchkick::Index.new(reindex[:index_name]) | ||
36 | + index.refresh | ||
37 | + assert_equal 1, index.total_docs | ||
38 | + | ||
39 | + Product.searchkick_index.promote(reindex[:index_name]) | ||
40 | + assert_search "product", ["Product A"] | ||
41 | + end | ||
25 | end | 42 | end |