From 06ee49b9d7dbd5368ac77ed7c8f34b7251ec1692 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Thu, 30 May 2019 22:27:12 -0700 Subject: [PATCH] Added block form of scroll --- CHANGELOG.md | 4 ++++ README.md | 10 +++++++++- lib/searchkick/results.rb | 41 +++++++++++++++++++++++++++++------------ test/scroll_test.rb | 10 ++++++++++ 4 files changed, 52 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2cec276..ed3ac80 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +## 4.0.2 [unreleased] + +- Added block form of `scroll` + ## 4.0.1 - Added support for scroll API diff --git a/README.md b/README.md index 19f27ca..96ca1e5 100644 --- a/README.md +++ b/README.md @@ -1484,12 +1484,20 @@ To retrieve a very large number of results, use the [scroll API](https://www.ela ```ruby products = Product.search "*", scroll: "1m" while products.any? - # do something ... + # process batch ... products = products.scroll end ``` +On the master branch, you can also do: + +```ruby +Product.search("*", scroll: "1m").scroll do |batch| + # process batch ... +end +``` + You should call `scroll` on each new set of results, not the original result. ## Nested Data diff --git a/lib/searchkick/results.rb b/lib/searchkick/results.rb index 57b051e..576995e 100644 --- a/lib/searchkick/results.rb +++ b/lib/searchkick/results.rb @@ -228,18 +228,35 @@ module Searchkick def scroll raise Searchkick::Error, "Pass `scroll` option to the search method for scrolling" unless scroll_id - params = { - scroll: options[:scroll], - scroll_id: scroll_id - } - - begin - Searchkick::Results.new(@klass, Searchkick.client.scroll(params), @options) - rescue Elasticsearch::Transport::Transport::Errors::NotFound => e - if e.class.to_s =~ /NotFound/ && e.message =~ /search_context_missing_exception/i - raise Searchkick::Error, "Scroll id has expired" - else - raise e + if block_given? + records = self + while records.any? + yield records + records = records.scroll + end + + begin + # try to clear scroll + # not required as scroll will expire + # but there is a cost to open scrolls + Searchkick.client.clear_scroll(scroll_id: scroll_id) + rescue Elasticsearch::Transport::Transport::Error + # okay if it fails + end + else + params = { + scroll: options[:scroll], + scroll_id: scroll_id + } + + begin + Searchkick::Results.new(@klass, Searchkick.client.scroll(params), @options) + rescue Elasticsearch::Transport::Transport::Errors::NotFound => e + if e.class.to_s =~ /NotFound/ && e.message =~ /search_context_missing_exception/i + raise Searchkick::Error, "Scroll id has expired" + else + raise e + end end end end diff --git a/test/scroll_test.rb b/test/scroll_test.rb index 0fd76da..4d43091 100644 --- a/test/scroll_test.rb +++ b/test/scroll_test.rb @@ -61,4 +61,14 @@ class ScrollTest < Minitest::Test end assert_match /Pass .+ option/, error.message end + + def test_scroll_block + store_names ["Product A", "Product B", "Product C", "Product D", "Product E", "Product F"] + batches_count = 0 + Product.search("*", scroll: "1m", per_page: 2).scroll do |batch| + assert_equal 2, batch.size + batches_count += 1 + end + assert_equal 3, batches_count + end end -- libgit2 0.21.0