From cf25191c8154375d95198b1f91eced2b3bda5acb Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Thu, 30 May 2019 15:30:48 -0700 Subject: [PATCH] Scroll API cleanup - consistent style, additional tests --- CHANGELOG.md | 1 + README.md | 30 +++++++++++++++--------------- lib/searchkick/logging.rb | 4 ++-- lib/searchkick/query.rb | 6 +++--- lib/searchkick/results.rb | 33 +++++++++++++++------------------ test/scroll_test.rb | 27 +++++++++++++++++---------- 6 files changed, 53 insertions(+), 48 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 39f622a..9359942 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,6 @@ ## 4.0.1 [unreleased] +- Added support for scroll API - Made type optional for custom mapping for Elasticsearch 6 - Fixed error when suggestions empty - Fixed `models` option with inheritance diff --git a/README.md b/README.md index fa25fea..9450817 100644 --- a/README.md +++ b/README.md @@ -229,21 +229,6 @@ View with will_paginate <%= will_paginate @products %> ``` -### Scroll API - -For large datasets, a scroll context is more efficient than deep pagination. Create a scroll context by passing in the desired keep-alive time and the number of records per page. - -Request the next set of records in the scroll context. You can either use the original search context or any subsequent record set to fetch the next batch of records. - -```ruby -products = Product.search "milk", per_page: 10, scroll: '1m' -while products.any? - # code ... - - products = products.scroll -end -``` - ### Partial Matches By default, results must match all words in the query. @@ -1492,6 +1477,21 @@ Boost specific models with: indices_boost: {Category => 2, Product => 1} ``` +### Scroll API [master] + +To retrieve a large number of results, use the [scroll API](https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-scroll.html). + +```ruby +products = Product.search "*", scroll: "1m" +while products.any? + # do something ... + + products = products.scroll +end +``` + +You should call `scroll` on each new set of results, not the original result. + ## Nested Data To query nested data, use dot notation. diff --git a/lib/searchkick/logging.rb b/lib/searchkick/logging.rb index 1275806..50d6801 100644 --- a/lib/searchkick/logging.rb +++ b/lib/searchkick/logging.rb @@ -167,8 +167,8 @@ module Searchkick # no easy way to tell which host the client will use host = Searchkick.client.transport.hosts.first - params = ['pretty'] - params << 'scroll=' + payload[:query][:scroll] if payload[:query][:scroll] + params = ["pretty"] + params << "scroll=#{payload[:query][:scroll]}" if payload[:query][:scroll] debug " #{color(name, YELLOW, true)} curl #{host[:protocol]}://#{host[:host]}:#{host[:port]}/#{CGI.escape(index)}#{type ? "/#{type.map { |t| CGI.escape(t) }.join(',')}" : ''}/_search?#{params.join('&')} -H 'Content-Type: application/json' -d '#{payload[:query][:body].to_json}'" end diff --git a/lib/searchkick/query.rb b/lib/searchkick/query.rb index 4f17590..0aca7a2 100644 --- a/lib/searchkick/query.rb +++ b/lib/searchkick/query.rb @@ -19,7 +19,7 @@ module Searchkick :boost_by, :boost_by_distance, :boost_by_recency, :boost_where, :conversions, :conversions_term, :debug, :emoji, :exclude, :execute, :explain, :fields, :highlight, :includes, :index_name, :indices_boost, :limit, :load, :match, :misspellings, :models, :model_includes, :offset, :operator, :order, :padding, :page, :per_page, :profile, - :request_params, :routing, :scope_results, :select, :similar, :smart_aggs, :suggest, :total_entries, :track, :type, :where, :scroll] + :request_params, :routing, :scope_results, :scroll, :select, :similar, :smart_aggs, :suggest, :total_entries, :track, :type, :where] raise ArgumentError, "unknown keywords: #{unknown_keywords.join(", ")}" if unknown_keywords.any? term = term.to_s @@ -110,8 +110,8 @@ module Searchkick # no easy way to tell which host the client will use host = Searchkick.client.transport.hosts.first credentials = host[:user] || host[:password] ? "#{host[:user]}:#{host[:password]}@" : nil - params = ['pretty'] - params << 'scroll=' + options[:scroll] if options[:scroll] + params = ["pretty"] + params << "scroll=#{options[:scroll]}" if options[:scroll] "curl #{host[:protocol]}://#{credentials}#{host[:host]}:#{host[:port]}/#{CGI.escape(index)}#{type ? "/#{type.map { |t| CGI.escape(t) }.join(',')}" : ''}/_search?#{params.join('&')} -H 'Content-Type: application/json' -d '#{query[:body].to_json}'" end diff --git a/lib/searchkick/results.rb b/lib/searchkick/results.rb index cc18f0d..57b051e 100644 --- a/lib/searchkick/results.rb +++ b/lib/searchkick/results.rb @@ -222,27 +222,24 @@ module Searchkick end def scroll_id - @response['_scroll_id'] + @response["_scroll_id"] end def scroll - if scroll_id.present? && options[:scroll].nil? - raise Searchkick::Error, "Scroll error - scroll keepalive must be defined" - elsif scroll_id.nil? - raise Searchkick::Error, "Scroll error - a scroll id has not been provided" - else - begin - params = { - scroll: options[:scroll], - scroll_id: scroll_id - } - Searchkick::Results.new(@klass, Searchkick.client.scroll(params), @options) - rescue Elasticsearch::Transport::Transport::Errors::NotFound => e - if e.class.to_s =~ /NotFound/ && e.message =~ /search_context_missing_exception/i - raise Searchkick::Error, "Scroll error - a scroll id does not exist or has expired" - else - raise e - end + raise Searchkick::Error, "Pass `scroll` option to the search method for scrolling" unless scroll_id + + params = { + scroll: options[:scroll], + scroll_id: scroll_id + } + + begin + Searchkick::Results.new(@klass, Searchkick.client.scroll(params), @options) + rescue Elasticsearch::Transport::Transport::Errors::NotFound => e + if e.class.to_s =~ /NotFound/ && e.message =~ /search_context_missing_exception/i + raise Searchkick::Error, "Scroll id has expired" + else + raise e end end end diff --git a/test/scroll_test.rb b/test/scroll_test.rb index 46cc561..0fd76da 100644 --- a/test/scroll_test.rb +++ b/test/scroll_test.rb @@ -6,8 +6,6 @@ class ScrollTest < Minitest::Test products = Product.search("product", order: {name: :asc}, scroll: '1m', per_page: 2) assert_equal ["Product A", "Product B"], products.map(&:name) assert_equal "product", products.entry_name - assert_equal "1m", products.options[:scroll] - assert_equal products.response["_scroll_id"], products.scroll_id assert_equal 2, products.size assert_equal 2, products.length assert_equal 6, products.total_count @@ -17,15 +15,14 @@ class ScrollTest < Minitest::Test # scroll for next 2 products = products.scroll assert_equal ["Product C", "Product D"], products.map(&:name) - assert_equal "product", products.entry_name + # scroll for next 2 products = products.scroll assert_equal ["Product E", "Product F"], products.map(&:name) - assert_equal "product", products.entry_name + # scroll exhausted products = products.scroll assert_equal [], products.map(&:name) - assert_equal "product", products.entry_name end def test_scroll_body @@ -33,8 +30,6 @@ class ScrollTest < Minitest::Test products = Product.search("product", body: {query: {match_all: {}}, sort: [{name: "asc"}]}, scroll: '1m', per_page: 2) assert_equal ["Product A", "Product B"], products.map(&:name) assert_equal "product", products.entry_name - assert_equal "1m", products.options[:scroll] - assert_equal products.response["_scroll_id"], products.scroll_id assert_equal 2, products.size assert_equal 2, products.length assert_equal 6, products.total_count @@ -44,14 +39,26 @@ class ScrollTest < Minitest::Test # scroll for next 2 products = products.scroll assert_equal ["Product C", "Product D"], products.map(&:name) - assert_equal "product", products.entry_name + # scroll for next 2 products = products.scroll assert_equal ["Product E", "Product F"], products.map(&:name) - assert_equal "product", products.entry_name + # scroll exhausted products = products.scroll assert_equal [], products.map(&:name) - assert_equal "product", products.entry_name + end + + def test_scroll_all + store_names ["Product A"] + assert_equal ["Product A"], Product.search("*", scroll: "1m").map(&:name) + end + + def test_scroll_no_option + products = Product.search("*") + error = assert_raises Searchkick::Error do + products.scroll + end + assert_match /Pass .+ option/, error.message end end -- libgit2 0.21.0