Commit cf25191c8154375d95198b1f91eced2b3bda5acb
1 parent
64c60145
Exists in
master
and in
17 other branches
Scroll API cleanup - consistent style, additional tests
Showing
6 changed files
with
53 additions
and
48 deletions
Show diff stats
CHANGELOG.md
1 | ## 4.0.1 [unreleased] | 1 | ## 4.0.1 [unreleased] |
2 | 2 | ||
3 | +- Added support for scroll API | ||
3 | - Made type optional for custom mapping for Elasticsearch 6 | 4 | - Made type optional for custom mapping for Elasticsearch 6 |
4 | - Fixed error when suggestions empty | 5 | - Fixed error when suggestions empty |
5 | - Fixed `models` option with inheritance | 6 | - Fixed `models` option with inheritance |
README.md
@@ -229,21 +229,6 @@ View with will_paginate | @@ -229,21 +229,6 @@ View with will_paginate | ||
229 | <%= will_paginate @products %> | 229 | <%= will_paginate @products %> |
230 | ``` | 230 | ``` |
231 | 231 | ||
232 | -### Scroll API | ||
233 | - | ||
234 | -For large datasets, a scroll context is more efficient than deep pagination. Create a scroll context by passing in the desired keep-alive time and the number of records per page. | ||
235 | - | ||
236 | -Request the next set of records in the scroll context. You can either use the original search context or any subsequent record set to fetch the next batch of records. | ||
237 | - | ||
238 | -```ruby | ||
239 | -products = Product.search "milk", per_page: 10, scroll: '1m' | ||
240 | -while products.any? | ||
241 | - # code ... | ||
242 | - | ||
243 | - products = products.scroll | ||
244 | -end | ||
245 | -``` | ||
246 | - | ||
247 | ### Partial Matches | 232 | ### Partial Matches |
248 | 233 | ||
249 | By default, results must match all words in the query. | 234 | By default, results must match all words in the query. |
@@ -1492,6 +1477,21 @@ Boost specific models with: | @@ -1492,6 +1477,21 @@ Boost specific models with: | ||
1492 | indices_boost: {Category => 2, Product => 1} | 1477 | indices_boost: {Category => 2, Product => 1} |
1493 | ``` | 1478 | ``` |
1494 | 1479 | ||
1480 | +### Scroll API [master] | ||
1481 | + | ||
1482 | +To retrieve a large number of results, use the [scroll API](https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-scroll.html). | ||
1483 | + | ||
1484 | +```ruby | ||
1485 | +products = Product.search "*", scroll: "1m" | ||
1486 | +while products.any? | ||
1487 | + # do something ... | ||
1488 | + | ||
1489 | + products = products.scroll | ||
1490 | +end | ||
1491 | +``` | ||
1492 | + | ||
1493 | +You should call `scroll` on each new set of results, not the original result. | ||
1494 | + | ||
1495 | ## Nested Data | 1495 | ## Nested Data |
1496 | 1496 | ||
1497 | To query nested data, use dot notation. | 1497 | To query nested data, use dot notation. |
lib/searchkick/logging.rb
@@ -167,8 +167,8 @@ module Searchkick | @@ -167,8 +167,8 @@ module Searchkick | ||
167 | 167 | ||
168 | # no easy way to tell which host the client will use | 168 | # no easy way to tell which host the client will use |
169 | host = Searchkick.client.transport.hosts.first | 169 | host = Searchkick.client.transport.hosts.first |
170 | - params = ['pretty'] | ||
171 | - params << 'scroll=' + payload[:query][:scroll] if payload[:query][:scroll] | 170 | + params = ["pretty"] |
171 | + params << "scroll=#{payload[:query][:scroll]}" if payload[:query][:scroll] | ||
172 | debug " #{color(name, YELLOW, true)} curl #{host[:protocol]}://#{host[:host]}:#{host[:port]}/#{CGI.escape(index)}#{type ? "/#{type.map { |t| CGI.escape(t) }.join(',')}" : ''}/_search?#{params.join('&')} -H 'Content-Type: application/json' -d '#{payload[:query][:body].to_json}'" | 172 | debug " #{color(name, YELLOW, true)} curl #{host[:protocol]}://#{host[:host]}:#{host[:port]}/#{CGI.escape(index)}#{type ? "/#{type.map { |t| CGI.escape(t) }.join(',')}" : ''}/_search?#{params.join('&')} -H 'Content-Type: application/json' -d '#{payload[:query][:body].to_json}'" |
173 | end | 173 | end |
174 | 174 |
lib/searchkick/query.rb
@@ -19,7 +19,7 @@ module Searchkick | @@ -19,7 +19,7 @@ module Searchkick | ||
19 | :boost_by, :boost_by_distance, :boost_by_recency, :boost_where, :conversions, :conversions_term, :debug, :emoji, :exclude, :execute, :explain, | 19 | :boost_by, :boost_by_distance, :boost_by_recency, :boost_where, :conversions, :conversions_term, :debug, :emoji, :exclude, :execute, :explain, |
20 | :fields, :highlight, :includes, :index_name, :indices_boost, :limit, :load, | 20 | :fields, :highlight, :includes, :index_name, :indices_boost, :limit, :load, |
21 | :match, :misspellings, :models, :model_includes, :offset, :operator, :order, :padding, :page, :per_page, :profile, | 21 | :match, :misspellings, :models, :model_includes, :offset, :operator, :order, :padding, :page, :per_page, :profile, |
22 | - :request_params, :routing, :scope_results, :select, :similar, :smart_aggs, :suggest, :total_entries, :track, :type, :where, :scroll] | 22 | + :request_params, :routing, :scope_results, :scroll, :select, :similar, :smart_aggs, :suggest, :total_entries, :track, :type, :where] |
23 | raise ArgumentError, "unknown keywords: #{unknown_keywords.join(", ")}" if unknown_keywords.any? | 23 | raise ArgumentError, "unknown keywords: #{unknown_keywords.join(", ")}" if unknown_keywords.any? |
24 | 24 | ||
25 | term = term.to_s | 25 | term = term.to_s |
@@ -110,8 +110,8 @@ module Searchkick | @@ -110,8 +110,8 @@ module Searchkick | ||
110 | # no easy way to tell which host the client will use | 110 | # no easy way to tell which host the client will use |
111 | host = Searchkick.client.transport.hosts.first | 111 | host = Searchkick.client.transport.hosts.first |
112 | credentials = host[:user] || host[:password] ? "#{host[:user]}:#{host[:password]}@" : nil | 112 | credentials = host[:user] || host[:password] ? "#{host[:user]}:#{host[:password]}@" : nil |
113 | - params = ['pretty'] | ||
114 | - params << 'scroll=' + options[:scroll] if options[:scroll] | 113 | + params = ["pretty"] |
114 | + params << "scroll=#{options[:scroll]}" if options[:scroll] | ||
115 | "curl #{host[:protocol]}://#{credentials}#{host[:host]}:#{host[:port]}/#{CGI.escape(index)}#{type ? "/#{type.map { |t| CGI.escape(t) }.join(',')}" : ''}/_search?#{params.join('&')} -H 'Content-Type: application/json' -d '#{query[:body].to_json}'" | 115 | "curl #{host[:protocol]}://#{credentials}#{host[:host]}:#{host[:port]}/#{CGI.escape(index)}#{type ? "/#{type.map { |t| CGI.escape(t) }.join(',')}" : ''}/_search?#{params.join('&')} -H 'Content-Type: application/json' -d '#{query[:body].to_json}'" |
116 | end | 116 | end |
117 | 117 |
lib/searchkick/results.rb
@@ -222,27 +222,24 @@ module Searchkick | @@ -222,27 +222,24 @@ module Searchkick | ||
222 | end | 222 | end |
223 | 223 | ||
224 | def scroll_id | 224 | def scroll_id |
225 | - @response['_scroll_id'] | 225 | + @response["_scroll_id"] |
226 | end | 226 | end |
227 | 227 | ||
228 | def scroll | 228 | def scroll |
229 | - if scroll_id.present? && options[:scroll].nil? | ||
230 | - raise Searchkick::Error, "Scroll error - scroll keepalive must be defined" | ||
231 | - elsif scroll_id.nil? | ||
232 | - raise Searchkick::Error, "Scroll error - a scroll id has not been provided" | ||
233 | - else | ||
234 | - begin | ||
235 | - params = { | ||
236 | - scroll: options[:scroll], | ||
237 | - scroll_id: scroll_id | ||
238 | - } | ||
239 | - Searchkick::Results.new(@klass, Searchkick.client.scroll(params), @options) | ||
240 | - rescue Elasticsearch::Transport::Transport::Errors::NotFound => e | ||
241 | - if e.class.to_s =~ /NotFound/ && e.message =~ /search_context_missing_exception/i | ||
242 | - raise Searchkick::Error, "Scroll error - a scroll id does not exist or has expired" | ||
243 | - else | ||
244 | - raise e | ||
245 | - end | 229 | + raise Searchkick::Error, "Pass `scroll` option to the search method for scrolling" unless scroll_id |
230 | + | ||
231 | + params = { | ||
232 | + scroll: options[:scroll], | ||
233 | + scroll_id: scroll_id | ||
234 | + } | ||
235 | + | ||
236 | + begin | ||
237 | + Searchkick::Results.new(@klass, Searchkick.client.scroll(params), @options) | ||
238 | + rescue Elasticsearch::Transport::Transport::Errors::NotFound => e | ||
239 | + if e.class.to_s =~ /NotFound/ && e.message =~ /search_context_missing_exception/i | ||
240 | + raise Searchkick::Error, "Scroll id has expired" | ||
241 | + else | ||
242 | + raise e | ||
246 | end | 243 | end |
247 | end | 244 | end |
248 | end | 245 | end |
test/scroll_test.rb
@@ -6,8 +6,6 @@ class ScrollTest < Minitest::Test | @@ -6,8 +6,6 @@ class ScrollTest < Minitest::Test | ||
6 | products = Product.search("product", order: {name: :asc}, scroll: '1m', per_page: 2) | 6 | products = Product.search("product", order: {name: :asc}, scroll: '1m', per_page: 2) |
7 | assert_equal ["Product A", "Product B"], products.map(&:name) | 7 | assert_equal ["Product A", "Product B"], products.map(&:name) |
8 | assert_equal "product", products.entry_name | 8 | assert_equal "product", products.entry_name |
9 | - assert_equal "1m", products.options[:scroll] | ||
10 | - assert_equal products.response["_scroll_id"], products.scroll_id | ||
11 | assert_equal 2, products.size | 9 | assert_equal 2, products.size |
12 | assert_equal 2, products.length | 10 | assert_equal 2, products.length |
13 | assert_equal 6, products.total_count | 11 | assert_equal 6, products.total_count |
@@ -17,15 +15,14 @@ class ScrollTest < Minitest::Test | @@ -17,15 +15,14 @@ class ScrollTest < Minitest::Test | ||
17 | # scroll for next 2 | 15 | # scroll for next 2 |
18 | products = products.scroll | 16 | products = products.scroll |
19 | assert_equal ["Product C", "Product D"], products.map(&:name) | 17 | assert_equal ["Product C", "Product D"], products.map(&:name) |
20 | - assert_equal "product", products.entry_name | 18 | + |
21 | # scroll for next 2 | 19 | # scroll for next 2 |
22 | products = products.scroll | 20 | products = products.scroll |
23 | assert_equal ["Product E", "Product F"], products.map(&:name) | 21 | assert_equal ["Product E", "Product F"], products.map(&:name) |
24 | - assert_equal "product", products.entry_name | 22 | + |
25 | # scroll exhausted | 23 | # scroll exhausted |
26 | products = products.scroll | 24 | products = products.scroll |
27 | assert_equal [], products.map(&:name) | 25 | assert_equal [], products.map(&:name) |
28 | - assert_equal "product", products.entry_name | ||
29 | end | 26 | end |
30 | 27 | ||
31 | def test_scroll_body | 28 | def test_scroll_body |
@@ -33,8 +30,6 @@ class ScrollTest < Minitest::Test | @@ -33,8 +30,6 @@ class ScrollTest < Minitest::Test | ||
33 | products = Product.search("product", body: {query: {match_all: {}}, sort: [{name: "asc"}]}, scroll: '1m', per_page: 2) | 30 | products = Product.search("product", body: {query: {match_all: {}}, sort: [{name: "asc"}]}, scroll: '1m', per_page: 2) |
34 | assert_equal ["Product A", "Product B"], products.map(&:name) | 31 | assert_equal ["Product A", "Product B"], products.map(&:name) |
35 | assert_equal "product", products.entry_name | 32 | assert_equal "product", products.entry_name |
36 | - assert_equal "1m", products.options[:scroll] | ||
37 | - assert_equal products.response["_scroll_id"], products.scroll_id | ||
38 | assert_equal 2, products.size | 33 | assert_equal 2, products.size |
39 | assert_equal 2, products.length | 34 | assert_equal 2, products.length |
40 | assert_equal 6, products.total_count | 35 | assert_equal 6, products.total_count |
@@ -44,14 +39,26 @@ class ScrollTest < Minitest::Test | @@ -44,14 +39,26 @@ class ScrollTest < Minitest::Test | ||
44 | # scroll for next 2 | 39 | # scroll for next 2 |
45 | products = products.scroll | 40 | products = products.scroll |
46 | assert_equal ["Product C", "Product D"], products.map(&:name) | 41 | assert_equal ["Product C", "Product D"], products.map(&:name) |
47 | - assert_equal "product", products.entry_name | 42 | + |
48 | # scroll for next 2 | 43 | # scroll for next 2 |
49 | products = products.scroll | 44 | products = products.scroll |
50 | assert_equal ["Product E", "Product F"], products.map(&:name) | 45 | assert_equal ["Product E", "Product F"], products.map(&:name) |
51 | - assert_equal "product", products.entry_name | 46 | + |
52 | # scroll exhausted | 47 | # scroll exhausted |
53 | products = products.scroll | 48 | products = products.scroll |
54 | assert_equal [], products.map(&:name) | 49 | assert_equal [], products.map(&:name) |
55 | - assert_equal "product", products.entry_name | 50 | + end |
51 | + | ||
52 | + def test_scroll_all | ||
53 | + store_names ["Product A"] | ||
54 | + assert_equal ["Product A"], Product.search("*", scroll: "1m").map(&:name) | ||
55 | + end | ||
56 | + | ||
57 | + def test_scroll_no_option | ||
58 | + products = Product.search("*") | ||
59 | + error = assert_raises Searchkick::Error do | ||
60 | + products.scroll | ||
61 | + end | ||
62 | + assert_match /Pass .+ option/, error.message | ||
56 | end | 63 | end |
57 | end | 64 | end |