Skip to content

Commit e81fc5b

Browse files
committedFeb 12, 2017
[STORE] Removed the search_type=scan in the find_in_batches method
Since the `search_type` parameter has been deprecated in Elasticsearch 2 and removed in Elasticsearch 5, the `find_in_batches` method has been updated to stop using the parameter, and to use the new way of setting up search context for scrolling with the `sort:_doc` option. See: https://www.elastic.co/guide/en/elasticsearch/reference/2.1/breaking_21_search_changes.html#_literal_search_type_scan_literal_deprecated
1 parent ffc1aa4 commit e81fc5b

File tree

3 files changed

+16
-17
lines changed

3 files changed

+16
-17
lines changed
 

‎elasticsearch-persistence/README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -581,7 +581,7 @@ retrieve big collections of model instances, using the Elasticsearch's _Scan API
581581

582582
```ruby
583583
Article.find_each(_source_include: 'title') { |a| puts "===> #{a.title.upcase}" }
584-
# GET http://localhost:9200/articles/article/_search?scroll=5m&search_type=scan&size=20
584+
# GET http://localhost:9200/articles/article/_search?scroll=5m&size=20
585585
# GET http://localhost:9200/_search/scroll?scroll=5m&scroll_id=c2Nhb...
586586
# ===> TEST
587587
# GET http://localhost:9200/_search/scroll?scroll=5m&scroll_id=c2Nhb...

‎elasticsearch-persistence/lib/elasticsearch/persistence/model/find.rb

+8-11
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ def count(query_or_definition=nil, options={})
5555
gateway.count( query_or_definition, options )
5656
end
5757

58-
# Returns all models efficiently via the Elasticsearch's scan/scroll API
58+
# Returns all models efficiently via the Elasticsearch's scroll API
5959
#
6060
# You can restrict the models being returned with a query.
6161
#
@@ -117,20 +117,17 @@ def find_in_batches(options={}, &block)
117117

118118
body = options
119119

120-
# Get the initial scroll_id
120+
# Get the initial batch of documents and the scroll_id
121121
#
122122
response = gateway.client.search( { index: gateway.index_name,
123123
type: gateway.document_type,
124-
search_type: 'scan',
125-
scroll: scroll,
126-
size: 20,
127-
body: body }.merge(search_params) )
124+
scroll: scroll,
125+
sort: ['_doc'],
126+
size: 20,
127+
body: body }.merge(search_params) )
128128

129-
# Get the initial batch of documents
130-
#
131-
response = gateway.client.scroll( { scroll_id: response['_scroll_id'], scroll: scroll } )
132129

133-
# Break when receiving an empty array of hits
130+
# Scroll the search object and break when receiving an empty array of hits
134131
#
135132
while response['hits']['hits'].any? do
136133
yield Repository::Response::Results.new(gateway, response)
@@ -149,7 +146,7 @@ def find_in_batches(options={}, &block)
149146
#
150147
# Person.find_each { |person| puts person.name }
151148
#
152-
# # # GET http://localhost:9200/people/person/_search?scroll=5m&search_type=scan&size=20
149+
# # # GET http://localhost:9200/people/person/_search?scroll=5m&size=20
153150
# # # GET http://localhost:9200/_search/scroll?scroll=5m&scroll_id=c2Nhbj...
154151
# # Test 0
155152
# # Test 1

‎elasticsearch-persistence/test/unit/model_find_test.rb

+7-5
Original file line numberDiff line numberDiff line change
@@ -83,27 +83,29 @@ class DummyFindModel
8383
DummyFindModel.all( { query: { match: { title: 'test' } } }, { routing: 'abc123' } )
8484
end
8585

86-
context "finding via scan/scroll" do
86+
context "finding via scroll" do
8787
setup do
8888
@gateway
8989
.expects(:deserialize)
90-
.with('_source' => {'foo' => 'bar'})
9190
.returns('_source' => {'foo' => 'bar'})
91+
.at_least_once
9292

93+
# 1. Initial batch of documents and the scroll_id
9394
@gateway.client
9495
.expects(:search)
9596
.with do |arguments|
96-
assert_equal 'scan', arguments[:search_type]
9797
assert_equal 'foo', arguments[:index]
9898
assert_equal 'bar', arguments[:type]
9999
true
100100
end
101-
.returns(MultiJson.load('{"_scroll_id":"abc123==", "hits":{"hits":[]}}'))
101+
.returns(MultiJson.load('{"_scroll_id":"abc123==", "hits":{"hits":[{"_source":{"foo":"bar_1"}}]}}'))
102102

103+
# 2. Second batch of documents and the scroll_id
104+
# 3. Last, empty batch of documents
103105
@gateway.client
104106
.expects(:scroll)
105107
.twice
106-
.returns(MultiJson.load('{"_scroll_id":"abc456==", "hits":{"hits":[{"_source":{"foo":"bar"}}]}}'))
108+
.returns(MultiJson.load('{"_scroll_id":"abc456==", "hits":{"hits":[{"_source":{"foo":"bar_2"}}]}}'))
107109
.then
108110
.returns(MultiJson.load('{"_scroll_id":"abc789==", "hits":{"hits":[]}}'))
109111
end

0 commit comments

Comments
 (0)
Please sign in to comment.