Skip to content

Commit 074930f

Browse files
committed
[STORE] Added the find_each method for models
Person.find_each { |person| puts person.name } # # GET http://localhost:9200/people/person/_search?scroll=5m&search_type=scan&size=20 # # GET http://localhost:9200/_search/scroll?scroll=5m&scroll_id=c2Nhbj... # Test 0 # Test 1 # Test 2 # ... # # GET http://localhost:9200/_search/scroll?scroll=5m&scroll_id=c2Nhbj... # Test 20 # Test 21 # Test 22 See: http://api.rubyonrails.org/classes/ActiveRecord/Batches.html#method-i-find_each
1 parent e5546e5 commit 074930f

File tree

3 files changed

+110
-28
lines changed

3 files changed

+110
-28
lines changed

elasticsearch-persistence/lib/elasticsearch/persistence/model/find.rb

Lines changed: 40 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,12 @@ def all(options={})
2525
#
2626
# You can restrict the models being returned with a query.
2727
#
28-
# The full {Persistence::Repository::Response::Result} instance is yielded to the passed
29-
# block in each batch, so you can access any of its properties. Calling `to_a` will
28+
# The {http://rubydoc.info/gems/elasticsearch-api/Elasticsearch/API/Actions#search-instance_method Search API}
29+
# options are passed to the search method as parameters, all remaining options are passed
30+
# as the `:body` parameter.
31+
#
32+
# The full {Persistence::Repository::Response::Results} instance is yielded to the passed
33+
# block in each batch, so you can access any of its properties; calling `to_a` will
3034
# convert the object to an Array of model instances.
3135
#
3236
# @example Return all models in batches of 20 x number of primary shards
@@ -102,6 +106,40 @@ def find_in_batches(options={}, &block)
102106

103107
return response['_scroll_id']
104108
end
109+
110+
# Iterate effectively over models using the `find_in_batches` method.
111+
#
112+
# All the options are passed to `find_in_batches` and each result is yielded to the passed block.
113+
#
114+
# @example Print out the people's names by scrolling through the index
115+
#
116+
# Person.find_each { |person| puts person.name }
117+
#
118+
# # # GET http://localhost:9200/people/person/_search?scroll=5m&search_type=scan&size=20
119+
# # # GET http://localhost:9200/_search/scroll?scroll=5m&scroll_id=c2Nhbj...
120+
# # Test 0
121+
# # Test 1
122+
# # Test 2
123+
# # ...
124+
# # # GET http://localhost:9200/_search/scroll?scroll=5m&scroll_id=c2Nhbj...
125+
# # Test 20
126+
# # Test 21
127+
# # Test 22
128+
#
129+
# @example Leave out the block to return an Enumerator instance
130+
#
131+
# Person.find_each.select { |person| person.name =~ /John/ }
132+
# # => => [#<Person {id: "NkltJP5vRxqk9_RMP7SU8Q", name: "John Smith", ...}>]
133+
#
134+
# @return [String,Enumerator] The `scroll_id` for the request or Enumerator when the block is not passed
135+
#
136+
def find_each(options = {})
137+
return to_enum(:find_each, options) unless block_given?
138+
139+
find_in_batches(options) do |batch|
140+
batch.each { |result| yield result }
141+
end
142+
end
105143
end
106144
end
107145

elasticsearch-persistence/test/integration/model/model_basic_test.rb

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -100,16 +100,33 @@ class ::Person
100100
end
101101

102102
should "find instances in batches" do
103-
100.times { |i| Person.create name: "John #{i+1}" }
103+
50.times { |i| Person.create name: "John #{i+1}" }
104104
Person.gateway.refresh_index!
105105

106+
@batches = 0
106107
@results = []
107108

108109
Person.find_in_batches(_source_include: 'name') do |batch|
110+
@batches += 1
109111
@results += batch.map(&:name)
110112
end
111113

112-
assert_equal 100, @results.size
114+
assert_equal 3, @batches
115+
assert_equal 50, @results.size
116+
assert_contains @results, 'John 1'
117+
end
118+
119+
should "find each instance" do
120+
50.times { |i| Person.create name: "John #{i+1}" }
121+
Person.gateway.refresh_index!
122+
123+
@results = []
124+
125+
Person.find_each(_source_include: 'name') do |person|
126+
@results << person.name
127+
end
128+
129+
assert_equal 50, @results.size
113130
assert_contains @results, 'John 1'
114131
end
115132
end

elasticsearch-persistence/test/unit/model_find_test.rb

Lines changed: 51 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -83,39 +83,66 @@ def set_id(id); self.id = id; end
8383
DummyFindModel.all( { query: { match: { title: 'test' } }, routing: 'abc123' } )
8484
end
8585

86-
should "find all records in batches" do
87-
@gateway
86+
context "finding via scan/scroll" do
87+
setup do
88+
@gateway
8889
.expects(:deserialize)
8990
.with('_source' => {'foo' => 'bar'})
9091
.returns('_source' => {'foo' => 'bar'})
9192

92-
@gateway.client
93-
.expects(:search)
94-
.with do |arguments|
95-
assert_equal 'scan', arguments[:search_type]
96-
assert_equal 'foo', arguments[:index]
97-
assert_equal 'bar', arguments[:type]
98-
end
99-
.returns(MultiJson.load('{"_scroll_id":"abc123==", "hits":{"hits":[]}}'))
93+
@gateway.client
94+
.expects(:search)
95+
.with do |arguments|
96+
assert_equal 'scan', arguments[:search_type]
97+
assert_equal 'foo', arguments[:index]
98+
assert_equal 'bar', arguments[:type]
99+
end
100+
.returns(MultiJson.load('{"_scroll_id":"abc123==", "hits":{"hits":[]}}'))
101+
102+
@gateway.client
103+
.expects(:scroll)
104+
.twice
105+
.returns(MultiJson.load('{"_scroll_id":"abc456==", "hits":{"hits":[{"_source":{"foo":"bar"}}]}}'))
106+
.then
107+
.returns(MultiJson.load('{"_scroll_id":"abc789==", "hits":{"hits":[]}}'))
108+
end
100109

101-
@gateway.client
102-
.expects(:scroll)
103-
.twice
104-
.returns(MultiJson.load('{"_scroll_id":"abc456==", "hits":{"hits":[{"_source":{"foo":"bar"}}]}}'))
105-
.then
106-
.returns(MultiJson.load('{"_scroll_id":"abc789==", "hits":{"hits":[]}}'))
110+
should "find all records in batches" do
111+
@doc = nil
112+
result = DummyFindModel.find_in_batches { |batch| @doc = batch.first['_source']['foo'] }
107113

108-
@doc = nil
114+
assert_equal 'abc789==', result
115+
assert_equal 'bar', @doc
116+
end
109117

110-
result = DummyFindModel.find_in_batches { |batch| @doc = batch.first['_source']['foo'] }
118+
should "return an Enumerator for find in batches" do
119+
@doc = nil
120+
assert_nothing_raised do
121+
e = DummyFindModel.find_in_batches
122+
assert_instance_of Enumerator, e
111123

112-
assert_equal 'abc789==', result
113-
assert_equal 'bar', @doc
114-
end
124+
e.each { |batch| @doc = batch.first['_source']['foo'] }
125+
assert_equal 'bar', @doc
126+
end
127+
end
128+
129+
should "find each" do
130+
@doc = nil
131+
result = DummyFindModel.find_each { |doc| @doc = doc['_source']['foo'] }
132+
133+
assert_equal 'abc789==', result
134+
assert_equal 'bar', @doc
135+
end
136+
137+
should "return an Enumerator for find each" do
138+
@doc = nil
139+
assert_nothing_raised do
140+
e = DummyFindModel.find_each
141+
assert_instance_of Enumerator, e
115142

116-
should "return an Enumerator for find in batches" do
117-
assert_nothing_raised do
118-
assert_instance_of Enumerator, DummyFindModel.find_in_batches
143+
e.each { |doc| @doc = doc['_source']['foo'] }
144+
assert_equal 'bar', @doc
145+
end
119146
end
120147
end
121148

0 commit comments

Comments
 (0)