Skip to content

Commit 67e7a44

Browse files
committed
[GEM] Adds Scroll Helper
1 parent 927cfc3 commit 67e7a44

File tree

2 files changed

+164
-0
lines changed

2 files changed

+164
-0
lines changed
+73
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
# Licensed to Elasticsearch B.V. under one or more contributor
2+
# license agreements. See the NOTICE file distributed with
3+
# this work for additional information regarding copyright
4+
# ownership. Elasticsearch B.V. licenses this file to you under
5+
# the Apache License, Version 2.0 (the "License"); you may
6+
# not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
module Elasticsearch
19+
module Helpers
20+
class ScrollHelper
21+
include Enumerable
22+
23+
def initialize(client, index, body, scroll = '1m')
24+
@index = index
25+
@client = client
26+
@scroll = scroll
27+
@body = body
28+
@docs = []
29+
end
30+
31+
def each(&block)
32+
refresh_docs
33+
for doc in @docs do
34+
refresh_docs
35+
yield doc
36+
end
37+
clear
38+
@docs = []
39+
end
40+
41+
def results
42+
if @scroll_id
43+
@client.scroll(body: {scroll: @scroll, scroll_id: @scroll_id})['hits']['hits']
44+
else
45+
initial_search
46+
end
47+
rescue Elastic::Transport::Transport::Errors::NotFound => e
48+
if e.message.match?('search_context_missing_exception')
49+
initial_search
50+
else
51+
raise e
52+
end
53+
end
54+
55+
private
56+
57+
def refresh_docs
58+
@docs << results
59+
@docs.flatten!
60+
end
61+
62+
def initial_search
63+
response = @client.search(index: @index, scroll: @scroll, body: @body)
64+
@scroll_id = response['_scroll_id']
65+
response['hits']['hits']
66+
end
67+
68+
def clear
69+
@client.clear_scroll(body: { scroll_id: @scroll_id })
70+
end
71+
end
72+
end
73+
end
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
# Licensed to Elasticsearch B.V. under one or more contributor
2+
# license agreements. See the NOTICE file distributed with
3+
# this work for additional information regarding copyright
4+
# ownership. Elasticsearch B.V. licenses this file to you under
5+
# the Apache License, Version 2.0 (the "License"); you may
6+
# not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
ELASTICSEARCH_URL = ENV['TEST_ES_SERVER'] || "http://localhost:#{(ENV['PORT'] || 9200)}"
18+
raise URI::InvalidURIError unless ELASTICSEARCH_URL =~ /\A#{URI::DEFAULT_PARSER.make_regexp}\z/
19+
20+
require 'spec_helper'
21+
require 'elasticsearch/helpers/scroll_helper'
22+
23+
context 'Elasticsearch client helpers' do
24+
let(:client) do
25+
Elasticsearch::Client.new(
26+
host: ELASTICSEARCH_URL,
27+
user: 'elastic',
28+
password: 'changeme'
29+
)
30+
end
31+
let(:index) { 'books' }
32+
let(:body) { { size: 12, query: { match_all: {} } } }
33+
let(:scroll_helper) { Elasticsearch::Helpers::ScrollHelper.new(client, index, body) }
34+
35+
before do
36+
documents = [
37+
{ index: { _index: index, data: {name: "Leviathan Wakes", "author": "James S.A. Corey", "release_date": "2011-06-02", "page_count": 561} } },
38+
{ index: { _index: index, data: {name: "Hyperion", "author": "Dan Simmons", "release_date": "1989-05-26", "page_count": 482} } },
39+
{ index: { _index: index, data: {name: "Dune", "author": "Frank Herbert", "release_date": "1965-06-01", "page_count": 604} } },
40+
{ index: { _index: index, data: {name: "Dune Messiah", "author": "Frank Herbert", "release_date": "1969-10-15", "page_count": 331} } },
41+
{ index: { _index: index, data: {name: "Children of Dune", "author": "Frank Herbert", "release_date": "1976-04-21", "page_count": 408} } },
42+
{ index: { _index: index, data: {name: "God Emperor of Dune", "author": "Frank Herbert", "release_date": "1981-05-28", "page_count": 454} } },
43+
{ index: { _index: index, data: {name: "Consider Phlebas", "author": "Iain M. Banks", "release_date": "1987-04-23", "page_count": 471} } },
44+
{ index: { _index: index, data: {name: "Pandora's Star", "author": "Peter F. Hamilton", "release_date": "2004-03-02", "page_count": 768} } },
45+
{ index: { _index: index, data: {name: "Revelation Space", "author": "Alastair Reynolds", "release_date": "2000-03-15", "page_count": 585} } },
46+
{ index: { _index: index, data: {name: "A Fire Upon the Deep", "author": "Vernor Vinge", "release_date": "1992-06-01", "page_count": 613} } },
47+
{ index: { _index: index, data: {name: "Ender's Game", "author": "Orson Scott Card", "release_date": "1985-06-01", "page_count": 324} } },
48+
{ index: { _index: index, data: {name: "1984", "author": "George Orwell", "release_date": "1985-06-01", "page_count": 328} } },
49+
{ index: { _index: index, data: {name: "Fahrenheit 451", "author": "Ray Bradbury", "release_date": "1953-10-15", "page_count": 227} } },
50+
{ index: { _index: index, data: {name: "Brave New World", "author": "Aldous Huxley", "release_date": "1932-06-01", "page_count": 268} } },
51+
{ index: { _index: index, data: {name: "Foundation", "author": "Isaac Asimov", "release_date": "1951-06-01", "page_count": 224} } },
52+
{ index: { _index: index, data: {name: "The Giver", "author": "Lois Lowry", "release_date": "1993-04-26", "page_count": 208} } },
53+
{ index: { _index: index, data: {name: "Slaughterhouse-Five", "author": "Kurt Vonnegut", "release_date": "1969-06-01", "page_count": 275} } },
54+
{ index: { _index: index, data: {name: "The Hitchhiker's Guide to the Galaxy", "author": "Douglas Adams", "release_date": "1979-10-12", "page_count": 180} } },
55+
{ index: { _index: index, data: {name: "Snow Crash", "author": "Neal Stephenson", "release_date": "1992-06-01", "page_count": 470} } },
56+
{ index: { _index: index, data: {name: "Neuromancer", "author": "William Gibson", "release_date": "1984-07-01", "page_count": 271} } },
57+
{ index: { _index: index, data: {name: "The Handmaid's Tale", "author": "Margaret Atwood", "release_date": "1985-06-01", "page_count": 311} } },
58+
{ index: { _index: index, data: {name: "Starship Troopers", "author": "Robert A. Heinlein", "release_date": "1959-12-01", "page_count": 335} } },
59+
{ index: { _index: index, data: {name: "The Left Hand of Darkness", "author": "Ursula K. Le Guin", "release_date": "1969-06-01", "page_count": 304} } },
60+
{ index: { _index: index, data: {name: "The Moon is a Harsh Mistress", "author": "Robert A. Heinlein", "release_date": "1966-04-01", "page_count": 288 } } }
61+
]
62+
client.bulk(body: documents, refresh: 'wait_for')
63+
end
64+
65+
after do
66+
client.indices.delete(index: index)
67+
end
68+
69+
it 'instantiates a scroll helper' do
70+
expect(scroll_helper).to be_an_instance_of Elasticsearch::Helpers::ScrollHelper
71+
end
72+
73+
it 'searches an index' do
74+
my_documents = []
75+
while !(documents = scroll_helper.results).empty?
76+
my_documents << documents
77+
end
78+
79+
expect(my_documents.flatten.size).to eq 24
80+
end
81+
82+
it 'uses enumerable' do
83+
count = 0
84+
scroll_helper.each { |a| count += 1 }
85+
expect(count).to eq 24
86+
expect(scroll_helper).to respond_to(:count)
87+
expect(scroll_helper).to respond_to(:reject)
88+
expect(scroll_helper).to respond_to(:uniq)
89+
expect(scroll_helper.map { |a| a['_id'] }.uniq.count).to eq 24
90+
end
91+
end

0 commit comments

Comments
 (0)