forked from elastic/elasticsearch-rails
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathimporting.rb
151 lines (136 loc) · 4.93 KB
/
importing.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
module Elasticsearch
module Model
# Provides support for easily and efficiently importing large amounts of
# records from the including class into the index.
#
# @see ClassMethods#import
#
module Importing
# When included in a model, adds the importing methods.
#
# @example Import all records from the `Article` model
#
# Article.import
#
# @see #import
#
def self.included(base)
base.__send__ :extend, ClassMethods
adapter = Adapter.from_class(base)
base.__send__ :include, adapter.importing_mixin
base.__send__ :extend, adapter.importing_mixin
end
module ClassMethods
# Import all model records into the index
#
# The method will pick up correct strategy based on the `Importing` module
# defined in the corresponding adapter.
#
# @param options [Hash] Options passed to the underlying `__find_in_batches`method
# @param block [Proc] Optional block to evaluate for each batch
#
# @yield [Hash] Gives the Hash with the Elasticsearch response to the block
#
# @return [Fixnum] Number of errors encountered during importing
#
# @example Import all records into the index
#
# Article.import
#
# @example Set the batch size to 100
#
# Article.import batch_size: 100
#
# @example Process the response from Elasticsearch
#
# Article.import do |response|
# puts "Got " + response['items'].select { |i| i['index']['error'] }.size.to_s + " errors"
# end
#
# @example Delete and create the index with appropriate settings and mappings
#
# Article.import force: true
#
# @example Refresh the index after importing all batches
#
# Article.import refresh: true
#
# @example Import the records into a different index/type than the default one
#
# Article.import index: 'my-new-index', type: 'my-other-type'
#
# @example Pass an ActiveRecord scope to limit the imported records
#
# Article.import scope: 'published'
#
# @example Pass an ActiveRecord query to limit the imported records
#
# Article.import query: -> { where(author_id: author_id) }
#
# @example Transform records during the import with a lambda
#
# transform = lambda do |a|
# {index: {_id: a.id, _parent: a.author_id, data: a.__elasticsearch__.as_indexed_json}}
# end
#
# Article.import transform: transform
#
# @example Update the batch before yielding it
#
# class Article
# # ...
# def self.enrich(batch)
# batch.each do |item|
# item.metadata = MyAPI.get_metadata(item.id)
# end
# batch
# end
# end
#
# Article.import preprocess: :enrich
#
# @example Return an array of error elements instead of the number of errors, eg.
# to try importing these records again
#
# Article.import return: 'errors'
#
def import(options={}, &block)
errors = []
refresh = options.delete(:refresh) || false
target_index = options.delete(:index) || index_name
target_type = options.delete(:type) || document_type
transform = options.delete(:transform) || __transform
return_value = options.delete(:return) || 'count'
unless transform.respond_to?(:call)
raise ArgumentError,
"Pass an object responding to `call` as the :transform option, #{transform.class} given"
end
if options.delete(:force)
self.create_index! force: true, index: target_index
elsif !self.index_exists? index: target_index
raise ArgumentError,
"#{target_index} does not exist to be imported into. Use create_index! or the :force option to create it."
end
__find_in_batches(options) do |batch|
response = client.bulk \
index: target_index,
type: target_type,
body: __batch_to_bulk(batch, transform)
yield response if block_given?
errors += response['items'].select { |k, v| k.values.first['error'] }
end
self.refresh_index! if refresh
case return_value
when 'errors'
errors
else
errors.size
end
end
def __batch_to_bulk(batch, transform)
batch.map { |model| transform.call(model) }
end
end
end
end
end