-
Notifications
You must be signed in to change notification settings - Fork 802
/
Copy pathimporting.rb
118 lines (103 loc) · 3.9 KB
/
importing.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
module Elasticsearch
module Model
# Provides support for easily and efficiently importing large amounts of
# records from the including class into the index.
#
# @see ClassMethods#import
#
module Importing
# When included in a model, adds the importing methods.
#
# @example Import all records from the `Article` model
#
# Article.import
#
# @see #import
#
def self.included(base)
base.__send__ :extend, ClassMethods
adapter = Adapter.from_class(base)
base.__send__ :include, adapter.importing_mixin
base.__send__ :extend, adapter.importing_mixin
end
module ClassMethods
# Import all model records into the index
#
# The method will pick up correct strategy based on the `Importing` module
# defined in the corresponding adapter.
#
# @param options [Hash] Options passed to the underlying `__find_in_batches`method
# @param block [Proc] Optional block to evaluate for each batch
#
# @yield [Hash] Gives the Hash with the Elasticsearch response to the block
#
# @return [Fixnum] Number of errors encountered during importing
#
# @example Import all records into the index
#
# Article.import
#
# @example Set the batch size to 100
#
# Article.import batch_size: 100
#
# @example Process the response from Elasticsearch
#
# Article.import do |response|
# puts "Got " + response['items'].select { |i| i['index']['error'] }.size.to_s + " errors"
# end
#
# @example Delete and create the index with appropriate settings and mappings
#
# Article.import force: true
#
# @example Refresh the index after importing all batches
#
# Article.import refresh: true
#
# @example Import the records into a different index/type than the default one
#
# Article.import index: 'my-new-index', type: 'my-other-type'
#
# @example Pass an ActiveRecord scope to limit the imported records
#
# Article.import scope: 'published'
#
# @example Customize how each record is [bulk imported](https://github.com/elasticsearch/elasticsearch-ruby/blob/master/elasticsearch-api/lib/elasticsearch/api/actions/bulk.rb)
#
# transform = lambda do |article|
# {index: {_id: article.id, _parent: article.author_id, data: article.__elasticsearch__.as_indexed_json}}
# end
#
# Article.import transform: transform
#
def import(options={}, &block)
errors = 0
refresh = options.delete(:refresh) || false
target_index = options.delete(:index) || index_name
target_type = options.delete(:type) || document_type
transform = options.delete(:transform) || __transform
if !transform.respond_to?(:call)
raise ArgumentError, "You must pass an object that supports #call method, #{transform.class} given"
end
if options.delete(:force)
self.create_index! force: true, index: target_index
end
__find_in_batches(options) do |batch|
response = client.bulk \
index: target_index,
type: target_type,
body: __batch_to_bulk(batch, transform)
yield response if block_given?
errors += response['items'].map { |k, v| k.values.first['error'] }.compact.length
end
self.refresh_index! if refresh
return errors
end
def __batch_to_bulk(batch, transform)
batch.map {|model| transform.call(model)}
end
end
end
end
end