Skip to content

Commit 09755b3

Browse files
committed
Add SignificantText aggregation
1 parent 17485ea commit 09755b3

5 files changed

+329
-3
lines changed

README.md

+4-2
Original file line numberDiff line numberDiff line change
@@ -159,11 +159,10 @@ See the [wiki](https://github.com/olivere/elastic/wiki) for more details.
159159
- [x] Context Suggester
160160
- [x] Multi Search API
161161
- [x] Count API
162-
- [ ] Search Exists API
163162
- [ ] Validate API
164163
- [x] Explain API
165164
- [x] Profile API
166-
- [x] Field Stats API
165+
- [x] Field Capabilities API
167166

168167
### Aggregations
169168

@@ -183,9 +182,11 @@ See the [wiki](https://github.com/olivere/elastic/wiki) for more details.
183182
- [x] Top Hits
184183
- [x] Value Count
185184
- Bucket Aggregations
185+
- [ ] Adjacency Matrix
186186
- [x] Children
187187
- [x] Date Histogram
188188
- [x] Date Range
189+
- [ ] Diversified Sampler
189190
- [x] Filter
190191
- [x] Filters
191192
- [x] Geo Distance
@@ -199,6 +200,7 @@ See the [wiki](https://github.com/olivere/elastic/wiki) for more details.
199200
- [x] Reverse Nested
200201
- [x] Sampler
201202
- [x] Significant Terms
203+
- [x] Significant Text
202204
- [x] Terms
203205
- Pipeline Aggregations
204206
- [x] Avg Bucket
+13
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
// Copyright 2012-present Oliver Eilhard. All rights reserved.
2+
// Use of this source code is governed by a MIT-license.
3+
// See http://olivere.mit-license.org/license.txt for details.
4+
5+
package elastic
6+
7+
// BucketCountThresholds is used in e.g. terms and significant text aggregations.
8+
type BucketCountThresholds struct {
9+
MinDocCount *int64
10+
ShardMinDocCount *int64
11+
RequiredSize *int
12+
ShardSize *int
13+
}

search_aggs_bucket_significant_terms.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
package elastic
66

7-
// SignificantSignificantTermsAggregation is an aggregation that returns interesting
7+
// SignificantTermsAggregation is an aggregation that returns interesting
88
// or unusual occurrences of terms in a set.
99
// See: https://www.elastic.co/guide/en/elasticsearch/reference/6.0/search-aggregations-bucket-significantterms-aggregation.html
1010
type SignificantTermsAggregation struct {
+245
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,245 @@
1+
// Copyright 2012-present Oliver Eilhard. All rights reserved.
2+
// Use of this source code is governed by a MIT-license.
3+
// See http://olivere.mit-license.org/license.txt for details.
4+
5+
package elastic
6+
7+
// SignificantTextAggregation returns interesting or unusual occurrences
8+
// of free-text terms in a set.
9+
// See: https://www.elastic.co/guide/en/elasticsearch/reference/6.0/search-aggregations-bucket-significanttext-aggregation.html
10+
type SignificantTextAggregation struct {
11+
field string
12+
subAggregations map[string]Aggregation
13+
meta map[string]interface{}
14+
15+
sourceFieldNames []string
16+
filterDuplicateText *bool
17+
includeExclude *TermsAggregationIncludeExclude
18+
filter Query
19+
bucketCountThresholds *BucketCountThresholds
20+
significanceHeuristic SignificanceHeuristic
21+
}
22+
23+
func NewSignificantTextAggregation() *SignificantTextAggregation {
24+
return &SignificantTextAggregation{
25+
subAggregations: make(map[string]Aggregation, 0),
26+
}
27+
}
28+
29+
func (a *SignificantTextAggregation) Field(field string) *SignificantTextAggregation {
30+
a.field = field
31+
return a
32+
}
33+
34+
func (a *SignificantTextAggregation) SubAggregation(name string, subAggregation Aggregation) *SignificantTextAggregation {
35+
a.subAggregations[name] = subAggregation
36+
return a
37+
}
38+
39+
// Meta sets the meta data to be included in the aggregation response.
40+
func (a *SignificantTextAggregation) Meta(metaData map[string]interface{}) *SignificantTextAggregation {
41+
a.meta = metaData
42+
return a
43+
}
44+
45+
func (a *SignificantTextAggregation) SourceFieldNames(names ...string) *SignificantTextAggregation {
46+
a.sourceFieldNames = names
47+
return a
48+
}
49+
50+
func (a *SignificantTextAggregation) FilterDuplicateText(filter bool) *SignificantTextAggregation {
51+
a.filterDuplicateText = &filter
52+
return a
53+
}
54+
55+
func (a *SignificantTextAggregation) MinDocCount(minDocCount int64) *SignificantTextAggregation {
56+
if a.bucketCountThresholds == nil {
57+
a.bucketCountThresholds = &BucketCountThresholds{}
58+
}
59+
a.bucketCountThresholds.MinDocCount = &minDocCount
60+
return a
61+
}
62+
63+
func (a *SignificantTextAggregation) ShardMinDocCount(shardMinDocCount int64) *SignificantTextAggregation {
64+
if a.bucketCountThresholds == nil {
65+
a.bucketCountThresholds = &BucketCountThresholds{}
66+
}
67+
a.bucketCountThresholds.ShardMinDocCount = &shardMinDocCount
68+
return a
69+
}
70+
71+
func (a *SignificantTextAggregation) Size(size int) *SignificantTextAggregation {
72+
if a.bucketCountThresholds == nil {
73+
a.bucketCountThresholds = &BucketCountThresholds{}
74+
}
75+
a.bucketCountThresholds.RequiredSize = &size
76+
return a
77+
}
78+
79+
func (a *SignificantTextAggregation) ShardSize(shardSize int) *SignificantTextAggregation {
80+
if a.bucketCountThresholds == nil {
81+
a.bucketCountThresholds = &BucketCountThresholds{}
82+
}
83+
a.bucketCountThresholds.ShardSize = &shardSize
84+
return a
85+
}
86+
87+
func (a *SignificantTextAggregation) BackgroundFilter(filter Query) *SignificantTextAggregation {
88+
a.filter = filter
89+
return a
90+
}
91+
92+
func (a *SignificantTextAggregation) SignificanceHeuristic(heuristic SignificanceHeuristic) *SignificantTextAggregation {
93+
a.significanceHeuristic = heuristic
94+
return a
95+
}
96+
97+
func (a *SignificantTextAggregation) Include(regexp string) *SignificantTextAggregation {
98+
if a.includeExclude == nil {
99+
a.includeExclude = &TermsAggregationIncludeExclude{}
100+
}
101+
a.includeExclude.Include = regexp
102+
return a
103+
}
104+
105+
func (a *SignificantTextAggregation) IncludeValues(values ...interface{}) *SignificantTextAggregation {
106+
if a.includeExclude == nil {
107+
a.includeExclude = &TermsAggregationIncludeExclude{}
108+
}
109+
a.includeExclude.IncludeValues = append(a.includeExclude.IncludeValues, values...)
110+
return a
111+
}
112+
113+
func (a *SignificantTextAggregation) Exclude(regexp string) *SignificantTextAggregation {
114+
if a.includeExclude == nil {
115+
a.includeExclude = &TermsAggregationIncludeExclude{}
116+
}
117+
a.includeExclude.Exclude = regexp
118+
return a
119+
}
120+
121+
func (a *SignificantTextAggregation) ExcludeValues(values ...interface{}) *SignificantTextAggregation {
122+
if a.includeExclude == nil {
123+
a.includeExclude = &TermsAggregationIncludeExclude{}
124+
}
125+
a.includeExclude.ExcludeValues = append(a.includeExclude.ExcludeValues, values...)
126+
return a
127+
}
128+
129+
func (a *SignificantTextAggregation) Partition(p int) *SignificantTextAggregation {
130+
if a.includeExclude == nil {
131+
a.includeExclude = &TermsAggregationIncludeExclude{}
132+
}
133+
a.includeExclude.Partition = p
134+
return a
135+
}
136+
137+
func (a *SignificantTextAggregation) NumPartitions(n int) *SignificantTextAggregation {
138+
if a.includeExclude == nil {
139+
a.includeExclude = &TermsAggregationIncludeExclude{}
140+
}
141+
a.includeExclude.NumPartitions = n
142+
return a
143+
}
144+
145+
func (a *SignificantTextAggregation) Source() (interface{}, error) {
146+
// Example:
147+
// {
148+
// "query" : {
149+
// "match" : {"content" : "Bird flu"}
150+
// },
151+
// "aggregations" : {
152+
// "my_sample" : {
153+
// "sampler": {
154+
// "shard_size" : 100
155+
// },
156+
// "aggregations": {
157+
// "keywords" : {
158+
// "significant_text" : { "field" : "content" }
159+
// }
160+
// }
161+
// }
162+
// }
163+
// }
164+
//
165+
// This method returns only the
166+
// { "significant_text" : { "field" : "content" }
167+
// part.
168+
169+
source := make(map[string]interface{})
170+
opts := make(map[string]interface{})
171+
source["significant_text"] = opts
172+
173+
if a.field != "" {
174+
opts["field"] = a.field
175+
}
176+
if a.bucketCountThresholds != nil {
177+
if a.bucketCountThresholds.RequiredSize != nil {
178+
opts["size"] = (*a.bucketCountThresholds).RequiredSize
179+
}
180+
if a.bucketCountThresholds.ShardSize != nil {
181+
opts["shard_size"] = (*a.bucketCountThresholds).ShardSize
182+
}
183+
if a.bucketCountThresholds.MinDocCount != nil {
184+
opts["min_doc_count"] = (*a.bucketCountThresholds).MinDocCount
185+
}
186+
if a.bucketCountThresholds.ShardMinDocCount != nil {
187+
opts["shard_min_doc_count"] = (*a.bucketCountThresholds).ShardMinDocCount
188+
}
189+
}
190+
if a.filter != nil {
191+
src, err := a.filter.Source()
192+
if err != nil {
193+
return nil, err
194+
}
195+
opts["background_filter"] = src
196+
}
197+
if a.significanceHeuristic != nil {
198+
name := a.significanceHeuristic.Name()
199+
src, err := a.significanceHeuristic.Source()
200+
if err != nil {
201+
return nil, err
202+
}
203+
opts[name] = src
204+
}
205+
// Include/Exclude
206+
if ie := a.includeExclude; ie != nil {
207+
// Include
208+
if ie.Include != "" {
209+
opts["include"] = ie.Include
210+
} else if len(ie.IncludeValues) > 0 {
211+
opts["include"] = ie.IncludeValues
212+
} else if ie.NumPartitions > 0 {
213+
inc := make(map[string]interface{})
214+
inc["partition"] = ie.Partition
215+
inc["num_partitions"] = ie.NumPartitions
216+
opts["include"] = inc
217+
}
218+
// Exclude
219+
if ie.Exclude != "" {
220+
opts["exclude"] = ie.Exclude
221+
} else if len(ie.ExcludeValues) > 0 {
222+
opts["exclude"] = ie.ExcludeValues
223+
}
224+
}
225+
226+
// AggregationBuilder (SubAggregations)
227+
if len(a.subAggregations) > 0 {
228+
aggsMap := make(map[string]interface{})
229+
source["aggregations"] = aggsMap
230+
for name, aggregate := range a.subAggregations {
231+
src, err := aggregate.Source()
232+
if err != nil {
233+
return nil, err
234+
}
235+
aggsMap[name] = src
236+
}
237+
}
238+
239+
// Add Meta data if available
240+
if len(a.meta) > 0 {
241+
source["meta"] = a.meta
242+
}
243+
244+
return source, nil
245+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
// Copyright 2012-present Oliver Eilhard. All rights reserved.
2+
// Use of this source code is governed by a MIT-license.
3+
// See http://olivere.mit-license.org/license.txt for details.
4+
5+
package elastic
6+
7+
import (
8+
"encoding/json"
9+
"testing"
10+
)
11+
12+
func TestSignificantTextAggregation(t *testing.T) {
13+
agg := NewSignificantTextAggregation().Field("content")
14+
src, err := agg.Source()
15+
if err != nil {
16+
t.Fatal(err)
17+
}
18+
data, err := json.Marshal(src)
19+
if err != nil {
20+
t.Fatalf("marshaling to JSON failed: %v", err)
21+
}
22+
got := string(data)
23+
expected := `{"significant_text":{"field":"content"}}`
24+
if got != expected {
25+
t.Errorf("expected\n%s\n,got:\n%s", expected, got)
26+
}
27+
}
28+
29+
func TestSignificantTextAggregationWithArgs(t *testing.T) {
30+
agg := NewSignificantTextAggregation().
31+
Field("content").
32+
ShardSize(5).
33+
MinDocCount(10).
34+
BackgroundFilter(NewTermQuery("city", "London"))
35+
src, err := agg.Source()
36+
if err != nil {
37+
t.Fatal(err)
38+
}
39+
data, err := json.Marshal(src)
40+
if err != nil {
41+
t.Fatalf("marshaling to JSON failed: %v", err)
42+
}
43+
got := string(data)
44+
expected := `{"significant_text":{"background_filter":{"term":{"city":"London"}},"field":"content","min_doc_count":10,"shard_size":5}}`
45+
if got != expected {
46+
t.Errorf("expected\n%s\n,got:\n%s", expected, got)
47+
}
48+
}
49+
50+
func TestSignificantTextAggregationWithMetaData(t *testing.T) {
51+
agg := NewSignificantTextAggregation().Field("content")
52+
agg = agg.Meta(map[string]interface{}{"name": "Oliver"})
53+
src, err := agg.Source()
54+
if err != nil {
55+
t.Fatal(err)
56+
}
57+
data, err := json.Marshal(src)
58+
if err != nil {
59+
t.Fatalf("marshaling to JSON failed: %v", err)
60+
}
61+
got := string(data)
62+
expected := `{"meta":{"name":"Oliver"},"significant_text":{"field":"content"}}`
63+
if got != expected {
64+
t.Errorf("expected\n%s\n,got:\n%s", expected, got)
65+
}
66+
}

0 commit comments

Comments
 (0)