12
12
import org .apache .lucene .document .IntField ;
13
13
import org .apache .lucene .index .IndexReader ;
14
14
import org .apache .lucene .index .LeafReaderContext ;
15
- import org .apache .lucene .search . IndexSearcher ;
15
+ import org .apache .lucene .index . NoMergePolicy ;
16
16
import org .apache .lucene .search .Query ;
17
17
import org .apache .lucene .search .ScoreMode ;
18
18
import org .apache .lucene .search .Weight ;
43
43
44
44
public class QueryFeatureExtractorTests extends AbstractBuilderTestCase {
45
45
46
- private Directory dir ;
47
- private IndexReader reader ;
48
- private IndexSearcher searcher ;
49
-
50
- private void addDocs (String [] textValues , int [] numberValues ) throws IOException {
51
- dir = newDirectory ();
52
- try (RandomIndexWriter indexWriter = new RandomIndexWriter (random (), dir )) {
46
+ private IndexReader addDocs (Directory dir , String [] textValues , int [] numberValues ) throws IOException {
47
+ var config = newIndexWriterConfig ();
48
+ // override the merge policy to ensure that docs remain in the same ingestion order
49
+ config .setMergePolicy (newLogMergePolicy (random ()));
50
+ try (RandomIndexWriter indexWriter = new RandomIndexWriter (random (), dir , config )) {
53
51
for (int i = 0 ; i < textValues .length ; i ++) {
54
52
Document doc = new Document ();
55
53
doc .add (newTextField (TEXT_FIELD_NAME , textValues [i ], Field .Store .NO ));
@@ -59,98 +57,119 @@ private void addDocs(String[] textValues, int[] numberValues) throws IOException
59
57
indexWriter .flush ();
60
58
}
61
59
}
62
- reader = indexWriter .getReader ();
60
+ return indexWriter .getReader ();
63
61
}
64
- searcher = newSearcher (reader );
65
- searcher .setSimilarity (new ClassicSimilarity ());
66
62
}
67
63
68
- @ AwaitsFix (bugUrl = "https://github.com/elastic/elasticsearch/issues/98127" )
69
64
public void testQueryExtractor () throws IOException {
70
- addDocs (
71
- new String [] { "the quick brown fox" , "the slow brown fox" , "the grey dog" , "yet another string" },
72
- new int [] { 5 , 10 , 12 , 11 }
73
- );
74
- QueryRewriteContext ctx = createQueryRewriteContext ();
75
- List <QueryExtractorBuilder > queryExtractorBuilders = List .of (
76
- new QueryExtractorBuilder ("text_score" , QueryProvider .fromParsedQuery (QueryBuilders .matchQuery (TEXT_FIELD_NAME , "quick fox" )))
77
- .rewrite (ctx ),
78
- new QueryExtractorBuilder (
79
- "number_score" ,
80
- QueryProvider .fromParsedQuery (QueryBuilders .rangeQuery (INT_FIELD_NAME ).from (12 ).to (12 ))
81
- ).rewrite (ctx ),
82
- new QueryExtractorBuilder (
83
- "matching_none" ,
84
- QueryProvider .fromParsedQuery (QueryBuilders .termQuery (TEXT_FIELD_NAME , "never found term" ))
85
- ).rewrite (ctx ),
86
- new QueryExtractorBuilder (
87
- "matching_missing_field" ,
88
- QueryProvider .fromParsedQuery (QueryBuilders .termQuery ("missing_text" , "quick fox" ))
89
- ).rewrite (ctx )
90
- );
91
- SearchExecutionContext dummySEC = createSearchExecutionContext ();
92
- List <Weight > weights = new ArrayList <>();
93
- List <String > featureNames = new ArrayList <>();
94
- for (QueryExtractorBuilder qeb : queryExtractorBuilders ) {
95
- Query q = qeb .query ().getParsedQuery ().toQuery (dummySEC );
96
- Weight weight = searcher .rewrite (q ).createWeight (searcher , ScoreMode .COMPLETE , 1f );
97
- weights .add (weight );
98
- featureNames .add (qeb .featureName ());
99
- }
100
- QueryFeatureExtractor queryFeatureExtractor = new QueryFeatureExtractor (featureNames , weights );
101
- List <Map <String , Object >> extractedFeatures = new ArrayList <>();
102
- for (LeafReaderContext leafReaderContext : searcher .getLeafContexts ()) {
103
- int maxDoc = leafReaderContext .reader ().maxDoc ();
104
- queryFeatureExtractor .setNextReader (leafReaderContext );
105
- for (int i = 0 ; i < maxDoc ; i ++) {
106
- Map <String , Object > featureMap = new HashMap <>();
107
- queryFeatureExtractor .addFeatures (featureMap , i );
108
- extractedFeatures .add (featureMap );
65
+ try (var dir = newDirectory ()) {
66
+ try (
67
+ var reader = addDocs (
68
+ dir ,
69
+ new String [] { "the quick brown fox" , "the slow brown fox" , "the grey dog" , "yet another string" },
70
+ new int [] { 5 , 10 , 12 , 11 }
71
+ )
72
+ ) {
73
+ var searcher = newSearcher (reader );
74
+ searcher .setSimilarity (new ClassicSimilarity ());
75
+ QueryRewriteContext ctx = createQueryRewriteContext ();
76
+ List <QueryExtractorBuilder > queryExtractorBuilders = List .of (
77
+ new QueryExtractorBuilder (
78
+ "text_score" ,
79
+ QueryProvider .fromParsedQuery (QueryBuilders .matchQuery (TEXT_FIELD_NAME , "quick fox" ))
80
+ ).rewrite (ctx ),
81
+ new QueryExtractorBuilder (
82
+ "number_score" ,
83
+ QueryProvider .fromParsedQuery (QueryBuilders .rangeQuery (INT_FIELD_NAME ).from (12 ).to (12 ))
84
+ ).rewrite (ctx ),
85
+ new QueryExtractorBuilder (
86
+ "matching_none" ,
87
+ QueryProvider .fromParsedQuery (QueryBuilders .termQuery (TEXT_FIELD_NAME , "never found term" ))
88
+ ).rewrite (ctx ),
89
+ new QueryExtractorBuilder (
90
+ "matching_missing_field" ,
91
+ QueryProvider .fromParsedQuery (QueryBuilders .termQuery ("missing_text" , "quick fox" ))
92
+ ).rewrite (ctx ),
93
+ new QueryExtractorBuilder (
94
+ "phrase_score" ,
95
+ QueryProvider .fromParsedQuery (QueryBuilders .matchPhraseQuery (TEXT_FIELD_NAME , "slow brown fox" ))
96
+ ).rewrite (ctx )
97
+ );
98
+ SearchExecutionContext dummySEC = createSearchExecutionContext ();
99
+ List <Weight > weights = new ArrayList <>();
100
+ List <String > featureNames = new ArrayList <>();
101
+ for (QueryExtractorBuilder qeb : queryExtractorBuilders ) {
102
+ Query q = qeb .query ().getParsedQuery ().toQuery (dummySEC );
103
+ Weight weight = searcher .rewrite (q ).createWeight (searcher , ScoreMode .COMPLETE , 1f );
104
+ weights .add (weight );
105
+ featureNames .add (qeb .featureName ());
106
+ }
107
+ QueryFeatureExtractor queryFeatureExtractor = new QueryFeatureExtractor (featureNames , weights );
108
+ List <Map <String , Object >> extractedFeatures = new ArrayList <>();
109
+ for (LeafReaderContext leafReaderContext : searcher .getLeafContexts ()) {
110
+ int maxDoc = leafReaderContext .reader ().maxDoc ();
111
+ queryFeatureExtractor .setNextReader (leafReaderContext );
112
+ for (int i = 0 ; i < maxDoc ; i ++) {
113
+ Map <String , Object > featureMap = new HashMap <>();
114
+ queryFeatureExtractor .addFeatures (featureMap , i );
115
+ extractedFeatures .add (featureMap );
116
+ }
117
+ }
118
+ assertThat (extractedFeatures , hasSize (4 ));
119
+ // Should never add features for queries that don't match a document or on documents where the field is missing
120
+ for (Map <String , Object > features : extractedFeatures ) {
121
+ assertThat (features , not (hasKey ("matching_none" )));
122
+ assertThat (features , not (hasKey ("matching_missing_field" )));
123
+ }
124
+ // First two only match the text field
125
+ assertThat (extractedFeatures .get (0 ), hasEntry ("text_score" , 1.7135582f ));
126
+ assertThat (extractedFeatures .get (0 ), not (hasKey ("number_score" )));
127
+ assertThat (extractedFeatures .get (0 ), not (hasKey ("phrase_score" )));
128
+ assertThat (extractedFeatures .get (1 ), hasEntry ("text_score" , 0.7554128f ));
129
+ assertThat (extractedFeatures .get (1 ), not (hasKey ("number_score" )));
130
+ assertThat (extractedFeatures .get (1 ), hasEntry ("phrase_score" , 2.468971f ));
131
+
132
+ // Only matches the range query
133
+ assertThat (extractedFeatures .get (2 ), hasEntry ("number_score" , 1f ));
134
+ assertThat (extractedFeatures .get (2 ), not (hasKey ("text_score" )));
135
+ assertThat (extractedFeatures .get (2 ), not (hasKey ("phrase_score" )));
136
+
137
+ // No query matches
138
+ assertThat (extractedFeatures .get (3 ), anEmptyMap ());
109
139
}
110
140
}
111
- assertThat (extractedFeatures , hasSize (4 ));
112
- // Should never add features for queries that don't match a document or on documents where the field is missing
113
- for (Map <String , Object > features : extractedFeatures ) {
114
- assertThat (features , not (hasKey ("matching_none" )));
115
- assertThat (features , not (hasKey ("matching_missing_field" )));
116
- }
117
- // First two only match the text field
118
- assertThat (extractedFeatures .get (0 ), hasEntry ("text_score" , 1.7135582f ));
119
- assertThat (extractedFeatures .get (0 ), not (hasKey ("number_score" )));
120
- assertThat (extractedFeatures .get (1 ), hasEntry ("text_score" , 0.7554128f ));
121
- assertThat (extractedFeatures .get (1 ), not (hasKey ("number_score" )));
122
- // Only matches the range query
123
- assertThat (extractedFeatures .get (2 ), hasEntry ("number_score" , 1f ));
124
- assertThat (extractedFeatures .get (2 ), not (hasKey ("text_score" )));
125
- // No query matches
126
- assertThat (extractedFeatures .get (3 ), anEmptyMap ());
127
- reader .close ();
128
- dir .close ();
129
141
}
130
142
131
143
public void testEmptyDisiPriorityQueue () throws IOException {
132
- addDocs (
133
- new String [] { "the quick brown fox" , "the slow brown fox" , "the grey dog" , "yet another string" },
134
- new int [] { 5 , 10 , 12 , 11 }
135
- );
144
+ try (var dir = newDirectory ()) {
145
+ var config = newIndexWriterConfig ();
146
+ config .setMergePolicy (NoMergePolicy .INSTANCE );
147
+ try (
148
+ var reader = addDocs (
149
+ dir ,
150
+ new String [] { "the quick brown fox" , "the slow brown fox" , "the grey dog" , "yet another string" },
151
+ new int [] { 5 , 10 , 12 , 11 }
152
+ )
153
+ ) {
136
154
137
- // Scorers returned by weights are null
138
- List <String > featureNames = randomList (1 , 10 , ESTestCase ::randomIdentifier );
139
- List <Weight > weights = Stream .generate (() -> mock (Weight .class )).limit (featureNames .size ()).toList ();
155
+ var searcher = newSearcher (reader );
156
+ searcher .setSimilarity (new ClassicSimilarity ());
140
157
141
- QueryFeatureExtractor featureExtractor = new QueryFeatureExtractor (featureNames , weights );
158
+ // Scorers returned by weights are null
159
+ List <String > featureNames = randomList (1 , 10 , ESTestCase ::randomIdentifier );
160
+ List <Weight > weights = Stream .generate (() -> mock (Weight .class )).limit (featureNames .size ()).toList ();
142
161
143
- for (LeafReaderContext leafReaderContext : searcher .getLeafContexts ()) {
144
- int maxDoc = leafReaderContext .reader ().maxDoc ();
145
- featureExtractor .setNextReader (leafReaderContext );
146
- for (int i = 0 ; i < maxDoc ; i ++) {
147
- Map <String , Object > featureMap = new HashMap <>();
148
- featureExtractor .addFeatures (featureMap , i );
149
- assertThat (featureMap , anEmptyMap ());
162
+ QueryFeatureExtractor featureExtractor = new QueryFeatureExtractor (featureNames , weights );
163
+ for (LeafReaderContext leafReaderContext : searcher .getLeafContexts ()) {
164
+ int maxDoc = leafReaderContext .reader ().maxDoc ();
165
+ featureExtractor .setNextReader (leafReaderContext );
166
+ for (int i = 0 ; i < maxDoc ; i ++) {
167
+ Map <String , Object > featureMap = new HashMap <>();
168
+ featureExtractor .addFeatures (featureMap , i );
169
+ assertThat (featureMap , anEmptyMap ());
170
+ }
171
+ }
150
172
}
151
173
}
152
-
153
- reader .close ();
154
- dir .close ();
155
174
}
156
175
}
0 commit comments