Skip to content

Commit e29bf52

Browse files
committed
Bug#25418534: JSON_EXTRACT USING WILDCARDS TAKES FOREVER
Patch #3: find_child_doms() has two kinds of duplicate elimination. One for removing duplicates that occur due to multiple ellipses, and one for removing duplicates that occur due to auto-wrapping on results returned by an ellipsis. The first kind of duplicate elimination is performed by maintaining a sorted set of results. The second kind performs a linear search of the results to see if the value is already in the result vector. This patch consolidates this code so that they both use the first kind of duplicate elimination. It also makes sure that duplicate elimination for auto-wrapping only happens for paths that could produce duplicates (only if the auto-wrapping path leg comes after an ellipsis path leg). This is just a code cleanup. The microbenchmark results are indistinguishable from noise. Microbenchmarks (64-bit, Intel Core i7-4770 3.4 GHz, GCC 6.3): BM_JsonDomSearchEllipsis 25443 ns/iter [+1.0%] BM_JsonDomSearchEllipsis_OnlyOne 17757 ns/iter [+0.7%] BM_JsonDomSearchKey 128 ns/iter [ 0.0%] BM_JsonBinarySearchEllipsis 233469 ns/iter [-0.9%] BM_JsonBinarySearchEllipsis_OnlyOne 226089 ns/iter [-1.5%] BM_JsonBinarySearchKey 86 ns/iter [ 0.0%] Change-Id: Ia62916098096032adf9f2ecc70a42c845f625c1c
1 parent 5a54ce6 commit e29bf52

File tree

1 file changed

+27
-31
lines changed

1 file changed

+27
-31
lines changed

sql/json_dom.cc

+27-31
Original file line numberDiff line numberDiff line change
@@ -199,14 +199,6 @@ void Json_dom::operator delete(void *ptr, const std::nothrow_t&) throw()
199199
/* purecov: end */
200200

201201

202-
static bool seen_already(Json_dom_vector *result, Json_dom *cand)
203-
{
204-
Json_dom_vector::iterator it= std::find(result->begin(),
205-
result->end(),
206-
cand);
207-
return it != result->end();
208-
}
209-
210202
/**
211203
Add a value to a vector if it isn't already there.
212204
@@ -221,9 +213,15 @@ static bool seen_already(Json_dom_vector *result, Json_dom *cand)
221213
will be seen multiple times, as its grandparent, parent and finally
222214
itself are inspected. We want it to appear only once in the result.
223215
216+
The same problem occurs if a possibly auto-wrapping array path leg
217+
comes after an ellipsis. If the candidate set contains both an array
218+
element and its parent array due to the ellipsis, the auto-wrapping
219+
path leg may match the array element twice, and we only want it once
220+
in the result.
221+
224222
@param[in] candidate value to add
225-
@param[in,out] duplicates set of values added, or `nullptr` if the ellipsis
226-
token is not daisy-chained
223+
@param[in,out] duplicates set of values added, or `nullptr` if duplicate
224+
checking is not needed
227225
@param[in,out] result vector
228226
@return false on success, true on error
229227
*/
@@ -267,12 +265,13 @@ static inline bool is_seek_done(const Result_vector *hits, bool only_need_one)
267265
268266
@param[in] dom the DOM to search
269267
@param[in] path_leg identifies the child
270-
@param[in] auto_wrap if true, match final scalar with [0] is need be
268+
@param[in] auto_wrap if true, auto-wrap non-arrays when matching against
269+
array path legs
271270
@param[in] only_need_one true if we can stop after finding one match
272271
@param[in,out] duplicates set of values collected, which helps to identify
273272
duplicate arrays and objects introduced by daisy-chained
274-
** tokens, or `nullptr` if the path leg is not a
275-
daisy-chained ** token
273+
** tokens or auto-wrapping, or `nullptr` if duplicate
274+
elimination is not needed for this path leg
276275
@param[in,out] result the vector of qualifying children
277276
@return false on success, true on error
278277
*/
@@ -290,7 +289,7 @@ static bool find_child_doms(Json_dom *dom,
290289
if (auto_wrap && dom_type != enum_json_type::J_ARRAY &&
291290
path_leg->is_autowrap())
292291
{
293-
return !seen_already(result, dom) && result->push_back(dom);
292+
return add_if_missing(dom, duplicates, result);
294293
}
295294

296295
switch (leg_type)
@@ -300,7 +299,8 @@ static bool find_child_doms(Json_dom *dom,
300299
{
301300
const auto array= down_cast<const Json_array *>(dom);
302301
const Json_array_index idx= path_leg->first_array_index(array->size());
303-
return idx.within_bounds() && result->push_back((*array)[idx.position()]);
302+
return idx.within_bounds() &&
303+
add_if_missing((*array)[idx.position()], duplicates, result);
304304
}
305305
return false;
306306
case jpl_array_range:
@@ -311,7 +311,7 @@ static bool find_child_doms(Json_dom *dom,
311311
const auto range= path_leg->get_array_range(array->size());
312312
for (size_t i= range.m_begin; i < range.m_end; ++i)
313313
{
314-
if (result->push_back((*array)[i]))
314+
if (add_if_missing((*array)[i], duplicates, result))
315315
return true; /* purecov: inspected */
316316
if (only_need_one)
317317
return false;
@@ -2289,27 +2289,22 @@ bool Json_dom::seek(const Json_seekable_path &path,
22892289
for (size_t path_idx= 0; path_idx < path_leg_count; path_idx++)
22902290
{
22912291
const Json_path_leg *path_leg= path.get_leg_at(path_idx);
2292-
candidates.clear();
22932292

22942293
/*
2295-
When we have multiple ellipses in the path, we need to eliminate
2296-
duplicates from the result. It's not needed for the first ellipsis.
2297-
See explanation in add_if_missing() and Json_wrapper::seek().
2294+
When we have multiple ellipses in the path, or an ellipsis
2295+
followed by an auto-wrapping array path leg, we need to
2296+
eliminate duplicates from the result. It's not needed for the
2297+
first ellipsis. See explanation in add_if_missing() and
2298+
Json_wrapper::seek().
22982299
*/
22992300
Json_dom_vector *dup_vector= nullptr;
2300-
if (path_leg->get_type() == jpl_ellipsis)
2301+
if (seen_ellipsis && (path_leg->get_type() == jpl_ellipsis ||
2302+
(auto_wrap && path_leg->is_autowrap())))
23012303
{
2302-
if (seen_ellipsis)
2303-
{
2304-
/*
2305-
This ellipsis is not the first one, so we need to eliminate
2306-
duplicates in find_child_doms().
2307-
*/
2308-
dup_vector= &duplicates;
2309-
dup_vector->clear();
2310-
}
2311-
seen_ellipsis= true;
2304+
dup_vector= &duplicates;
2305+
dup_vector->clear();
23122306
}
2307+
seen_ellipsis|= path_leg->get_type() == jpl_ellipsis;
23132308

23142309
/*
23152310
On the last path leg, we can stop after the first match if only
@@ -2330,6 +2325,7 @@ bool Json_dom::seek(const Json_seekable_path &path,
23302325

23312326
// swap the two lists so that they can be re-used
23322327
hits->swap(candidates);
2328+
candidates.clear();
23332329
}
23342330

23352331
return false;

0 commit comments

Comments
 (0)