Skip to content

Commit 0d318b6

Browse files
committed
[benchmark] Discard oversampled quantile values
When num_samples is less than quantile + 1, some of the measurements are repeated in the report summary. Parsed samples should strive to be a true reflection of the measured distribution, so we’ll correct this by discarding the repetated artifacts from quantile estimation. This avoids introducting a bias from this oversampling into the empirical distribution obtained from merging independent samples. See also: https://en.wikipedia.org/wiki/Oversampling_and_undersampling_in_data_analysis
1 parent a04edd1 commit 0d318b6

File tree

2 files changed

+74
-5
lines changed

2 files changed

+74
-5
lines changed

benchmark/scripts/compare_perf_tests.py

+12-3
Original file line numberDiff line numberDiff line change
@@ -241,9 +241,18 @@ def __init__(self, csv_row, quantiles=False, memory=False, delta=False):
241241
if quantiles: # Variable number of columns representing quantiles
242242
runtimes = csv_row[3:-1] if memory else csv_row[3:]
243243
if delta:
244-
runtimes = map(lambda x: int(x) if x else 0, runtimes)
245-
runtimes = reduce(lambda l, x: l.append(l[-1] + x) or
246-
l if l else [x], runtimes, None)
244+
runtimes = [int(x) if x else 0 for x in runtimes]
245+
runtimes = reduce(lambda l, x: l.append(l[-1] + x) or # runnin
246+
l if l else [x], runtimes, None) # total
247+
num_values = len(runtimes)
248+
if self.num_samples < num_values: # remove repeated samples
249+
quantile = num_values - 1
250+
qs = [float(i) / float(quantile) for i in range(0, num_values)]
251+
indices = [max(0, int(ceil(self.num_samples * float(q))) - 1)
252+
for q in qs]
253+
runtimes = [runtimes[indices.index(i)]
254+
for i in range(0, self.num_samples)]
255+
247256
self.samples = PerformanceTestSamples(
248257
self.name,
249258
[Sample(None, None, int(runtime)) for runtime in runtimes])

benchmark/scripts/test_compare_perf_tests.py

+62-2
Original file line numberDiff line numberDiff line change
@@ -243,8 +243,68 @@ def test_init_delta_quantiles(self):
243243
r = PerformanceTestResult(log.split(','), quantiles=True, delta=True)
244244
self.assertEquals((r.num_samples, r.min, r.median, r.max),
245245
(2, 265, 265, 287))
246-
self.assertEquals(r.samples.count, 3) # --quantile=2 gives a
247-
self.assertEquals(r.samples.num_samples, 3) # 3 sample estimate
246+
self.assertEquals(r.samples.count, 2)
247+
self.assertEquals(r.samples.num_samples, 2)
248+
249+
def test_init_oversampled_quantiles(self):
250+
"""When num_samples is < quantile + 1, some of the measurements are
251+
repeated in the report summary. Samples should contain only true
252+
values, discarding the repetated artifacts from quantile estimation.
253+
254+
The test string is slightly massaged output of the following R script:
255+
subsample <- function(x, q) {
256+
quantile(1:x, probs=((0:(q-1))/(q-1)), type=1)}
257+
tbl <- function(s) t(sapply(1:s, function(x) {
258+
qs <- subsample(x, s); c(qs[1], diff(qs)) }))
259+
sapply(c(3, 5, 11, 21), tbl)
260+
"""
261+
def validatePTR(deq): # construct from delta encoded quantiles string
262+
deq = deq.split(',')
263+
num_samples = deq.count('1')
264+
r = PerformanceTestResult(['0', 'B', str(num_samples)] + deq,
265+
quantiles=True, delta=True)
266+
self.assertEquals(r.samples.num_samples, num_samples)
267+
self.assertEquals([s.runtime for s in r.samples.all_samples],
268+
range(1, num_samples + 1))
269+
270+
delta_encoded_quantiles = """
271+
1,,
272+
1,,1
273+
1,,,,
274+
1,,,1,
275+
1,,1,1,
276+
1,,1,1,1
277+
1,,,,,,,,,,
278+
1,,,,,,1,,,,
279+
1,,,,1,,,1,,,
280+
1,,,1,,,1,,1,,
281+
1,,,1,,1,,1,,1,
282+
1,,1,,1,,1,1,,1,
283+
1,,1,1,,1,1,,1,1,
284+
1,,1,1,1,,1,1,1,1,
285+
1,,1,1,1,1,1,1,1,1,
286+
1,,1,1,1,1,1,1,1,1,1
287+
1,,,,,,,,,,,,,,,,,,,,
288+
1,,,,,,,,,,,1,,,,,,,,,
289+
1,,,,,,,1,,,,,,,1,,,,,,
290+
1,,,,,,1,,,,,1,,,,,1,,,,
291+
1,,,,,1,,,,1,,,,1,,,,1,,,
292+
1,,,,1,,,1,,,,1,,,1,,,1,,,
293+
1,,,1,,,1,,,1,,,1,,,1,,,1,,
294+
1,,,1,,,1,,1,,,1,,1,,,1,,1,,
295+
1,,,1,,1,,1,,1,,,1,,1,,1,,1,,
296+
1,,,1,,1,,1,,1,,1,,1,,1,,1,,1,
297+
1,,1,,1,,1,,1,,1,1,,1,,1,,1,,1,
298+
1,,1,,1,,1,1,,1,,1,1,,1,,1,1,,1,
299+
1,,1,,1,1,,1,1,,1,1,,1,1,,1,1,,1,
300+
1,,1,1,,1,1,,1,1,,1,1,1,,1,1,,1,1,
301+
1,,1,1,,1,1,1,,1,1,1,,1,1,1,,1,1,1,
302+
1,,1,1,1,,1,1,1,1,,1,1,1,1,,1,1,1,1,
303+
1,,1,1,1,1,1,,1,1,1,1,1,1,,1,1,1,1,1,
304+
1,,1,1,1,1,1,1,1,1,,1,1,1,1,1,1,1,1,1,
305+
1,,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
306+
1,,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1"""
307+
map(validatePTR, delta_encoded_quantiles.split('\n')[1:])
248308

249309
def test_repr(self):
250310
log_line = '1,AngryPhonebook,20,10664,12933,11035,576,10884'

0 commit comments

Comments
 (0)