[benchmark] Discard oversampled quantile values

palimondo · palimondo · commit 0d318b646487 · 2018-10-11T18:56:27.000+02:00
When num_samples is less than quantile + 1, some of the measurements are repeated in the report summary. Parsed samples should strive to be a true reflection of the measured distribution, so we’ll correct this by discarding the repetated artifacts from quantile estimation. This avoids introducting a bias from this oversampling into the empirical distribution obtained from merging independent samples. See also: https://en.wikipedia.org/wiki/Oversampling_and_undersampling_in_data_analysis
diff --git a/benchmark/scripts/compare_perf_tests.py b/benchmark/scripts/compare_perf_tests.py
@@ -241,9 +241,18 @@ def __init__(self, csv_row, quantiles=False, memory=False, delta=False):
         if quantiles:  # Variable number of columns representing quantiles
             runtimes = csv_row[3:-1] if memory else csv_row[3:]
             if delta:
-                runtimes = map(lambda x: int(x) if x else 0, runtimes)
-                runtimes = reduce(lambda l, x: l.append(l[-1] + x) or
-                                  l if l else [x], runtimes, None)
+                runtimes = [int(x) if x else 0 for x in runtimes]
+                runtimes = reduce(lambda l, x: l.append(l[-1] + x) or  # runnin
+                                  l if l else [x], runtimes, None)     # total
+            num_values = len(runtimes)
+            if self.num_samples < num_values: # remove repeated samples
+                quantile = num_values - 1
+                qs = [float(i) / float(quantile) for i in range(0, num_values)]
+                indices = [max(0, int(ceil(self.num_samples * float(q))) - 1)
+                           for q in qs]
+                runtimes = [runtimes[indices.index(i)]
+                            for i in range(0, self.num_samples)]
+
             self.samples = PerformanceTestSamples(
                 self.name,
                 [Sample(None, None, int(runtime)) for runtime in runtimes])
diff --git a/benchmark/scripts/test_compare_perf_tests.py b/benchmark/scripts/test_compare_perf_tests.py
@@ -243,8 +243,68 @@ def test_init_delta_quantiles(self):
         r = PerformanceTestResult(log.split(','), quantiles=True, delta=True)
         self.assertEquals((r.num_samples, r.min, r.median, r.max),
                           (2, 265, 265, 287))
-        self.assertEquals(r.samples.count, 3)        # --quantile=2 gives a
-        self.assertEquals(r.samples.num_samples, 3)  # 3 sample estimate
+        self.assertEquals(r.samples.count, 2)
+        self.assertEquals(r.samples.num_samples, 2)
+
+    def test_init_oversampled_quantiles(self):
+        """When num_samples is < quantile + 1, some of the measurements are
+        repeated in the report summary. Samples should contain only true
+        values, discarding the repetated artifacts from quantile estimation.
+
+        The test string is slightly massaged output of the following R script:
+        subsample <- function(x, q) {
+          quantile(1:x, probs=((0:(q-1))/(q-1)), type=1)}
+        tbl <- function(s) t(sapply(1:s, function(x) {
+          qs <- subsample(x, s); c(qs[1], diff(qs)) }))
+        sapply(c(3, 5, 11, 21), tbl)
+        """
+        def validatePTR(deq):  # construct from delta encoded quantiles string
+            deq = deq.split(',')
+            num_samples = deq.count('1')
+            r = PerformanceTestResult(['0', 'B', str(num_samples)] + deq,
+                                      quantiles=True, delta=True)
+            self.assertEquals(r.samples.num_samples, num_samples)
+            self.assertEquals([s.runtime for s in r.samples.all_samples],
+                              range(1, num_samples + 1))
+
+        delta_encoded_quantiles = """
+1,,
+1,,1
+1,,,,
+1,,,1,
+1,,1,1,
+1,,1,1,1
+1,,,,,,,,,,
+1,,,,,,1,,,,
+1,,,,1,,,1,,,
+1,,,1,,,1,,1,,
+1,,,1,,1,,1,,1,
+1,,1,,1,,1,1,,1,
+1,,1,1,,1,1,,1,1,
+1,,1,1,1,,1,1,1,1,
+1,,1,1,1,1,1,1,1,1,
+1,,1,1,1,1,1,1,1,1,1
+1,,,,,,,,,,,,,,,,,,,,
+1,,,,,,,,,,,1,,,,,,,,,
+1,,,,,,,1,,,,,,,1,,,,,,
+1,,,,,,1,,,,,1,,,,,1,,,,
+1,,,,,1,,,,1,,,,1,,,,1,,,
+1,,,,1,,,1,,,,1,,,1,,,1,,,
+1,,,1,,,1,,,1,,,1,,,1,,,1,,
+1,,,1,,,1,,1,,,1,,1,,,1,,1,,
+1,,,1,,1,,1,,1,,,1,,1,,1,,1,,
+1,,,1,,1,,1,,1,,1,,1,,1,,1,,1,
+1,,1,,1,,1,,1,,1,1,,1,,1,,1,,1,
+1,,1,,1,,1,1,,1,,1,1,,1,,1,1,,1,
+1,,1,,1,1,,1,1,,1,1,,1,1,,1,1,,1,
+1,,1,1,,1,1,,1,1,,1,1,1,,1,1,,1,1,
+1,,1,1,,1,1,1,,1,1,1,,1,1,1,,1,1,1,
+1,,1,1,1,,1,1,1,1,,1,1,1,1,,1,1,1,1,
+1,,1,1,1,1,1,,1,1,1,1,1,1,,1,1,1,1,1,
+1,,1,1,1,1,1,1,1,1,,1,1,1,1,1,1,1,1,1,
+1,,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1"""
+        map(validatePTR, delta_encoded_quantiles.split('\n')[1:])
 
     def test_repr(self):
         log_line = '1,AngryPhonebook,20,10664,12933,11035,576,10884'