Fix compare_perf_tests.py for running locally.

atrick · atrick · commit f09cc8cc8b47 · 2022-05-12T16:50:32.000-07:00
The script defaulted to a mode that no one uses without checking
whether the input was compatible with that mode.

This is the script used for run-to-run comparison of benchmark
results. The in-tree benchmarks happened to work with the script only
because of a fragile string comparison burried deep within the
script. Other out-of-tree benchmark scripts that generate results were
silently broken when using this script for comparison.
diff --git a/benchmark/scripts/compare_perf_tests.py b/benchmark/scripts/compare_perf_tests.py
@@ -229,8 +229,8 @@ class PerformanceTestResult(object):
     statistics for normal distribution (MEAN, SD):
         #,TEST,SAMPLES,MIN(μs),MAX(μs),MEAN(μs),SD(μs),MEDIAN(μs),MAX_RSS(B)
     And new quantiles format with variable number of columns:
-        #,TEST,SAMPLES,MIN(μs),MEDIAN(μs),MAX(μs)
-        #,TEST,SAMPLES,MIN(μs),Q1(μs),Q2(μs),Q3(μs),MAX(μs),MAX_RSS(B)
+        #,TEST,SAMPLES,QMIN(μs),MEDIAN(μs),MAX(μs)
+        #,TEST,SAMPLES,QMIN(μs),Q1(μs),Q2(μs),Q3(μs),MAX(μs),MAX_RSS(B)
     The number of columns between MIN and MAX depends on the test driver's
     `--quantile`parameter. In both cases, the last column, MAX_RSS is optional.
     """
@@ -244,9 +244,10 @@ def __init__(self, csv_row, quantiles=False, memory=False, delta=False, meta=Fal
         self.name = csv_row[1]  # Name of the performance test
         self.num_samples = int(csv_row[2])  # Number of measurements taken
 
+        mem_index = (-1 if memory else 0) + (-3 if meta else 0)
         if quantiles:  # Variable number of columns representing quantiles
-            mem_index = (-1 if memory else 0) + (-3 if meta else 0)
             runtimes = csv_row[3:mem_index] if memory or meta else csv_row[3:]
+            last_runtime_index = mem_index - 1
             if delta:
                 runtimes = [int(x) if x else 0 for x in runtimes]
                 runtimes = functools.reduce(
@@ -277,20 +278,21 @@ def __init__(self, csv_row, quantiles=False, memory=False, delta=False, meta=Fal
                 sams.mean,
                 sams.sd,
             )
-            self.max_rss = (  # Maximum Resident Set Size (B)
-                int(csv_row[mem_index]) if memory else None
-            )
         else:  # Legacy format with statistics for normal distribution.
             self.min = int(csv_row[3])  # Minimum runtime (μs)
             self.max = int(csv_row[4])  # Maximum runtime (μs)
             self.mean = float(csv_row[5])  # Mean (average) runtime (μs)
             self.sd = float(csv_row[6])  # Standard Deviation (μs)
             self.median = int(csv_row[7])  # Median runtime (μs)
-            self.max_rss = (  # Maximum Resident Set Size (B)
-                int(csv_row[8]) if len(csv_row) > 8 else None
-            )
+            last_runtime_index = 7
             self.samples = None
 
+        self.max_rss = (  # Maximum Resident Set Size (B)
+            int(csv_row[mem_index]) if (
+                memory and len(csv_row) > (last_runtime_index + 1)
+            ) else None
+        )
+
         # Optional measurement metadata. The number of:
         # memory pages used, involuntary context switches and voluntary yields
         self.mem_pages, self.involuntary_cs, self.yield_count = (
@@ -427,7 +429,7 @@ def _store_memory_stats(self, max_rss, mem_pages):
         self.mem_pages = int(mem_pages)
 
     def _configure_format(self, header):
-        self.quantiles = "MEAN" not in header
+        self.quantiles = "QMIN" in header
         self.memory = "MAX_RSS" in header
         self.meta = "PAGES" in header
         self.delta = "𝚫" in header
@@ -453,7 +455,7 @@ def _configure_format(self, header):
                 Yield(len(self.samples), int(since_last_yield))
             )
         ),
-        re.compile(r"( *#[, \t]+TEST[, \t]+SAMPLES[, \t]+MIN.*)"): _configure_format,
+        re.compile(r"( *#[, \t]+TEST[, \t]+SAMPLES[, \t].*)"): _configure_format,
         # Environmental statistics: memory usage and context switches
         re.compile(
             r"\s+MAX_RSS \d+ - \d+ = (\d+) \((\d+) pages\)"
diff --git a/benchmark/scripts/test_compare_perf_tests.py b/benchmark/scripts/test_compare_perf_tests.py
@@ -205,7 +205,7 @@ def test_init(self):
         self.assertEqual(r.samples, None)
 
         log_line = "1,AngryPhonebook,1,12045,12045,12045,0,12045,10510336"
-        r = PerformanceTestResult(log_line.split(","))
+        r = PerformanceTestResult(log_line.split(","), memory=True)
         self.assertEqual(r.max_rss, 10510336)
 
     def test_init_quantiles(self):
@@ -379,7 +379,11 @@ def test_merge(self):
         )[
             1:
         ]
-        results = list(map(PerformanceTestResult, [line.split(",") for line in tests]))
+
+        def makeResult(csv_row):
+            return PerformanceTestResult(csv_row, memory=True)
+
+        results = list(map(makeResult, [line.split(",") for line in tests]))
         results[2].setup = 9
         results[3].setup = 7
 
@@ -489,11 +493,14 @@ class OldAndNewLog(unittest.TestCase):
 3,Array2D,20,335831,400221,346622,0,346622
 1,AngryPhonebook,20,10458,12714,11000,0,11000"""
 
+    def makeResult(csv_row):
+        return PerformanceTestResult(csv_row, memory=True)
+
     old_results = dict(
         [
             (r.name, r)
             for r in map(
-                PerformanceTestResult,
+                makeResult,
                 [line.split(",") for line in old_log_content.splitlines()],
             )
         ]
@@ -503,7 +510,7 @@ class OldAndNewLog(unittest.TestCase):
         [
             (r.name, r)
             for r in map(
-                PerformanceTestResult,
+                makeResult,
                 [line.split(",") for line in new_log_content.splitlines()],
             )
         ]
@@ -557,14 +564,14 @@ def test_parse_results_formatted_text(self):
     def test_parse_quantiles(self):
         """Gathers samples from reported quantiles. Handles optional memory."""
         r = LogParser.results_from_string(
-            """#,TEST,SAMPLES,MIN(μs),MEDIAN(μs),MAX(μs)
+            """#,TEST,SAMPLES,QMIN(μs),MEDIAN(μs),MAX(μs)
 1,Ackermann,3,54383,54512,54601"""
         )["Ackermann"]
         self.assertEqual(
             [s.runtime for s in r.samples.all_samples], [54383, 54512, 54601]
         )
         r = LogParser.results_from_string(
-            """#,TEST,SAMPLES,MIN(μs),MEDIAN(μs),MAX(μs),MAX_RSS(B)
+            """#,TEST,SAMPLES,QMIN(μs),MEDIAN(μs),MAX(μs),MAX_RSS(B)
 1,Ackermann,3,54529,54760,55807,266240"""
         )["Ackermann"]
         self.assertEqual(
@@ -574,21 +581,21 @@ def test_parse_quantiles(self):
 
     def test_parse_delta_quantiles(self):
         r = LogParser.results_from_string(  # 2-quantile aka. median
-            "#,TEST,SAMPLES,MIN(μs),𝚫MEDIAN,𝚫MAX\n0,B,1,101,,"
+            "#,TEST,SAMPLES,QMIN(μs),𝚫MEDIAN,𝚫MAX\n0,B,1,101,,"
         )["B"]
         self.assertEqual(
             (r.num_samples, r.min, r.median, r.max, r.samples.count),
             (1, 101, 101, 101, 1),
         )
         r = LogParser.results_from_string(
-            "#,TEST,SAMPLES,MIN(μs),𝚫MEDIAN,𝚫MAX\n0,B,2,101,,1"
+            "#,TEST,SAMPLES,QMIN(μs),𝚫MEDIAN,𝚫MAX\n0,B,2,101,,1"
         )["B"]
         self.assertEqual(
             (r.num_samples, r.min, r.median, r.max, r.samples.count),
             (2, 101, 101, 102, 2),
         )
         r = LogParser.results_from_string(  # 20-quantiles aka. ventiles
-            "#,TEST,SAMPLES,MIN(μs),𝚫V1,𝚫V2,𝚫V3,𝚫V4,𝚫V5,𝚫V6,𝚫V7,𝚫V8,"
+            "#,TEST,SAMPLES,QMIN(μs),𝚫V1,𝚫V2,𝚫V3,𝚫V4,𝚫V5,𝚫V6,𝚫V7,𝚫V8,"
             + "𝚫V9,𝚫VA,𝚫VB,𝚫VC,𝚫VD,𝚫VE,𝚫VF,𝚫VG,𝚫VH,𝚫VI,𝚫VJ,𝚫MAX\n"
             + "202,DropWhileArray,200,214,,,,,,,,,,,,1,,,,,,2,16,464"
         )["DropWhileArray"]
@@ -617,13 +624,13 @@ def test_parse_meta(self):
             (3, 9, 50, 15, 36864),
         )
         r = LogParser.results_from_string(
-            "#,TEST,SAMPLES,MIN(μs),MAX(μs),PAGES,ICS,YIELD\n" + "0,B,1,4,4,8,31,15"
+            "#,TEST,SAMPLES,QMIN(μs),MAX(μs),PAGES,ICS,YIELD\n" + "0,B,1,4,4,8,31,15"
         )["B"]
         self.assertEqual(
             (r.min, r.mem_pages, r.involuntary_cs, r.yield_count), (4, 8, 31, 15)
         )
         r = LogParser.results_from_string(
-            "#,TEST,SAMPLES,MIN(μs),MAX(μs),MAX_RSS(B),PAGES,ICS,YIELD\n"
+            "#,TEST,SAMPLES,QMIN(μs),MAX(μs),MAX_RSS(B),PAGES,ICS,YIELD\n"
             + "0,B,1,5,5,32768,8,28,15"
         )["B"]
         self.assertEqual(
@@ -831,7 +838,8 @@ def test_values(self):
         self.assertEqual(
             ReportFormatter.values(
                 PerformanceTestResult(
-                    "1,AngryPhonebook,1,12045,12045,12045,0,12045,10510336".split(",")
+                    "1,AngryPhonebook,1,12045,12045,12045,0,12045,10510336".split(","),
+                    memory=True
                 )
             ),
             ("AngryPhonebook", "12045", "12045", "12045", "10510336"),
diff --git a/benchmark/utils/DriverUtils.swift b/benchmark/utils/DriverUtils.swift
@@ -634,7 +634,8 @@ final class TestRunner {
       let index: (Int) -> String =
         { q == 2 ? "" : q <= 20 ?  base20[$0] : String($0) }
       let tail = (1..<q).map { prefix + index($0) } + ["MAX"]
-      return [withUnit("MIN")] + tail.map(c.delta ? withDelta : withUnit)
+      // QMIN identifies the quantile format, distinct from formats using "MIN"
+      return [withUnit("QMIN")] + tail.map(c.delta ? withDelta : withUnit)
     }
     return (
       ["#", "TEST", "SAMPLES"] +