Skip to content

Commit f09cc8c

Browse files
committed
Fix compare_perf_tests.py for running locally.
The script defaulted to a mode that no one uses without checking whether the input was compatible with that mode. This is the script used for run-to-run comparison of benchmark results. The in-tree benchmarks happened to work with the script only because of a fragile string comparison burried deep within the script. Other out-of-tree benchmark scripts that generate results were silently broken when using this script for comparison.
1 parent e819415 commit f09cc8c

File tree

3 files changed

+35
-24
lines changed

3 files changed

+35
-24
lines changed

benchmark/scripts/compare_perf_tests.py

+13-11
Original file line numberDiff line numberDiff line change
@@ -229,8 +229,8 @@ class PerformanceTestResult(object):
229229
statistics for normal distribution (MEAN, SD):
230230
#,TEST,SAMPLES,MIN(μs),MAX(μs),MEAN(μs),SD(μs),MEDIAN(μs),MAX_RSS(B)
231231
And new quantiles format with variable number of columns:
232-
#,TEST,SAMPLES,MIN(μs),MEDIAN(μs),MAX(μs)
233-
#,TEST,SAMPLES,MIN(μs),Q1(μs),Q2(μs),Q3(μs),MAX(μs),MAX_RSS(B)
232+
#,TEST,SAMPLES,QMIN(μs),MEDIAN(μs),MAX(μs)
233+
#,TEST,SAMPLES,QMIN(μs),Q1(μs),Q2(μs),Q3(μs),MAX(μs),MAX_RSS(B)
234234
The number of columns between MIN and MAX depends on the test driver's
235235
`--quantile`parameter. In both cases, the last column, MAX_RSS is optional.
236236
"""
@@ -244,9 +244,10 @@ def __init__(self, csv_row, quantiles=False, memory=False, delta=False, meta=Fal
244244
self.name = csv_row[1] # Name of the performance test
245245
self.num_samples = int(csv_row[2]) # Number of measurements taken
246246

247+
mem_index = (-1 if memory else 0) + (-3 if meta else 0)
247248
if quantiles: # Variable number of columns representing quantiles
248-
mem_index = (-1 if memory else 0) + (-3 if meta else 0)
249249
runtimes = csv_row[3:mem_index] if memory or meta else csv_row[3:]
250+
last_runtime_index = mem_index - 1
250251
if delta:
251252
runtimes = [int(x) if x else 0 for x in runtimes]
252253
runtimes = functools.reduce(
@@ -277,20 +278,21 @@ def __init__(self, csv_row, quantiles=False, memory=False, delta=False, meta=Fal
277278
sams.mean,
278279
sams.sd,
279280
)
280-
self.max_rss = ( # Maximum Resident Set Size (B)
281-
int(csv_row[mem_index]) if memory else None
282-
)
283281
else: # Legacy format with statistics for normal distribution.
284282
self.min = int(csv_row[3]) # Minimum runtime (μs)
285283
self.max = int(csv_row[4]) # Maximum runtime (μs)
286284
self.mean = float(csv_row[5]) # Mean (average) runtime (μs)
287285
self.sd = float(csv_row[6]) # Standard Deviation (μs)
288286
self.median = int(csv_row[7]) # Median runtime (μs)
289-
self.max_rss = ( # Maximum Resident Set Size (B)
290-
int(csv_row[8]) if len(csv_row) > 8 else None
291-
)
287+
last_runtime_index = 7
292288
self.samples = None
293289

290+
self.max_rss = ( # Maximum Resident Set Size (B)
291+
int(csv_row[mem_index]) if (
292+
memory and len(csv_row) > (last_runtime_index + 1)
293+
) else None
294+
)
295+
294296
# Optional measurement metadata. The number of:
295297
# memory pages used, involuntary context switches and voluntary yields
296298
self.mem_pages, self.involuntary_cs, self.yield_count = (
@@ -427,7 +429,7 @@ def _store_memory_stats(self, max_rss, mem_pages):
427429
self.mem_pages = int(mem_pages)
428430

429431
def _configure_format(self, header):
430-
self.quantiles = "MEAN" not in header
432+
self.quantiles = "QMIN" in header
431433
self.memory = "MAX_RSS" in header
432434
self.meta = "PAGES" in header
433435
self.delta = "𝚫" in header
@@ -453,7 +455,7 @@ def _configure_format(self, header):
453455
Yield(len(self.samples), int(since_last_yield))
454456
)
455457
),
456-
re.compile(r"( *#[, \t]+TEST[, \t]+SAMPLES[, \t]+MIN.*)"): _configure_format,
458+
re.compile(r"( *#[, \t]+TEST[, \t]+SAMPLES[, \t].*)"): _configure_format,
457459
# Environmental statistics: memory usage and context switches
458460
re.compile(
459461
r"\s+MAX_RSS \d+ - \d+ = (\d+) \((\d+) pages\)"

benchmark/scripts/test_compare_perf_tests.py

+20-12
Original file line numberDiff line numberDiff line change
@@ -205,7 +205,7 @@ def test_init(self):
205205
self.assertEqual(r.samples, None)
206206

207207
log_line = "1,AngryPhonebook,1,12045,12045,12045,0,12045,10510336"
208-
r = PerformanceTestResult(log_line.split(","))
208+
r = PerformanceTestResult(log_line.split(","), memory=True)
209209
self.assertEqual(r.max_rss, 10510336)
210210

211211
def test_init_quantiles(self):
@@ -379,7 +379,11 @@ def test_merge(self):
379379
)[
380380
1:
381381
]
382-
results = list(map(PerformanceTestResult, [line.split(",") for line in tests]))
382+
383+
def makeResult(csv_row):
384+
return PerformanceTestResult(csv_row, memory=True)
385+
386+
results = list(map(makeResult, [line.split(",") for line in tests]))
383387
results[2].setup = 9
384388
results[3].setup = 7
385389

@@ -489,11 +493,14 @@ class OldAndNewLog(unittest.TestCase):
489493
3,Array2D,20,335831,400221,346622,0,346622
490494
1,AngryPhonebook,20,10458,12714,11000,0,11000"""
491495

496+
def makeResult(csv_row):
497+
return PerformanceTestResult(csv_row, memory=True)
498+
492499
old_results = dict(
493500
[
494501
(r.name, r)
495502
for r in map(
496-
PerformanceTestResult,
503+
makeResult,
497504
[line.split(",") for line in old_log_content.splitlines()],
498505
)
499506
]
@@ -503,7 +510,7 @@ class OldAndNewLog(unittest.TestCase):
503510
[
504511
(r.name, r)
505512
for r in map(
506-
PerformanceTestResult,
513+
makeResult,
507514
[line.split(",") for line in new_log_content.splitlines()],
508515
)
509516
]
@@ -557,14 +564,14 @@ def test_parse_results_formatted_text(self):
557564
def test_parse_quantiles(self):
558565
"""Gathers samples from reported quantiles. Handles optional memory."""
559566
r = LogParser.results_from_string(
560-
"""#,TEST,SAMPLES,MIN(μs),MEDIAN(μs),MAX(μs)
567+
"""#,TEST,SAMPLES,QMIN(μs),MEDIAN(μs),MAX(μs)
561568
1,Ackermann,3,54383,54512,54601"""
562569
)["Ackermann"]
563570
self.assertEqual(
564571
[s.runtime for s in r.samples.all_samples], [54383, 54512, 54601]
565572
)
566573
r = LogParser.results_from_string(
567-
"""#,TEST,SAMPLES,MIN(μs),MEDIAN(μs),MAX(μs),MAX_RSS(B)
574+
"""#,TEST,SAMPLES,QMIN(μs),MEDIAN(μs),MAX(μs),MAX_RSS(B)
568575
1,Ackermann,3,54529,54760,55807,266240"""
569576
)["Ackermann"]
570577
self.assertEqual(
@@ -574,21 +581,21 @@ def test_parse_quantiles(self):
574581

575582
def test_parse_delta_quantiles(self):
576583
r = LogParser.results_from_string( # 2-quantile aka. median
577-
"#,TEST,SAMPLES,MIN(μs),𝚫MEDIAN,𝚫MAX\n0,B,1,101,,"
584+
"#,TEST,SAMPLES,QMIN(μs),𝚫MEDIAN,𝚫MAX\n0,B,1,101,,"
578585
)["B"]
579586
self.assertEqual(
580587
(r.num_samples, r.min, r.median, r.max, r.samples.count),
581588
(1, 101, 101, 101, 1),
582589
)
583590
r = LogParser.results_from_string(
584-
"#,TEST,SAMPLES,MIN(μs),𝚫MEDIAN,𝚫MAX\n0,B,2,101,,1"
591+
"#,TEST,SAMPLES,QMIN(μs),𝚫MEDIAN,𝚫MAX\n0,B,2,101,,1"
585592
)["B"]
586593
self.assertEqual(
587594
(r.num_samples, r.min, r.median, r.max, r.samples.count),
588595
(2, 101, 101, 102, 2),
589596
)
590597
r = LogParser.results_from_string( # 20-quantiles aka. ventiles
591-
"#,TEST,SAMPLES,MIN(μs),𝚫V1,𝚫V2,𝚫V3,𝚫V4,𝚫V5,𝚫V6,𝚫V7,𝚫V8,"
598+
"#,TEST,SAMPLES,QMIN(μs),𝚫V1,𝚫V2,𝚫V3,𝚫V4,𝚫V5,𝚫V6,𝚫V7,𝚫V8,"
592599
+ "𝚫V9,𝚫VA,𝚫VB,𝚫VC,𝚫VD,𝚫VE,𝚫VF,𝚫VG,𝚫VH,𝚫VI,𝚫VJ,𝚫MAX\n"
593600
+ "202,DropWhileArray,200,214,,,,,,,,,,,,1,,,,,,2,16,464"
594601
)["DropWhileArray"]
@@ -617,13 +624,13 @@ def test_parse_meta(self):
617624
(3, 9, 50, 15, 36864),
618625
)
619626
r = LogParser.results_from_string(
620-
"#,TEST,SAMPLES,MIN(μs),MAX(μs),PAGES,ICS,YIELD\n" + "0,B,1,4,4,8,31,15"
627+
"#,TEST,SAMPLES,QMIN(μs),MAX(μs),PAGES,ICS,YIELD\n" + "0,B,1,4,4,8,31,15"
621628
)["B"]
622629
self.assertEqual(
623630
(r.min, r.mem_pages, r.involuntary_cs, r.yield_count), (4, 8, 31, 15)
624631
)
625632
r = LogParser.results_from_string(
626-
"#,TEST,SAMPLES,MIN(μs),MAX(μs),MAX_RSS(B),PAGES,ICS,YIELD\n"
633+
"#,TEST,SAMPLES,QMIN(μs),MAX(μs),MAX_RSS(B),PAGES,ICS,YIELD\n"
627634
+ "0,B,1,5,5,32768,8,28,15"
628635
)["B"]
629636
self.assertEqual(
@@ -831,7 +838,8 @@ def test_values(self):
831838
self.assertEqual(
832839
ReportFormatter.values(
833840
PerformanceTestResult(
834-
"1,AngryPhonebook,1,12045,12045,12045,0,12045,10510336".split(",")
841+
"1,AngryPhonebook,1,12045,12045,12045,0,12045,10510336".split(","),
842+
memory=True
835843
)
836844
),
837845
("AngryPhonebook", "12045", "12045", "12045", "10510336"),

benchmark/utils/DriverUtils.swift

+2-1
Original file line numberDiff line numberDiff line change
@@ -634,7 +634,8 @@ final class TestRunner {
634634
let index: (Int) -> String =
635635
{ q == 2 ? "" : q <= 20 ? base20[$0] : String($0) }
636636
let tail = (1..<q).map { prefix + index($0) } + ["MAX"]
637-
return [withUnit("MIN")] + tail.map(c.delta ? withDelta : withUnit)
637+
// QMIN identifies the quantile format, distinct from formats using "MIN"
638+
return [withUnit("QMIN")] + tail.map(c.delta ? withDelta : withUnit)
638639
}
639640
return (
640641
["#", "TEST", "SAMPLES"] +

0 commit comments

Comments
 (0)