Skip to content

Commit 2c12236

Browse files
committed
Add compile time measurement + cleanup
1 parent e8d273a commit 2c12236

File tree

7 files changed

+138
-55
lines changed

7 files changed

+138
-55
lines changed

Sources/RegexBenchmark/Benchmark.swift

+14-1
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
1-
import _StringProcessing
1+
@_spi(RegexBenchmark) import _StringProcessing
22
import Foundation
33

44
protocol RegexBenchmark {
55
var name: String { get }
6+
func compile()
67
func run()
78
func debug()
89
}
@@ -19,6 +20,10 @@ struct Benchmark: RegexBenchmark {
1920
case allMatches
2021
}
2122

23+
func compile() {
24+
blackHole(regex._compileRegex())
25+
}
26+
2227
func run() {
2328
switch type {
2429
case .whole: blackHole(target.wholeMatch(of: regex))
@@ -43,6 +48,8 @@ struct NSBenchmark: RegexBenchmark {
4348
case first
4449
}
4550

51+
func compile() {}
52+
4653
func run() {
4754
switch type {
4855
case .allMatches: blackHole(regex.matches(in: target, range: range))
@@ -57,6 +64,10 @@ struct InputListBenchmark: RegexBenchmark {
5764
let regex: Regex<AnyRegexOutput>
5865
let targets: [String]
5966

67+
func compile() {
68+
blackHole(regex._compileRegex())
69+
}
70+
6071
func run() {
6172
for target in targets {
6273
blackHole(target.wholeMatch(of: regex))
@@ -79,6 +90,8 @@ struct InputListNSBenchmark: RegexBenchmark {
7990
NSRange(target.startIndex..<target.endIndex, in: target)
8091
}
8192

93+
func compile() {}
94+
8295
func run() {
8396
for target in targets {
8497
let range = range(in: target)

Sources/RegexBenchmark/BenchmarkResults.swift

+54-26
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,9 @@ extension BenchmarkRunner {
66
let url = URL(fileURLWithPath: savePath, isDirectory: false)
77
let parent = url.deletingLastPathComponent()
88
if !FileManager.default.fileExists(atPath: parent.path) {
9-
try! FileManager.default.createDirectory(atPath: parent.path, withIntermediateDirectories: true)
9+
try! FileManager.default.createDirectory(
10+
atPath: parent.path,
11+
withIntermediateDirectories: true)
1012
}
1113
print("Saving result to \(url.path)")
1214
try results.save(to: url)
@@ -21,7 +23,11 @@ extension BenchmarkRunner {
2123
}
2224

2325
/// Compare this runner's results against the results stored in the given file path
24-
func compare(against compareFilePath: String, showChart: Bool, saveTo: String?) throws {
26+
func compare(
27+
against compareFilePath: String,
28+
showChart: Bool,
29+
saveTo: String?
30+
) throws {
2531
let compareFileURL = URL(fileURLWithPath: compareFilePath)
2632
let compareResult = try SuiteResult.load(from: compareFileURL)
2733
let compareFile = compareFileURL.lastPathComponent
@@ -30,13 +36,22 @@ extension BenchmarkRunner {
3036
.compare(with: compareResult)
3137
.filter({!$0.name.contains("_NS")})
3238
.filter({$0.diff != nil})
33-
displayComparisons(comparisons, showChart, against: "saved benchmark result " + compareFile)
39+
displayComparisons(
40+
comparisons,
41+
showChart,
42+
against: "saved benchmark result " + compareFile)
3443
if let saveFile = saveTo {
3544
try saveComparisons(comparisons, path: saveFile)
3645
}
3746
}
3847

39-
func compareCompileTimes(against compareFilePath: String, showChart: Bool) throws {
48+
// Compile times are often very short (5-20µs) so results are likely to be
49+
// very affected by background tasks. This is primarily for making sure
50+
// there aren't any catastrophic changes in compile times
51+
func compareCompileTimes(
52+
against compareFilePath: String,
53+
showChart: Bool
54+
) throws {
4055
let compareFileURL = URL(fileURLWithPath: compareFilePath)
4156
let compareResult = try SuiteResult.load(from: compareFileURL)
4257
let compareFile = compareFileURL.lastPathComponent
@@ -45,20 +60,30 @@ extension BenchmarkRunner {
4560
.compareCompileTimes(with: compareResult)
4661
.filter({!$0.name.contains("_NS")})
4762
.filter({$0.diff != nil})
48-
print("[Experimental] Comparing estimated compile times")
49-
displayComparisons(compileTimeComparisons, false, against: "saved benchmark result " + compareFile)
63+
print("Comparing estimated compile times")
64+
displayComparisons(
65+
compileTimeComparisons,
66+
false,
67+
against: "saved benchmark result " + compareFile)
5068
}
5169

5270
/// Compares Swift Regex benchmark results against NSRegularExpression
5371
func compareWithNS(showChart: Bool, saveTo: String?) throws {
5472
let comparisons = results.compareWithNS().filter({$0.diff != nil})
55-
displayComparisons(comparisons, showChart, against: "NSRegularExpression (via CrossBenchmark)")
73+
displayComparisons(
74+
comparisons,
75+
showChart,
76+
against: "NSRegularExpression (via CrossBenchmark)")
5677
if let saveFile = saveTo {
5778
try saveComparisons(comparisons, path: saveFile)
5879
}
5980
}
6081

61-
func displayComparisons(_ comparisons: [BenchmarkResult.Comparison], _ showChart: Bool, against: String) {
82+
func displayComparisons(
83+
_ comparisons: [BenchmarkResult.Comparison],
84+
_ showChart: Bool,
85+
against: String
86+
) {
6287
let regressions = comparisons.filter({$0.diff!.seconds > 0})
6388
.sorted(by: {(a,b) in a.diff!.seconds > b.diff!.seconds})
6489
let improvements = comparisons.filter({$0.diff!.seconds < 0})
@@ -95,11 +120,16 @@ extension BenchmarkRunner {
95120
#endif
96121
}
97122

98-
func saveComparisons(_ comparisons: [BenchmarkResult.Comparison], path: String) throws {
123+
func saveComparisons(
124+
_ comparisons: [BenchmarkResult.Comparison],
125+
path: String
126+
) throws {
99127
let url = URL(fileURLWithPath: path, isDirectory: false)
100128
let parent = url.deletingLastPathComponent()
101129
if !FileManager.default.fileExists(atPath: parent.path) {
102-
try! FileManager.default.createDirectory(atPath: parent.path, withIntermediateDirectories: true)
130+
try! FileManager.default.createDirectory(
131+
atPath: parent.path,
132+
withIntermediateDirectories: true)
103133
}
104134

105135
var contents = "name,latest,baseline,diff,percentage\n"
@@ -112,17 +142,10 @@ extension BenchmarkRunner {
112142
}
113143

114144
struct BenchmarkResult: Codable {
145+
let compileTime: Time
115146
let median: Time
116-
let estimatedCompileTime: Time
117147
let stdev: Double
118148
let samples: Int
119-
120-
init(_ initialRunTime: Time, _ median: Time, _ stdev: Double, _ samples: Int) {
121-
self.estimatedCompileTime = initialRunTime - median
122-
self.median = median
123-
self.stdev = stdev
124-
self.samples = samples
125-
}
126149
}
127150

128151
extension BenchmarkResult {
@@ -135,7 +158,7 @@ extension BenchmarkResult {
135158

136159
var diff: Time? {
137160
if diffCompileTimes {
138-
return latest.estimatedCompileTime - baseline.estimatedCompileTime
161+
return latest.compileTime - baseline.compileTime
139162
}
140163
if Stats.tTest(baseline, latest) {
141164
return latest.median - baseline.median
@@ -150,8 +173,8 @@ extension BenchmarkResult {
150173
let oldVal: Time
151174
let newVal: Time
152175
if diffCompileTimes {
153-
oldVal = baseline.estimatedCompileTime
154-
newVal = latest.estimatedCompileTime
176+
oldVal = baseline.compileTime
177+
newVal = latest.compileTime
155178
} else {
156179
oldVal = baseline.median
157180
newVal = latest.median
@@ -169,8 +192,8 @@ extension BenchmarkResult {
169192
let oldVal: Time
170193
let newVal: Time
171194
if diffCompileTimes {
172-
oldVal = baseline.estimatedCompileTime
173-
newVal = latest.estimatedCompileTime
195+
oldVal = baseline.compileTime
196+
newVal = latest.compileTime
174197
} else {
175198
oldVal = baseline.median
176199
newVal = latest.median
@@ -213,13 +236,18 @@ struct SuiteResult {
213236
return comparisons
214237
}
215238

216-
/// Compares the estimated compile times
217-
func compareCompileTimes(with other: SuiteResult) -> [BenchmarkResult.Comparison] {
239+
/// Compares the compile times
240+
func compareCompileTimes(
241+
with other: SuiteResult
242+
) -> [BenchmarkResult.Comparison] {
218243
var comparisons: [BenchmarkResult.Comparison] = []
219244
for item in results {
220245
if let otherVal = other.results[item.key] {
221246
comparisons.append(
222-
.init(name: item.key, baseline: otherVal, latest: item.value, diffCompileTimes: true))
247+
.init(name: item.key,
248+
baseline: otherVal,
249+
latest: item.value,
250+
diffCompileTimes: true))
223251
}
224252
}
225253
return comparisons

Sources/RegexBenchmark/BenchmarkRunner.swift

+37-17
Original file line numberDiff line numberDiff line change
@@ -18,32 +18,49 @@ struct BenchmarkRunner {
1818
suite.append(new)
1919
}
2020

21-
mutating func measure(benchmark: some RegexBenchmark, samples: Int) -> BenchmarkResult {
22-
var times: [Time] = []
23-
24-
// initial run to make sure the regex has been compiled
25-
// FIXME: this is a very poor way of estimating compile time
26-
// we should have some sort of interface directly with the engine to measure this
27-
// This also completely breaks when we rerun measure() for variant results
28-
let initialStart = Tick.now
21+
mutating func measure(
22+
benchmark: some RegexBenchmark,
23+
samples: Int
24+
) -> BenchmarkResult {
25+
var runtimes: [Time] = []
26+
var compileTimes: [Time] = []
27+
// Initial run to make sure the regex has been compiled
2928
benchmark.run()
30-
let initialEnd = Tick.now
31-
let initialRunTime = initialEnd.elapsedTime(since: initialStart)
29+
30+
// Measure compilataion time
31+
for _ in 0..<samples {
32+
let start = Tick.now
33+
benchmark.compile()
34+
let end = Tick.now
35+
let time = end.elapsedTime(since: start)
36+
compileTimes.append(time)
37+
}
38+
39+
compileTimes.sort()
40+
let compileTime = compileTimes[samples/2]
3241

3342
// FIXME: use suspendingclock?
3443
for _ in 0..<samples {
3544
let start = Tick.now
3645
benchmark.run()
3746
let end = Tick.now
3847
let time = end.elapsedTime(since: start)
39-
times.append(time)
48+
runtimes.append(time)
4049
}
4150

42-
times.sort()
43-
let median = times[samples/2]
44-
let mean = times.reduce(0.0, {acc, next in acc + next.seconds}) / Double(times.count)
45-
let stdev = (times.reduce(0.0, {acc, next in acc + pow(next.seconds - mean, 2)}) / Double(times.count)).squareRoot()
46-
return BenchmarkResult(initialRunTime, median, stdev, samples)
51+
runtimes.sort()
52+
let median = runtimes[samples/2]
53+
let sum = runtimes.reduce(0.0) {acc, next in acc + next.seconds}
54+
let mean = sum / Double(runtimes.count)
55+
let squareDiffs = runtimes.reduce(0.0) { acc, next in
56+
acc + pow(next.seconds - mean, 2)
57+
}
58+
let stdev = (squareDiffs / Double(runtimes.count)).squareRoot()
59+
return BenchmarkResult(
60+
compileTime: compileTime,
61+
median: median,
62+
stdev: stdev,
63+
samples: samples)
4764
}
4865

4966
mutating func run() {
@@ -60,8 +77,11 @@ struct BenchmarkRunner {
6077
fatalError("Benchmark \(b.name) is too variant")
6178
}
6279
}
80+
if result.compileTime > Time.millisecond {
81+
print("Warning: Abnormally high compilation time, what happened?")
82+
}
6383
if !quiet {
64-
print("- \(b.name) \(result.median) (stdev: \(Time(result.stdev))) (estimated compile time: \(result.estimatedCompileTime))")
84+
print("- \(b.name) \(result.median) (stdev: \(Time(result.stdev))) (compile time: \(result.compileTime))")
6585
}
6686
self.results.add(name: b.name, result: result)
6787
}

Sources/RegexBenchmark/CLI.swift

+7-4
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@ struct Runner: ParsableCommand {
2020
@Option(help: "The result file to compare against")
2121
var compare: String?
2222

23-
@Option(help: "Experimental compile time comparison")
24-
var experimentalCompareCompileTimes: String?
23+
@Option(help: "Compare compile times with the given results file")
24+
var compareCompileTime: String?
2525

2626
@Flag(help: "Show comparison chart")
2727
var showChart: Bool = false
@@ -72,9 +72,12 @@ struct Runner: ParsableCommand {
7272
try runner.compareWithNS(showChart: showChart, saveTo: saveComparison)
7373
}
7474
if let compareFile = compare {
75-
try runner.compare(against: compareFile, showChart: showChart, saveTo: saveComparison)
75+
try runner.compare(
76+
against: compareFile,
77+
showChart: showChart,
78+
saveTo: saveComparison)
7679
}
77-
if let compareFile = experimentalCompareCompileTimes {
80+
if let compareFile = compareCompileTime {
7881
try runner.compareCompileTimes(against: compareFile, showChart: showChart)
7982
}
8083
}

Sources/RegexBenchmark/Suite/LiteralSearch.swift

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ import _StringProcessing
33
extension BenchmarkRunner {
44
mutating func addLiteralSearch() {
55
let searchNotFound = CrossBenchmark(baseName: "LiteralSearchNotFound", regex: "magic_string_to_search_for", input: Inputs.graphemeBreakData)
6-
let search = CrossBenchmark(baseName: "LiteralSearch", regex: "aatcgaagcagtcttctaacacccttagaaaagcaaacactattgaatactgccgccgca", input: Inputs.graphemeBreakData)
6+
let search = CrossBenchmark(baseName: "LiteralSearch", regex: "HANGUL CHOSEONG TIKEUT-MIEUM..HANGUL CHOSEONG SSANGYEORINHIEUH", input: Inputs.graphemeBreakData)
77
searchNotFound.register(&self)
88
search.register(&self)
99
}

Sources/RegexBenchmark/Suite/Unicode.swift

+6-6
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,11 @@ extension BenchmarkRunner {
44
mutating func addUnicode() {
55
// tagged unicode: unicode characters surrounded by html tags
66
// use the same html regex, uses backreference + reluctant quantification
7-
let tags = #"<(\w*)\b[^>]*>(.*?)<\/\1>"#
8-
let taggedEmojis = CrossBenchmark(
9-
baseName: "TaggedEmojis",
10-
regex: tags,
11-
input: Inputs.taggedEmojis)
7+
// let tags = #"<(\w*)\b[^>]*>(.*?)<\/\1>"# // disabled due to \b being unusably slow
8+
// let taggedEmojis = CrossBenchmark(
9+
// baseName: "TaggedEmojis",
10+
// regex: tags,
11+
// input: Inputs.taggedEmojis)
1212

1313
// Now actually matching emojis
1414
let emoji = #"(😃|😀|😳|😲|😦|😊|🙊|😘|😏|😳|😒){2,5}"#
@@ -18,7 +18,7 @@ extension BenchmarkRunner {
1818
regex: emoji,
1919
input: Inputs.taggedEmojis)
2020

21-
// taggedEmojis.register(&self) // disabled due to \b being unusably slow
21+
// taggedEmojis.register(&self)
2222
emojiRegex.register(&self)
2323
}
2424
}

Sources/_StringProcessing/Regex/Core.swift

+19
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,25 @@ extension Regex {
135135
}
136136
}
137137

138+
@available(SwiftStdlib 5.7, *)
139+
@_spi(RegexBenchmark)
140+
extension Regex {
141+
/// Compiles the stored DSLTree into bytecode and return if it was successful
142+
/// For measuring compilation times
143+
///
144+
/// Note: This bypasses the cached program that is normally used
145+
public func _compileRegex() -> Bool {
146+
do {
147+
let _ = try Compiler(
148+
tree: program.tree,
149+
compileOptions: program.compileOptions).emit()
150+
return true
151+
} catch {
152+
return false
153+
}
154+
}
155+
}
156+
138157
@available(SwiftStdlib 5.7, *)
139158
extension Regex {
140159
internal mutating func _setCompilerOptionsForTesting(_ opts: Compiler.CompileOptions) {

0 commit comments

Comments
 (0)