Add compile time measurement + cleanup

rctcwyvrn · rctcwyvrn · commit 2c1223628e53 · 2022-08-02T17:42:45.000-07:00
diff --git a/Sources/RegexBenchmark/Benchmark.swift b/Sources/RegexBenchmark/Benchmark.swift
@@ -1,8 +1,9 @@
-import _StringProcessing
+@_spi(RegexBenchmark) import _StringProcessing
 import Foundation
 
 protocol RegexBenchmark {
   var name: String { get }
+  func compile()
   func run()
   func debug()
 }
@@ -19,6 +20,10 @@ struct Benchmark: RegexBenchmark {
     case allMatches
   }
   
+  func compile() {
+    blackHole(regex._compileRegex())
+  }
+  
   func run() {
     switch type {
     case .whole: blackHole(target.wholeMatch(of: regex))
@@ -43,6 +48,8 @@ struct NSBenchmark: RegexBenchmark {
     case first
   }
   
+  func compile() {}
+  
   func run() {
     switch type {
     case .allMatches: blackHole(regex.matches(in: target, range: range))
@@ -57,6 +64,10 @@ struct InputListBenchmark: RegexBenchmark {
   let regex: Regex<AnyRegexOutput>
   let targets: [String]
   
+  func compile() {
+    blackHole(regex._compileRegex())
+  }
+
   func run() {
     for target in targets {
       blackHole(target.wholeMatch(of: regex))
@@ -79,6 +90,8 @@ struct InputListNSBenchmark: RegexBenchmark {
     NSRange(target.startIndex..<target.endIndex, in: target)
   }
   
+  func compile() {}
+
   func run() {
     for target in targets {
       let range = range(in: target)
diff --git a/Sources/RegexBenchmark/BenchmarkResults.swift b/Sources/RegexBenchmark/BenchmarkResults.swift
@@ -6,7 +6,9 @@ extension BenchmarkRunner {
     let url = URL(fileURLWithPath: savePath, isDirectory: false)
     let parent = url.deletingLastPathComponent()
     if !FileManager.default.fileExists(atPath: parent.path) {
-      try! FileManager.default.createDirectory(atPath: parent.path, withIntermediateDirectories: true)
+      try! FileManager.default.createDirectory(
+        atPath: parent.path,
+        withIntermediateDirectories: true)
     }
     print("Saving result to \(url.path)")
     try results.save(to: url)
@@ -21,7 +23,11 @@ extension BenchmarkRunner {
   }
   
   /// Compare this runner's results against the results stored in the given file path
-  func compare(against compareFilePath: String, showChart: Bool, saveTo: String?) throws {
+  func compare(
+    against compareFilePath: String,
+    showChart: Bool,
+    saveTo: String?
+  ) throws {
     let compareFileURL = URL(fileURLWithPath: compareFilePath)
     let compareResult = try SuiteResult.load(from: compareFileURL)
     let compareFile = compareFileURL.lastPathComponent
@@ -30,13 +36,22 @@ extension BenchmarkRunner {
       .compare(with: compareResult)
       .filter({!$0.name.contains("_NS")})
       .filter({$0.diff != nil})
-    displayComparisons(comparisons, showChart, against: "saved benchmark result " + compareFile)
+    displayComparisons(
+      comparisons,
+      showChart,
+      against: "saved benchmark result " + compareFile)
     if let saveFile = saveTo {
       try saveComparisons(comparisons, path: saveFile)
     }
   }
   
-  func compareCompileTimes(against compareFilePath: String, showChart: Bool) throws {
+  // Compile times are often very short (5-20µs) so results are likely to be
+  // very affected by background tasks. This is primarily for making sure
+  // there aren't any catastrophic changes in compile times
+  func compareCompileTimes(
+    against compareFilePath: String,
+    showChart: Bool
+  ) throws {
     let compareFileURL = URL(fileURLWithPath: compareFilePath)
     let compareResult = try SuiteResult.load(from: compareFileURL)
     let compareFile = compareFileURL.lastPathComponent
@@ -45,20 +60,30 @@ extension BenchmarkRunner {
       .compareCompileTimes(with: compareResult)
       .filter({!$0.name.contains("_NS")})
       .filter({$0.diff != nil})
-    print("[Experimental] Comparing estimated compile times")
-    displayComparisons(compileTimeComparisons, false, against: "saved benchmark result " + compareFile)
+    print("Comparing estimated compile times")
+    displayComparisons(
+      compileTimeComparisons,
+      false,
+      against: "saved benchmark result " + compareFile)
   }
   
   /// Compares Swift Regex benchmark results against NSRegularExpression
   func compareWithNS(showChart: Bool, saveTo: String?) throws {
     let comparisons = results.compareWithNS().filter({$0.diff != nil})
-    displayComparisons(comparisons, showChart, against: "NSRegularExpression (via CrossBenchmark)")
+    displayComparisons(
+      comparisons,
+      showChart,
+      against: "NSRegularExpression (via CrossBenchmark)")
     if let saveFile = saveTo {
       try saveComparisons(comparisons, path: saveFile)
     }
   }
   
-  func displayComparisons(_ comparisons: [BenchmarkResult.Comparison], _ showChart: Bool, against: String) {
+  func displayComparisons(
+    _ comparisons: [BenchmarkResult.Comparison],
+    _ showChart: Bool,
+    against: String
+  ) {
     let regressions = comparisons.filter({$0.diff!.seconds > 0})
       .sorted(by: {(a,b) in a.diff!.seconds > b.diff!.seconds})
     let improvements = comparisons.filter({$0.diff!.seconds < 0})
@@ -95,11 +120,16 @@ extension BenchmarkRunner {
     #endif
   }
   
-  func saveComparisons(_ comparisons: [BenchmarkResult.Comparison], path: String) throws {
+  func saveComparisons(
+    _ comparisons: [BenchmarkResult.Comparison],
+    path: String
+  ) throws {
     let url = URL(fileURLWithPath: path, isDirectory: false)
     let parent = url.deletingLastPathComponent()
     if !FileManager.default.fileExists(atPath: parent.path) {
-      try! FileManager.default.createDirectory(atPath: parent.path, withIntermediateDirectories: true)
+      try! FileManager.default.createDirectory(
+        atPath: parent.path,
+        withIntermediateDirectories: true)
     }
     
     var contents = "name,latest,baseline,diff,percentage\n"
@@ -112,17 +142,10 @@ extension BenchmarkRunner {
 }
 
 struct BenchmarkResult: Codable {
+  let compileTime: Time
   let median: Time
-  let estimatedCompileTime: Time
   let stdev: Double
   let samples: Int
-
-  init(_ initialRunTime: Time, _ median: Time, _ stdev: Double, _ samples: Int) {
-    self.estimatedCompileTime = initialRunTime - median
-    self.median = median
-    self.stdev = stdev
-    self.samples = samples
-  }
 }
 
 extension BenchmarkResult {
@@ -135,7 +158,7 @@ extension BenchmarkResult {
     
     var diff: Time? {
       if diffCompileTimes {
-        return latest.estimatedCompileTime - baseline.estimatedCompileTime
+        return latest.compileTime - baseline.compileTime
       }
       if Stats.tTest(baseline, latest) {
         return latest.median - baseline.median
@@ -150,8 +173,8 @@ extension BenchmarkResult {
       let oldVal: Time
       let newVal: Time
       if diffCompileTimes {
-        oldVal = baseline.estimatedCompileTime
-        newVal = latest.estimatedCompileTime
+        oldVal = baseline.compileTime
+        newVal = latest.compileTime
       } else {
         oldVal = baseline.median
         newVal = latest.median
@@ -169,8 +192,8 @@ extension BenchmarkResult {
       let oldVal: Time
       let newVal: Time
       if diffCompileTimes {
-        oldVal = baseline.estimatedCompileTime
-        newVal = latest.estimatedCompileTime
+        oldVal = baseline.compileTime
+        newVal = latest.compileTime
       } else {
         oldVal = baseline.median
         newVal = latest.median
@@ -213,13 +236,18 @@ struct SuiteResult {
     return comparisons
   }
   
-  /// Compares the estimated compile times
-  func compareCompileTimes(with other: SuiteResult) -> [BenchmarkResult.Comparison] {
+  /// Compares the compile times
+  func compareCompileTimes(
+    with other: SuiteResult
+  ) -> [BenchmarkResult.Comparison] {
     var comparisons: [BenchmarkResult.Comparison] = []
     for item in results {
       if let otherVal = other.results[item.key] {
         comparisons.append(
-          .init(name: item.key, baseline: otherVal, latest: item.value, diffCompileTimes: true))
+          .init(name: item.key,
+                baseline: otherVal,
+                latest: item.value,
+                diffCompileTimes: true))
       }
     }
     return comparisons
diff --git a/Sources/RegexBenchmark/BenchmarkRunner.swift b/Sources/RegexBenchmark/BenchmarkRunner.swift
@@ -18,32 +18,49 @@ struct BenchmarkRunner {
     suite.append(new)
   }
   
-  mutating func measure(benchmark: some RegexBenchmark, samples: Int) -> BenchmarkResult {
-    var times: [Time] = []
-    
-    // initial run to make sure the regex has been compiled
-    // FIXME: this is a very poor way of estimating compile time
-    // we should have some sort of interface directly with the engine to measure this
-    // This also completely breaks when we rerun measure() for variant results
-    let initialStart = Tick.now
+  mutating func measure(
+    benchmark: some RegexBenchmark,
+    samples: Int
+  ) -> BenchmarkResult {
+    var runtimes: [Time] = []
+    var compileTimes: [Time] = []
+    // Initial run to make sure the regex has been compiled
     benchmark.run()
-    let initialEnd = Tick.now
-    let initialRunTime = initialEnd.elapsedTime(since: initialStart)
+
+    // Measure compilataion time
+    for _ in 0..<samples {
+      let start = Tick.now
+      benchmark.compile()
+      let end = Tick.now
+      let time = end.elapsedTime(since: start)
+      compileTimes.append(time)
+    }
+    
+    compileTimes.sort()
+    let compileTime = compileTimes[samples/2]
     
     // FIXME: use suspendingclock?
     for _ in 0..<samples {
       let start = Tick.now
       benchmark.run()
       let end = Tick.now
       let time = end.elapsedTime(since: start)
-      times.append(time)
+      runtimes.append(time)
     }
 
-    times.sort()
-    let median = times[samples/2]
-    let mean = times.reduce(0.0, {acc, next in acc + next.seconds}) / Double(times.count)
-    let stdev = (times.reduce(0.0, {acc, next in acc + pow(next.seconds - mean, 2)}) / Double(times.count)).squareRoot()
-    return BenchmarkResult(initialRunTime, median, stdev, samples)
+    runtimes.sort()
+    let median = runtimes[samples/2]
+    let sum = runtimes.reduce(0.0) {acc, next in acc + next.seconds}
+    let mean = sum / Double(runtimes.count)
+    let squareDiffs = runtimes.reduce(0.0) { acc, next in
+      acc + pow(next.seconds - mean, 2)
+    }
+    let stdev = (squareDiffs / Double(runtimes.count)).squareRoot()
+    return BenchmarkResult(
+      compileTime: compileTime,
+      median: median,
+      stdev: stdev,
+      samples: samples)
   }
   
   mutating func run() {
@@ -60,8 +77,11 @@ struct BenchmarkRunner {
           fatalError("Benchmark \(b.name) is too variant")
         }
       }
+      if result.compileTime > Time.millisecond {
+        print("Warning: Abnormally high compilation time, what happened?")
+      }
       if !quiet {
-        print("- \(b.name) \(result.median) (stdev: \(Time(result.stdev))) (estimated compile time: \(result.estimatedCompileTime))")
+        print("- \(b.name) \(result.median) (stdev: \(Time(result.stdev))) (compile time: \(result.compileTime))")
       }
       self.results.add(name: b.name, result: result)
     }
diff --git a/Sources/RegexBenchmark/CLI.swift b/Sources/RegexBenchmark/CLI.swift
@@ -20,8 +20,8 @@ struct Runner: ParsableCommand {
   @Option(help: "The result file to compare against")
   var compare: String?
 
-  @Option(help: "Experimental compile time comparison")
-  var experimentalCompareCompileTimes: String?
+  @Option(help: "Compare compile times with the given results file")
+  var compareCompileTime: String?
   
   @Flag(help: "Show comparison chart")
   var showChart: Bool = false
@@ -72,9 +72,12 @@ struct Runner: ParsableCommand {
       try runner.compareWithNS(showChart: showChart, saveTo: saveComparison)
     }
     if let compareFile = compare {
-      try runner.compare(against: compareFile, showChart: showChart, saveTo: saveComparison)
+      try runner.compare(
+        against: compareFile,
+        showChart: showChart,
+        saveTo: saveComparison)
     }
-    if let compareFile = experimentalCompareCompileTimes {
+    if let compareFile = compareCompileTime {
       try runner.compareCompileTimes(against: compareFile, showChart: showChart)
     }
   }
diff --git a/Sources/RegexBenchmark/Suite/LiteralSearch.swift b/Sources/RegexBenchmark/Suite/LiteralSearch.swift
@@ -3,7 +3,7 @@ import _StringProcessing
 extension BenchmarkRunner {
   mutating func addLiteralSearch() {
     let searchNotFound = CrossBenchmark(baseName: "LiteralSearchNotFound", regex: "magic_string_to_search_for", input: Inputs.graphemeBreakData)
-    let search = CrossBenchmark(baseName: "LiteralSearch", regex: "aatcgaagcagtcttctaacacccttagaaaagcaaacactattgaatactgccgccgca", input: Inputs.graphemeBreakData)
+    let search = CrossBenchmark(baseName: "LiteralSearch", regex: "HANGUL CHOSEONG TIKEUT-MIEUM..HANGUL CHOSEONG SSANGYEORINHIEUH", input: Inputs.graphemeBreakData)
     searchNotFound.register(&self)
     search.register(&self)
   }
diff --git a/Sources/RegexBenchmark/Suite/Unicode.swift b/Sources/RegexBenchmark/Suite/Unicode.swift
@@ -4,11 +4,11 @@ extension BenchmarkRunner {
   mutating func addUnicode() {
     // tagged unicode: unicode characters surrounded by html tags
     // use the same html regex, uses backreference + reluctant quantification
-    let tags = #"<(\w*)\b[^>]*>(.*?)<\/\1>"#
-    let taggedEmojis = CrossBenchmark(
-      baseName: "TaggedEmojis",
-      regex: tags,
-      input: Inputs.taggedEmojis)
+//    let tags = #"<(\w*)\b[^>]*>(.*?)<\/\1>"# // disabled due to \b being unusably slow
+//    let taggedEmojis = CrossBenchmark(
+//      baseName: "TaggedEmojis",
+//      regex: tags,
+//      input: Inputs.taggedEmojis)
 
     // Now actually matching emojis
     let emoji = #"(😃|😀|😳|😲|😦|😊|🙊|😘|😏|😳|😒){2,5}"#
@@ -18,7 +18,7 @@ extension BenchmarkRunner {
       regex: emoji,
       input: Inputs.taggedEmojis)
 
-    // taggedEmojis.register(&self) // disabled due to \b being unusably slow
+    // taggedEmojis.register(&self)
     emojiRegex.register(&self)
   }
 }
diff --git a/Sources/_StringProcessing/Regex/Core.swift b/Sources/_StringProcessing/Regex/Core.swift
@@ -135,6 +135,25 @@ extension Regex {
   }
 }
 
+@available(SwiftStdlib 5.7, *)
+@_spi(RegexBenchmark)
+extension Regex {
+  /// Compiles the stored DSLTree into bytecode and return if it was successful
+  /// For measuring compilation times
+  ///
+  /// Note: This bypasses the cached program that is normally used
+  public func _compileRegex() -> Bool {
+    do {
+      let _ = try Compiler(
+        tree: program.tree,
+        compileOptions: program.compileOptions).emit()
+      return true
+    } catch {
+      return false
+    }
+  }
+}
+
 @available(SwiftStdlib 5.7, *)
 extension Regex {
   internal mutating func _setCompilerOptionsForTesting(_ opts: Compiler.CompileOptions) {

Original file line number	Diff line number	Diff line change
`@@ -20,8 +20,8 @@ struct Runner: ParsableCommand {`
`20`	`20`	`@Option(help: "The result file to compare against")`
`21`	`21`	`var compare: String?`
`22`	`22`
`23`		`- @Option(help: "Experimental compile time comparison")`
`24`		`- var experimentalCompareCompileTimes: String?`
	`23`	`+ @Option(help: "Compare compile times with the given results file")`
	`24`	`+ var compareCompileTime: String?`
`25`	`25`
`26`	`26`	`@Flag(help: "Show comparison chart")`
`27`	`27`	`var showChart: Bool = false`
`@@ -72,9 +72,12 @@ struct Runner: ParsableCommand {`
`72`	`72`	`try runner.compareWithNS(showChart: showChart, saveTo: saveComparison)`
`73`	`73`	`}`
`74`	`74`	`if let compareFile = compare {`
`75`		`- try runner.compare(against: compareFile, showChart: showChart, saveTo: saveComparison)`
	`75`	`+ try runner.compare(`
	`76`	`+ against: compareFile,`
	`77`	`+ showChart: showChart,`
	`78`	`+ saveTo: saveComparison)`
`76`	`79`	`}`
`77`		`- if let compareFile = experimentalCompareCompileTimes {`
	`80`	`+ if let compareFile = compareCompileTime {`
`78`	`81`	`try runner.compareCompileTimes(against: compareFile, showChart: showChart)`
`79`	`82`	`}`
`80`	`83`	`}`
Original file line number	Diff line number	Diff line change
`@@ -3,7 +3,7 @@ import _StringProcessing`
`3`	`3`	`extension BenchmarkRunner {`
`4`	`4`	`mutating func addLiteralSearch() {`
`5`	`5`	`let searchNotFound = CrossBenchmark(baseName: "LiteralSearchNotFound", regex: "magic_string_to_search_for", input: Inputs.graphemeBreakData)`
`6`		`- let search = CrossBenchmark(baseName: "LiteralSearch", regex: "aatcgaagcagtcttctaacacccttagaaaagcaaacactattgaatactgccgccgca", input: Inputs.graphemeBreakData)`
	`6`	`+ let search = CrossBenchmark(baseName: "LiteralSearch", regex: "HANGUL CHOSEONG TIKEUT-MIEUM..HANGUL CHOSEONG SSANGYEORINHIEUH", input: Inputs.graphemeBreakData)`
`7`	`7`	`searchNotFound.register(&self)`
`8`	`8`	`search.register(&self)`
`9`	`9`	`}`