Enable metrics and tracing on the benchmarker

rctcwyvrn · rctcwyvrn · commit b9f68c888df7 · 2022-08-04T17:26:53.000-07:00
diff --git a/Sources/RegexBenchmark/Benchmark.swift b/Sources/RegexBenchmark/Benchmark.swift
@@ -3,14 +3,14 @@ import Foundation
 
 protocol RegexBenchmark {
   var name: String { get }
-  func compile()
+  mutating func compile()
   func run()
   func debug()
 }
 
 struct Benchmark: RegexBenchmark {
   let name: String
-  let regex: Regex<AnyRegexOutput>
+  var regex: Regex<AnyRegexOutput>
   let type: MatchType
   let target: String
 
@@ -20,8 +20,15 @@ struct Benchmark: RegexBenchmark {
     case allMatches
   }
   
-  func compile() {
-    blackHole(regex._compileRegex())
+  mutating func compile() {
+    let _ = regex._forceAction(.recompile)
+  }
+  
+  mutating func enableTracing() {
+    let _ = regex._forceAction(.setOptions(.enableTracing))
+  }
+  mutating func enableMetrics() {
+    let _ = regex._forceAction(.setOptions(.enableMetrics))
   }
   
   func run() {
@@ -48,7 +55,8 @@ struct NSBenchmark: RegexBenchmark {
     case first
   }
   
-  func compile() {}
+  // Not measured for NSRegularExpression
+  mutating func compile() {}
   
   func run() {
     switch type {
@@ -61,11 +69,17 @@ struct NSBenchmark: RegexBenchmark {
 /// A benchmark running a regex on strings in input set
 struct InputListBenchmark: RegexBenchmark {
   let name: String
-  let regex: Regex<AnyRegexOutput>
+  var regex: Regex<AnyRegexOutput>
   let targets: [String]
   
-  func compile() {
-    blackHole(regex._compileRegex())
+  mutating func compile() {
+    blackHole(regex._forceAction(.recompile))
+  }
+  mutating func enableTracing() {
+    let _ = regex._forceAction(.setOptions(.enableTracing))
+  }
+  mutating func enableMetrics() {
+    let _ = regex._forceAction(.setOptions(.enableMetrics))
   }
 
   func run() {
@@ -90,7 +104,7 @@ struct InputListNSBenchmark: RegexBenchmark {
     NSRange(target.startIndex..<target.endIndex, in: target)
   }
   
-  func compile() {}
+  mutating func compile() {}
 
   func run() {
     for target in targets {
@@ -160,7 +174,7 @@ struct CrossBenchmark {
           regex: nsRegex,
           type: .allMatches,
           target: input))
-      if includeFirst {
+      if includeFirst || runner.includeFirstOverride {
         runner.register(
           Benchmark(
             name: baseName + "First",
diff --git a/Sources/RegexBenchmark/BenchmarkRegistration.swift b/Sources/RegexBenchmark/BenchmarkRegistration.swift
@@ -2,27 +2,22 @@
 // Do not remove the start of registration or end of registration markers
 
 extension BenchmarkRunner {
-  static func makeRunner(
-    _ samples: Int,
-    _ quiet: Bool
-  ) -> BenchmarkRunner {
-    var benchmark = BenchmarkRunner("RegexBench", samples, quiet)
+  mutating func registerDefault() {
     // -- start of registrations --
-    benchmark.addReluctantQuant()
-    benchmark.addCSS()
-    benchmark.addNotFound()
-    benchmark.addGraphemeBreak()
-    benchmark.addHangulSyllable()
-    // benchmark.addHTML() // Disabled due to \b being unusably slow
-    benchmark.addEmail()
-    benchmark.addCustomCharacterClasses()
-    benchmark.addBuiltinCC()
-    benchmark.addUnicode()
-    benchmark.addLiteralSearch()
-    benchmark.addDiceNotation()
-    benchmark.addErrorMessages()
-    benchmark.addIpAddress()
+    self.addReluctantQuant()
+    self.addCSS()
+    self.addNotFound()
+    self.addGraphemeBreak()
+    self.addHangulSyllable()
+    // self.addHTML() // Disabled due to \b being unusably slow
+    self.addEmail()
+    self.addCustomCharacterClasses()
+    self.addBuiltinCC()
+    self.addUnicode()
+    self.addLiteralSearch()
+    self.addDiceNotation()
+    self.addErrorMessages()
+    self.addIpAddress()
     // -- end of registrations --
-    return benchmark
   }
 }
diff --git a/Sources/RegexBenchmark/BenchmarkRunner.swift b/Sources/RegexBenchmark/BenchmarkRunner.swift
@@ -1,4 +1,5 @@
 import Foundation
+@_spi(RegexBenchmark) import _StringProcessing
 
 struct BenchmarkRunner {
   let suiteName: String
@@ -7,21 +8,43 @@ struct BenchmarkRunner {
   let samples: Int
   var results: SuiteResult = SuiteResult()
   let quiet: Bool
-
-  init(_ suiteName: String, _ n: Int, _ quiet: Bool) {
-    self.suiteName = suiteName
-    self.samples = n
-    self.quiet = quiet
+  let enableTracing: Bool
+  let enableMetrics: Bool
+  
+  // Forcibly include firstMatch benchmarks for all CrossBenchmarks
+  let includeFirstOverride: Bool
+  
+  mutating func register(_ benchmark: some RegexBenchmark) {
+    suite.append(benchmark)
   }
   
-  mutating func register(_ new: some RegexBenchmark) {
-    suite.append(new)
+  mutating func register(_ benchmark: Benchmark) {
+    var benchmark = benchmark
+    if enableTracing {
+      benchmark.enableTracing()
+    }
+    if enableMetrics {
+      benchmark.enableMetrics()
+    }
+    suite.append(benchmark)
+  }
+  
+  mutating func register(_ benchmark: InputListBenchmark) {
+    var benchmark = benchmark
+    if enableTracing {
+      benchmark.enableTracing()
+    }
+    if enableMetrics {
+      benchmark.enableMetrics()
+    }
+    suite.append(benchmark)
   }
   
   mutating func measure(
     benchmark: some RegexBenchmark,
     samples: Int
   ) -> BenchmarkResult {
+    var benchmark = benchmark
     var runtimes: [Time] = []
     var compileTimes: [Time] = []
     // Initial run to make sure the regex has been compiled
diff --git a/Sources/RegexBenchmark/CLI.swift b/Sources/RegexBenchmark/CLI.swift
@@ -37,9 +37,26 @@ struct Runner: ParsableCommand {
 
   @Flag(help: "Exclude running NSRegex benchmarks")
   var excludeNs = false
+  
+  @Flag(help: "Enable tracing of the engine (warning: lots of output)")
+  var enableTracing: Bool = false
+
+  @Flag(help: "Enable engine metrics (warning: lots of output)")
+  var enableMetrics: Bool = false
+  
+  @Flag(help: "Include firstMatch benchmarks in CrossBenchmark (off by default")
+  var includeFirst: Bool = false
 
   mutating func run() throws {
-    var runner = BenchmarkRunner.makeRunner(samples, quiet)
+    var runner = BenchmarkRunner(
+      suiteName: "DefaultRegexSuite",
+      samples: samples,
+      quiet: quiet,
+      enableTracing: enableTracing,
+      enableMetrics: enableMetrics,
+      includeFirstOverride: includeFirst)
+    
+    runner.registerDefault()
     
     if !self.specificBenchmarks.isEmpty {
       runner.suite = runner.suite.filter { b in
diff --git a/Sources/RegexBenchmark/Debug.swift b/Sources/RegexBenchmark/Debug.swift
@@ -6,7 +6,7 @@ extension Benchmark {
     case .whole:
       let result = target.wholeMatch(of: regex)
       if let match = result {
-        if match.0.count > 100 {
+        if match.0.count > 1000 {
           print("- Match: len =  \(match.0.count)")
         } else {
           print("- Match: \(match.0)")
@@ -22,7 +22,7 @@ extension Benchmark {
       }
       
       print("- Total matches: \(results.count)")
-      if results.count > 10 {
+      if results.count > 100 {
         print("# Too many matches, not printing")
         let avgLen = results.map({result in String(target[result.range]).count})
           .reduce(0.0, {$0 + Double($1)}) / Double(results.count)
@@ -32,7 +32,7 @@ extension Benchmark {
       }
       
       for match in results {
-        if match.0.count > 100 {
+        if match.0.count > 1000 {
           print("- Match: len =  \(match.0.count)")
         } else {
           print("- Match: \(match.0)")
@@ -42,7 +42,7 @@ extension Benchmark {
     case .first:
       let result = target.firstMatch(of: regex)
       if let match = result {
-        if match.0.count > 100 {
+        if match.0.count > 1000 {
           print("- Match: len =  \(match.0.count)")
         } else {
           print("- Match: \(match.0)")
@@ -66,13 +66,13 @@ extension NSBenchmark {
       }
       
       print("- Total matches: \(results.count)")
-      if results.count > 10 {
+      if results.count > 100 {
         print("# Too many matches, not printing")
         return
       }
       
       for m in results {
-        if m.range.length > 100 {
+        if m.range.length > 1000 {
           print("- Match: len =  \(m.range.length)")
         } else {
           print("- Match: \(target[Range(m.range, in: target)!])")
@@ -81,7 +81,7 @@ extension NSBenchmark {
     case .first:
       let result = regex.firstMatch(in: target, range: range)
       if let match = result {
-        if match.range.length > 100 {
+        if match.range.length > 1000 {
           print("- Match: len =  \(match.range.length)")
         } else {
           print("- Match: \(target[Range(match.range, in: target)!])")
diff --git a/Sources/_StringProcessing/Engine/Consume.swift b/Sources/_StringProcessing/Engine/Consume.swift
@@ -21,7 +21,8 @@ extension Engine {
       subjectBounds: bounds,
       searchBounds: bounds,
       matchMode: matchMode,
-      isTracingEnabled: enableTracing)
+      isTracingEnabled: enableTracing,
+      shouldMeasureMetrics: enableMetrics)
   }
   
   func makeFirstMatchProcessor(
@@ -35,7 +36,8 @@ extension Engine {
       subjectBounds: subjectBounds,
       searchBounds: searchBounds,
       matchMode: .partialFromFront,
-      isTracingEnabled: enableTracing)
+      isTracingEnabled: enableTracing,
+      shouldMeasureMetrics: enableMetrics)
   }
 }
 
diff --git a/Sources/_StringProcessing/Engine/Engine.swift b/Sources/_StringProcessing/Engine/Engine.swift
@@ -24,7 +24,7 @@ struct Engine {
     set { program.enableTracing = newValue }
   }
   var enableMetrics: Bool {
-    get { program.enableTracing }
+    get { program.enableMetrics }
     set { program.enableMetrics = newValue }
   }
 
diff --git a/Sources/_StringProcessing/Engine/Instruction.swift b/Sources/_StringProcessing/Engine/Instruction.swift
@@ -22,7 +22,7 @@ struct Instruction: RawRepresentable, Hashable {
 }
 
 extension Instruction {
-  enum OpCode: UInt64, CaseIterable {
+  enum OpCode: UInt64 {
     case invalid = 0
 
     // MARK: - General Purpose
diff --git a/Sources/_StringProcessing/Engine/Metrics.swift b/Sources/_StringProcessing/Engine/Metrics.swift
@@ -1,24 +1,37 @@
 extension Processor {
   struct ProcessorMetrics {
-    var instructionCounts: [Int] = .init(repeating: 0, count: Instruction.OpCode.allCases.count)
-    var caseInsensitiveInstrs: Bool = false
+    var instructionCounts: [Instruction.OpCode: Int] = [:]
+    var backtracks: Int = 0
+    var resets: Int = 0
   }
   
   func printMetrics() {
-    // print("Total cycle count: \(cycleCount)")
-    // print("Instructions:")
-    let sorted = metrics.instructionCounts.enumerated()
+    print("===")
+    print("Total cycle count: \(cycleCount)")
+    print("Backtracks: \(metrics.backtracks)")
+    print("Resets: \(metrics.resets)")
+    print("Instructions:")
+    let sorted = metrics.instructionCounts
       .filter({$0.1 != 0})
       .sorted(by: { (a,b) in a.1 > b.1 })
     for (opcode, count) in sorted {
-      print("\(Instruction.OpCode.init(rawValue: UInt64(opcode))!),\(count)")
+      print("> \(opcode): \(count)")
+    }
+    print("===")
+  }
+
+  mutating func measure() {
+    let (opcode, _) = fetch().destructure
+    if metrics.instructionCounts.keys.contains(opcode) {
+      metrics.instructionCounts[opcode]! += 1
+    } else {
+      metrics.instructionCounts.updateValue(1, forKey: opcode)
     }
   }
   
   mutating func measureMetrics() {
     if shouldMeasureMetrics {
-      let (opcode, _) = fetch().destructure
-      metrics.instructionCounts[Int(opcode.rawValue)] += 1
+      measure()
     }
   }
 }
diff --git a/Sources/_StringProcessing/Engine/Processor.swift b/Sources/_StringProcessing/Engine/Processor.swift
diff --git a/Sources/_StringProcessing/Utility/Traced.swift b/Sources/_StringProcessing/Utility/Traced.swift
diff --git a/Tests/RegexTests/MatchTests.swift b/Tests/RegexTests/MatchTests.swift
diff --git a/Utils/createBenchmark.py b/Utils/createBenchmark.py

Original file line number	Diff line number	Diff line change
`@@ -21,7 +21,8 @@ extension Engine {`
`21`	`21`	`subjectBounds: bounds,`
`22`	`22`	`searchBounds: bounds,`
`23`	`23`	`matchMode: matchMode,`
`24`		`- isTracingEnabled: enableTracing)`
	`24`	`+ isTracingEnabled: enableTracing,`
	`25`	`+ shouldMeasureMetrics: enableMetrics)`
`25`	`26`	`}`
`26`	`27`
`27`	`28`	`func makeFirstMatchProcessor(`
`@@ -35,7 +36,8 @@ extension Engine {`
`35`	`36`	`subjectBounds: subjectBounds,`
`36`	`37`	`searchBounds: searchBounds,`
`37`	`38`	`matchMode: .partialFromFront,`
`38`		`- isTracingEnabled: enableTracing)`
	`39`	`+ isTracingEnabled: enableTracing,`
	`40`	`+ shouldMeasureMetrics: enableMetrics)`
`39`	`41`	`}`
`40`	`42`	`}`
`41`	`43`
Original file line number	Diff line number	Diff line change
`@@ -24,7 +24,7 @@ struct Engine {`
`24`	`24`	`set { program.enableTracing = newValue }`
`25`	`25`	`}`
`26`	`26`	`var enableMetrics: Bool {`
`27`		`- get { program.enableTracing }`
	`27`	`+ get { program.enableMetrics }`
`28`	`28`	`set { program.enableMetrics = newValue }`
`29`	`29`	`}`
`30`	`30`
Original file line number	Diff line number	Diff line change
`@@ -22,7 +22,7 @@ struct Instruction: RawRepresentable, Hashable {`
`22`	`22`	`}`
`23`	`23`
`24`	`24`	`extension Instruction {`
`25`		`- enum OpCode: UInt64, CaseIterable {`
	`25`	`+ enum OpCode: UInt64 {`
`26`	`26`	`case invalid = 0`
`27`	`27`
`28`	`28`	`// MARK: - General Purpose`