swiftlang · natecook1000 · Jan 8, 2025 · Jul 17, 2024 · Jul 17, 2024 · Jul 24, 2024
diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
diff --git a/Package.swift b/Package.swift
@@ -59,9 +59,9 @@ let package = Package(
             name: "VariadicsGenerator",
             targets: ["VariadicsGenerator"]),
 // Disable to work around rdar://126877024
-//        .executable(
-//            name: "RegexBenchmark",
-//            targets: ["RegexBenchmark"])
+        .executable(
+          name: "RegexBenchmark",
+          targets: ["RegexBenchmark"])
     ],
     dependencies: [
         .package(url: "https://github.com/apple/swift-argument-parser", from: "1.0.0"),
@@ -96,7 +96,7 @@ let package = Package(
                 swiftSettings: [availabilityDefinition]),
         .testTarget(
             name: "RegexTests",
-            dependencies: ["_StringProcessing", "TestSupport"],
+            dependencies: ["_StringProcessing", "RegexBuilder", "TestSupport"],
             swiftSettings: [
                 availabilityDefinition
             ]),
@@ -143,17 +143,17 @@ let package = Package(
                 "_StringProcessing"
             ],
             swiftSettings: [availabilityDefinition]),
-//        .executableTarget(
-//            name: "RegexBenchmark",
-//            dependencies: [
-//                .product(name: "ArgumentParser", package: "swift-argument-parser"),
-//                "_RegexParser",
-//                "_StringProcessing",
-//                "RegexBuilder"
-//            ],
-//            swiftSettings: [
-//                .unsafeFlags(["-Xfrontend", "-disable-availability-checking"]),
-//            ]),
+        .executableTarget(
+            name: "RegexBenchmark",
+            dependencies: [
+                .product(name: "ArgumentParser", package: "swift-argument-parser"),
+                "_RegexParser",
+                "_StringProcessing",
+                "RegexBuilder"
+            ],
+            swiftSettings: [
+                .unsafeFlags(["-Xfrontend", "-disable-availability-checking"]),
+            ]),
 
         // MARK: Exercises
         .target(

diff --git a/Sources/Exercises/Participants/HandWrittenParticipant.swift b/Sources/Exercises/Participants/HandWrittenParticipant.swift
@@ -60,7 +60,8 @@ private func graphemeBreakPropertyData(
   }
 
   // For testing our framework
-  if forceFailure, lower == Unicode.Scalar(0x07FD) {
+  let failureSigil = Unicode.Scalar(0x07FD as UInt32)!
+  if forceFailure, lower == failureSigil {
     return nil
   }
 

diff --git a/Sources/RegexBenchmark/Benchmark.swift b/Sources/RegexBenchmark/Benchmark.swift
@@ -153,29 +153,34 @@ struct CrossBenchmark {
   /// Whether to also run scalar-semantic mode
   var alsoRunScalarSemantic: Bool = true
 
+  var alsoRunSimpleWordBoundaries: Bool = false
+
   func register(_ runner: inout BenchmarkRunner) {
     if isWhole {
       runner.registerCrossBenchmark(
         nameBase: baseName,
         input: input,
         pattern: regex,
         .whole,
-        alsoRunScalarSemantic: alsoRunScalarSemantic)
+        alsoRunScalarSemantic: alsoRunScalarSemantic,
+        alsoRunSimpleWordBoundaries: alsoRunSimpleWordBoundaries)
     } else {
       runner.registerCrossBenchmark(
         nameBase: baseName,
         input: input,
         pattern: regex,
         .allMatches,
-        alsoRunScalarSemantic: alsoRunScalarSemantic)
+        alsoRunScalarSemantic: alsoRunScalarSemantic,
+        alsoRunSimpleWordBoundaries: alsoRunSimpleWordBoundaries)
 
       if includeFirst || runner.includeFirstOverride {
         runner.registerCrossBenchmark(
           nameBase: baseName,
           input: input,
           pattern: regex,
           .first,
-          alsoRunScalarSemantic: alsoRunScalarSemantic)
+          alsoRunScalarSemantic: alsoRunScalarSemantic,
+          alsoRunSimpleWordBoundaries: alsoRunSimpleWordBoundaries)
       }
     }
   }

diff --git a/Sources/RegexBenchmark/BenchmarkRegistration.swift b/Sources/RegexBenchmark/BenchmarkRegistration.swift
@@ -18,6 +18,9 @@ extension BenchmarkRunner {
     self.addDiceNotation()
     self.addErrorMessages()
     self.addIpAddress()
+
+    self.addURLWithWordBoundaries()
+    self.addFSPathsRegex()
     // -- end of registrations --
   }
 }
diff --git a/Sources/RegexBenchmark/BenchmarkResults.swift b/Sources/RegexBenchmark/BenchmarkResults.swift
@@ -21,7 +21,21 @@ extension BenchmarkRunner {
     self.results = result
     print("Loaded results from \(url.path)")
   }
-
+
+  /// Attempts to save results in a CSV format to the given path
+  func saveCSV(to savePath: String) throws {
+    let url = URL(fileURLWithPath: savePath, isDirectory: false)
+    let parent = url.deletingLastPathComponent()
+    if !FileManager.default.fileExists(atPath: parent.path) {
+      try! FileManager.default.createDirectory(
+        atPath: parent.path,
+        withIntermediateDirectories: true)
+    }
+    print("Saving result as CSV to \(url.path)")
+    try results.saveCSV(to: url)
+
+  }
+
   /// Compare this runner's results against the results stored in the given file path
   func compare(
     against compareFilePath: String,
@@ -153,6 +167,12 @@ struct Measurement: Codable, CustomStringConvertible {
   var description: String {
     return "\(median) (stdev: \(Time(stdev)), N = \(samples))"
   }
+
+  var asCSV: String {
+    """
+    \(median.asCSVSeconds), \(stdev), \(samples)
+    """
+  }
 }
 
 struct BenchmarkResult: Codable, CustomStringConvertible {
@@ -170,6 +190,13 @@ struct BenchmarkResult: Codable, CustomStringConvertible {
     }
     return base
   }
+
+  var asCSV: String {
+    let na = "N/A, N/A, N/A"
+    return """
+    \(runtime.asCSV), \(compileTime?.asCSV ?? na), \(parseTime?.asCSV ?? na)
+    """
+  }
 }
 
 extension BenchmarkResult {
@@ -263,6 +290,27 @@ struct SuiteResult {
 }
 
 extension SuiteResult: Codable {
+  func saveCSV(to url: URL) throws {
+    var output: [(name: String, result: BenchmarkResult)] = []
+    for key in results.keys {
+      output.append((key, results[key]!))
+    }
+    output.sort {
+      $0.name < $1.name
+    }
+    var contents = """
+    name,\
+    runtime_median, runTime_stddev, runTime_samples,\
+    compileTime_median, compileTime_stddev, compileTime_samples,\
+    parseTime_median, parseTime_stddev, parseTime_samples\n
+    """
+    for (name, result) in output {
+      contents.append("\(name), \(result.asCSV))\n")
+    }
+    print("Saving result as .csv to \(url.path())")
+    try contents.write(to: url, atomically: true, encoding: String.Encoding.utf8)
+  }
+
   func save(to url: URL) throws {
     let encoder = JSONEncoder()
     let data = try encoder.encode(self)

diff --git a/Sources/RegexBenchmark/BenchmarkRunner.swift b/Sources/RegexBenchmark/BenchmarkRunner.swift
@@ -33,7 +33,8 @@ struct BenchmarkRunner {
     input: String,
     pattern: String,
     _ type: Benchmark.MatchType,
-    alsoRunScalarSemantic: Bool = true
+    alsoRunScalarSemantic: Bool = true,
+    alsoRunSimpleWordBoundaries: Bool
   ) {
     let swiftRegex = try! Regex(pattern)
     let nsRegex: NSRegularExpression
@@ -58,6 +59,16 @@ struct BenchmarkRunner {
         type: .init(type),
         target: input))
 
+    if alsoRunSimpleWordBoundaries {
+      register(
+        Benchmark(
+          name: nameBase + nameSuffix + "_SimpleWordBoundaries",
+          regex: swiftRegex.wordBoundaryKind(.simple),
+          pattern: pattern,
+          type: type,
+          target: input))
+    }
+
     if alsoRunScalarSemantic {
       register(
         Benchmark(

diff --git a/Sources/RegexBenchmark/CLI.swift b/Sources/RegexBenchmark/CLI.swift
@@ -32,12 +32,18 @@ struct Runner: ParsableCommand {
   @Option(help: "Save comparison results as csv")
   var saveComparison: String?
 
+  @Option(help: "Save benchmark results as csv")
+  var saveCSV: String?
+
   @Flag(help: "Quiet mode")
   var quiet = false
 
   @Flag(help: "Exclude running NSRegex benchmarks")
   var excludeNs = false
-
+
+  @Flag(help: "Rather than specify specific-benchmarks as patterns, use exact names")
+  var exactName = false
+
   @Flag(help: """
 Enable tracing of the engine (warning: lots of output). Prints out processor state each cycle
 
@@ -73,7 +79,11 @@ swift build -c release -Xswiftc -DPROCESSOR_MEASUREMENTS_ENABLED
     if !self.specificBenchmarks.isEmpty {
       runner.suite = runner.suite.filter { b in
         specificBenchmarks.contains { pattern in
-          try! Regex(pattern).firstMatch(in: b.name) != nil
+          if exactName {
+            return pattern == b.name
+          }
+
+          return try! Regex(pattern).firstMatch(in: b.name) != nil
         }
       }
     }
@@ -84,9 +94,14 @@ swift build -c release -Xswiftc -DPROCESSOR_MEASUREMENTS_ENABLED
 
     if let loadFile = load {
       try runner.load(from: loadFile)
+      if excludeNs {
+        runner.results.results = runner.results.results.filter {
+          !$0.key.contains("_NS")
+        }
+      }
     } else {
       if excludeNs {
-        runner.suite = runner.suite.filter { b in !b.name.contains("NS") }
+        runner.suite = runner.suite.filter { b in !b.name.contains("_NS") }
       }
       runner.run()
     }
@@ -109,5 +124,8 @@ swift build -c release -Xswiftc -DPROCESSOR_MEASUREMENTS_ENABLED
     if let compareFile = compareCompileTime {
       try runner.compareCompileTimes(against: compareFile, showChart: showChart)
     }
+    if let csvPath = saveCSV {
+      try runner.saveCSV(to: csvPath)
+    }
   }
 }
diff --git a/Sources/RegexBenchmark/Inputs/FSPaths.swift b/Sources/RegexBenchmark/Inputs/FSPaths.swift
@@ -0,0 +1,70 @@
+// Successful match FSPaths
+private let pathSuccess = #"""
+./First/Second/Third/some/really/long/content.extension/more/stuff/OptionLeft
+./First/Second/Third/some/really/long/content.extension/more/stuff/OptionRight
+./First/Second/PrefixThird/some/really/long/content.extension/more/stuff/OptionLeft
+./First/Second/PrefixThird/some/really/long/content.extension/more/stuff/OptionRight
+"""#
+
+// Unsucessful match FSPaths.
+//
+// We will have far more failures than successful matches by interspersing
+// this whole list between each success
+private let pathFailure = #"""
+a/b/c
+/smol/path
+/a/really/long/path/that/is/certainly/stored/out/of/line
+./First/Second/Third/some/really/long/content.extension/more/stuff/NothingToSeeHere
+./First/Second/PrefixThird/some/really/long/content.extension/more/stuff/NothingToSeeHere
+./First/Second/Third/some/really/long/content.extension/more/stuff/OptionNeither
+./First/Second/PrefixThird/some/really/long/content.extension/more/stuff/OptionNeither
+/First/Second/Third/some/really/long/content.extension/more/stuff/OptionLeft
+/First/Second/Third/some/really/long/content.extension/more/stuff/OptionRight
+/First/Second/PrefixThird/some/really/long/content.extension/more/stuff/OptionLeft
+/First/Second/PrefixThird/some/really/long/content.extension/more/stuff/OptionRight
+./First/Second/Third/some/really/long/content/more/stuff/OptionLeft
+./First/Second/Third/some/really/long/content/more/stuff/OptionRight
+./First/Second/PrefixThird/some/really/long/content/more/stuff/OptionLeft
+./First/Second/PrefixThird/some/really/long/content/more/stuff/OptionRight
+"""#
+
+private func listify(_ s: String) -> [String] {
+  s.split(whereSeparator: { $0.isNewline }).map { String($0) }
+}
+
+private let pathSuccessList: [String] = { listify(pathSuccess) }()
+private let pathFailureList: [String] = { listify(pathFailure) }()
+
+private func scale(_ input: [String]) -> [String] {
+  let threshold = 1_000
+  var result = input
+  while result.count < threshold {
+    result.append(contentsOf: input)
+  }
+  return result
+}
+
+extension Inputs {
+  static let fsPathsList: [String] = {
+    var result = pathFailureList
+    result.append(contentsOf: pathFailureList)
+
+    for success in pathSuccessList {
+      result.append(String(success))
+      result.append(contentsOf: pathFailureList)
+      result.append(contentsOf: pathFailureList)
+    }
+
+    // Scale result up a bit
+    return scale(result)
+
+  }()
+
+  static let fsPathsNotFoundList: [String] = {
+    scale(pathFailureList)
+  }()
+
+  static let fsPathsFoundList: [String] = {
+    scale(pathFailureList)
+  }()
+}