From c6cdf6cd062cf8b3ca65f101e3cb5912edbf29d0 Mon Sep 17 00:00:00 2001 From: I-Ting Tina Liu Date: Wed, 13 Apr 2022 08:13:41 -0700 Subject: [PATCH] Throwing matches and update to CustomMatchingRegexComponent - Update the name `CustomRegexComponent` to `CustomMatchingRegexComponent` per pitch - Adopt `throws` for `CustomMatchingRegexComponent` as added in #261. Errors thrown by `CustomMatchingRegexComponent`'s conformers will be bubbled up to the engine and surfaced at client-side. --- .../Evolution/StringProcessingAlgorithms.md | 2 +- Sources/_StringProcessing/ByteCodeGen.swift | 4 +- .../Regex/DSLConsumers.swift | 14 +- Tests/RegexBuilderTests/CustomTests.swift | 249 +++++++++++++++++- Tests/RegexBuilderTests/RegexDSLTests.swift | 8 +- 5 files changed, 263 insertions(+), 14 deletions(-) diff --git a/Documentation/Evolution/StringProcessingAlgorithms.md b/Documentation/Evolution/StringProcessingAlgorithms.md index 9454396ce..b976c562e 100644 --- a/Documentation/Evolution/StringProcessingAlgorithms.md +++ b/Documentation/Evolution/StringProcessingAlgorithms.md @@ -187,7 +187,7 @@ public protocol CustomMatchingRegexComponent : RegexComponent { _ input: String, startingAt index: String.Index, in bounds: Range - ) -> (upperBound: String.Index, match: Match)? + ) throws -> (upperBound: String.Index, match: Match)? } ``` diff --git a/Sources/_StringProcessing/ByteCodeGen.swift b/Sources/_StringProcessing/ByteCodeGen.swift index c44b5af94..8f111e627 100644 --- a/Sources/_StringProcessing/ByteCodeGen.swift +++ b/Sources/_StringProcessing/ByteCodeGen.swift @@ -302,7 +302,9 @@ extension Compiler.ByteCodeGen { // not captured. This may mean we should store // an existential instead of a closure... - let matcher = builder.makeMatcherFunction(matcher) + let matcher = builder.makeMatcherFunction { input, start, range in + try matcher(input, start, range) + } let valReg = builder.makeValueRegister() builder.buildMatcher(matcher, into: valReg) diff --git a/Sources/_StringProcessing/Regex/DSLConsumers.swift b/Sources/_StringProcessing/Regex/DSLConsumers.swift index e1a69d74b..ea46c789b 100644 --- a/Sources/_StringProcessing/Regex/DSLConsumers.swift +++ b/Sources/_StringProcessing/Regex/DSLConsumers.swift @@ -10,19 +10,21 @@ //===----------------------------------------------------------------------===// @available(SwiftStdlib 5.7, *) -public protocol CustomRegexComponent: RegexComponent { +public protocol CustomMatchingRegexComponent: RegexComponent { func match( _ input: String, startingAt index: String.Index, in bounds: Range - ) -> (upperBound: String.Index, output: RegexOutput)? + ) throws -> (upperBound: String.Index, output: RegexOutput)? } @available(SwiftStdlib 5.7, *) -extension CustomRegexComponent { +extension CustomMatchingRegexComponent { public var regex: Regex { - Regex(node: .matcher(.init(RegexOutput.self), { input, index, bounds in - match(input, startingAt: index, in: bounds) - })) + + let node: DSLTree.Node = .matcher(.init(RegexOutput.self), { input, index, bounds in + try match(input, startingAt: index, in: bounds) + }) + return Regex(node: node) } } diff --git a/Tests/RegexBuilderTests/CustomTests.swift b/Tests/RegexBuilderTests/CustomTests.swift index 0ac6b46c5..0a7d6fc59 100644 --- a/Tests/RegexBuilderTests/CustomTests.swift +++ b/Tests/RegexBuilderTests/CustomTests.swift @@ -14,7 +14,7 @@ import _StringProcessing @testable import RegexBuilder // A nibbler processes a single character from a string -private protocol Nibbler: CustomRegexComponent { +private protocol Nibbler: CustomMatchingRegexComponent { func nibble(_: Character) -> RegexOutput? } @@ -24,7 +24,7 @@ extension Nibbler { _ input: String, startingAt index: String.Index, in bounds: Range - ) -> (upperBound: String.Index, output: RegexOutput)? { + ) throws -> (upperBound: String.Index, output: RegexOutput)? { guard index != bounds.upperBound, let res = nibble(input[index]) else { return nil } @@ -49,6 +49,69 @@ private struct Asciibbler: Nibbler { } } +private struct IntParser: CustomMatchingRegexComponent { + struct ParseError: Error, Hashable {} + typealias RegexOutput = Int + func match(_ input: String, + startingAt index: String.Index, + in bounds: Range + ) throws -> (upperBound: String.Index, output: Int)? { + guard index != bounds.upperBound else { return nil } + + let r = Regex { + Capture(OneOrMore(.digit)) { Int($0) } + } + + guard let match = input[index.. + ) throws -> (upperBound: String.Index, output: Currency)? { + + guard index != bounds.upperBound else { return nil } + + let substr = input[index..( + _ regex: Regex, + _ tests: (input: String, match: Match?, expectError: E?)..., + file: StaticString = #file, + line: UInt = #line + ) { + for (input, match, expectError) in tests { + do { + let result = try regex.wholeMatch(in: input)?.output + XCTAssertEqual(result, match) + } catch let e as E { + XCTAssertEqual(e, expectError) + } catch { + XCTFail() + } + } + } + + func customTest( + _ regex: Regex, + _ tests: (input: String, match: Match?, expectError1: Error1?, expectError2: Error2?)..., + file: StaticString = #file, + line: UInt = #line + ) { + for (input, match, expectError1, expectError2) in tests { + do { + let result = try regex.wholeMatch(in: input)?.output + XCTAssertEqual(result, match) + } catch let e as Error1 { + XCTAssertEqual(e, expectError1, input, file: file, line: line) + } catch let e as Error2 { + XCTAssertEqual(e, expectError2, input, file: file, line: line) + } catch { + XCTFail("caught error: \(error.localizedDescription)") + } + } + } + + func customTest( + _ regex: Regex<(Substring, Capture)>, + _ tests: (input: String, match: (Substring, Capture)?, expectError1: Error1?, expectError2: Error2?)..., + file: StaticString = #file, + line: UInt = #line + ) { + for (input, match, expectError1, expectError2) in tests { + do { + let result = try regex.wholeMatch(in: input)?.output + XCTAssertEqual(result?.0, match?.0, file: file, line: line) + XCTAssertEqual(result?.1, match?.1, file: file, line: line) + } catch let e as Error1 { + XCTAssertEqual(e, expectError1, input, file: file, line: line) + } catch let e as Error2 { + XCTAssertEqual(e, expectError2, input, file: file, line: line) + } catch { + XCTFail("caught error: \(error.localizedDescription)") + } + } + } + + func customTest( + _ regex: Regex<(Substring, Capture1, Capture2)>, + _ tests: (input: String, match: (Substring, Capture1, Capture2)?, expectError1: Error1?, expectError2: Error2?)..., + file: StaticString = #file, + line: UInt = #line + ) { + for (input, match, expectError1, expectError2) in tests { + do { + let result = try regex.wholeMatch(in: input)?.output + XCTAssertEqual(result?.0, match?.0, file: file, line: line) + XCTAssertEqual(result?.1, match?.1, file: file, line: line) + XCTAssertEqual(result?.2, match?.2, file: file, line: line) + } catch let e as Error1 { + XCTAssertEqual(e, expectError1, input, file: file, line: line) + } catch let e as Error2 { + XCTAssertEqual(e, expectError2, input, file: file, line: line) + } catch { + XCTFail("caught error: \(error.localizedDescription)") + } + } + } + + // No capture, one error + customTest( + Regex { + IntParser() + }, + ("zzz", nil, IntParser.ParseError()), + ("x10x", nil, IntParser.ParseError()), + ("30", 30, nil) + ) + + customTest( + Regex { + CurrencyParser() + }, + ("USD", .usd, nil), + ("NTD", .ntd, nil), + ("NTD USD", nil, nil), + ("DEM", nil, CurrencyParser.ParseError.deprecated), + ("XXX", nil, CurrencyParser.ParseError.unrecognized) + ) + + // No capture, two errors + customTest( + Regex { + IntParser() + " " + IntParser() + }, + ("20304 100", "20304 100", nil, nil), + ("20304.445 200", nil, IntParser.ParseError(), nil), + ("20304 200.123", nil, nil, IntParser.ParseError()), + ("20304.445 200.123", nil, IntParser.ParseError(), IntParser.ParseError()) + ) + + customTest( + Regex { + CurrencyParser() + IntParser() + }, + ("USD100", "USD100", nil, nil), + ("XXX100", nil, CurrencyParser.ParseError.unrecognized, nil), + ("USD100.000", nil, nil, IntParser.ParseError()), + ("XXX100.0000", nil, CurrencyParser.ParseError.unrecognized, IntParser.ParseError()) + ) + + // One capture, two errors: One error is thrown from inside a capture, + // while the other one is thrown from outside + customTest( + Regex { + Capture { CurrencyParser() } + IntParser() + }, + ("USD100", ("USD100", .usd), nil, nil), + ("NTD305.5", nil, nil, IntParser.ParseError()), + ("DEM200", ("DEM200", .dem), CurrencyParser.ParseError.deprecated, nil), + ("XXX", nil, CurrencyParser.ParseError.unrecognized, IntParser.ParseError()) + ) + + customTest( + Regex { + CurrencyParser() + Capture { IntParser() } + }, + ("USD100", ("USD100", 100), nil, nil), + ("NTD305.5", nil, nil, IntParser.ParseError()), + ("DEM200", ("DEM200", 200), CurrencyParser.ParseError.deprecated, nil), + ("XXX", nil, CurrencyParser.ParseError.unrecognized, IntParser.ParseError()) + ) + + // One capture, two errors: Both errors are thrown from inside the capture + customTest( + Regex { + Capture { + CurrencyParser() + IntParser() + } + }, + ("USD100", ("USD100", "USD100"), nil, nil), + ("NTD305.5", nil, nil, IntParser.ParseError()), + ("DEM200", ("DEM200", "DEM200"), CurrencyParser.ParseError.deprecated, nil), + ("XXX", nil, CurrencyParser.ParseError.unrecognized, IntParser.ParseError()) + ) + + // Two captures, two errors: Different erros are thrown from inside captures + customTest( + Regex { + Capture(CurrencyParser()) + Capture(IntParser()) + }, + ("USD100", ("USD100", .usd, 100), nil, nil), + ("NTD500", ("NTD500", .ntd, 500), nil, nil), + ("XXX20", nil, CurrencyParser.ParseError.unrecognized, IntParser.ParseError()), + ("DEM500", nil, CurrencyParser.ParseError.deprecated, nil), + ("DEM500.345", nil, CurrencyParser.ParseError.deprecated, IntParser.ParseError()), + ("NTD100.345", nil, nil, IntParser.ParseError()) + ) + + } } diff --git a/Tests/RegexBuilderTests/RegexDSLTests.swift b/Tests/RegexBuilderTests/RegexDSLTests.swift index b38b82a33..8159ba8ae 100644 --- a/Tests/RegexBuilderTests/RegexDSLTests.swift +++ b/Tests/RegexBuilderTests/RegexDSLTests.swift @@ -743,13 +743,13 @@ class RegexDSLTests: XCTestCase { var patch: Int var dev: String? } - struct SemanticVersionParser: CustomRegexComponent { + struct SemanticVersionParser: CustomMatchingRegexComponent { typealias RegexOutput = SemanticVersion func match( _ input: String, startingAt index: String.Index, in bounds: Range - ) -> (upperBound: String.Index, output: SemanticVersion)? { + ) throws -> (upperBound: String.Index, output: SemanticVersion)? { let regex = Regex { TryCapture(OneOrMore(.digit)) { Int($0) } "." @@ -776,13 +776,13 @@ class RegexDSLTests: XCTestCase { return (match.range.upperBound, result) } } - + let versions = [ ("1.0", SemanticVersion(major: 1, minor: 0, patch: 0)), ("1.0.1", SemanticVersion(major: 1, minor: 0, patch: 1)), ("12.100.5-dev", SemanticVersion(major: 12, minor: 100, patch: 5, dev: "dev")), ] - + let parser = SemanticVersionParser() for (str, version) in versions { XCTAssertEqual(str.wholeMatch(of: parser)?.output, version)