Skip to content

Throwing matches and update to CustomMatchingRegexComponent #273

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 18, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Documentation/Evolution/StringProcessingAlgorithms.md
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ public protocol CustomMatchingRegexComponent : RegexComponent {
_ input: String,
startingAt index: String.Index,
in bounds: Range<String.Index>
) -> (upperBound: String.Index, match: Match)?
) throws -> (upperBound: String.Index, match: Match)?
}
```

Expand Down
4 changes: 3 additions & 1 deletion Sources/_StringProcessing/ByteCodeGen.swift
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,9 @@ extension Compiler.ByteCodeGen {
// not captured. This may mean we should store
// an existential instead of a closure...

let matcher = builder.makeMatcherFunction(matcher)
let matcher = builder.makeMatcherFunction { input, start, range in
try matcher(input, start, range)
}

let valReg = builder.makeValueRegister()
builder.buildMatcher(matcher, into: valReg)
Expand Down
14 changes: 8 additions & 6 deletions Sources/_StringProcessing/Regex/DSLConsumers.swift
Original file line number Diff line number Diff line change
Expand Up @@ -10,19 +10,21 @@
//===----------------------------------------------------------------------===//

@available(SwiftStdlib 5.7, *)
public protocol CustomRegexComponent: RegexComponent {
public protocol CustomMatchingRegexComponent: RegexComponent {
func match(
_ input: String,
startingAt index: String.Index,
in bounds: Range<String.Index>
) -> (upperBound: String.Index, output: RegexOutput)?
) throws -> (upperBound: String.Index, output: RegexOutput)?
}

@available(SwiftStdlib 5.7, *)
extension CustomRegexComponent {
extension CustomMatchingRegexComponent {
public var regex: Regex<RegexOutput> {
Regex(node: .matcher(.init(RegexOutput.self), { input, index, bounds in
match(input, startingAt: index, in: bounds)
}))

let node: DSLTree.Node = .matcher(.init(RegexOutput.self), { input, index, bounds in
try match(input, startingAt: index, in: bounds)
})
return Regex(node: node)
}
}
249 changes: 247 additions & 2 deletions Tests/RegexBuilderTests/CustomTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ import _StringProcessing
@testable import RegexBuilder

// A nibbler processes a single character from a string
private protocol Nibbler: CustomRegexComponent {
private protocol Nibbler: CustomMatchingRegexComponent {
func nibble(_: Character) -> RegexOutput?
}

Expand All @@ -24,7 +24,7 @@ extension Nibbler {
_ input: String,
startingAt index: String.Index,
in bounds: Range<String.Index>
) -> (upperBound: String.Index, output: RegexOutput)? {
) throws -> (upperBound: String.Index, output: RegexOutput)? {
guard index != bounds.upperBound, let res = nibble(input[index]) else {
return nil
}
Expand All @@ -49,6 +49,69 @@ private struct Asciibbler: Nibbler {
}
}

private struct IntParser: CustomMatchingRegexComponent {
struct ParseError: Error, Hashable {}
typealias RegexOutput = Int
func match(_ input: String,
startingAt index: String.Index,
in bounds: Range<String.Index>
) throws -> (upperBound: String.Index, output: Int)? {
guard index != bounds.upperBound else { return nil }

let r = Regex {
Capture(OneOrMore(.digit)) { Int($0) }
}

guard let match = input[index..<bounds.upperBound].prefixMatch(of: r),
let output = match.1 else {
throw ParseError()
}

return (match.range.upperBound, output)
}
}

private struct CurrencyParser: CustomMatchingRegexComponent {
enum Currency: String, Hashable {
case usd = "USD"
case ntd = "NTD"
case dem = "DEM"
}

enum ParseError: Error, Hashable {
case unrecognized
case deprecated
}

typealias RegexOutput = Currency
func match(_ input: String,
startingAt index: String.Index,
in bounds: Range<String.Index>
) throws -> (upperBound: String.Index, output: Currency)? {

guard index != bounds.upperBound else { return nil }

let substr = input[index..<bounds.upperBound]
guard !substr.isEmpty else { return nil }

let currencies: [Currency] = [ .usd, .ntd ]
let deprecated: [Currency] = [ .dem ]

for currency in currencies {
if substr.hasPrefix(currency.rawValue) {
return (input.range(of: currency.rawValue)!.upperBound, currency)
}
}

for dep in deprecated {
if substr.hasPrefix(dep.rawValue) {
throw ParseError.deprecated
}
}
throw ParseError.unrecognized
}
}

enum MatchCall {
case match
case firstMatch
Expand Down Expand Up @@ -223,4 +286,186 @@ class CustomRegexComponentTests: XCTestCase {


}

func testCustomRegexThrows() {

func customTest<Match: Equatable, E: Error & Equatable>(
_ regex: Regex<Match>,
_ tests: (input: String, match: Match?, expectError: E?)...,
file: StaticString = #file,
line: UInt = #line
) {
for (input, match, expectError) in tests {
do {
let result = try regex.wholeMatch(in: input)?.output
XCTAssertEqual(result, match)
} catch let e as E {
XCTAssertEqual(e, expectError)
} catch {
XCTFail()
}
}
}

func customTest<Match: Equatable, Error1: Error & Equatable, Error2: Error & Equatable>(
_ regex: Regex<Match>,
_ tests: (input: String, match: Match?, expectError1: Error1?, expectError2: Error2?)...,
file: StaticString = #file,
line: UInt = #line
) {
for (input, match, expectError1, expectError2) in tests {
do {
let result = try regex.wholeMatch(in: input)?.output
XCTAssertEqual(result, match)
} catch let e as Error1 {
XCTAssertEqual(e, expectError1, input, file: file, line: line)
} catch let e as Error2 {
XCTAssertEqual(e, expectError2, input, file: file, line: line)
} catch {
XCTFail("caught error: \(error.localizedDescription)")
}
}
}

func customTest<Capture: Equatable, Error1: Error & Equatable, Error2: Error & Equatable>(
_ regex: Regex<(Substring, Capture)>,
_ tests: (input: String, match: (Substring, Capture)?, expectError1: Error1?, expectError2: Error2?)...,
file: StaticString = #file,
line: UInt = #line
) {
for (input, match, expectError1, expectError2) in tests {
do {
let result = try regex.wholeMatch(in: input)?.output
XCTAssertEqual(result?.0, match?.0, file: file, line: line)
XCTAssertEqual(result?.1, match?.1, file: file, line: line)
} catch let e as Error1 {
XCTAssertEqual(e, expectError1, input, file: file, line: line)
} catch let e as Error2 {
XCTAssertEqual(e, expectError2, input, file: file, line: line)
} catch {
XCTFail("caught error: \(error.localizedDescription)")
}
}
}

func customTest<Capture1: Equatable, Capture2: Equatable, Error1: Error & Equatable, Error2: Error & Equatable>(
_ regex: Regex<(Substring, Capture1, Capture2)>,
_ tests: (input: String, match: (Substring, Capture1, Capture2)?, expectError1: Error1?, expectError2: Error2?)...,
file: StaticString = #file,
line: UInt = #line
) {
for (input, match, expectError1, expectError2) in tests {
do {
let result = try regex.wholeMatch(in: input)?.output
XCTAssertEqual(result?.0, match?.0, file: file, line: line)
XCTAssertEqual(result?.1, match?.1, file: file, line: line)
XCTAssertEqual(result?.2, match?.2, file: file, line: line)
} catch let e as Error1 {
XCTAssertEqual(e, expectError1, input, file: file, line: line)
} catch let e as Error2 {
XCTAssertEqual(e, expectError2, input, file: file, line: line)
} catch {
XCTFail("caught error: \(error.localizedDescription)")
}
}
}

// No capture, one error
customTest(
Regex {
IntParser()
},
("zzz", nil, IntParser.ParseError()),
("x10x", nil, IntParser.ParseError()),
("30", 30, nil)
)

customTest(
Regex {
CurrencyParser()
},
("USD", .usd, nil),
("NTD", .ntd, nil),
("NTD USD", nil, nil),
("DEM", nil, CurrencyParser.ParseError.deprecated),
("XXX", nil, CurrencyParser.ParseError.unrecognized)
)

// No capture, two errors
customTest(
Regex {
IntParser()
" "
IntParser()
},
("20304 100", "20304 100", nil, nil),
("20304.445 200", nil, IntParser.ParseError(), nil),
("20304 200.123", nil, nil, IntParser.ParseError()),
("20304.445 200.123", nil, IntParser.ParseError(), IntParser.ParseError())
)

customTest(
Regex {
CurrencyParser()
IntParser()
},
("USD100", "USD100", nil, nil),
("XXX100", nil, CurrencyParser.ParseError.unrecognized, nil),
("USD100.000", nil, nil, IntParser.ParseError()),
("XXX100.0000", nil, CurrencyParser.ParseError.unrecognized, IntParser.ParseError())
)

// One capture, two errors: One error is thrown from inside a capture,
// while the other one is thrown from outside
customTest(
Regex {
Capture { CurrencyParser() }
IntParser()
},
("USD100", ("USD100", .usd), nil, nil),
("NTD305.5", nil, nil, IntParser.ParseError()),
("DEM200", ("DEM200", .dem), CurrencyParser.ParseError.deprecated, nil),
("XXX", nil, CurrencyParser.ParseError.unrecognized, IntParser.ParseError())
)

customTest(
Regex {
CurrencyParser()
Capture { IntParser() }
},
("USD100", ("USD100", 100), nil, nil),
("NTD305.5", nil, nil, IntParser.ParseError()),
("DEM200", ("DEM200", 200), CurrencyParser.ParseError.deprecated, nil),
("XXX", nil, CurrencyParser.ParseError.unrecognized, IntParser.ParseError())
)

// One capture, two errors: Both errors are thrown from inside the capture
customTest(
Regex {
Capture {
CurrencyParser()
IntParser()
}
},
("USD100", ("USD100", "USD100"), nil, nil),
("NTD305.5", nil, nil, IntParser.ParseError()),
("DEM200", ("DEM200", "DEM200"), CurrencyParser.ParseError.deprecated, nil),
("XXX", nil, CurrencyParser.ParseError.unrecognized, IntParser.ParseError())
)

// Two captures, two errors: Different erros are thrown from inside captures
customTest(
Regex {
Capture(CurrencyParser())
Capture(IntParser())
},
("USD100", ("USD100", .usd, 100), nil, nil),
("NTD500", ("NTD500", .ntd, 500), nil, nil),
("XXX20", nil, CurrencyParser.ParseError.unrecognized, IntParser.ParseError()),
("DEM500", nil, CurrencyParser.ParseError.deprecated, nil),
("DEM500.345", nil, CurrencyParser.ParseError.deprecated, IntParser.ParseError()),
("NTD100.345", nil, nil, IntParser.ParseError())
)

}
}
8 changes: 4 additions & 4 deletions Tests/RegexBuilderTests/RegexDSLTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -743,13 +743,13 @@ class RegexDSLTests: XCTestCase {
var patch: Int
var dev: String?
}
struct SemanticVersionParser: CustomRegexComponent {
struct SemanticVersionParser: CustomMatchingRegexComponent {
typealias RegexOutput = SemanticVersion
func match(
_ input: String,
startingAt index: String.Index,
in bounds: Range<String.Index>
) -> (upperBound: String.Index, output: SemanticVersion)? {
) throws -> (upperBound: String.Index, output: SemanticVersion)? {
let regex = Regex {
TryCapture(OneOrMore(.digit)) { Int($0) }
"."
Expand All @@ -776,13 +776,13 @@ class RegexDSLTests: XCTestCase {
return (match.range.upperBound, result)
}
}

let versions = [
("1.0", SemanticVersion(major: 1, minor: 0, patch: 0)),
("1.0.1", SemanticVersion(major: 1, minor: 0, patch: 1)),
("12.100.5-dev", SemanticVersion(major: 12, minor: 100, patch: 5, dev: "dev")),
]

let parser = SemanticVersionParser()
for (str, version) in versions {
XCTAssertEqual(str.wholeMatch(of: parser)?.output, version)
Expand Down