Skip to content

Commit 185ebd6

Browse files
authored
Overhaul quantification fast-path (swiftlang#689)
Overhaul quantification save points and fast path logic, for significant wins in simplicity and performance.
1 parent f5b0b5e commit 185ebd6

9 files changed

+199
-187
lines changed

Sources/_StringProcessing/ByteCodeGen.swift

+33-33
Original file line numberDiff line numberDiff line change
@@ -459,16 +459,16 @@ fileprivate extension Compiler.ByteCodeGen {
459459
assert(high != 0)
460460
assert((0...(high ?? Int.max)).contains(low))
461461

462-
let extraTrips: Int?
462+
let maxExtraTrips: Int?
463463
if let h = high {
464-
extraTrips = h - low
464+
maxExtraTrips = h - low
465465
} else {
466-
extraTrips = nil
466+
maxExtraTrips = nil
467467
}
468468
let minTrips = low
469-
assert((extraTrips ?? 1) >= 0)
469+
assert((maxExtraTrips ?? 1) >= 0)
470470

471-
if tryEmitFastQuant(child, updatedKind, minTrips, extraTrips) {
471+
if tryEmitFastQuant(child, updatedKind, minTrips, maxExtraTrips) {
472472
return
473473
}
474474

@@ -486,19 +486,19 @@ fileprivate extension Compiler.ByteCodeGen {
486486
decrement %minTrips and fallthrough
487487
488488
loop-body:
489-
<if can't guarantee forward progress && extraTrips = nil>:
489+
<if can't guarantee forward progress && maxExtraTrips = nil>:
490490
mov currentPosition %pos
491491
evaluate the subexpression
492-
<if can't guarantee forward progress && extraTrips = nil>:
492+
<if can't guarantee forward progress && maxExtraTrips = nil>:
493493
if %pos is currentPosition:
494494
goto exit
495495
goto min-trip-count control block
496496
497497
exit-policy control block:
498-
if %extraTrips is zero:
498+
if %maxExtraTrips is zero:
499499
goto exit
500500
else:
501-
decrement %extraTrips and fallthrough
501+
decrement %maxExtraTrips and fallthrough
502502
503503
<if eager>:
504504
save exit and goto loop-body
@@ -525,12 +525,12 @@ fileprivate extension Compiler.ByteCodeGen {
525525
/* fallthrough */
526526
"""
527527

528-
// Specialization based on `extraTrips` for 0 or unbounded
528+
// Specialization based on `maxExtraTrips` for 0 or unbounded
529529
_ = """
530530
exit-policy control block:
531-
<if extraTrips == 0>:
531+
<if maxExtraTrips == 0>:
532532
goto exit
533-
<if extraTrips == .unbounded>:
533+
<if maxExtraTrips == .unbounded>:
534534
/* fallthrough */
535535
"""
536536

@@ -563,12 +563,12 @@ fileprivate extension Compiler.ByteCodeGen {
563563
minTripsReg = nil
564564
}
565565

566-
let extraTripsReg: IntRegister?
567-
if (extraTrips ?? 0) > 0 {
568-
extraTripsReg = builder.makeIntRegister(
569-
initialValue: extraTrips!)
566+
let maxExtraTripsReg: IntRegister?
567+
if (maxExtraTrips ?? 0) > 0 {
568+
maxExtraTripsReg = builder.makeIntRegister(
569+
initialValue: maxExtraTrips!)
570570
} else {
571-
extraTripsReg = nil
571+
maxExtraTripsReg = nil
572572
}
573573

574574
// Set up a dummy save point for possessive to update
@@ -600,7 +600,7 @@ fileprivate extension Compiler.ByteCodeGen {
600600
let startPosition: PositionRegister?
601601
let emitPositionChecking =
602602
(!optimizationsEnabled || !child.guaranteesForwardProgress) &&
603-
extraTrips == nil
603+
maxExtraTrips == nil
604604

605605
if emitPositionChecking {
606606
startPosition = builder.makePositionRegister()
@@ -610,7 +610,7 @@ fileprivate extension Compiler.ByteCodeGen {
610610
}
611611
try emitNode(child)
612612
if emitPositionChecking {
613-
// in all quantifier cases, no matter what minTrips or extraTrips is,
613+
// in all quantifier cases, no matter what minTrips or maxExtraTrips is,
614614
// if we have a successful non-advancing match, branch to exit because it
615615
// can match an arbitrary number of times
616616
builder.buildCondBranch(to: exit, ifSamePositionAs: startPosition!)
@@ -623,20 +623,20 @@ fileprivate extension Compiler.ByteCodeGen {
623623
}
624624

625625
// exit-policy:
626-
// condBranch(to: exit, ifZeroElseDecrement: %extraTrips)
626+
// condBranch(to: exit, ifZeroElseDecrement: %maxExtraTrips)
627627
// <eager: split(to: loop, saving: exit)>
628628
// <possesive:
629629
// clearSavePoint
630630
// split(to: loop, saving: exit)>
631631
// <reluctant: save(restoringAt: loop)
632632
builder.label(exitPolicy)
633-
switch extraTrips {
633+
switch maxExtraTrips {
634634
case nil: break
635635
case 0: builder.buildBranch(to: exit)
636636
default:
637-
assert(extraTripsReg != nil, "logic inconsistency")
637+
assert(maxExtraTripsReg != nil, "logic inconsistency")
638638
builder.buildCondBranch(
639-
to: exit, ifZeroElseDecrement: extraTripsReg!)
639+
to: exit, ifZeroElseDecrement: maxExtraTripsReg!)
640640
}
641641

642642
switch updatedKind {
@@ -666,12 +666,12 @@ fileprivate extension Compiler.ByteCodeGen {
666666
_ child: DSLTree.Node,
667667
_ kind: AST.Quantification.Kind,
668668
_ minTrips: Int,
669-
_ extraTrips: Int?
669+
_ maxExtraTrips: Int?
670670
) -> Bool {
671671
let isScalarSemantics = options.semanticLevel == .unicodeScalar
672672
guard optimizationsEnabled
673673
&& minTrips <= QuantifyPayload.maxStorableTrips
674-
&& extraTrips ?? 0 <= QuantifyPayload.maxStorableTrips
674+
&& maxExtraTrips ?? 0 <= QuantifyPayload.maxStorableTrips
675675
&& kind != .reluctant else {
676676
return false
677677
}
@@ -681,7 +681,7 @@ fileprivate extension Compiler.ByteCodeGen {
681681
guard let bitset = ccc.asAsciiBitset(options) else {
682682
return false
683683
}
684-
builder.buildQuantify(bitset: bitset, kind, minTrips, extraTrips, isScalarSemantics: isScalarSemantics)
684+
builder.buildQuantify(bitset: bitset, kind, minTrips, maxExtraTrips, isScalarSemantics: isScalarSemantics)
685685

686686
case .atom(let atom):
687687
switch atom {
@@ -690,17 +690,17 @@ fileprivate extension Compiler.ByteCodeGen {
690690
guard let val = c._singleScalarAsciiValue else {
691691
return false
692692
}
693-
builder.buildQuantify(asciiChar: val, kind, minTrips, extraTrips, isScalarSemantics: isScalarSemantics)
693+
builder.buildQuantify(asciiChar: val, kind, minTrips, maxExtraTrips, isScalarSemantics: isScalarSemantics)
694694

695695
case .any:
696696
builder.buildQuantifyAny(
697-
matchesNewlines: true, kind, minTrips, extraTrips, isScalarSemantics: isScalarSemantics)
697+
matchesNewlines: true, kind, minTrips, maxExtraTrips, isScalarSemantics: isScalarSemantics)
698698
case .anyNonNewline:
699699
builder.buildQuantifyAny(
700-
matchesNewlines: false, kind, minTrips, extraTrips, isScalarSemantics: isScalarSemantics)
700+
matchesNewlines: false, kind, minTrips, maxExtraTrips, isScalarSemantics: isScalarSemantics)
701701
case .dot:
702702
builder.buildQuantifyAny(
703-
matchesNewlines: options.dotMatchesNewline, kind, minTrips, extraTrips, isScalarSemantics: isScalarSemantics)
703+
matchesNewlines: options.dotMatchesNewline, kind, minTrips, maxExtraTrips, isScalarSemantics: isScalarSemantics)
704704

705705
case .characterClass(let cc):
706706
// Custom character class that consumes a single grapheme
@@ -709,19 +709,19 @@ fileprivate extension Compiler.ByteCodeGen {
709709
model: model,
710710
kind,
711711
minTrips,
712-
extraTrips,
712+
maxExtraTrips,
713713
isScalarSemantics: isScalarSemantics)
714714
default:
715715
return false
716716
}
717717
case .convertedRegexLiteral(let node, _):
718-
return tryEmitFastQuant(node, kind, minTrips, extraTrips)
718+
return tryEmitFastQuant(node, kind, minTrips, maxExtraTrips)
719719
case .nonCapturingGroup(let groupKind, let node):
720720
// .nonCapture nonCapturingGroups are ignored during compilation
721721
guard groupKind.ast == .nonCapture else {
722722
return false
723723
}
724-
return tryEmitFastQuant(node, kind, minTrips, extraTrips)
724+
return tryEmitFastQuant(node, kind, minTrips, maxExtraTrips)
725725
default:
726726
return false
727727
}

Sources/_StringProcessing/Engine/Backtracking.swift

+43-37
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,9 @@ extension Processor {
1313
struct SavePoint {
1414
var pc: InstructionAddress
1515
var pos: Position?
16+
1617
// Quantifiers may store a range of positions to restore to
17-
var rangeStart: Position?
18-
var rangeEnd: Position?
18+
var quantifiedRange: Range<Position>?
1919

2020
// FIXME: refactor, for now this field is only used for quantifier save
2121
// points. We should try to separate out the concerns better.
@@ -49,64 +49,70 @@ extension Processor {
4949
return (pc, pos, stackEnd, captureEnds, intRegisters, posRegisters)
5050
}
5151

52-
var rangeIsEmpty: Bool { rangeEnd == nil }
53-
54-
mutating func updateRange(newEnd: Input.Index) {
55-
if rangeStart == nil {
56-
rangeStart = newEnd
57-
}
58-
rangeEnd = newEnd
52+
// Whether this save point is quantified, meaning it has a range of
53+
// possible positions to explore.
54+
var isQuantified: Bool {
55+
quantifiedRange != nil
5956
}
6057

6158
/// Move the next range position into pos, and removing it from the range
62-
mutating func takePositionFromRange(_ input: Input) {
63-
assert(!rangeIsEmpty)
64-
pos = rangeEnd!
65-
shrinkRange(input)
66-
}
59+
mutating func takePositionFromQuantifiedRange(_ input: Input) {
60+
assert(isQuantified)
61+
let range = quantifiedRange!
62+
pos = range.upperBound
63+
if range.isEmpty {
64+
// Becomes a normal save point
65+
quantifiedRange = nil
66+
return
67+
}
6768

68-
/// Shrink the range of the save point by one index, essentially dropping the last index
69-
mutating func shrinkRange(_ input: Input) {
70-
assert(!rangeIsEmpty)
71-
if rangeEnd == rangeStart {
72-
// The range is now empty
73-
rangeStart = nil
74-
rangeEnd = nil
69+
// Shrink the range
70+
let newUpper: Position
71+
if isScalarSemantics {
72+
newUpper = input.unicodeScalars.index(before: range.upperBound)
7573
} else {
76-
if isScalarSemantics {
77-
input.unicodeScalars.formIndex(before: &rangeEnd!)
78-
} else {
79-
input.formIndex(before: &rangeEnd!)
80-
}
74+
newUpper = input.index(before: range.upperBound)
8175
}
76+
quantifiedRange = range.lowerBound..<newUpper
8277
}
8378
}
8479

8580
func makeSavePoint(
86-
_ pc: InstructionAddress,
87-
addressOnly: Bool = false
81+
resumingAt pc: InstructionAddress
82+
) -> SavePoint {
83+
SavePoint(
84+
pc: pc,
85+
pos: currentPosition,
86+
quantifiedRange: nil,
87+
isScalarSemantics: false,
88+
stackEnd: .init(callStack.count),
89+
captureEnds: storedCaptures,
90+
intRegisters: registers.ints,
91+
posRegisters: registers.positions)
92+
}
93+
94+
func makeAddressOnlySavePoint(
95+
resumingAt pc: InstructionAddress
8896
) -> SavePoint {
8997
SavePoint(
9098
pc: pc,
91-
pos: addressOnly ? nil : currentPosition,
92-
rangeStart: nil,
93-
rangeEnd: nil,
94-
isScalarSemantics: false, // FIXME: refactor away
99+
pos: nil,
100+
quantifiedRange: nil,
101+
isScalarSemantics: false,
95102
stackEnd: .init(callStack.count),
96103
captureEnds: storedCaptures,
97104
intRegisters: registers.ints,
98105
posRegisters: registers.positions)
99106
}
100-
101-
func startQuantifierSavePoint(
107+
108+
func makeQuantifiedSavePoint(
109+
_ range: Range<Position>,
102110
isScalarSemantics: Bool
103111
) -> SavePoint {
104-
// Restores to the instruction AFTER the current quantifier instruction
105112
SavePoint(
106113
pc: controller.pc + 1,
107114
pos: nil,
108-
rangeStart: nil,
109-
rangeEnd: nil,
115+
quantifiedRange: range,
110116
isScalarSemantics: isScalarSemantics,
111117
stackEnd: .init(callStack.count),
112118
captureEnds: storedCaptures,

0 commit comments

Comments
 (0)