Skip to content

Commit ed95066

Browse files
authored
Improve StringProcessing and RegexBuilder documentation (swiftlang#611)
This includes documentation improvements for core types/methods, RegexBuilder types along with their generated variadic initializers, and adds some curation. It also includes tests of the documentation code samples.
1 parent 45f752a commit ed95066

File tree

15 files changed

+3777
-109
lines changed

15 files changed

+3777
-109
lines changed

.gitignore

+3
Original file line numberDiff line numberDiff line change
@@ -93,3 +93,6 @@ fastlane/test_output
9393
# https://github.com/johnno1962/injectionforxcode
9494

9595
iOSInjectionProject/
96+
97+
# DocC build folder
98+
*.docc-build

Package.swift

+8
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,14 @@ let package = Package(
8989
swiftSettings: [
9090
.unsafeFlags(["-Xfrontend", "-disable-availability-checking"])
9191
]),
92+
.testTarget(
93+
name: "DocumentationTests",
94+
dependencies: ["_StringProcessing", "RegexBuilder"],
95+
swiftSettings: [
96+
.unsafeFlags(["-Xfrontend", "-disable-availability-checking"]),
97+
.unsafeFlags(["-enable-bare-slash-regex"]),
98+
]),
99+
92100
// FIXME: Disabled due to rdar://94763190.
93101
// .testTarget(
94102
// name: "Prototypes",

Sources/RegexBuilder/Anchor.swift

+2-2
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,7 @@ extension Anchor {
147147
///
148148
/// Word boundaries are identified using the Unicode default word boundary
149149
/// algorithm by default. To specify a different word boundary algorithm,
150-
/// see the `RegexComponent.wordBoundaryKind(_:)` method.
150+
/// use the `wordBoundaryKind(_:)` method.
151151
///
152152
/// This anchor is equivalent to `\b` in regex syntax.
153153
public static var wordBoundary: Anchor {
@@ -157,7 +157,7 @@ extension Anchor {
157157
/// The inverse of this anchor, which matches at every position that this
158158
/// anchor does not.
159159
///
160-
/// For the `wordBoundary` and `textSegmentBoundary` anchors, the inverted
160+
/// For the ``wordBoundary`` and ``textSegmentBoundary`` anchors, the inverted
161161
/// version corresponds to `\B` and `\Y`, respectively.
162162
public var inverted: Anchor {
163163
var result = self

Sources/RegexBuilder/Builder.swift

+9
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,15 @@
1111

1212
@_spi(RegexBuilder) import _StringProcessing
1313

14+
/// A custom parameter attribute that constructs regular expressions from
15+
/// closures.
16+
///
17+
/// You typically see `RegexComponentBuilder` as a parameter attribute for
18+
/// `Regex`- or `RegexComponent`-producing closure parameters, allowing those
19+
/// closures to combine multiple regular expression components. Type
20+
/// initializers and string algorithm methods in the RegexBuilder framework
21+
/// include a builder closure parameter, so that you can use regular expression
22+
/// components together.
1423
@available(SwiftStdlib 5.7, *)
1524
@resultBuilder
1625
public enum RegexComponentBuilder {

Sources/RegexBuilder/CharacterClass.swift

+94-8
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,11 @@
1212
@_implementationOnly import _RegexParser
1313
@_spi(RegexBuilder) import _StringProcessing
1414

15+
/// A class of characters that match in a regex.
16+
///
17+
/// A character class can represent individual characters, a group of
18+
/// characters, the set of character that match some set of criteria, or
19+
/// a set algebraic combination of all of the above.
1520
@available(SwiftStdlib 5.7, *)
1621
public struct CharacterClass {
1722
internal var ccc: DSLTree.CustomCharacterClass
@@ -42,6 +47,20 @@ extension CharacterClass: RegexComponent {
4247

4348
@available(SwiftStdlib 5.7, *)
4449
extension CharacterClass {
50+
/// A character class that matches any character that does not match this
51+
/// character class.
52+
///
53+
/// For example, you can use the `inverted` property to create a character
54+
/// class that excludes a specific group of characters:
55+
///
56+
/// let validCharacters = CharacterClass("a"..."z", .anyOf("-_"))
57+
/// let invalidCharacters = validCharacters.inverted
58+
///
59+
/// let username = "user123"
60+
/// if username.contains(invalidCharacters) {
61+
/// print("Invalid username: '\(username)'")
62+
/// }
63+
/// // Prints "Invalid username: 'user123'"
4564
public var inverted: CharacterClass {
4665
if let inv = builtin?.inverted {
4766
return CharacterClass(builtin: inv)
@@ -53,26 +72,50 @@ extension CharacterClass {
5372

5473
@available(SwiftStdlib 5.7, *)
5574
extension RegexComponent where Self == CharacterClass {
75+
/// A character class that matches any element.
76+
///
77+
/// This character class is unaffected by the `dotMatchesNewlines()` method.
78+
/// To match any character that isn't a newline, see
79+
/// ``anyNonNewline``.
80+
///
81+
/// This character class is equivalent to the regex syntax "dot"
82+
/// metacharacter in single-line mode: `(?s:.)`.
5683
public static var any: CharacterClass {
5784
.init(DSLTree.CustomCharacterClass(members: [.atom(.any)]))
5885
}
5986

87+
/// A character class that matches any element that isn't a newline.
88+
///
89+
/// This character class is unaffected by the `dotMatchesNewlines()` method.
90+
/// To match any character, including newlines, see ``any``.
91+
///
92+
/// This character class is equivalent to the regex syntax "dot"
93+
/// metacharacter with single-line mode disabled: `(?-s:.)`.
6094
public static var anyNonNewline: CharacterClass {
6195
.init(DSLTree.CustomCharacterClass(members: [.atom(.anyNonNewline)]))
6296
}
6397

98+
/// A character class that matches any single `Character`, or extended
99+
/// grapheme cluster, regardless of the current semantic level.
100+
///
101+
/// This character class is equivalent to `\X` in regex syntax.
64102
public static var anyGraphemeCluster: CharacterClass {
65103
.init(builtin: .anyGrapheme)
66104
}
67105

68-
public static var whitespace: CharacterClass {
69-
.init(builtin: .whitespace)
70-
}
71-
106+
/// A character class that matches any digit.
107+
///
108+
/// This character class is equivalent to `\d` in regex syntax.
72109
public static var digit: CharacterClass {
73110
.init(builtin: .digit)
74111
}
75112

113+
/// A character class that matches any hexadecimal digit.
114+
///
115+
/// `hexDigit` matches the ASCII characters `0` through `9`, and upper- or
116+
/// lowercase `a` through `f`. The corresponding characters in the "Halfwidth
117+
/// and Fullwidth Forms" Unicode block are not matched by this character
118+
/// class.
76119
public static var hexDigit: CharacterClass {
77120
.init(DSLTree.CustomCharacterClass(members: [
78121
.range(.char("A"), .char("F")),
@@ -81,27 +124,56 @@ extension RegexComponent where Self == CharacterClass {
81124
]))
82125
}
83126

127+
/// A character class that matches any element that is a "word character".
128+
///
129+
/// This character class is equivalent to `\w` in regex syntax.
130+
public static var word: CharacterClass {
131+
.init(builtin: .word)
132+
}
133+
134+
/// A character class that matches any element that is classified as
135+
/// whitespace.
136+
///
137+
/// This character class is equivalent to `\s` in regex syntax.
138+
public static var whitespace: CharacterClass {
139+
.init(builtin: .whitespace)
140+
}
141+
142+
/// A character class that matches any element that is classified as
143+
/// horizontal whitespace.
144+
///
145+
/// This character class is equivalent to `\h` in regex syntax.
84146
public static var horizontalWhitespace: CharacterClass {
85147
.init(builtin: .horizontalWhitespace)
86148
}
87149

150+
/// A character class that matches any newline sequence.
151+
///
152+
/// This character class is equivalent to `\R` or `\n` in regex syntax.
88153
public static var newlineSequence: CharacterClass {
89154
.init(builtin: .newlineSequence)
90155
}
91156

157+
/// A character class that matches any element that is classified as
158+
/// vertical whitespace.
159+
///
160+
/// This character class is equivalent to `\v` in regex syntax.
92161
public static var verticalWhitespace: CharacterClass {
93162
.init(builtin: .verticalWhitespace)
94163
}
95-
96-
public static var word: CharacterClass {
97-
.init(builtin: .word)
98-
}
99164
}
100165

101166
@available(SwiftStdlib 5.7, *)
102167
extension RegexComponent where Self == CharacterClass {
103168
/// Returns a character class that matches any character in the given string
104169
/// or sequence.
170+
///
171+
/// Calling this method with a group of characters is equivalent to listing
172+
/// those characters in a custom character class in regex syntax. For example,
173+
/// the two regexes in this example are equivalent:
174+
///
175+
/// let regex1 = /[abcd]+/
176+
/// let regex2 = OneOrMore(.anyOf("abcd"))
105177
public static func anyOf<S: Sequence>(_ s: S) -> CharacterClass
106178
where S.Element == Character
107179
{
@@ -111,6 +183,9 @@ extension RegexComponent where Self == CharacterClass {
111183

112184
/// Returns a character class that matches any Unicode scalar in the given
113185
/// sequence.
186+
///
187+
/// Calling this method with a group of Unicode scalars is equivalent to
188+
/// listing them in a custom character class in regex syntax.
114189
public static func anyOf<S: Sequence>(_ s: S) -> CharacterClass
115190
where S.Element == UnicodeScalar
116191
{
@@ -122,6 +197,11 @@ extension RegexComponent where Self == CharacterClass {
122197
// Unicode properties
123198
@available(SwiftStdlib 5.7, *)
124199
extension CharacterClass {
200+
/// Returns a character class that matches any element with the given Unicode
201+
/// general category.
202+
///
203+
/// For example, when passed `.uppercaseLetter`, this method is equivalent to
204+
/// `/\p{Uppercase_Letter}/` or `/\p{Lu}/`.
125205
public static func generalCategory(_ category: Unicode.GeneralCategory) -> CharacterClass {
126206
return CharacterClass(.generalCategory(category))
127207
}
@@ -148,6 +228,7 @@ public func ...(lhs: UnicodeScalar, rhs: UnicodeScalar) -> CharacterClass {
148228

149229
@available(SwiftStdlib 5.7, *)
150230
extension RegexComponent where Self == CharacterClass {
231+
/// Creates a character class that combines the given classes in a union.
151232
public init(_ first: CharacterClass, _ rest: CharacterClass...) {
152233
if rest.isEmpty {
153234
self.init(first.ccc)
@@ -161,24 +242,29 @@ extension RegexComponent where Self == CharacterClass {
161242

162243
@available(SwiftStdlib 5.7, *)
163244
extension CharacterClass {
245+
/// Returns a character class from the union of this class and the given class.
164246
public func union(_ other: CharacterClass) -> CharacterClass {
165247
CharacterClass(.init(members: [
166248
.custom(self.ccc),
167249
.custom(other.ccc)]))
168250
}
169251

252+
/// Returns a character class from the intersection of this class and the given class.
170253
public func intersection(_ other: CharacterClass) -> CharacterClass {
171254
CharacterClass(.init(members: [
172255
.intersection(self.ccc, other.ccc)
173256
]))
174257
}
175258

259+
/// Returns a character class by subtracting the given class from this class.
176260
public func subtracting(_ other: CharacterClass) -> CharacterClass {
177261
CharacterClass(.init(members: [
178262
.subtraction(self.ccc, other.ccc)
179263
]))
180264
}
181265

266+
/// Returns a character class matching elements in one or the other, but not both,
267+
/// of this class and the given class.
182268
public func symmetricDifference(_ other: CharacterClass) -> CharacterClass {
183269
CharacterClass(.init(members: [
184270
.symmetricDifference(self.ccc, other.ccc)

0 commit comments

Comments
 (0)