12
12
@_implementationOnly import _RegexParser
13
13
@_spi ( RegexBuilder) import _StringProcessing
14
14
15
+ /// A class of characters that match in a regex.
16
+ ///
17
+ /// A character class can represent individual characters, a group of
18
+ /// characters, the set of character that match some set of criteria, or
19
+ /// a set algebraic combination of all of the above.
15
20
@available ( SwiftStdlib 5 . 7 , * )
16
21
public struct CharacterClass {
17
22
internal var ccc : DSLTree . CustomCharacterClass
@@ -42,6 +47,20 @@ extension CharacterClass: RegexComponent {
42
47
43
48
@available ( SwiftStdlib 5 . 7 , * )
44
49
extension CharacterClass {
50
+ /// A character class that matches any character that does not match this
51
+ /// character class.
52
+ ///
53
+ /// For example, you can use the `inverted` property to create a character
54
+ /// class that excludes a specific group of characters:
55
+ ///
56
+ /// let validCharacters = CharacterClass("a"..."z", .anyOf("-_"))
57
+ /// let invalidCharacters = validCharacters.inverted
58
+ ///
59
+ /// let username = "user123"
60
+ /// if username.contains(invalidCharacters) {
61
+ /// print("Invalid username: '\(username)'")
62
+ /// }
63
+ /// // Prints "Invalid username: 'user123'"
45
64
public var inverted : CharacterClass {
46
65
if let inv = builtin? . inverted {
47
66
return CharacterClass ( builtin: inv)
@@ -53,26 +72,50 @@ extension CharacterClass {
53
72
54
73
@available ( SwiftStdlib 5 . 7 , * )
55
74
extension RegexComponent where Self == CharacterClass {
75
+ /// A character class that matches any element.
76
+ ///
77
+ /// This character class is unaffected by the `dotMatchesNewlines()` method.
78
+ /// To match any character that isn't a newline, see
79
+ /// ``anyNonNewline``.
80
+ ///
81
+ /// This character class is equivalent to the regex syntax "dot"
82
+ /// metacharacter in single-line mode: `(?s:.)`.
56
83
public static var any : CharacterClass {
57
84
. init( DSLTree . CustomCharacterClass ( members: [ . atom( . any) ] ) )
58
85
}
59
86
87
+ /// A character class that matches any element that isn't a newline.
88
+ ///
89
+ /// This character class is unaffected by the `dotMatchesNewlines()` method.
90
+ /// To match any character, including newlines, see ``any``.
91
+ ///
92
+ /// This character class is equivalent to the regex syntax "dot"
93
+ /// metacharacter with single-line mode disabled: `(?-s:.)`.
60
94
public static var anyNonNewline : CharacterClass {
61
95
. init( DSLTree . CustomCharacterClass ( members: [ . atom( . anyNonNewline) ] ) )
62
96
}
63
97
98
+ /// A character class that matches any single `Character`, or extended
99
+ /// grapheme cluster, regardless of the current semantic level.
100
+ ///
101
+ /// This character class is equivalent to `\X` in regex syntax.
64
102
public static var anyGraphemeCluster : CharacterClass {
65
103
. init( builtin: . anyGrapheme)
66
104
}
67
105
68
- public static var whitespace : CharacterClass {
69
- . init( builtin: . whitespace)
70
- }
71
-
106
+ /// A character class that matches any digit.
107
+ ///
108
+ /// This character class is equivalent to `\d` in regex syntax.
72
109
public static var digit : CharacterClass {
73
110
. init( builtin: . digit)
74
111
}
75
112
113
+ /// A character class that matches any hexadecimal digit.
114
+ ///
115
+ /// `hexDigit` matches the ASCII characters `0` through `9`, and upper- or
116
+ /// lowercase `a` through `f`. The corresponding characters in the "Halfwidth
117
+ /// and Fullwidth Forms" Unicode block are not matched by this character
118
+ /// class.
76
119
public static var hexDigit : CharacterClass {
77
120
. init( DSLTree . CustomCharacterClass ( members: [
78
121
. range( . char( " A " ) , . char( " F " ) ) ,
@@ -81,27 +124,56 @@ extension RegexComponent where Self == CharacterClass {
81
124
] ) )
82
125
}
83
126
127
+ /// A character class that matches any element that is a "word character".
128
+ ///
129
+ /// This character class is equivalent to `\w` in regex syntax.
130
+ public static var word : CharacterClass {
131
+ . init( builtin: . word)
132
+ }
133
+
134
+ /// A character class that matches any element that is classified as
135
+ /// whitespace.
136
+ ///
137
+ /// This character class is equivalent to `\s` in regex syntax.
138
+ public static var whitespace : CharacterClass {
139
+ . init( builtin: . whitespace)
140
+ }
141
+
142
+ /// A character class that matches any element that is classified as
143
+ /// horizontal whitespace.
144
+ ///
145
+ /// This character class is equivalent to `\h` in regex syntax.
84
146
public static var horizontalWhitespace : CharacterClass {
85
147
. init( builtin: . horizontalWhitespace)
86
148
}
87
149
150
+ /// A character class that matches any newline sequence.
151
+ ///
152
+ /// This character class is equivalent to `\R` or `\n` in regex syntax.
88
153
public static var newlineSequence : CharacterClass {
89
154
. init( builtin: . newlineSequence)
90
155
}
91
156
157
+ /// A character class that matches any element that is classified as
158
+ /// vertical whitespace.
159
+ ///
160
+ /// This character class is equivalent to `\v` in regex syntax.
92
161
public static var verticalWhitespace : CharacterClass {
93
162
. init( builtin: . verticalWhitespace)
94
163
}
95
-
96
- public static var word : CharacterClass {
97
- . init( builtin: . word)
98
- }
99
164
}
100
165
101
166
@available ( SwiftStdlib 5 . 7 , * )
102
167
extension RegexComponent where Self == CharacterClass {
103
168
/// Returns a character class that matches any character in the given string
104
169
/// or sequence.
170
+ ///
171
+ /// Calling this method with a group of characters is equivalent to listing
172
+ /// those characters in a custom character class in regex syntax. For example,
173
+ /// the two regexes in this example are equivalent:
174
+ ///
175
+ /// let regex1 = /[abcd]+/
176
+ /// let regex2 = OneOrMore(.anyOf("abcd"))
105
177
public static func anyOf< S: Sequence > ( _ s: S ) -> CharacterClass
106
178
where S. Element == Character
107
179
{
@@ -111,6 +183,9 @@ extension RegexComponent where Self == CharacterClass {
111
183
112
184
/// Returns a character class that matches any Unicode scalar in the given
113
185
/// sequence.
186
+ ///
187
+ /// Calling this method with a group of Unicode scalars is equivalent to
188
+ /// listing them in a custom character class in regex syntax.
114
189
public static func anyOf< S: Sequence > ( _ s: S ) -> CharacterClass
115
190
where S. Element == UnicodeScalar
116
191
{
@@ -122,6 +197,11 @@ extension RegexComponent where Self == CharacterClass {
122
197
// Unicode properties
123
198
@available ( SwiftStdlib 5 . 7 , * )
124
199
extension CharacterClass {
200
+ /// Returns a character class that matches any element with the given Unicode
201
+ /// general category.
202
+ ///
203
+ /// For example, when passed `.uppercaseLetter`, this method is equivalent to
204
+ /// `/\p{Uppercase_Letter}/` or `/\p{Lu}/`.
125
205
public static func generalCategory( _ category: Unicode . GeneralCategory ) -> CharacterClass {
126
206
return CharacterClass ( . generalCategory( category) )
127
207
}
@@ -148,6 +228,7 @@ public func ...(lhs: UnicodeScalar, rhs: UnicodeScalar) -> CharacterClass {
148
228
149
229
@available ( SwiftStdlib 5 . 7 , * )
150
230
extension RegexComponent where Self == CharacterClass {
231
+ /// Creates a character class that combines the given classes in a union.
151
232
public init ( _ first: CharacterClass , _ rest: CharacterClass ... ) {
152
233
if rest. isEmpty {
153
234
self . init ( first. ccc)
@@ -161,24 +242,29 @@ extension RegexComponent where Self == CharacterClass {
161
242
162
243
@available ( SwiftStdlib 5 . 7 , * )
163
244
extension CharacterClass {
245
+ /// Returns a character class from the union of this class and the given class.
164
246
public func union( _ other: CharacterClass ) -> CharacterClass {
165
247
CharacterClass ( . init( members: [
166
248
. custom( self . ccc) ,
167
249
. custom( other. ccc) ] ) )
168
250
}
169
251
252
+ /// Returns a character class from the intersection of this class and the given class.
170
253
public func intersection( _ other: CharacterClass ) -> CharacterClass {
171
254
CharacterClass ( . init( members: [
172
255
. intersection( self . ccc, other. ccc)
173
256
] ) )
174
257
}
175
258
259
+ /// Returns a character class by subtracting the given class from this class.
176
260
public func subtracting( _ other: CharacterClass ) -> CharacterClass {
177
261
CharacterClass ( . init( members: [
178
262
. subtraction( self . ccc, other. ccc)
179
263
] ) )
180
264
}
181
265
266
+ /// Returns a character class matching elements in one or the other, but not both,
267
+ /// of this class and the given class.
182
268
public func symmetricDifference( _ other: CharacterClass ) -> CharacterClass {
183
269
CharacterClass ( . init( members: [
184
270
. symmetricDifference( self . ccc, other. ccc)
0 commit comments