@@ -63,7 +63,7 @@ extension String {
63
63
64
64
/// The offset of this view's `_core` from an original core. This works
65
65
/// around the fact that `_StringCore` is always zero-indexed.
66
- /// `_coreOffset` should be subtracted from `UnicodeScalarIndex._position `
66
+ /// `_coreOffset` should be subtracted from `UnicodeScalarIndex.encodedOffset `
67
67
/// before that value is used as a `_core` index.
68
68
@_versioned
69
69
internal var _coreOffset : Int
@@ -178,97 +178,66 @@ extension String.CharacterView : BidirectionalCollection {
178
178
return UnicodeScalarView ( _core, coreOffset: _coreOffset)
179
179
}
180
180
181
- /// A position in a string's `CharacterView` instance.
182
- ///
183
- /// You can convert between indices of the different string views by using
184
- /// conversion initializers and the `samePosition(in:)` method overloads.
185
- /// The following example finds the index of the first space in the string's
186
- /// character view and then converts that to the same position in the UTF-8
187
- /// view:
188
- ///
189
- /// let hearts = "Hearts <3 ♥︎ 💘"
190
- /// if let i = hearts.characters.index(of: " ") {
191
- /// let j = i.samePosition(in: hearts.utf8)
192
- /// print(Array(hearts.utf8[..<j]))
193
- /// }
194
- /// // Prints "[72, 101, 97, 114, 116, 115]"
195
- public struct Index : Comparable , CustomPlaygroundQuickLookable {
196
- public // SPI(Foundation)
197
- init ( _base: String . UnicodeScalarView . Index , in c: String . CharacterView ) {
198
- self . _base = _base
199
- self . _countUTF16 = c. _measureExtendedGraphemeClusterForward ( from: _base)
200
- }
201
-
202
- internal init ( _base: UnicodeScalarView . Index , _countUTF16: Int ) {
203
- self . _base = _base
204
- self . _countUTF16 = _countUTF16
205
- }
206
-
207
- internal let _base : UnicodeScalarView . Index
208
-
209
- /// The count of this extended grapheme cluster in UTF-16 code units.
210
- internal let _countUTF16 : Int
211
-
212
- /// The integer offset of this index in UTF-16 code units.
213
- public // SPI(Foundation)
214
- var _utf16Index : Int {
215
- return _base. _position
216
- }
217
-
218
- /// The one past end index for this extended grapheme cluster in Unicode
219
- /// scalars.
220
- internal var _endBase : UnicodeScalarView . Index {
221
- return UnicodeScalarView . Index ( _position: _utf16Index + _countUTF16)
222
- }
223
-
224
- public var customPlaygroundQuickLook : PlaygroundQuickLook {
225
- return . int( Int64 ( _utf16Index) )
226
- }
227
- }
228
-
181
+ public typealias Index = String . Index
229
182
public typealias IndexDistance = Int
230
183
231
184
/// The position of the first character in a nonempty character view.
232
185
///
233
186
/// In an empty character view, `startIndex` is equal to `endIndex`.
234
187
public var startIndex : Index {
235
- return Index ( _base : unicodeScalars. startIndex, in : self )
188
+ return unicodeScalars. startIndex
236
189
}
237
190
238
191
/// A character view's "past the end" position---that is, the position one
239
192
/// greater than the last valid subscript argument.
240
193
///
241
194
/// In an empty character view, `endIndex` is equal to `startIndex`.
242
195
public var endIndex : Index {
243
- return Index ( _base : unicodeScalars. endIndex, in : self )
196
+ return unicodeScalars. endIndex
244
197
}
245
198
199
+ internal func _index( atEncodedOffset n: Int ) -> Index {
200
+ let stride = _measureExtendedGraphemeClusterForward (
201
+ from: Index ( encodedOffset: n) )
202
+ return Index ( encodedOffset: n, . character( stride: UInt16 ( stride) ) )
203
+ }
204
+
246
205
/// Returns the next consecutive position after `i`.
247
206
///
248
207
/// - Precondition: The next position is valid.
249
208
public func index( after i: Index ) -> Index {
250
- _precondition ( i. _base < unicodeScalars. endIndex,
209
+ _precondition (
210
+ i < unicodeScalars. endIndex,
251
211
" cannot increment beyond endIndex " )
252
- _precondition ( i. _base >= unicodeScalars. startIndex,
212
+
213
+ _precondition (
214
+ i >= unicodeScalars. startIndex,
253
215
" cannot increment invalid index " )
254
- return Index ( _base: i. _endBase, in: self )
216
+
217
+ var j = i
218
+ while true {
219
+ if case . character( let oldStride) = j. _cache {
220
+ return _index ( atEncodedOffset: j. encodedOffset + Int( oldStride) )
221
+ }
222
+ j = _index ( atEncodedOffset: j. encodedOffset)
223
+ }
255
224
}
256
225
257
226
/// Returns the previous consecutive position before `i`.
258
227
///
259
228
/// - Precondition: The previous position is valid.
260
229
public func index( before i: Index ) -> Index {
261
- _precondition ( i. _base > unicodeScalars. startIndex,
230
+ _precondition ( i > unicodeScalars. startIndex,
262
231
" cannot decrement before startIndex " )
263
- _precondition ( i. _base <= unicodeScalars. endIndex,
232
+ _precondition ( i <= unicodeScalars. endIndex,
264
233
" cannot decrement invalid index " )
265
- let predecessorLengthUTF16 =
266
- _measureExtendedGraphemeClusterBackward ( from: i. _base)
234
+
235
+ let stride = _measureExtendedGraphemeClusterBackward (
236
+ from: Index ( encodedOffset: i. encodedOffset) )
237
+
267
238
return Index (
268
- _base: UnicodeScalarView . Index (
269
- _position: i. _utf16Index - predecessorLengthUTF16
270
- ) ,
271
- in: self
239
+ encodedOffset: i. encodedOffset &- stride,
240
+ . character( stride: numericCast ( stride) )
272
241
)
273
242
}
274
243
@@ -365,8 +334,8 @@ extension String.CharacterView : BidirectionalCollection {
365
334
internal func _measureExtendedGraphemeClusterForward(
366
335
from start: UnicodeScalarView . Index
367
336
) -> Int {
368
- let startPosition = start. _position
369
- let endPosition = unicodeScalars. endIndex. _position
337
+ let startPosition = start. encodedOffset
338
+ let endPosition = unicodeScalars. endIndex. encodedOffset
370
339
371
340
// No more graphemes
372
341
if startPosition == endPosition {
@@ -379,7 +348,7 @@ extension String.CharacterView : BidirectionalCollection {
379
348
}
380
349
381
350
// Our relative offset from the _StringCore's baseAddress pointer. If our
382
- // _core is not a substring, this is the same as start._position . Otherwise,
351
+ // _core is not a substring, this is the same as start.encodedOffset . Otherwise,
383
352
// it is the code unit relative offset into the substring and not the
384
353
// absolute offset into the outer string.
385
354
let startOffset = startPosition - _coreOffset
@@ -419,7 +388,7 @@ extension String.CharacterView : BidirectionalCollection {
419
388
func _measureExtendedGraphemeClusterForwardSlow(
420
389
startOffset: Int
421
390
) -> Int {
422
- let endOffset = unicodeScalars. endIndex. _position - _coreOffset
391
+ let endOffset = unicodeScalars. endIndex. encodedOffset - _coreOffset
423
392
let numCodeUnits = endOffset - startOffset
424
393
_sanityCheck ( numCodeUnits >= 2 , " should have at least two code units " )
425
394
@@ -501,8 +470,8 @@ extension String.CharacterView : BidirectionalCollection {
501
470
internal func _measureExtendedGraphemeClusterBackward(
502
471
from end: UnicodeScalarView . Index
503
472
) -> Int {
504
- let startPosition = unicodeScalars. startIndex. _position
505
- let endPosition = end. _position
473
+ let startPosition = unicodeScalars. startIndex. encodedOffset
474
+ let endPosition = end. encodedOffset
506
475
507
476
// No more graphemes
508
477
if startPosition == endPosition {
@@ -559,7 +528,7 @@ extension String.CharacterView : BidirectionalCollection {
559
528
) -> Int {
560
529
let startOffset = 0
561
530
let numCodeUnits = endOffset - startOffset
562
- _sanityCheck ( unicodeScalars. startIndex. _position - _coreOffset == 0 ,
531
+ _sanityCheck ( unicodeScalars. startIndex. encodedOffset - _coreOffset == 0 ,
563
532
" position/offset mismatch in _StringCore as a substring " )
564
533
_sanityCheck ( numCodeUnits >= 2 ,
565
534
" should have at least two code units " )
@@ -643,31 +612,38 @@ extension String.CharacterView : BidirectionalCollection {
643
612
///
644
613
/// - Parameter position: A valid index of the character view. `position`
645
614
/// must be less than the view's end index.
646
- public subscript( i: Index ) -> Character {
647
- if i. _countUTF16 == 1 {
648
- // For single-code-unit graphemes, we can construct a Character directly
649
- // from a single unicode scalar (if sub-surrogate).
650
- let relativeOffset = i. _base. _position - _coreOffset
651
- if _core. isASCII {
652
- let asciiBuffer = _core. asciiBuffer. _unsafelyUnwrappedUnchecked
653
- // Bounds checks in an UnsafeBufferPointer (asciiBuffer) are only
654
- // performed in Debug mode, so they need to be duplicated here.
655
- // Falling back to the non-optimal behavior in the case they don't
656
- // pass.
657
- if relativeOffset >= asciiBuffer. startIndex &&
658
- relativeOffset < asciiBuffer. endIndex {
659
- return Character ( Unicode . Scalar ( asciiBuffer [ relativeOffset] ) )
660
- }
661
- } else if _core. _baseAddress != nil {
662
- let cu = _core. _nthContiguous ( relativeOffset)
663
- // Only constructible if sub-surrogate
664
- if ( cu < 0xd800 ) {
665
- return Character ( Unicode . Scalar ( cu) . _unsafelyUnwrappedUnchecked)
615
+ public subscript( i_: Index ) -> Character {
616
+ var i = i_
617
+ while true {
618
+ if case . character( let stride) = i. _cache {
619
+ if _fastPath ( stride == 1 ) {
620
+ // For single-code-unit graphemes, we can construct a Character directly
621
+ // from a single unicode scalar (if sub-surrogate).
622
+ let relativeOffset = i. encodedOffset - _coreOffset
623
+ if _core. isASCII {
624
+ let asciiBuffer = _core. asciiBuffer. _unsafelyUnwrappedUnchecked
625
+ // Bounds checks in an UnsafeBufferPointer (asciiBuffer) are only
626
+ // performed in Debug mode, so they need to be duplicated here.
627
+ // Falling back to the non-optimal behavior in the case they don't
628
+ // pass.
629
+ if relativeOffset >= asciiBuffer. startIndex &&
630
+ relativeOffset < asciiBuffer. endIndex {
631
+ return Character ( Unicode . Scalar ( asciiBuffer [ relativeOffset] ) )
632
+ }
633
+ } else if _core. _baseAddress != nil {
634
+ let cu = _core. _nthContiguous ( relativeOffset)
635
+ // Only constructible if sub-surrogate
636
+ if ( cu < 0xd800 ) {
637
+ return Character ( Unicode . Scalar ( cu) . _unsafelyUnwrappedUnchecked)
638
+ }
639
+ }
666
640
}
641
+
642
+ let s = self [ i..< Index ( encodedOffset: i. encodedOffset + Int( stride) ) ]
643
+ return Character ( s. _ephemeralContent)
667
644
}
645
+ i = _index ( atEncodedOffset: i. encodedOffset)
668
646
}
669
-
670
- return Character ( String ( unicodeScalars [ i. _base..< i. _endBase] ) )
671
647
}
672
648
}
673
649
@@ -696,8 +672,8 @@ extension String.CharacterView : RangeReplaceableCollection {
696
672
with newElements: C
697
673
) where C : Collection , C. Element == Character {
698
674
let rawSubRange : Range < Int > =
699
- bounds. lowerBound. _base . _position - _coreOffset
700
- ..< bounds. upperBound. _base . _position - _coreOffset
675
+ bounds. lowerBound. encodedOffset - _coreOffset
676
+ ..< bounds. upperBound. encodedOffset - _coreOffset
701
677
let lazyUTF16 = newElements. lazy. flatMap { $0. utf16 }
702
678
_core. replaceSubrange ( rawSubRange, with: lazyUTF16)
703
679
}
@@ -764,9 +740,9 @@ extension String.CharacterView {
764
740
/// - Complexity: O(*n*) if the underlying string is bridged from
765
741
/// Objective-C, where *n* is the length of the string; otherwise, O(1).
766
742
public subscript( bounds: Range < Index > ) -> String . CharacterView {
767
- let unicodeScalarRange = bounds . lowerBound . _base ..< bounds . upperBound . _base
768
- return String . CharacterView ( unicodeScalars [ unicodeScalarRange ] . _core,
769
- coreOffset: unicodeScalarRange . lowerBound. _position )
743
+ return String . CharacterView (
744
+ unicodeScalars [ bounds ] . _core,
745
+ coreOffset: bounds . lowerBound. encodedOffset )
770
746
}
771
747
}
772
748
0 commit comments