@@ -63,7 +63,7 @@ extension String {
63
63
64
64
/// The offset of this view's `_core` from an original core. This works
65
65
/// around the fact that `_StringCore` is always zero-indexed.
66
- /// `_coreOffset` should be subtracted from `UnicodeScalarIndex.encodedOffset `
66
+ /// `_coreOffset` should be subtracted from `UnicodeScalarIndex._position `
67
67
/// before that value is used as a `_core` index.
68
68
@_versioned
69
69
internal var _coreOffset : Int
@@ -178,66 +178,97 @@ extension String.CharacterView : BidirectionalCollection {
178
178
return UnicodeScalarView ( _core, coreOffset: _coreOffset)
179
179
}
180
180
181
- public typealias Index = String . Index
181
+ /// A position in a string's `CharacterView` instance.
182
+ ///
183
+ /// You can convert between indices of the different string views by using
184
+ /// conversion initializers and the `samePosition(in:)` method overloads.
185
+ /// The following example finds the index of the first space in the string's
186
+ /// character view and then converts that to the same position in the UTF-8
187
+ /// view:
188
+ ///
189
+ /// let hearts = "Hearts <3 ♥︎ 💘"
190
+ /// if let i = hearts.characters.index(of: " ") {
191
+ /// let j = i.samePosition(in: hearts.utf8)
192
+ /// print(Array(hearts.utf8[..<j]))
193
+ /// }
194
+ /// // Prints "[72, 101, 97, 114, 116, 115]"
195
+ public struct Index : Comparable , CustomPlaygroundQuickLookable {
196
+ public // SPI(Foundation)
197
+ init ( _base: String . UnicodeScalarView . Index , in c: String . CharacterView ) {
198
+ self . _base = _base
199
+ self . _countUTF16 = c. _measureExtendedGraphemeClusterForward ( from: _base)
200
+ }
201
+
202
+ internal init ( _base: UnicodeScalarView . Index , _countUTF16: Int ) {
203
+ self . _base = _base
204
+ self . _countUTF16 = _countUTF16
205
+ }
206
+
207
+ internal let _base : UnicodeScalarView . Index
208
+
209
+ /// The count of this extended grapheme cluster in UTF-16 code units.
210
+ internal let _countUTF16 : Int
211
+
212
+ /// The integer offset of this index in UTF-16 code units.
213
+ public // SPI(Foundation)
214
+ var _utf16Index : Int {
215
+ return _base. _position
216
+ }
217
+
218
+ /// The one past end index for this extended grapheme cluster in Unicode
219
+ /// scalars.
220
+ internal var _endBase : UnicodeScalarView . Index {
221
+ return UnicodeScalarView . Index ( _position: _utf16Index + _countUTF16)
222
+ }
223
+
224
+ public var customPlaygroundQuickLook : PlaygroundQuickLook {
225
+ return . int( Int64 ( _utf16Index) )
226
+ }
227
+ }
228
+
182
229
public typealias IndexDistance = Int
183
230
184
231
/// The position of the first character in a nonempty character view.
185
232
///
186
233
/// In an empty character view, `startIndex` is equal to `endIndex`.
187
234
public var startIndex : Index {
188
- return unicodeScalars. startIndex
235
+ return Index ( _base : unicodeScalars. startIndex, in : self )
189
236
}
190
237
191
238
/// A character view's "past the end" position---that is, the position one
192
239
/// greater than the last valid subscript argument.
193
240
///
194
241
/// In an empty character view, `endIndex` is equal to `startIndex`.
195
242
public var endIndex : Index {
196
- return unicodeScalars. endIndex
243
+ return Index ( _base : unicodeScalars. endIndex, in : self )
197
244
}
198
245
199
- internal func _index( atEncodedOffset n: Int ) -> Index {
200
- let stride = _measureExtendedGraphemeClusterForward (
201
- from: Index ( encodedOffset: n) )
202
- return Index ( encodedOffset: n, . character( stride: UInt16 ( stride) ) )
203
- }
204
-
205
246
/// Returns the next consecutive position after `i`.
206
247
///
207
248
/// - Precondition: The next position is valid.
208
249
public func index( after i: Index ) -> Index {
209
- _precondition (
210
- i < unicodeScalars. endIndex,
250
+ _precondition ( i. _base < unicodeScalars. endIndex,
211
251
" cannot increment beyond endIndex " )
212
-
213
- _precondition (
214
- i >= unicodeScalars. startIndex,
252
+ _precondition ( i. _base >= unicodeScalars. startIndex,
215
253
" cannot increment invalid index " )
216
-
217
- var j = i
218
- while true {
219
- if case . character( let oldStride) = j. _cache {
220
- return _index ( atEncodedOffset: j. encodedOffset + Int( oldStride) )
221
- }
222
- j = _index ( atEncodedOffset: j. encodedOffset)
223
- }
254
+ return Index ( _base: i. _endBase, in: self )
224
255
}
225
256
226
257
/// Returns the previous consecutive position before `i`.
227
258
///
228
259
/// - Precondition: The previous position is valid.
229
260
public func index( before i: Index ) -> Index {
230
- _precondition ( i > unicodeScalars. startIndex,
261
+ _precondition ( i. _base > unicodeScalars. startIndex,
231
262
" cannot decrement before startIndex " )
232
- _precondition ( i <= unicodeScalars. endIndex,
263
+ _precondition ( i. _base <= unicodeScalars. endIndex,
233
264
" cannot decrement invalid index " )
234
-
235
- let stride = _measureExtendedGraphemeClusterBackward (
236
- from: Index ( encodedOffset: i. encodedOffset) )
237
-
265
+ let predecessorLengthUTF16 =
266
+ _measureExtendedGraphemeClusterBackward ( from: i. _base)
238
267
return Index (
239
- encodedOffset: i. encodedOffset &- stride,
240
- . character( stride: numericCast ( stride) )
268
+ _base: UnicodeScalarView . Index (
269
+ _position: i. _utf16Index - predecessorLengthUTF16
270
+ ) ,
271
+ in: self
241
272
)
242
273
}
243
274
@@ -334,8 +365,8 @@ extension String.CharacterView : BidirectionalCollection {
334
365
internal func _measureExtendedGraphemeClusterForward(
335
366
from start: UnicodeScalarView . Index
336
367
) -> Int {
337
- let startPosition = start. encodedOffset
338
- let endPosition = unicodeScalars. endIndex. encodedOffset
368
+ let startPosition = start. _position
369
+ let endPosition = unicodeScalars. endIndex. _position
339
370
340
371
// No more graphemes
341
372
if startPosition == endPosition {
@@ -348,7 +379,7 @@ extension String.CharacterView : BidirectionalCollection {
348
379
}
349
380
350
381
// Our relative offset from the _StringCore's baseAddress pointer. If our
351
- // _core is not a substring, this is the same as start.encodedOffset . Otherwise,
382
+ // _core is not a substring, this is the same as start._position . Otherwise,
352
383
// it is the code unit relative offset into the substring and not the
353
384
// absolute offset into the outer string.
354
385
let startOffset = startPosition - _coreOffset
@@ -388,7 +419,7 @@ extension String.CharacterView : BidirectionalCollection {
388
419
func _measureExtendedGraphemeClusterForwardSlow(
389
420
startOffset: Int
390
421
) -> Int {
391
- let endOffset = unicodeScalars. endIndex. encodedOffset - _coreOffset
422
+ let endOffset = unicodeScalars. endIndex. _position - _coreOffset
392
423
let numCodeUnits = endOffset - startOffset
393
424
_sanityCheck ( numCodeUnits >= 2 , " should have at least two code units " )
394
425
@@ -470,8 +501,8 @@ extension String.CharacterView : BidirectionalCollection {
470
501
internal func _measureExtendedGraphemeClusterBackward(
471
502
from end: UnicodeScalarView . Index
472
503
) -> Int {
473
- let startPosition = unicodeScalars. startIndex. encodedOffset
474
- let endPosition = end. encodedOffset
504
+ let startPosition = unicodeScalars. startIndex. _position
505
+ let endPosition = end. _position
475
506
476
507
// No more graphemes
477
508
if startPosition == endPosition {
@@ -528,7 +559,7 @@ extension String.CharacterView : BidirectionalCollection {
528
559
) -> Int {
529
560
let startOffset = 0
530
561
let numCodeUnits = endOffset - startOffset
531
- _sanityCheck ( unicodeScalars. startIndex. encodedOffset - _coreOffset == 0 ,
562
+ _sanityCheck ( unicodeScalars. startIndex. _position - _coreOffset == 0 ,
532
563
" position/offset mismatch in _StringCore as a substring " )
533
564
_sanityCheck ( numCodeUnits >= 2 ,
534
565
" should have at least two code units " )
@@ -612,38 +643,31 @@ extension String.CharacterView : BidirectionalCollection {
612
643
///
613
644
/// - Parameter position: A valid index of the character view. `position`
614
645
/// must be less than the view's end index.
615
- public subscript( i_: Index ) -> Character {
616
- var i = i_
617
- while true {
618
- if case . character( let stride) = i. _cache {
619
- if _fastPath ( stride == 1 ) {
620
- // For single-code-unit graphemes, we can construct a Character directly
621
- // from a single unicode scalar (if sub-surrogate).
622
- let relativeOffset = i. encodedOffset - _coreOffset
623
- if _core. isASCII {
624
- let asciiBuffer = _core. asciiBuffer. _unsafelyUnwrappedUnchecked
625
- // Bounds checks in an UnsafeBufferPointer (asciiBuffer) are only
626
- // performed in Debug mode, so they need to be duplicated here.
627
- // Falling back to the non-optimal behavior in the case they don't
628
- // pass.
629
- if relativeOffset >= asciiBuffer. startIndex &&
630
- relativeOffset < asciiBuffer. endIndex {
631
- return Character ( Unicode . Scalar ( asciiBuffer [ relativeOffset] ) )
632
- }
633
- } else if _core. _baseAddress != nil {
634
- let cu = _core. _nthContiguous ( relativeOffset)
635
- // Only constructible if sub-surrogate
636
- if ( cu < 0xd800 ) {
637
- return Character ( Unicode . Scalar ( cu) . _unsafelyUnwrappedUnchecked)
638
- }
639
- }
646
+ public subscript( i: Index ) -> Character {
647
+ if i. _countUTF16 == 1 {
648
+ // For single-code-unit graphemes, we can construct a Character directly
649
+ // from a single unicode scalar (if sub-surrogate).
650
+ let relativeOffset = i. _base. _position - _coreOffset
651
+ if _core. isASCII {
652
+ let asciiBuffer = _core. asciiBuffer. _unsafelyUnwrappedUnchecked
653
+ // Bounds checks in an UnsafeBufferPointer (asciiBuffer) are only
654
+ // performed in Debug mode, so they need to be duplicated here.
655
+ // Falling back to the non-optimal behavior in the case they don't
656
+ // pass.
657
+ if relativeOffset >= asciiBuffer. startIndex &&
658
+ relativeOffset < asciiBuffer. endIndex {
659
+ return Character ( Unicode . Scalar ( asciiBuffer [ relativeOffset] ) )
660
+ }
661
+ } else if _core. _baseAddress != nil {
662
+ let cu = _core. _nthContiguous ( relativeOffset)
663
+ // Only constructible if sub-surrogate
664
+ if ( cu < 0xd800 ) {
665
+ return Character ( Unicode . Scalar ( cu) . _unsafelyUnwrappedUnchecked)
640
666
}
641
-
642
- let s = self [ i..< Index ( encodedOffset: i. encodedOffset + Int( stride) ) ]
643
- return Character ( s. _ephemeralContent)
644
667
}
645
- i = _index ( atEncodedOffset: i. encodedOffset)
646
668
}
669
+
670
+ return Character ( String ( unicodeScalars [ i. _base..< i. _endBase] ) )
647
671
}
648
672
}
649
673
@@ -672,8 +696,8 @@ extension String.CharacterView : RangeReplaceableCollection {
672
696
with newElements: C
673
697
) where C : Collection , C. Element == Character {
674
698
let rawSubRange : Range < Int > =
675
- bounds. lowerBound. encodedOffset - _coreOffset
676
- ..< bounds. upperBound. encodedOffset - _coreOffset
699
+ bounds. lowerBound. _base . _position - _coreOffset
700
+ ..< bounds. upperBound. _base . _position - _coreOffset
677
701
let lazyUTF16 = newElements. lazy. flatMap { $0. utf16 }
678
702
_core. replaceSubrange ( rawSubRange, with: lazyUTF16)
679
703
}
@@ -740,9 +764,9 @@ extension String.CharacterView {
740
764
/// - Complexity: O(*n*) if the underlying string is bridged from
741
765
/// Objective-C, where *n* is the length of the string; otherwise, O(1).
742
766
public subscript( bounds: Range < Index > ) -> String . CharacterView {
743
- return String . CharacterView (
744
- unicodeScalars [ bounds ] . _core,
745
- coreOffset: bounds . lowerBound. encodedOffset )
767
+ let unicodeScalarRange = bounds . lowerBound . _base ..< bounds . upperBound . _base
768
+ return String . CharacterView ( unicodeScalars [ unicodeScalarRange ] . _core,
769
+ coreOffset: unicodeScalarRange . lowerBound. _position )
746
770
}
747
771
}
748
772
0 commit comments