Skip to content

Commit d9fb110

Browse files
authored
Revert "[stdlib] String index interchange, etc." (#10812)
rdar://33186295
1 parent 0a48318 commit d9fb110

27 files changed

+1409
-906
lines changed

stdlib/public/SDK/Foundation/ExtraStringAPIs.swift

+8-7
Original file line numberDiff line numberDiff line change
@@ -10,24 +10,25 @@
1010
//
1111
//===----------------------------------------------------------------------===//
1212

13-
extension String.UTF16View.Index {
13+
// Random access for String.UTF16View, only when Foundation is
14+
// imported. Making this API dependent on Foundation decouples the
15+
// Swift core from a UTF16 representation.
16+
extension String.UTF16View.Index : Strideable {
1417
/// Construct from an integer offset.
15-
@available(swift, deprecated: 3.2)
16-
@available(swift, obsoleted: 4.0)
1718
public init(_ offset: Int) {
1819
_precondition(offset >= 0, "Negative UTF16 index offset not allowed")
1920
self.init(_offset: offset)
2021
}
2122

22-
@available(swift, deprecated: 3.2)
23-
@available(swift, obsoleted: 4.0)
2423
public func distance(to other: String.UTF16View.Index) -> Int {
2524
return _offset.distance(to: other._offset)
2625
}
2726

28-
@available(swift, deprecated: 3.2)
29-
@available(swift, obsoleted: 4.0)
3027
public func advanced(by n: Int) -> String.UTF16View.Index {
3128
return String.UTF16View.Index(_offset.advanced(by: n))
3229
}
3330
}
31+
32+
extension String.UTF16View : RandomAccessCollection {}
33+
extension String.UTF16View.Indices : RandomAccessCollection {}
34+

stdlib/public/SDK/Foundation/NSRange.swift

+2-2
Original file line numberDiff line numberDiff line change
@@ -151,8 +151,8 @@ extension NSRange {
151151
where R.Bound == S.Index, S.Index == String.Index {
152152
let r = region.relative(to: target)
153153
self = NSRange(
154-
location: r.lowerBound.encodedOffset - target.startIndex.encodedOffset,
155-
length: r.upperBound.encodedOffset - r.lowerBound.encodedOffset
154+
location: r.lowerBound._utf16Index - target.startIndex._utf16Index,
155+
length: r.upperBound._utf16Index - r.lowerBound._utf16Index
156156
)
157157
}
158158

stdlib/public/SDK/Foundation/NSStringAPI.swift

+9-6
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,8 @@ func _toNSArray<T, U : AnyObject>(_ a: [T], f: (T) -> U) -> NSArray {
3232

3333
func _toNSRange(_ r: Range<String.Index>) -> NSRange {
3434
return NSRange(
35-
location: r.lowerBound.encodedOffset,
36-
length: r.upperBound.encodedOffset - r.lowerBound.encodedOffset)
35+
location: r.lowerBound._utf16Index,
36+
length: r.upperBound._utf16Index - r.lowerBound._utf16Index)
3737
}
3838

3939
// We only need this for UnsafeMutablePointer, but there's not currently a way
@@ -72,7 +72,10 @@ extension String {
7272
/// Return an `Index` corresponding to the given offset in our UTF-16
7373
/// representation.
7474
func _index(_ utf16Index: Int) -> Index {
75-
return Index(encodedOffset: utf16Index)
75+
return Index(
76+
_base: String.UnicodeScalarView.Index(_position: utf16Index),
77+
in: characters
78+
)
7679
}
7780

7881
/// Return a `Range<Index>` corresponding to the given `NSRange` of
@@ -1257,7 +1260,7 @@ extension String {
12571260
public
12581261
func rangeOfComposedCharacterSequence(at anIndex: Index) -> Range<Index> {
12591262
return _range(
1260-
_ns.rangeOfComposedCharacterSequence(at: anIndex.encodedOffset))
1263+
_ns.rangeOfComposedCharacterSequence(at: anIndex._utf16Index))
12611264
}
12621265

12631266
// - (NSRange)rangeOfComposedCharacterSequencesForRange:(NSRange)range
@@ -1607,15 +1610,15 @@ extension String {
16071610
/// Returns a new string containing the characters of the
16081611
/// `String` from the one at a given index to the end.
16091612
public func substring(from index: Index) -> String {
1610-
return _ns.substring(from: index.encodedOffset)
1613+
return _ns.substring(from: index._utf16Index)
16111614
}
16121615

16131616
// - (NSString *)substringToIndex:(NSUInteger)anIndex
16141617

16151618
/// Returns a new string containing the characters of the
16161619
/// `String` up to, but not including, the one at a given index.
16171620
public func substring(to index: Index) -> String {
1618-
return _ns.substring(to: index.encodedOffset)
1621+
return _ns.substring(to: index._utf16Index)
16191622
}
16201623

16211624
// - (NSString *)substringWithRange:(NSRange)aRange

stdlib/public/SDK/Foundation/URLComponents.swift

+2-2
Original file line numberDiff line numberDiff line change
@@ -194,8 +194,8 @@ public struct URLComponents : ReferenceConvertible, Hashable, Equatable, _Mutabl
194194
private func _toStringRange(_ r : NSRange) -> Range<String.Index>? {
195195
guard r.location != NSNotFound else { return nil }
196196

197-
let utf16Start = String.UTF16View.Index(encodedOffset: r.location)
198-
let utf16End = String.UTF16View.Index(encodedOffset: r.location + r.length)
197+
let utf16Start = String.UTF16View.Index(_offset: r.location)
198+
let utf16End = String.UTF16View.Index(_offset: r.location + r.length)
199199

200200
guard let s = self.string else { return nil }
201201
guard let start = String.Index(utf16Start, within: s) else { return nil }

stdlib/public/core/CMakeLists.txt

-1
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,6 @@ set(SWIFTLIB_ESSENTIAL
126126
StringBuffer.swift
127127
StringComparable.swift
128128
StringCore.swift
129-
StringIndex.swift
130129
StringInterpolation.swift
131130
StringLegacy.swift
132131
StringRangeReplaceableCollection.swift.gyb

stdlib/public/core/GroupInfo.json

-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
"StringComparable.swift",
1818
"StringCore.swift",
1919
"StringHashable.swift",
20-
"StringIndex.swift",
2120
"StringIndexConversions.swift",
2221
"StringInterpolation.swift",
2322
"StringLegacy.swift",

stdlib/public/core/StringCharacterView.swift

+96-72
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ extension String {
6363

6464
/// The offset of this view's `_core` from an original core. This works
6565
/// around the fact that `_StringCore` is always zero-indexed.
66-
/// `_coreOffset` should be subtracted from `UnicodeScalarIndex.encodedOffset`
66+
/// `_coreOffset` should be subtracted from `UnicodeScalarIndex._position`
6767
/// before that value is used as a `_core` index.
6868
@_versioned
6969
internal var _coreOffset: Int
@@ -178,66 +178,97 @@ extension String.CharacterView : BidirectionalCollection {
178178
return UnicodeScalarView(_core, coreOffset: _coreOffset)
179179
}
180180

181-
public typealias Index = String.Index
181+
/// A position in a string's `CharacterView` instance.
182+
///
183+
/// You can convert between indices of the different string views by using
184+
/// conversion initializers and the `samePosition(in:)` method overloads.
185+
/// The following example finds the index of the first space in the string's
186+
/// character view and then converts that to the same position in the UTF-8
187+
/// view:
188+
///
189+
/// let hearts = "Hearts <3 ♥︎ 💘"
190+
/// if let i = hearts.characters.index(of: " ") {
191+
/// let j = i.samePosition(in: hearts.utf8)
192+
/// print(Array(hearts.utf8[..<j]))
193+
/// }
194+
/// // Prints "[72, 101, 97, 114, 116, 115]"
195+
public struct Index : Comparable, CustomPlaygroundQuickLookable {
196+
public // SPI(Foundation)
197+
init(_base: String.UnicodeScalarView.Index, in c: String.CharacterView) {
198+
self._base = _base
199+
self._countUTF16 = c._measureExtendedGraphemeClusterForward(from: _base)
200+
}
201+
202+
internal init(_base: UnicodeScalarView.Index, _countUTF16: Int) {
203+
self._base = _base
204+
self._countUTF16 = _countUTF16
205+
}
206+
207+
internal let _base: UnicodeScalarView.Index
208+
209+
/// The count of this extended grapheme cluster in UTF-16 code units.
210+
internal let _countUTF16: Int
211+
212+
/// The integer offset of this index in UTF-16 code units.
213+
public // SPI(Foundation)
214+
var _utf16Index: Int {
215+
return _base._position
216+
}
217+
218+
/// The one past end index for this extended grapheme cluster in Unicode
219+
/// scalars.
220+
internal var _endBase: UnicodeScalarView.Index {
221+
return UnicodeScalarView.Index(_position: _utf16Index + _countUTF16)
222+
}
223+
224+
public var customPlaygroundQuickLook: PlaygroundQuickLook {
225+
return .int(Int64(_utf16Index))
226+
}
227+
}
228+
182229
public typealias IndexDistance = Int
183230

184231
/// The position of the first character in a nonempty character view.
185232
///
186233
/// In an empty character view, `startIndex` is equal to `endIndex`.
187234
public var startIndex: Index {
188-
return unicodeScalars.startIndex
235+
return Index(_base: unicodeScalars.startIndex, in: self)
189236
}
190237

191238
/// A character view's "past the end" position---that is, the position one
192239
/// greater than the last valid subscript argument.
193240
///
194241
/// In an empty character view, `endIndex` is equal to `startIndex`.
195242
public var endIndex: Index {
196-
return unicodeScalars.endIndex
243+
return Index(_base: unicodeScalars.endIndex, in: self)
197244
}
198245

199-
internal func _index(atEncodedOffset n: Int) -> Index {
200-
let stride = _measureExtendedGraphemeClusterForward(
201-
from: Index(encodedOffset: n))
202-
return Index(encodedOffset: n, .character(stride: UInt16(stride)))
203-
}
204-
205246
/// Returns the next consecutive position after `i`.
206247
///
207248
/// - Precondition: The next position is valid.
208249
public func index(after i: Index) -> Index {
209-
_precondition(
210-
i < unicodeScalars.endIndex,
250+
_precondition(i._base < unicodeScalars.endIndex,
211251
"cannot increment beyond endIndex")
212-
213-
_precondition(
214-
i >= unicodeScalars.startIndex,
252+
_precondition(i._base >= unicodeScalars.startIndex,
215253
"cannot increment invalid index")
216-
217-
var j = i
218-
while true {
219-
if case .character(let oldStride) = j._cache {
220-
return _index(atEncodedOffset: j.encodedOffset + Int(oldStride))
221-
}
222-
j = _index(atEncodedOffset: j.encodedOffset)
223-
}
254+
return Index(_base: i._endBase, in: self)
224255
}
225256

226257
/// Returns the previous consecutive position before `i`.
227258
///
228259
/// - Precondition: The previous position is valid.
229260
public func index(before i: Index) -> Index {
230-
_precondition(i > unicodeScalars.startIndex,
261+
_precondition(i._base > unicodeScalars.startIndex,
231262
"cannot decrement before startIndex")
232-
_precondition(i <= unicodeScalars.endIndex,
263+
_precondition(i._base <= unicodeScalars.endIndex,
233264
"cannot decrement invalid index")
234-
235-
let stride = _measureExtendedGraphemeClusterBackward(
236-
from: Index(encodedOffset: i.encodedOffset))
237-
265+
let predecessorLengthUTF16 =
266+
_measureExtendedGraphemeClusterBackward(from: i._base)
238267
return Index(
239-
encodedOffset: i.encodedOffset &- stride,
240-
.character(stride: numericCast(stride))
268+
_base: UnicodeScalarView.Index(
269+
_position: i._utf16Index - predecessorLengthUTF16
270+
),
271+
in: self
241272
)
242273
}
243274

@@ -334,8 +365,8 @@ extension String.CharacterView : BidirectionalCollection {
334365
internal func _measureExtendedGraphemeClusterForward(
335366
from start: UnicodeScalarView.Index
336367
) -> Int {
337-
let startPosition = start.encodedOffset
338-
let endPosition = unicodeScalars.endIndex.encodedOffset
368+
let startPosition = start._position
369+
let endPosition = unicodeScalars.endIndex._position
339370

340371
// No more graphemes
341372
if startPosition == endPosition {
@@ -348,7 +379,7 @@ extension String.CharacterView : BidirectionalCollection {
348379
}
349380

350381
// Our relative offset from the _StringCore's baseAddress pointer. If our
351-
// _core is not a substring, this is the same as start.encodedOffset. Otherwise,
382+
// _core is not a substring, this is the same as start._position. Otherwise,
352383
// it is the code unit relative offset into the substring and not the
353384
// absolute offset into the outer string.
354385
let startOffset = startPosition - _coreOffset
@@ -388,7 +419,7 @@ extension String.CharacterView : BidirectionalCollection {
388419
func _measureExtendedGraphemeClusterForwardSlow(
389420
startOffset: Int
390421
) -> Int {
391-
let endOffset = unicodeScalars.endIndex.encodedOffset - _coreOffset
422+
let endOffset = unicodeScalars.endIndex._position - _coreOffset
392423
let numCodeUnits = endOffset - startOffset
393424
_sanityCheck(numCodeUnits >= 2, "should have at least two code units")
394425

@@ -470,8 +501,8 @@ extension String.CharacterView : BidirectionalCollection {
470501
internal func _measureExtendedGraphemeClusterBackward(
471502
from end: UnicodeScalarView.Index
472503
) -> Int {
473-
let startPosition = unicodeScalars.startIndex.encodedOffset
474-
let endPosition = end.encodedOffset
504+
let startPosition = unicodeScalars.startIndex._position
505+
let endPosition = end._position
475506

476507
// No more graphemes
477508
if startPosition == endPosition {
@@ -528,7 +559,7 @@ extension String.CharacterView : BidirectionalCollection {
528559
) -> Int {
529560
let startOffset = 0
530561
let numCodeUnits = endOffset - startOffset
531-
_sanityCheck(unicodeScalars.startIndex.encodedOffset - _coreOffset == 0,
562+
_sanityCheck(unicodeScalars.startIndex._position - _coreOffset == 0,
532563
"position/offset mismatch in _StringCore as a substring")
533564
_sanityCheck(numCodeUnits >= 2,
534565
"should have at least two code units")
@@ -612,38 +643,31 @@ extension String.CharacterView : BidirectionalCollection {
612643
///
613644
/// - Parameter position: A valid index of the character view. `position`
614645
/// must be less than the view's end index.
615-
public subscript(i_: Index) -> Character {
616-
var i = i_
617-
while true {
618-
if case .character(let stride) = i._cache {
619-
if _fastPath(stride == 1) {
620-
// For single-code-unit graphemes, we can construct a Character directly
621-
// from a single unicode scalar (if sub-surrogate).
622-
let relativeOffset = i.encodedOffset - _coreOffset
623-
if _core.isASCII {
624-
let asciiBuffer = _core.asciiBuffer._unsafelyUnwrappedUnchecked
625-
// Bounds checks in an UnsafeBufferPointer (asciiBuffer) are only
626-
// performed in Debug mode, so they need to be duplicated here.
627-
// Falling back to the non-optimal behavior in the case they don't
628-
// pass.
629-
if relativeOffset >= asciiBuffer.startIndex &&
630-
relativeOffset < asciiBuffer.endIndex {
631-
return Character(Unicode.Scalar(asciiBuffer[relativeOffset]))
632-
}
633-
} else if _core._baseAddress != nil {
634-
let cu = _core._nthContiguous(relativeOffset)
635-
// Only constructible if sub-surrogate
636-
if (cu < 0xd800) {
637-
return Character(Unicode.Scalar(cu)._unsafelyUnwrappedUnchecked)
638-
}
639-
}
646+
public subscript(i: Index) -> Character {
647+
if i._countUTF16 == 1 {
648+
// For single-code-unit graphemes, we can construct a Character directly
649+
// from a single unicode scalar (if sub-surrogate).
650+
let relativeOffset = i._base._position - _coreOffset
651+
if _core.isASCII {
652+
let asciiBuffer = _core.asciiBuffer._unsafelyUnwrappedUnchecked
653+
// Bounds checks in an UnsafeBufferPointer (asciiBuffer) are only
654+
// performed in Debug mode, so they need to be duplicated here.
655+
// Falling back to the non-optimal behavior in the case they don't
656+
// pass.
657+
if relativeOffset >= asciiBuffer.startIndex &&
658+
relativeOffset < asciiBuffer.endIndex {
659+
return Character(Unicode.Scalar(asciiBuffer[relativeOffset]))
660+
}
661+
} else if _core._baseAddress != nil {
662+
let cu = _core._nthContiguous(relativeOffset)
663+
// Only constructible if sub-surrogate
664+
if (cu < 0xd800) {
665+
return Character(Unicode.Scalar(cu)._unsafelyUnwrappedUnchecked)
640666
}
641-
642-
let s = self[i..<Index(encodedOffset: i.encodedOffset + Int(stride))]
643-
return Character(s._ephemeralContent)
644667
}
645-
i = _index(atEncodedOffset: i.encodedOffset)
646668
}
669+
670+
return Character(String(unicodeScalars[i._base..<i._endBase]))
647671
}
648672
}
649673

@@ -672,8 +696,8 @@ extension String.CharacterView : RangeReplaceableCollection {
672696
with newElements: C
673697
) where C : Collection, C.Element == Character {
674698
let rawSubRange: Range<Int> =
675-
bounds.lowerBound.encodedOffset - _coreOffset
676-
..< bounds.upperBound.encodedOffset - _coreOffset
699+
bounds.lowerBound._base._position - _coreOffset
700+
..< bounds.upperBound._base._position - _coreOffset
677701
let lazyUTF16 = newElements.lazy.flatMap { $0.utf16 }
678702
_core.replaceSubrange(rawSubRange, with: lazyUTF16)
679703
}
@@ -740,9 +764,9 @@ extension String.CharacterView {
740764
/// - Complexity: O(*n*) if the underlying string is bridged from
741765
/// Objective-C, where *n* is the length of the string; otherwise, O(1).
742766
public subscript(bounds: Range<Index>) -> String.CharacterView {
743-
return String.CharacterView(
744-
unicodeScalars[bounds]._core,
745-
coreOffset: bounds.lowerBound.encodedOffset)
767+
let unicodeScalarRange = bounds.lowerBound._base..<bounds.upperBound._base
768+
return String.CharacterView(unicodeScalars[unicodeScalarRange]._core,
769+
coreOffset: unicodeScalarRange.lowerBound._position)
746770
}
747771
}
748772

stdlib/public/core/StringCore.swift

+1
Original file line numberDiff line numberDiff line change
@@ -428,6 +428,7 @@ public struct _StringCore {
428428
// In order to grow the substring in place, this _StringCore should point
429429
// at the substring at the end of a _StringBuffer. Otherwise, some other
430430
// String is using parts of the buffer beyond our last byte.
431+
let usedStart = _pointer(toElementAt:0)
431432
let usedEnd = _pointer(toElementAt:count)
432433

433434
// Attempt to claim unused capacity in the buffer

0 commit comments

Comments
 (0)