Skip to content

Commit 9159239

Browse files
author
Dave Abrahams
committed
Un-revert "[stdlib] String index interchange, etc." (#10812)
I failed to merge the upstream changes to swift-corelibs-foundation at the same time as I merged that #9806, and it broke on linux. Going to get it right this time.
1 parent d9fb110 commit 9159239

27 files changed

+906
-1409
lines changed

stdlib/public/SDK/Foundation/ExtraStringAPIs.swift

+7-8
Original file line numberDiff line numberDiff line change
@@ -10,25 +10,24 @@
1010
//
1111
//===----------------------------------------------------------------------===//
1212

13-
// Random access for String.UTF16View, only when Foundation is
14-
// imported. Making this API dependent on Foundation decouples the
15-
// Swift core from a UTF16 representation.
16-
extension String.UTF16View.Index : Strideable {
13+
extension String.UTF16View.Index {
1714
/// Construct from an integer offset.
15+
@available(swift, deprecated: 3.2)
16+
@available(swift, obsoleted: 4.0)
1817
public init(_ offset: Int) {
1918
_precondition(offset >= 0, "Negative UTF16 index offset not allowed")
2019
self.init(_offset: offset)
2120
}
2221

22+
@available(swift, deprecated: 3.2)
23+
@available(swift, obsoleted: 4.0)
2324
public func distance(to other: String.UTF16View.Index) -> Int {
2425
return _offset.distance(to: other._offset)
2526
}
2627

28+
@available(swift, deprecated: 3.2)
29+
@available(swift, obsoleted: 4.0)
2730
public func advanced(by n: Int) -> String.UTF16View.Index {
2831
return String.UTF16View.Index(_offset.advanced(by: n))
2932
}
3033
}
31-
32-
extension String.UTF16View : RandomAccessCollection {}
33-
extension String.UTF16View.Indices : RandomAccessCollection {}
34-

stdlib/public/SDK/Foundation/NSRange.swift

+2-2
Original file line numberDiff line numberDiff line change
@@ -151,8 +151,8 @@ extension NSRange {
151151
where R.Bound == S.Index, S.Index == String.Index {
152152
let r = region.relative(to: target)
153153
self = NSRange(
154-
location: r.lowerBound._utf16Index - target.startIndex._utf16Index,
155-
length: r.upperBound._utf16Index - r.lowerBound._utf16Index
154+
location: r.lowerBound.encodedOffset - target.startIndex.encodedOffset,
155+
length: r.upperBound.encodedOffset - r.lowerBound.encodedOffset
156156
)
157157
}
158158

stdlib/public/SDK/Foundation/NSStringAPI.swift

+6-9
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,8 @@ func _toNSArray<T, U : AnyObject>(_ a: [T], f: (T) -> U) -> NSArray {
3232

3333
func _toNSRange(_ r: Range<String.Index>) -> NSRange {
3434
return NSRange(
35-
location: r.lowerBound._utf16Index,
36-
length: r.upperBound._utf16Index - r.lowerBound._utf16Index)
35+
location: r.lowerBound.encodedOffset,
36+
length: r.upperBound.encodedOffset - r.lowerBound.encodedOffset)
3737
}
3838

3939
// We only need this for UnsafeMutablePointer, but there's not currently a way
@@ -72,10 +72,7 @@ extension String {
7272
/// Return an `Index` corresponding to the given offset in our UTF-16
7373
/// representation.
7474
func _index(_ utf16Index: Int) -> Index {
75-
return Index(
76-
_base: String.UnicodeScalarView.Index(_position: utf16Index),
77-
in: characters
78-
)
75+
return Index(encodedOffset: utf16Index)
7976
}
8077

8178
/// Return a `Range<Index>` corresponding to the given `NSRange` of
@@ -1260,7 +1257,7 @@ extension String {
12601257
public
12611258
func rangeOfComposedCharacterSequence(at anIndex: Index) -> Range<Index> {
12621259
return _range(
1263-
_ns.rangeOfComposedCharacterSequence(at: anIndex._utf16Index))
1260+
_ns.rangeOfComposedCharacterSequence(at: anIndex.encodedOffset))
12641261
}
12651262

12661263
// - (NSRange)rangeOfComposedCharacterSequencesForRange:(NSRange)range
@@ -1610,15 +1607,15 @@ extension String {
16101607
/// Returns a new string containing the characters of the
16111608
/// `String` from the one at a given index to the end.
16121609
public func substring(from index: Index) -> String {
1613-
return _ns.substring(from: index._utf16Index)
1610+
return _ns.substring(from: index.encodedOffset)
16141611
}
16151612

16161613
// - (NSString *)substringToIndex:(NSUInteger)anIndex
16171614

16181615
/// Returns a new string containing the characters of the
16191616
/// `String` up to, but not including, the one at a given index.
16201617
public func substring(to index: Index) -> String {
1621-
return _ns.substring(to: index._utf16Index)
1618+
return _ns.substring(to: index.encodedOffset)
16221619
}
16231620

16241621
// - (NSString *)substringWithRange:(NSRange)aRange

stdlib/public/SDK/Foundation/URLComponents.swift

+2-2
Original file line numberDiff line numberDiff line change
@@ -194,8 +194,8 @@ public struct URLComponents : ReferenceConvertible, Hashable, Equatable, _Mutabl
194194
private func _toStringRange(_ r : NSRange) -> Range<String.Index>? {
195195
guard r.location != NSNotFound else { return nil }
196196

197-
let utf16Start = String.UTF16View.Index(_offset: r.location)
198-
let utf16End = String.UTF16View.Index(_offset: r.location + r.length)
197+
let utf16Start = String.UTF16View.Index(encodedOffset: r.location)
198+
let utf16End = String.UTF16View.Index(encodedOffset: r.location + r.length)
199199

200200
guard let s = self.string else { return nil }
201201
guard let start = String.Index(utf16Start, within: s) else { return nil }

stdlib/public/core/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,7 @@ set(SWIFTLIB_ESSENTIAL
126126
StringBuffer.swift
127127
StringComparable.swift
128128
StringCore.swift
129+
StringIndex.swift
129130
StringInterpolation.swift
130131
StringLegacy.swift
131132
StringRangeReplaceableCollection.swift.gyb

stdlib/public/core/GroupInfo.json

+1
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
"StringComparable.swift",
1818
"StringCore.swift",
1919
"StringHashable.swift",
20+
"StringIndex.swift",
2021
"StringIndexConversions.swift",
2122
"StringInterpolation.swift",
2223
"StringLegacy.swift",

stdlib/public/core/StringCharacterView.swift

+72-96
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ extension String {
6363

6464
/// The offset of this view's `_core` from an original core. This works
6565
/// around the fact that `_StringCore` is always zero-indexed.
66-
/// `_coreOffset` should be subtracted from `UnicodeScalarIndex._position`
66+
/// `_coreOffset` should be subtracted from `UnicodeScalarIndex.encodedOffset`
6767
/// before that value is used as a `_core` index.
6868
@_versioned
6969
internal var _coreOffset: Int
@@ -178,97 +178,66 @@ extension String.CharacterView : BidirectionalCollection {
178178
return UnicodeScalarView(_core, coreOffset: _coreOffset)
179179
}
180180

181-
/// A position in a string's `CharacterView` instance.
182-
///
183-
/// You can convert between indices of the different string views by using
184-
/// conversion initializers and the `samePosition(in:)` method overloads.
185-
/// The following example finds the index of the first space in the string's
186-
/// character view and then converts that to the same position in the UTF-8
187-
/// view:
188-
///
189-
/// let hearts = "Hearts <3 ♥︎ 💘"
190-
/// if let i = hearts.characters.index(of: " ") {
191-
/// let j = i.samePosition(in: hearts.utf8)
192-
/// print(Array(hearts.utf8[..<j]))
193-
/// }
194-
/// // Prints "[72, 101, 97, 114, 116, 115]"
195-
public struct Index : Comparable, CustomPlaygroundQuickLookable {
196-
public // SPI(Foundation)
197-
init(_base: String.UnicodeScalarView.Index, in c: String.CharacterView) {
198-
self._base = _base
199-
self._countUTF16 = c._measureExtendedGraphemeClusterForward(from: _base)
200-
}
201-
202-
internal init(_base: UnicodeScalarView.Index, _countUTF16: Int) {
203-
self._base = _base
204-
self._countUTF16 = _countUTF16
205-
}
206-
207-
internal let _base: UnicodeScalarView.Index
208-
209-
/// The count of this extended grapheme cluster in UTF-16 code units.
210-
internal let _countUTF16: Int
211-
212-
/// The integer offset of this index in UTF-16 code units.
213-
public // SPI(Foundation)
214-
var _utf16Index: Int {
215-
return _base._position
216-
}
217-
218-
/// The one past end index for this extended grapheme cluster in Unicode
219-
/// scalars.
220-
internal var _endBase: UnicodeScalarView.Index {
221-
return UnicodeScalarView.Index(_position: _utf16Index + _countUTF16)
222-
}
223-
224-
public var customPlaygroundQuickLook: PlaygroundQuickLook {
225-
return .int(Int64(_utf16Index))
226-
}
227-
}
228-
181+
public typealias Index = String.Index
229182
public typealias IndexDistance = Int
230183

231184
/// The position of the first character in a nonempty character view.
232185
///
233186
/// In an empty character view, `startIndex` is equal to `endIndex`.
234187
public var startIndex: Index {
235-
return Index(_base: unicodeScalars.startIndex, in: self)
188+
return unicodeScalars.startIndex
236189
}
237190

238191
/// A character view's "past the end" position---that is, the position one
239192
/// greater than the last valid subscript argument.
240193
///
241194
/// In an empty character view, `endIndex` is equal to `startIndex`.
242195
public var endIndex: Index {
243-
return Index(_base: unicodeScalars.endIndex, in: self)
196+
return unicodeScalars.endIndex
244197
}
245198

199+
internal func _index(atEncodedOffset n: Int) -> Index {
200+
let stride = _measureExtendedGraphemeClusterForward(
201+
from: Index(encodedOffset: n))
202+
return Index(encodedOffset: n, .character(stride: UInt16(stride)))
203+
}
204+
246205
/// Returns the next consecutive position after `i`.
247206
///
248207
/// - Precondition: The next position is valid.
249208
public func index(after i: Index) -> Index {
250-
_precondition(i._base < unicodeScalars.endIndex,
209+
_precondition(
210+
i < unicodeScalars.endIndex,
251211
"cannot increment beyond endIndex")
252-
_precondition(i._base >= unicodeScalars.startIndex,
212+
213+
_precondition(
214+
i >= unicodeScalars.startIndex,
253215
"cannot increment invalid index")
254-
return Index(_base: i._endBase, in: self)
216+
217+
var j = i
218+
while true {
219+
if case .character(let oldStride) = j._cache {
220+
return _index(atEncodedOffset: j.encodedOffset + Int(oldStride))
221+
}
222+
j = _index(atEncodedOffset: j.encodedOffset)
223+
}
255224
}
256225

257226
/// Returns the previous consecutive position before `i`.
258227
///
259228
/// - Precondition: The previous position is valid.
260229
public func index(before i: Index) -> Index {
261-
_precondition(i._base > unicodeScalars.startIndex,
230+
_precondition(i > unicodeScalars.startIndex,
262231
"cannot decrement before startIndex")
263-
_precondition(i._base <= unicodeScalars.endIndex,
232+
_precondition(i <= unicodeScalars.endIndex,
264233
"cannot decrement invalid index")
265-
let predecessorLengthUTF16 =
266-
_measureExtendedGraphemeClusterBackward(from: i._base)
234+
235+
let stride = _measureExtendedGraphemeClusterBackward(
236+
from: Index(encodedOffset: i.encodedOffset))
237+
267238
return Index(
268-
_base: UnicodeScalarView.Index(
269-
_position: i._utf16Index - predecessorLengthUTF16
270-
),
271-
in: self
239+
encodedOffset: i.encodedOffset &- stride,
240+
.character(stride: numericCast(stride))
272241
)
273242
}
274243

@@ -365,8 +334,8 @@ extension String.CharacterView : BidirectionalCollection {
365334
internal func _measureExtendedGraphemeClusterForward(
366335
from start: UnicodeScalarView.Index
367336
) -> Int {
368-
let startPosition = start._position
369-
let endPosition = unicodeScalars.endIndex._position
337+
let startPosition = start.encodedOffset
338+
let endPosition = unicodeScalars.endIndex.encodedOffset
370339

371340
// No more graphemes
372341
if startPosition == endPosition {
@@ -379,7 +348,7 @@ extension String.CharacterView : BidirectionalCollection {
379348
}
380349

381350
// Our relative offset from the _StringCore's baseAddress pointer. If our
382-
// _core is not a substring, this is the same as start._position. Otherwise,
351+
// _core is not a substring, this is the same as start.encodedOffset. Otherwise,
383352
// it is the code unit relative offset into the substring and not the
384353
// absolute offset into the outer string.
385354
let startOffset = startPosition - _coreOffset
@@ -419,7 +388,7 @@ extension String.CharacterView : BidirectionalCollection {
419388
func _measureExtendedGraphemeClusterForwardSlow(
420389
startOffset: Int
421390
) -> Int {
422-
let endOffset = unicodeScalars.endIndex._position - _coreOffset
391+
let endOffset = unicodeScalars.endIndex.encodedOffset - _coreOffset
423392
let numCodeUnits = endOffset - startOffset
424393
_sanityCheck(numCodeUnits >= 2, "should have at least two code units")
425394

@@ -501,8 +470,8 @@ extension String.CharacterView : BidirectionalCollection {
501470
internal func _measureExtendedGraphemeClusterBackward(
502471
from end: UnicodeScalarView.Index
503472
) -> Int {
504-
let startPosition = unicodeScalars.startIndex._position
505-
let endPosition = end._position
473+
let startPosition = unicodeScalars.startIndex.encodedOffset
474+
let endPosition = end.encodedOffset
506475

507476
// No more graphemes
508477
if startPosition == endPosition {
@@ -559,7 +528,7 @@ extension String.CharacterView : BidirectionalCollection {
559528
) -> Int {
560529
let startOffset = 0
561530
let numCodeUnits = endOffset - startOffset
562-
_sanityCheck(unicodeScalars.startIndex._position - _coreOffset == 0,
531+
_sanityCheck(unicodeScalars.startIndex.encodedOffset - _coreOffset == 0,
563532
"position/offset mismatch in _StringCore as a substring")
564533
_sanityCheck(numCodeUnits >= 2,
565534
"should have at least two code units")
@@ -643,31 +612,38 @@ extension String.CharacterView : BidirectionalCollection {
643612
///
644613
/// - Parameter position: A valid index of the character view. `position`
645614
/// must be less than the view's end index.
646-
public subscript(i: Index) -> Character {
647-
if i._countUTF16 == 1 {
648-
// For single-code-unit graphemes, we can construct a Character directly
649-
// from a single unicode scalar (if sub-surrogate).
650-
let relativeOffset = i._base._position - _coreOffset
651-
if _core.isASCII {
652-
let asciiBuffer = _core.asciiBuffer._unsafelyUnwrappedUnchecked
653-
// Bounds checks in an UnsafeBufferPointer (asciiBuffer) are only
654-
// performed in Debug mode, so they need to be duplicated here.
655-
// Falling back to the non-optimal behavior in the case they don't
656-
// pass.
657-
if relativeOffset >= asciiBuffer.startIndex &&
658-
relativeOffset < asciiBuffer.endIndex {
659-
return Character(Unicode.Scalar(asciiBuffer[relativeOffset]))
660-
}
661-
} else if _core._baseAddress != nil {
662-
let cu = _core._nthContiguous(relativeOffset)
663-
// Only constructible if sub-surrogate
664-
if (cu < 0xd800) {
665-
return Character(Unicode.Scalar(cu)._unsafelyUnwrappedUnchecked)
615+
public subscript(i_: Index) -> Character {
616+
var i = i_
617+
while true {
618+
if case .character(let stride) = i._cache {
619+
if _fastPath(stride == 1) {
620+
// For single-code-unit graphemes, we can construct a Character directly
621+
// from a single unicode scalar (if sub-surrogate).
622+
let relativeOffset = i.encodedOffset - _coreOffset
623+
if _core.isASCII {
624+
let asciiBuffer = _core.asciiBuffer._unsafelyUnwrappedUnchecked
625+
// Bounds checks in an UnsafeBufferPointer (asciiBuffer) are only
626+
// performed in Debug mode, so they need to be duplicated here.
627+
// Falling back to the non-optimal behavior in the case they don't
628+
// pass.
629+
if relativeOffset >= asciiBuffer.startIndex &&
630+
relativeOffset < asciiBuffer.endIndex {
631+
return Character(Unicode.Scalar(asciiBuffer[relativeOffset]))
632+
}
633+
} else if _core._baseAddress != nil {
634+
let cu = _core._nthContiguous(relativeOffset)
635+
// Only constructible if sub-surrogate
636+
if (cu < 0xd800) {
637+
return Character(Unicode.Scalar(cu)._unsafelyUnwrappedUnchecked)
638+
}
639+
}
666640
}
641+
642+
let s = self[i..<Index(encodedOffset: i.encodedOffset + Int(stride))]
643+
return Character(s._ephemeralContent)
667644
}
645+
i = _index(atEncodedOffset: i.encodedOffset)
668646
}
669-
670-
return Character(String(unicodeScalars[i._base..<i._endBase]))
671647
}
672648
}
673649

@@ -696,8 +672,8 @@ extension String.CharacterView : RangeReplaceableCollection {
696672
with newElements: C
697673
) where C : Collection, C.Element == Character {
698674
let rawSubRange: Range<Int> =
699-
bounds.lowerBound._base._position - _coreOffset
700-
..< bounds.upperBound._base._position - _coreOffset
675+
bounds.lowerBound.encodedOffset - _coreOffset
676+
..< bounds.upperBound.encodedOffset - _coreOffset
701677
let lazyUTF16 = newElements.lazy.flatMap { $0.utf16 }
702678
_core.replaceSubrange(rawSubRange, with: lazyUTF16)
703679
}
@@ -764,9 +740,9 @@ extension String.CharacterView {
764740
/// - Complexity: O(*n*) if the underlying string is bridged from
765741
/// Objective-C, where *n* is the length of the string; otherwise, O(1).
766742
public subscript(bounds: Range<Index>) -> String.CharacterView {
767-
let unicodeScalarRange = bounds.lowerBound._base..<bounds.upperBound._base
768-
return String.CharacterView(unicodeScalars[unicodeScalarRange]._core,
769-
coreOffset: unicodeScalarRange.lowerBound._position)
743+
return String.CharacterView(
744+
unicodeScalars[bounds]._core,
745+
coreOffset: bounds.lowerBound.encodedOffset)
770746
}
771747
}
772748

stdlib/public/core/StringCore.swift

-1
Original file line numberDiff line numberDiff line change
@@ -428,7 +428,6 @@ public struct _StringCore {
428428
// In order to grow the substring in place, this _StringCore should point
429429
// at the substring at the end of a _StringBuffer. Otherwise, some other
430430
// String is using parts of the buffer beyond our last byte.
431-
let usedStart = _pointer(toElementAt:0)
432431
let usedEnd = _pointer(toElementAt:count)
433432

434433
// Attempt to claim unused capacity in the buffer

0 commit comments

Comments
 (0)