Skip to content

Commit 40aae6b

Browse files
lorenteymilseman
authored andcommitted
[String] 32-bit platform support
Add support for 32-bit platforms for UTF-8 backed String.
1 parent e6582c3 commit 40aae6b

12 files changed

+925
-253
lines changed

stdlib/public/core/SmallString.swift

+67-58
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,10 @@
1010
//
1111
//===----------------------------------------------------------------------===//
1212

13-
//
14-
// NOTE: This is a prototype, it does not have e.g. 32-bit support yet.
15-
//
16-
1713
@_fixed_layout @usableFromInline
1814
internal struct _SmallString {
1915
@usableFromInline
20-
internal typealias RawBitPattern = _StringObject.RawBitPattern
16+
internal typealias RawBitPattern = (UInt64, UInt64)
2117

2218
// Small strings are values; store them raw
2319
@usableFromInline
@@ -29,13 +25,13 @@ internal struct _SmallString {
2925
}
3026

3127
@inlinable
32-
internal var leadingRawBits: UInt {
28+
internal var leadingRawBits: UInt64 {
3329
@inline(__always) get { return _storage.0 }
3430
@inline(__always) set { _storage.0 = newValue }
3531
}
3632

3733
@inlinable
38-
internal var trailingRawBits: UInt {
34+
internal var trailingRawBits: UInt64 {
3935
@inline(__always) get { return _storage.1 }
4036
@inline(__always) set { _storage.1 = newValue }
4137
}
@@ -66,15 +62,41 @@ internal struct _SmallString {
6662
// TODO
6763
extension _SmallString {
6864
@inlinable
69-
internal static var capacity: Int { @inline(__always) get { return 15 } }
65+
internal static var capacity: Int {
66+
@inline(__always) get {
67+
#if arch(i386) || arch(arm)
68+
return 10
69+
#else
70+
return 15
71+
#endif
72+
}
73+
}
74+
75+
@inlinable
76+
internal var discriminator: _StringObject.Discriminator {
77+
@inline(__always) get {
78+
let value = _storage.1 &>> _StringObject.Nibbles.discriminatorShift
79+
return _StringObject.Discriminator(UInt8(truncatingIfNeeded: value))
80+
}
81+
@inline(__always) set {
82+
_storage.1 &= _StringObject.Nibbles.largeAddressMask
83+
_storage.1 |= (
84+
UInt64(truncatingIfNeeded: newValue._value)
85+
&<< _StringObject.Nibbles.discriminatorShift)
86+
}
87+
}
7088

7189
@inlinable
72-
internal var capacity: Int { @inline(__always) get { return 15 } }
90+
internal var capacity: Int {
91+
@inline(__always) get {
92+
return _SmallString.capacity
93+
}
94+
}
7395

7496
@inlinable
7597
internal var count: Int {
7698
@inline(__always) get {
77-
return _StringObject(rawUncheckedValue: self.rawBits).smallCount
99+
return discriminator.smallCount
78100
}
79101
}
80102

@@ -86,7 +108,7 @@ extension _SmallString {
86108
@inlinable
87109
internal var isASCII: Bool {
88110
@inline(__always) get {
89-
return _StringObject(rawUncheckedValue: self.rawBits).smallIsASCII
111+
return discriminator.smallIsASCII
90112
}
91113
}
92114

@@ -97,25 +119,17 @@ extension _SmallString {
97119
@inline(__always) get {
98120
return (
99121
self._storage.0,
100-
_StringObject(
101-
rawUncheckedValue: self.rawBits
102-
).undiscriminatedObjectRawBits
103-
)
122+
self._storage.1 & _StringObject.Nibbles.largeAddressMask)
104123
}
105124
}
106125

107126
@inlinable
108127
internal func computeIsASCII() -> Bool {
109128
// TODO(UTF8 codegen): Either mask off discrim before, or don't set bit
110129
// after
111-
112-
#if arch(i386) || arch(arm)
113-
unimplemented_utf8_32bit()
114-
#else
115-
let asciiMask: UInt = 0x8080_8080_8080_8080
130+
let asciiMask: UInt64 = 0x8080_8080_8080_8080
116131
let raw = zeroTerminatedRawCodeUnits
117-
return raw.0 & asciiMask == 0 && raw.1 & asciiMask == 0
118-
#endif
132+
return (raw.0 & asciiMask == 0) && (raw.1 & asciiMask == 0)
119133
}
120134
}
121135

@@ -135,7 +149,7 @@ extension _SmallString {
135149
#if INTERNAL_CHECKS_ENABLED
136150
print("""
137151
smallUTF8: count: \(self.count), codeUnits: \(
138-
self.map { String($0, radix: 16) }.dropLast().joined()
152+
self.map { String($0, radix: 16) }.joined()
139153
)
140154
""")
141155
#endif // INTERNAL_CHECKS_ENABLED
@@ -218,9 +232,7 @@ extension _SmallString {
218232
}
219233

220234
_sanityCheck(len <= _SmallString.capacity)
221-
var obj = _StringObject(rawUncheckedValue: self.rawBits)
222-
obj.setSmallCount(len, isASCII: self.computeIsASCII())
223-
self = _SmallString(obj)
235+
discriminator = .small(withCount: len, isASCII: self.computeIsASCII())
224236
}
225237

226238
// Write to excess capacity. `f` should return the new count.
@@ -249,20 +261,15 @@ extension _SmallString {
249261

250262
// TODO(SIMD): The below can be replaced with just be a masked unaligned
251263
// vector load
252-
253264
let ptr = input.baseAddress._unsafelyUnwrappedUnchecked
254-
255-
let low = _bytesToUInt(ptr, Swift.min(input.count, 8))
256-
let high = count > 8 ? _bytesToUInt(ptr + 8, count &- 8) : 0
257-
258-
let isASCII = (low | high) & 0x8080_8080_8080_8080 == 0
259-
let smallDiscriminator = _StringObject.Nibbles.small(
260-
withCount: count, isASCII: isASCII)
261-
262-
self.init(_StringObject(
263-
discriminator: smallDiscriminator,
264-
valueLeading: low,
265-
valueTrailing: high))
265+
let leading = _bytesToUInt64(ptr, Swift.min(input.count, 8))
266+
let trailing = count > 8 ? _bytesToUInt64(ptr + 8, count &- 8) : 0
267+
268+
let isASCII = (leading | trailing) & 0x8080_8080_8080_8080 == 0
269+
let discriminator = _StringObject.Discriminator.small(
270+
withCount: count,
271+
isASCII: isASCII)
272+
self.init(raw: (leading, trailing | discriminator.rawBits))
266273
}
267274

268275
@usableFromInline // @testable
@@ -281,18 +288,16 @@ extension _SmallString {
281288
_sanityCheck(writeIdx == totalCount)
282289

283290
let isASCII = base.isASCII && other.isASCII
284-
let smallDiscriminator = _StringObject.Nibbles.small(
285-
withCount: totalCount, isASCII: isASCII)
291+
let discriminator = _StringObject.Discriminator.small(
292+
withCount: totalCount,
293+
isASCII: isASCII)
286294

287295
let (leading, trailing) = result.zeroTerminatedRawCodeUnits
288-
self.init(_StringObject(
289-
discriminator: smallDiscriminator,
290-
valueLeading: leading,
291-
valueTrailing: trailing))
296+
self.init(raw: (leading, trailing | discriminator.rawBits))
292297
}
293298
}
294299

295-
#if _runtime(_ObjC)
300+
#if _runtime(_ObjC) && !(arch(i386) || arch(arm))
296301
// Cocoa interop
297302
extension _SmallString {
298303
// Resiliently create from a tagged cocoa string
@@ -312,14 +317,14 @@ extension _SmallString {
312317
}
313318
#endif
314319

315-
extension UInt {
320+
extension UInt64 {
316321
// Fetches the `i`th byte, from least-significant to most-significant
317322
//
318323
// TODO: endianess awareness day
319324
@inlinable @inline(__always)
320325
internal func _uncheckedGetByte(at i: Int) -> UInt8 {
321-
_sanityCheck(i >= 0 && i < MemoryLayout<UInt>.stride)
322-
let shift = UInt(bitPattern: i) &* 8
326+
_sanityCheck(i >= 0 && i < MemoryLayout<UInt64>.stride)
327+
let shift = UInt64(truncatingIfNeeded: i) &* 8
323328
return UInt8(truncatingIfNeeded: (self &>> shift))
324329
}
325330

@@ -328,22 +333,26 @@ extension UInt {
328333
// TODO: endianess awareness day
329334
@inlinable @inline(__always)
330335
internal mutating func _uncheckedSetByte(at i: Int, to value: UInt8) {
331-
_sanityCheck(i >= 0 && i < MemoryLayout<UInt>.stride)
332-
let shift = UInt(bitPattern: i) &* 8
333-
let valueMask = 0xFF &<< shift
334-
self = (self & ~valueMask) | (UInt(truncatingIfNeeded: value) &<< shift)
336+
_sanityCheck(i >= 0 && i < MemoryLayout<UInt64>.stride)
337+
let shift = UInt64(truncatingIfNeeded: i) &* 8
338+
let valueMask: UInt64 = 0xFF &<< shift
339+
self = (self & ~valueMask) | (UInt64(truncatingIfNeeded: value) &<< shift)
335340
}
336341
}
337342

338343
@inlinable @inline(__always)
339-
internal func _bytesToUInt(_ input: UnsafePointer<UInt8>, _ c: Int) -> UInt {
340-
var r: UInt = 0
344+
internal func _bytesToUInt64(
345+
_ input: UnsafePointer<UInt8>,
346+
_ c: Int
347+
) -> UInt64 {
348+
// FIXME: This should be unified with _loadPartialUnalignedUInt64LE.
349+
// Unfortunately that causes regressions in literal concatenation tests. (Some
350+
// owned to guaranteed specializations don't get inlined.)
351+
var r: UInt64 = 0
341352
var shift: Int = 0
342353
for idx in 0..<c {
343-
r = r | (UInt(input[idx]) &<< shift)
354+
r = r | (UInt64(input[idx]) &<< shift)
344355
shift = shift &+ 8
345356
}
346357
return r
347358
}
348-
349-

stdlib/public/core/StringBridge.swift

+8-1
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ private var kCFStringEncodingUTF8 : _swift_shims_CFStringEncoding {
7979
@inline(__always) get { return 0x8000100 }
8080
}
8181

82+
#if !(arch(i386) || arch(arm))
8283
// Resiliently write a tagged cocoa string's contents into a buffer
8384
@_effects(releasenone) // @opaque
8485
internal func _bridgeTagged(
@@ -95,6 +96,7 @@ internal func _bridgeTagged(
9596
kCFStringEncodingUTF8, 0, 0, ptr, bufPtr.count, &count)
9697
return length == numCharWritten ? count : nil
9798
}
99+
#endif
98100

99101
@_effects(releasenone) // @opaque
100102
internal func _cocoaUTF8Pointer(_ str: _CocoaString) -> UnsafePointer<UInt8>? {
@@ -133,9 +135,12 @@ private func _getCocoaStringPointer(
133135
internal func _bridgeCocoaString(_ cocoaString: _CocoaString) -> _StringGuts {
134136
if let abstract = cocoaString as? _AbstractStringStorage {
135137
return abstract.asString._guts
136-
} else if _isObjCTaggedPointer(cocoaString) {
138+
}
139+
#if !(arch(i386) || arch(arm))
140+
if _isObjCTaggedPointer(cocoaString) {
137141
return _StringGuts(_SmallString(taggedCocoa: cocoaString))
138142
}
143+
#endif
139144

140145
// "copy" it into a value to be sure nobody will modify behind
141146
// our backs. In practice, when value is already immutable, this
@@ -150,9 +155,11 @@ internal func _bridgeCocoaString(_ cocoaString: _CocoaString) -> _StringGuts {
150155
let immutableCopy
151156
= _stdlib_binary_CFStringCreateCopy(cocoaString) as AnyObject
152157

158+
#if !(arch(i386) || arch(arm))
153159
if _isObjCTaggedPointer(immutableCopy) {
154160
return _StringGuts(_SmallString(taggedCocoa: immutableCopy))
155161
}
162+
#endif
156163

157164
let (fastUTF8, isASCII): (Bool, Bool)
158165
switch _getCocoaStringPointer(immutableCopy) {

stdlib/public/core/StringIndex.swift

+2-6
Original file line numberDiff line numberDiff line change
@@ -69,16 +69,12 @@ extension String.Index {
6969

7070
@inlinable @inline(__always)
7171
internal init(encodedOffset: Int, transcodedOffset: Int) {
72-
#if arch(i386) || arch(arm)
73-
unimplemented_utf8_32bit()
74-
#else
75-
_sanityCheck(encodedOffset == encodedOffset & 0x0000_FFFF_FFFF_FFFF)
76-
_sanityCheck((0...3) ~= transcodedOffset)
7772
let pos = UInt64(truncatingIfNeeded: encodedOffset)
7873
let trans = UInt64(truncatingIfNeeded: transcodedOffset)
74+
_sanityCheck(pos == pos & 0x0000_FFFF_FFFF_FFFF)
75+
_sanityCheck(trans <= 3)
7976

8077
self.init((pos &<< 16) | (trans &<< 14))
81-
#endif
8278
}
8379

8480
/// Creates a new index at the specified code unit offset.

0 commit comments

Comments
 (0)