@@ -18,6 +18,7 @@ internal import _ForSwiftFoundation
1818@available ( FoundationPreview 0 . 4 , * )
1919extension String {
2020 public func data( using encoding: String . Encoding , allowLossyConversion: Bool = false ) -> Data ? {
21+ // allowLossyConversion is a no-op for UTF8 and UTF16. For UTF32, we fall back to NSString when lossy conversion is requested on Darwin platforms.
2122 switch encoding {
2223 case . utf8:
2324 return Data ( self . utf8)
@@ -53,57 +54,101 @@ extension String {
5354 }
5455 return allASCII ? data : nil
5556 }
56- default :
57- #if FOUNDATION_FRAMEWORK
58- // TODO: Implement data(using:allowLossyConversion:) in Swift
59- return _ns. data (
60- using: encoding. rawValue,
61- allowLossyConversion: allowLossyConversion)
57+ case . utf16BigEndian, . utf16LittleEndian, . utf16:
58+ let bom : UInt16 ?
59+ let swap : Bool
60+
61+ if encoding == . utf16 {
62+ swap = false
63+ bom = 0xFEFF
64+ } else if encoding == . utf16BigEndian {
65+ #if _endian(little)
66+ swap = true
6267#else
63- switch encoding {
64- case . utf16BigEndian, . utf16LittleEndian:
65- // This creates a contiguous storage for Data to simply memcpy, the most efficient way to give it bytes.
66- return withUnsafeTemporaryAllocation ( of: UInt8 . self, capacity: self . utf16. count * 2 ) { utf16Buffer in
67- _ = utf16Buffer. initialize ( from: UTF16ToDataAdaptor ( self . utf16, endianness: Endianness ( encoding) !) )
68- defer { utf16Buffer. deinitialize ( ) }
69- return Data ( utf16Buffer)
70- }
71- case . utf16:
68+ swap = false
69+ #endif
70+ bom = nil
71+ } else if encoding == . utf16LittleEndian {
7272#if _endian(little)
73- let data = Data ( [ 0xFF , 0xFE ] )
74- let hostEncoding : String . Encoding = . utf16LittleEndian
73+ swap = false
7574#else
76- let data = Data ( [ 0xFE , 0xFF ] )
77- let hostEncoding : String . Encoding = . utf16BigEndian
75+ swap = true
7876#endif
79- guard let swapped = self . data ( using: hostEncoding, allowLossyConversion: allowLossyConversion) else {
80- return nil
77+ bom = nil
78+ } else {
79+ fatalError ( " Unreachable " )
80+ }
81+
82+ // Grab this value once, as it requires doing a calculation over String's UTF8 storage
83+ let inputCount = self . utf16. count
84+
85+ // The output may have 1 additional UTF16 character, if it has a BOM
86+ let outputCount = bom == nil ? inputCount : inputCount + 1
87+
88+ // Allocate enough memory to hold the UTF16 bytes after conversion. We will pass this off to Data.
89+ let utf16Pointer = calloc ( outputCount, MemoryLayout< UInt16> . size) !. assumingMemoryBound ( to: UInt16 . self)
90+ let utf16Buffer = UnsafeMutableBufferPointer < UInt16 > ( start: utf16Pointer, count: outputCount)
91+
92+ if let bom {
93+ // Put the BOM in, then copy the UTF16 bytes to the buffer after it.
94+ utf16Buffer [ 0 ] = bom
95+ let afterBOMBuffer = UnsafeMutableBufferPointer ( rebasing: utf16Buffer [ 1 ..< utf16Buffer. endIndex] )
96+ self . _copyUTF16CodeUnits ( into: afterBOMBuffer, range: 0 ..< inputCount)
97+ } else {
98+ self . _copyUTF16CodeUnits ( into: utf16Buffer, range: 0 ..< inputCount)
99+ }
100+
101+
102+ // If we need to swap endianness, we do it as a second pass over the data
103+ if swap {
104+ #if _endian(little)
105+ // Swap, including the BOM if it is there
106+ for u in utf16Buffer. enumerated ( ) {
107+ utf16Buffer [ u. 0 ] = u. 1 . bigEndian
81108 }
82-
83- return data + swapped
84- case . utf32BigEndian, . utf32LittleEndian:
85- // This creates a contiguous storage for Data to simply memcpy, the most efficient way to give it bytes.
86- return withUnsafeTemporaryAllocation ( of: UInt8 . self, capacity: self . unicodeScalars. count * 4 ) { utf32Buffer in
87- _ = utf32Buffer. initialize ( from: UnicodeScalarToDataAdaptor ( self . unicodeScalars, endianness: Endianness ( encoding) !) )
88- defer { utf32Buffer. deinitialize ( ) }
89- return Data ( utf32Buffer)
109+ #else
110+ for u in utf16Buffer. enumerated ( ) {
111+ utf16Buffer [ u. 0 ] = u. 1 . littleEndian
90112 }
91- case . utf32:
113+ #endif
114+ }
115+
116+ return Data ( bytesNoCopy: utf16Buffer. baseAddress!, count: utf16Buffer. count * 2 , deallocator: . free)
117+
118+ case . utf32BigEndian, . utf32LittleEndian:
119+ // This creates a contiguous storage for Data to simply memcpy.
120+ return withUnsafeTemporaryAllocation ( of: UInt8 . self, capacity: self . unicodeScalars. count * 4 ) { utf32Buffer in
121+ _ = utf32Buffer. initialize ( from: UnicodeScalarToDataAdaptor ( self . unicodeScalars, endianness: Endianness ( encoding) !) )
122+ defer { utf32Buffer. deinitialize ( ) }
123+ return Data ( utf32Buffer)
124+ }
125+ case . utf32:
126+ #if FOUNDATION_FRAMEWORK
127+ // Only the CoreFoundation code currently handles the rare case of allowing lossy conversion for UTF32
128+ if allowLossyConversion {
129+ return _ns. data (
130+ using: encoding. rawValue,
131+ allowLossyConversion: allowLossyConversion)
132+ }
133+ #endif
92134#if _endian(little)
93- let data = Data ( [ 0xFF , 0xFE , 0x00 , 0x00 ] )
94- let hostEncoding : String . Encoding = . utf32LittleEndian
135+ let data = Data ( [ 0xFF , 0xFE , 0x00 , 0x00 ] )
136+ let hostEncoding : String . Encoding = . utf32LittleEndian
95137#else
96- let data = Data ( [ 0x00 , 0x00 , 0xFE , 0xFF ] )
97- let hostEncoding : String . Encoding = . utf32BigEndian
138+ let data = Data ( [ 0x00 , 0x00 , 0xFE , 0xFF ] )
139+ let hostEncoding : String . Encoding = . utf32BigEndian
98140#endif
99- guard let swapped = self . data ( using: hostEncoding, allowLossyConversion: allowLossyConversion) else {
100- return nil
101- }
102-
103- return data + swapped
104- default :
141+ guard let swapped = self . data ( using: hostEncoding, allowLossyConversion: allowLossyConversion) else {
105142 return nil
106143 }
144+
145+ return data + swapped
146+ default :
147+ #if FOUNDATION_FRAMEWORK
148+ // Other encodings, defer to the CoreFoundation implementation
149+ return _ns. data ( using: encoding. rawValue, allowLossyConversion: allowLossyConversion)
150+ #else
151+ return nil
107152#endif
108153 }
109154 }
0 commit comments