Skip to content

Commit af83051

Browse files
naitharparkera
authored andcommitted
SR-3052. Incorrect string result for String methods with non latin symbols (swiftlang#706)
* added SR-3052 tests * expanded non latin string test * [SR-3052] utf8 encoding fix
1 parent 7ae394e commit af83051

File tree

2 files changed

+66
-10
lines changed

2 files changed

+66
-10
lines changed

Foundation/NSCFString.swift

+7-8
Original file line numberDiff line numberDiff line change
@@ -140,19 +140,18 @@ internal func _CFSwiftStringGetCharacters(_ str: AnyObject, range: CFRange, buff
140140
}
141141

142142
internal func _CFSwiftStringGetBytes(_ str: AnyObject, encoding: CFStringEncoding, range: CFRange, buffer: UnsafeMutablePointer<UInt8>?, maxBufLen: CFIndex, usedBufLen: UnsafeMutablePointer<CFIndex>?) -> CFIndex {
143+
let convertedLength: CFIndex
143144
switch encoding {
144145
// TODO: Don't treat many encodings like they are UTF8
145146
case CFStringEncoding(kCFStringEncodingUTF8), CFStringEncoding(kCFStringEncodingISOLatin1), CFStringEncoding(kCFStringEncodingMacRoman), CFStringEncoding(kCFStringEncodingASCII), CFStringEncoding(kCFStringEncodingNonLossyASCII):
146-
let encodingView = (str as! NSString)._swiftObject.utf8
147-
let start = encodingView.startIndex
147+
let encodingView = (str as! NSString).substring(with: NSRange(range)).utf8
148148
if let buffer = buffer {
149-
for idx in 0..<range.length {
150-
let characterIndex = encodingView.index(start, offsetBy: idx + range.location)
151-
let character = encodingView[characterIndex]
149+
for (idx, character) in encodingView.enumerated() {
152150
buffer.advanced(by: idx).initialize(to: character)
153151
}
154152
}
155-
usedBufLen?.pointee = range.length
153+
usedBufLen?.pointee = encodingView.count
154+
convertedLength = encodingView.count
156155

157156
case CFStringEncoding(kCFStringEncodingUTF16):
158157
let encodingView = (str as! NSString)._swiftObject.utf16
@@ -169,13 +168,13 @@ internal func _CFSwiftStringGetBytes(_ str: AnyObject, encoding: CFStringEncodin
169168
}
170169
// Every character was 2 bytes
171170
usedBufLen?.pointee = range.length * 2
172-
171+
convertedLength = range.length
173172

174173
default:
175174
fatalError("Attempted to get bytes of a Swift string using an unsupported encoding")
176175
}
177176

178-
return range.length
177+
return convertedLength
179178
}
180179

181180
internal func _CFSwiftStringCreateWithSubstring(_ str: AnyObject, range: CFRange) -> Unmanaged<AnyObject> {

TestFoundation/TestNSString.swift

+59-2
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,11 @@ class TestNSString : XCTestCase {
8181
("test_resolvingSymlinksInPath", test_resolvingSymlinksInPath),
8282
("test_expandingTildeInPath", test_expandingTildeInPath),
8383
("test_standardizingPath", test_standardizingPath),
84-
("test_removingPercentEncoding", test_removingPercentEncoding),
84+
("test_addingPercentEncoding", test_addingPercentEncoding),
85+
("test_removingPercentEncodingInLatin", test_removingPercentEncodingInLatin),
86+
("test_removingPercentEncodingInNonLatin", test_removingPercentEncodingInNonLatin),
87+
("test_removingPersentEncodingWithoutEncoding", test_removingPersentEncodingWithoutEncoding),
88+
("test_addingPercentEncodingAndBack", test_addingPercentEncodingAndBack),
8589
("test_stringByAppendingPathExtension", test_stringByAppendingPathExtension),
8690
("test_deletingPathExtension", test_deletingPathExtension),
8791
("test_ExternalRepresentation", test_ExternalRepresentation),
@@ -867,13 +871,66 @@ class TestNSString : XCTestCase {
867871
}
868872
}
869873

870-
func test_removingPercentEncoding() {
874+
func test_addingPercentEncoding() {
875+
let s1 = "a b".addingPercentEncoding(withAllowedCharacters: .alphanumerics)
876+
XCTAssertEqual(s1, "a%20b")
877+
878+
let s2 = "\u{0434}\u{043E}\u{043C}".addingPercentEncoding(withAllowedCharacters: .alphanumerics)
879+
XCTAssertEqual(s2, "%D0%B4%D0%BE%D0%BC")
880+
}
881+
882+
func test_removingPercentEncodingInLatin() {
871883
let s1 = "a%20b".removingPercentEncoding
872884
XCTAssertEqual(s1, "a b")
873885
let s2 = "a%1 b".removingPercentEncoding
874886
XCTAssertNil(s2, "returns nil for a string with an invalid percent encoding")
875887
}
876888

889+
func test_removingPercentEncodingInNonLatin() {
890+
let s1 = "\u{043C}\u{043E}\u{0439}%20\u{0434}\u{043E}\u{043C}".removingPercentEncoding
891+
XCTAssertEqual(s1, "\u{043C}\u{043E}\u{0439} \u{0434}\u{043E}\u{043C}")
892+
893+
let s2 = "%D0%B4%D0%BE%D0%BC".removingPercentEncoding
894+
XCTAssertEqual(s2, "\u{0434}\u{043E}\u{043C}")
895+
896+
let s3 = "\u{00E0}a%1 b".removingPercentEncoding
897+
XCTAssertNil(s3, "returns nil for a string with an invalid percent encoding")
898+
}
899+
900+
func test_removingPersentEncodingWithoutEncoding() {
901+
let cyrillicString = "\u{0434}\u{043E}\u{043C}"
902+
let cyrillicEscapedString = cyrillicString.removingPercentEncoding
903+
XCTAssertEqual(cyrillicString, cyrillicEscapedString)
904+
905+
let chineseString = "\u{623F}\u{5B50}"
906+
let chineseEscapedString = chineseString.removingPercentEncoding
907+
XCTAssertEqual(chineseString, chineseEscapedString)
908+
909+
let arabicString = "\u{0645}\u{0646}\u{0632}\u{0644}"
910+
let arabicEscapedString = arabicString.removingPercentEncoding
911+
XCTAssertEqual(arabicString, arabicEscapedString)
912+
913+
let randomString = "\u{00E0}\u{00E6}"
914+
let randomEscapedString = randomString.removingPercentEncoding
915+
XCTAssertEqual(randomString, randomEscapedString)
916+
917+
let latinString = "home"
918+
let latinEscapedString = latinString.removingPercentEncoding
919+
XCTAssertEqual(latinString, latinEscapedString)
920+
}
921+
922+
func test_addingPercentEncodingAndBack() {
923+
let latingString = "a b"
924+
let escapedLatingString = latingString.addingPercentEncoding(withAllowedCharacters: .alphanumerics)
925+
let returnedLatingString = escapedLatingString?.removingPercentEncoding
926+
XCTAssertEqual(returnedLatingString, latingString)
927+
928+
let cyrillicString = "\u{0434}\u{043E}\u{043C}"
929+
let escapedCyrillicString = cyrillicString.addingPercentEncoding(withAllowedCharacters: .alphanumerics)
930+
let returnedCyrillicString = escapedCyrillicString?.removingPercentEncoding
931+
XCTAssertEqual(returnedCyrillicString, cyrillicString)
932+
}
933+
877934
func test_stringByAppendingPathExtension() {
878935
let values = [
879936
NSString(string: "/tmp/scratch.old") : "/tmp/scratch.old.tiff",

0 commit comments

Comments
 (0)