From 3bc76c01f956557e4766ac466f76edd895483fa4 Mon Sep 17 00:00:00 2001
From: YOCKOW <YOCKOW@users.noreply.github.com>
Date: Fri, 9 May 2025 15:49:34 +0900
Subject: [PATCH 01/14] Import implementation for String Encoding Names from
 other repo.

- source: https://github.com/YOCKOW/SF-StringEncodingNameImpl
---
 .../String/String+Encoding+Names.swift        | 551 ++++++++++++++++++
 1 file changed, 551 insertions(+)
 create mode 100644 Sources/FoundationEssentials/String/String+Encoding+Names.swift

diff --git a/Sources/FoundationEssentials/String/String+Encoding+Names.swift b/Sources/FoundationEssentials/String/String+Encoding+Names.swift
new file mode 100644
index 000000000..07ca26c21
--- /dev/null
+++ b/Sources/FoundationEssentials/String/String+Encoding+Names.swift
@@ -0,0 +1,551 @@
+//===----------------------------------------------------------------------===//
+//
+// This source file is part of the Swift.org open source project
+//
+// Copyright (c) 2025 Apple Inc. and the Swift project authors
+// Licensed under Apache License v2.0 with Runtime Library Exception
+//
+// See https://swift.org/LICENSE.txt for license information
+// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
+//
+//===----------------------------------------------------------------------===//
+
+
+// MARK: - Private extensions for parsing encoding names
+
+private extension Unicode.Scalar {
+  var _isASCIINumeric: Bool {
+    return ("0"..."9").contains(self)
+  }
+
+  var _asciiNumericValue: Int {
+    assert(_isASCIINumeric)
+    return Int(self.value - 0x30)
+  }
+
+  /// Returns the Boolean value that indicates whether or not `self` is "ASCII whitespace".
+  ///
+  /// Reference: https://infra.spec.whatwg.org/#ascii-whitespace
+  var _isASCIIWhitespace: Bool {
+    switch self.value {
+    case 0x09, 0x0A, 0x0C, 0x0D, 0x20: true
+    default: false
+    }
+  }
+}
+
+private extension String {
+    var _trimmed: Substring.UnicodeScalarView {
+        let scalars = self.unicodeScalars
+        let isNonWhitespace: (Unicode.Scalar) -> Bool = { !$0._isASCIIWhitespace }
+        guard let firstIndexOfNonWhitespace = scalars.firstIndex(where: isNonWhitespace),
+              let lastIndexOfNonWhitespace = scalars.lastIndex(where: isNonWhitespace) else {
+            return Substring.UnicodeScalarView()
+        }
+        return scalars[firstIndexOfNonWhitespace...lastIndexOfNonWhitespace]
+    }
+}
+
+/// A type that holds a `Unicode.Scalar` where its value is compared case-insensitively with others'
+/// _if the value is within ASCII range_.
+private struct ASCIICaseInsensitiveUnicodeScalar: Equatable,
+                                                  ExpressibleByUnicodeScalarLiteral {
+    typealias UnicodeScalarLiteralType = Unicode.Scalar.UnicodeScalarLiteralType
+
+    let scalar: Unicode.Scalar
+
+    @inlinable
+    init(_ scalar: Unicode.Scalar) {
+        assert(scalar.isASCII)
+        self.scalar = scalar
+    }
+
+    init(unicodeScalarLiteral value: Unicode.Scalar.UnicodeScalarLiteralType) {
+        self.init(Unicode.Scalar(unicodeScalarLiteral: value))
+    }
+
+    @inlinable
+    static func ==(
+        lhs: ASCIICaseInsensitiveUnicodeScalar,
+        rhs: ASCIICaseInsensitiveUnicodeScalar
+    ) -> Bool {
+        if lhs.scalar == rhs.scalar {
+            return true
+        } else if ("A"..."Z").contains(lhs.scalar) {
+            return lhs.scalar.value + 0x20 == rhs.scalar.value
+        } else if ("a"..."z").contains(lhs.scalar) {
+            return lhs.scalar.value - 0x20 == rhs.scalar.value
+        }
+        return false
+    }
+}
+
+/// A type to tokenize string for `String.Encoding` names.
+private protocol StringEncodingNameTokenizer: ~Copyable {
+    associatedtype Token: Equatable
+    init(name: String)
+    mutating func nextToken() throws -> Token?
+}
+
+extension StringEncodingNameTokenizer where Self: ~Copyable {
+    mutating func hasEqualTokens(with other: consuming Self) throws -> Bool {
+        while let myToken = try self.nextToken() {
+            guard let otherToken = try other.nextToken(),
+                  myToken == otherToken else {
+                return false
+            }
+        }
+        return try other.nextToken() == nil
+    }
+}
+
+/// ICU-independent parser that follows [Charset Alias Matching](https://www.unicode.org/reports/tr22/tr22-8.html#Charset_Alias_Matching).
+private struct UTS22Tokenizer: StringEncodingNameTokenizer, ~Copyable {
+    enum Token: Equatable {
+        case numeric(Int)
+        case alphabet(ASCIICaseInsensitiveUnicodeScalar)
+    }
+
+    enum Error: Swift.Error {
+        case tooLargeNumericValue
+    }
+
+    let scalars: String.UnicodeScalarView
+
+    private var _currentIndex: String.UnicodeScalarView.Index
+
+    init(name: String) {
+        self.scalars = name.unicodeScalars
+        self._currentIndex = scalars.startIndex
+    }
+
+    mutating func nextToken() throws -> Token? {
+        guard _currentIndex < scalars.endIndex else {
+            return nil
+        }
+
+        let scalar = scalars[_currentIndex]
+        switch scalar {
+        case "0"..."9":
+            // Parse a numeric value ignoring leading zeros.
+            //
+            // NOTE: To prevent the value from overflow, a threhold is set here.
+            //       The max number of digits to be expected is 8 as of now: i.g. `csISO42JISC62261978`.
+            //       It wouldn't matter to throw an error in practice when the value is too large.
+
+            let threshold: Int = 999_999_999
+            var value = scalar._asciiNumericValue
+            scalars.formIndex(after: &_currentIndex)
+            while _currentIndex < scalars.endIndex {
+                let currentScalar = scalars[_currentIndex]
+                guard currentScalar._isASCIINumeric else {
+                    break
+                }
+                value = value * 10 + currentScalar._asciiNumericValue
+                if value > threshold {
+                    throw Error.tooLargeNumericValue
+                }
+                scalars.formIndex(after: &_currentIndex)
+            }
+            return .numeric(value)
+        case "A"..."Z", "a"..."z":
+            scalars.formIndex(after: &_currentIndex)
+            return .alphabet(ASCIICaseInsensitiveUnicodeScalar(scalar))
+        default:
+            scalars.formIndex(after: &_currentIndex)
+            if _currentIndex < scalars.endIndex {
+                return try nextToken()
+            }
+            return nil
+        }
+    }
+}
+
+
+/// A parser that tokenizes a string into `ASCIICaseInsensitiveUnicodeScalar`s.
+private struct ASCIICaseInsensitiveTokenizer: StringEncodingNameTokenizer, ~Copyable {
+    typealias Token = ASCIICaseInsensitiveUnicodeScalar
+
+      enum Error: Swift.Error {
+          case nonASCII
+      }
+
+    let scalars: Substring.UnicodeScalarView
+
+    var _currentIndex: Substring.UnicodeScalarView.Index
+
+    init(name: String) {
+        self.scalars = name._trimmed
+        self._currentIndex = scalars.startIndex
+    }
+
+    mutating func nextToken() throws -> Token? {
+        guard _currentIndex < scalars.endIndex else {
+            return nil
+        }
+        let scalar = scalars[_currentIndex]
+        guard scalar.isASCII else { throw Error.nonASCII }
+        defer {
+            scalars.formIndex(after: &_currentIndex)
+        }
+        return  ASCIICaseInsensitiveUnicodeScalar(scalar)
+    }
+}
+
+
+private extension String {
+    func isEqual<T>(
+        to other: String,
+        tokenizedBy tokenizer: T.Type
+    ) -> Bool where T: StringEncodingNameTokenizer, T: ~Copyable {
+        do {
+            var myTokenizer = T(name: self)
+            let otherTokenizer = T(name: other)
+            return try myTokenizer.hasEqualTokens(with: otherTokenizer)
+        } catch {
+            // Any errors imply that `self` or `other` contains invalid characters.
+            return false
+        }
+    }
+}
+
+
+// MARK: - IANA Charset Names
+
+/// Info about IANA Charset.
+private struct IANACharset {
+    /// Preferred MIME Name
+    let preferredMIMEName: String?
+
+    /// The name of this charset
+    let name: String
+
+    /// The aliases of this charset
+    let aliases: Array<String>
+
+    var representativeName: String {
+        return preferredMIMEName ?? name
+    }
+
+    init(preferredMIMEName: String?, name: String, aliases: Array<String>) {
+        self.preferredMIMEName = preferredMIMEName
+        self.name = name
+        self.aliases = aliases
+    }
+
+    func matches<T>(
+        _ string: String,
+        tokenizedBy tokenizer: T.Type
+    ) -> Bool where T: StringEncodingNameTokenizer, T: ~Copyable {
+        if let preferredMIMEName = self.preferredMIMEName,
+           preferredMIMEName.isEqual(to: string, tokenizedBy: tokenizer) {
+            return true
+        }
+        if name.isEqual(to: string, tokenizedBy: tokenizer) {
+            return true
+        }
+        for alias in aliases {
+            if alias.isEqual(to: string, tokenizedBy: tokenizer) {
+                return true
+            }
+        }
+        return false
+    }
+}
+
+// Extracted only necessary charsets from https://www.iana.org/assignments/character-sets/character-sets.xhtml
+extension IANACharset {
+    /// IANA Characater Set `US-ASCII`
+    static let usASCII = IANACharset(
+        preferredMIMEName: "US-ASCII",
+        name: "US-ASCII",
+        aliases: [
+            "iso-ir-6",
+            "ANSI_X3.4-1968",
+            "ANSI_X3.4-1986",
+            "ISO_646.irv:1991",
+            "ISO646-US",
+            "US-ASCII",
+            "us",
+            "IBM367",
+            "cp367",
+            "csASCII",
+        ]
+    )
+
+    /// IANA Characater Set `ISO-8859-1`
+    static let iso8859_1 = IANACharset(
+        preferredMIMEName: "ISO-8859-1",
+        name: "ISO_8859-1:1987",
+        aliases: [
+            "iso-ir-100",
+            "ISO_8859-1",
+            "ISO-8859-1",
+            "latin1",
+            "l1",
+            "IBM819",
+            "CP819",
+            "csISOLatin1",
+        ]
+    )
+
+    /// IANA Characater Set `ISO-8859-2`
+    static let iso8859_2 = IANACharset(
+        preferredMIMEName: "ISO-8859-2",
+        name: "ISO_8859-2:1987",
+        aliases: [
+            "iso-ir-101",
+            "ISO_8859-2",
+            "ISO-8859-2",
+            "latin2",
+            "l2",
+            "csISOLatin2",
+        ]
+    )
+
+    /// IANA Characater Set `Shift_JIS`
+    static let shiftJIS = IANACharset(
+        preferredMIMEName: "Shift_JIS",
+        name: "Shift_JIS",
+        aliases: [
+            "MS_Kanji",
+            "csShiftJIS",
+        ]
+    )
+
+    /// IANA Characater Set `EUC-JP`
+    static let eucJP = IANACharset(
+        preferredMIMEName: "EUC-JP",
+        name: "Extended_UNIX_Code_Packed_Format_for_Japanese",
+        aliases: [
+            "csEUCPkdFmtJapanese",
+            "EUC-JP",
+        ]
+    )
+
+    /// IANA Characater Set `ISO-2022-JP`
+    static let iso2022JP = IANACharset(
+        preferredMIMEName: "ISO-2022-JP",
+        name: "ISO-2022-JP",
+        aliases: [
+            "csISO2022JP",
+        ]
+    )
+
+    /// IANA Characater Set `UTF-8`
+    static let utf8 = IANACharset(
+        preferredMIMEName: nil,
+        name: "UTF-8",
+        aliases: [
+            "csUTF8",
+        ]
+    )
+
+    /// IANA Characater Set `UTF-16BE`
+    static let utf16BE = IANACharset(
+        preferredMIMEName: nil,
+        name: "UTF-16BE",
+        aliases: [
+            "csUTF16BE",
+        ]
+    )
+
+    /// IANA Characater Set `UTF-16LE`
+    static let utf16LE = IANACharset(
+        preferredMIMEName: nil,
+        name: "UTF-16LE",
+        aliases: [
+            "csUTF16LE",
+        ]
+    )
+
+    /// IANA Characater Set `UTF-16`
+    static let utf16 = IANACharset(
+        preferredMIMEName: nil,
+        name: "UTF-16",
+        aliases: [
+            "csUTF16",
+        ]
+    )
+
+    /// IANA Characater Set `UTF-32`
+    static let utf32 = IANACharset(
+        preferredMIMEName: nil,
+        name: "UTF-32",
+        aliases: [
+            "csUTF32",
+        ]
+    )
+
+    /// IANA Characater Set `UTF-32BE`
+    static let utf32BE = IANACharset(
+        preferredMIMEName: nil,
+        name: "UTF-32BE",
+        aliases: [
+            "csUTF32BE",
+        ]
+    )
+
+    /// IANA Characater Set `UTF-32LE`
+    static let utf32LE = IANACharset(
+        preferredMIMEName: nil,
+        name: "UTF-32LE",
+        aliases: [
+            "csUTF32LE",
+        ]
+    )
+
+    /// IANA Characater Set `macintosh`
+    static let macintosh = IANACharset(
+        preferredMIMEName: nil,
+        name: "macintosh",
+        aliases: [
+            "mac",
+            "csMacintosh",
+        ]
+    )
+
+    /// IANA Characater Set `windows-1250`
+    static let windows1250 = IANACharset(
+        preferredMIMEName: nil,
+        name: "windows-1250",
+        aliases: [
+            "cswindows1250",
+        ]
+    )
+
+    /// IANA Characater Set `windows-1251`
+    static let windows1251 = IANACharset(
+        preferredMIMEName: nil,
+        name: "windows-1251",
+        aliases: [
+            "cswindows1251",
+        ]
+    )
+
+    /// IANA Characater Set `windows-1252`
+    static let windows1252 = IANACharset(
+        preferredMIMEName: nil,
+        name: "windows-1252",
+        aliases: [
+            "cswindows1252",
+        ]
+    )
+
+    /// IANA Characater Set `windows-1253`
+    static let windows1253 = IANACharset(
+        preferredMIMEName: nil,
+        name: "windows-1253",
+        aliases: [
+            "cswindows1253",
+        ]
+    )
+
+    /// IANA Characater Set `windows-1254`
+    static let windows1254 = IANACharset(
+        preferredMIMEName: nil,
+        name: "windows-1254",
+        aliases: [
+            "cswindows1254",
+        ]
+    )
+}
+
+// MARK: - `String.Encoding` Names
+
+extension String.Encoding {
+    private var _ianaCharset: IANACharset? {
+        switch self {
+        case .utf8: .utf8
+        case .ascii: .usASCII
+        case .japaneseEUC: .eucJP
+        case .isoLatin1: .iso8859_1
+        case .shiftJIS: .shiftJIS
+        case .isoLatin2: .iso8859_2
+        case .unicode: .utf16
+        case .windowsCP1251: .windows1251
+        case .windowsCP1252: .windows1252
+        case .windowsCP1253: .windows1253
+        case .windowsCP1254: .windows1254
+        case .windowsCP1250: .windows1250
+        case .iso2022JP: .iso2022JP
+        case .macOSRoman: .macintosh
+        case .utf16BigEndian: .utf16BE
+        case .utf16LittleEndian: .utf16LE
+        case .utf32: .utf32
+        case .utf32BigEndian: .utf32BE
+        case .utf32LittleEndian: .utf32LE
+        default: nil
+        }
+    }
+
+    /// The name of this encoding that is compatible with the one of the IANA registry "charset".
+    @available(FoundationPreview 6.2, *)
+    public var ianaName: String? {
+        return _ianaCharset?.representativeName
+    }
+
+    /// Creates an instance from the name of the IANA registry "charset".
+    @available(FoundationPreview 6.2, *)
+    public init?(ianaName charsetName: String) {
+        func __determineEncoding() -> String.Encoding? {
+            func __matches(_ charsets: IANACharset...) -> Bool {
+                assert(!charsets.isEmpty)
+                return charsets.contains {
+                    $0.matches(
+                        charsetName,
+                        tokenizedBy: ASCIICaseInsensitiveTokenizer.self
+                    )
+                }
+            }
+
+            return if __matches(.utf8) {
+                .utf8
+            } else if __matches(.usASCII) {
+                .ascii
+            } else if __matches(.eucJP) {
+                .japaneseEUC
+            } else if __matches(.iso8859_1) {
+                .isoLatin1
+            } else if __matches(.shiftJIS) {
+                .shiftJIS
+            } else if __matches(.iso8859_2) {
+                .isoLatin2
+            } else if __matches(.utf16) {
+                .utf16
+            } else if __matches(.windows1251) {
+                .windowsCP1251
+            } else if __matches(.windows1252) {
+                .windowsCP1252
+            } else if __matches(.windows1253) {
+                .windowsCP1253
+            } else if __matches(.windows1254) {
+                .windowsCP1254
+            } else if __matches(.windows1250) {
+                .windowsCP1250
+            } else if __matches(.iso2022JP) {
+                .iso2022JP
+            } else if __matches(.macintosh) {
+                .macOSRoman
+            } else if __matches(.utf16BE) {
+                .utf16BigEndian
+            } else if __matches(.utf16LE) {
+                .utf16LittleEndian
+            } else if __matches(.utf32) {
+                .utf32
+            } else if __matches(.utf32BE) {
+                .utf32BigEndian
+            } else if __matches(.utf32LE) {
+                .utf32LittleEndian
+            } else {
+                nil
+            }
+        }
+
+        guard let encoding = __determineEncoding() else {
+            return nil
+        }
+        self = encoding
+    }
+}
+

From 0ea8aff0a7b772fd8500bc7bb36d68b7379ce4db Mon Sep 17 00:00:00 2001
From: YOCKOW <YOCKOW@users.noreply.github.com>
Date: Fri, 9 May 2025 16:27:55 +0900
Subject: [PATCH 02/14] Import tests for String Encoding Names from other repo.

- source: https://github.com/YOCKOW/SF-StringEncodingNameImpl/blob/0.4.0/Tests/StringEncodingNameImplTests/StringEncodingNameParserTests.swift
---
 .../StringTests.swift                         | 63 +++++++++++++++++++
 1 file changed, 63 insertions(+)

diff --git a/Tests/FoundationEssentialsTests/StringTests.swift b/Tests/FoundationEssentialsTests/StringTests.swift
index 26286be15..a2305ff82 100644
--- a/Tests/FoundationEssentialsTests/StringTests.swift
+++ b/Tests/FoundationEssentialsTests/StringTests.swift
@@ -1397,6 +1397,69 @@ private struct StringTests {
             "abcd🎺efgh"
         ])
     }
+
+    func test_Encoding_names() {
+        // Encoding to Name
+        XCTAssertEqual(String._Encoding.ascii.ianaName, "US-ASCII")
+        XCTAssertEqual(String._Encoding.nextstep.ianaName, nil)
+        XCTAssertEqual(String._Encoding.japaneseEUC.ianaName, "EUC-JP")
+        XCTAssertEqual(String._Encoding.utf8.ianaName, "UTF-8")
+        XCTAssertEqual(String._Encoding.isoLatin1.ianaName, "ISO-8859-1")
+        XCTAssertEqual(String._Encoding.symbol.ianaName, nil)
+        XCTAssertEqual(String._Encoding.nonLossyASCII.ianaName, nil)
+        XCTAssertEqual(String._Encoding.shiftJIS.ianaName, "Shift_JIS")
+        XCTAssertEqual(String._Encoding.isoLatin2.ianaName, "ISO-8859-2")
+        XCTAssertEqual(String._Encoding.unicode.ianaName, "UTF-16")
+        XCTAssertEqual(String._Encoding.windowsCP1251.ianaName, "windows-1251")
+        XCTAssertEqual(String._Encoding.windowsCP1252.ianaName, "windows-1252")
+        XCTAssertEqual(String._Encoding.windowsCP1253.ianaName, "windows-1253")
+        XCTAssertEqual(String._Encoding.windowsCP1254.ianaName, "windows-1254")
+        XCTAssertEqual(String._Encoding.windowsCP1250.ianaName, "windows-1250")
+        XCTAssertEqual(String._Encoding.iso2022JP.ianaName, "ISO-2022-JP")
+        XCTAssertEqual(String._Encoding.macOSRoman.ianaName, "macintosh")
+        XCTAssertEqual(String._Encoding.utf16BigEndian.ianaName, "UTF-16BE")
+        XCTAssertEqual(String._Encoding.utf16LittleEndian.ianaName, "UTF-16LE")
+        XCTAssertEqual(String._Encoding.utf32.ianaName, "UTF-32")
+        XCTAssertEqual(String._Encoding.utf32BigEndian.ianaName, "UTF-32BE")
+        XCTAssertEqual(String._Encoding.utf32LittleEndian.ianaName, "UTF-32LE")
+        XCTAssertEqual(String._Encoding(rawValue: .max).ianaName, nil)
+
+        // Name to Encoding
+        XCTAssertEqual(String._Encoding(ianaName: "us-ascii"), .ascii)
+        XCTAssertEqual(String._Encoding(ianaName: "iso-ir-2"), nil)
+        XCTAssertEqual(String._Encoding(ianaName: "x-nextstep"), nil)
+        XCTAssertEqual(String._Encoding(ianaName: "euc-jp"), .japaneseEUC)
+        XCTAssertEqual(String._Encoding(ianaName: "CP51932"), nil)
+        XCTAssertEqual(String._Encoding(ianaName: "utf-8"), .utf8)
+        XCTAssertEqual(String._Encoding(ianaName: "iso_8859-1"), .isoLatin1)
+        XCTAssertEqual(String._Encoding(ianaName: "x-mac-symbol"), nil)
+        XCTAssertEqual(String._Encoding(ianaName: "Adobe-symbol-encoding"), nil)
+        XCTAssertEqual(String._Encoding(ianaName: "cp932"), nil)
+        XCTAssertEqual(String._Encoding(ianaName: "shift_jis"), .shiftJIS)
+        XCTAssertEqual(String._Encoding(ianaName: "windows-31j"), nil)
+        XCTAssertEqual(String._Encoding(ianaName: "iso_8859-2"), .isoLatin2)
+        XCTAssertEqual(String._Encoding(ianaName: "utf-16"), .utf16)
+        XCTAssertEqual(String._Encoding(ianaName: "iso-10646-ucs-2"), nil)
+        XCTAssertEqual(String._Encoding(ianaName: "unicode-1-1"), nil)
+        XCTAssertEqual(String._Encoding(ianaName: "windows-1251"), .windowsCP1251)
+        XCTAssertEqual(String._Encoding(ianaName: "windows-1252"), .windowsCP1252)
+        XCTAssertEqual(String._Encoding(ianaName: "ISO-8859-1-Windows-3.0-Latin-1"), nil)
+        XCTAssertEqual(String._Encoding(ianaName: "ISO-8859-1-Windows-3.1-Latin-1"), nil)
+        XCTAssertEqual(String._Encoding(ianaName: "windows-1253"), .windowsCP1253)
+        XCTAssertEqual(String._Encoding(ianaName: "windows-1254"), .windowsCP1254)
+        XCTAssertEqual(String._Encoding(ianaName: "iso-8859-9-windows-Latin-5"), nil)
+        XCTAssertEqual(String._Encoding(ianaName: "windows-1250"), .windowsCP1250)
+        XCTAssertEqual(String._Encoding(ianaName: "iso-8859-2-windows-Latin-2"), nil)
+        XCTAssertEqual(String._Encoding(ianaName: "iso-2022-jp"), .iso2022JP)
+        XCTAssertEqual(String._Encoding(ianaName: "macintosh"), .macOSRoman)
+        XCTAssertEqual(String._Encoding(ianaName: "utf-16be"), .utf16BigEndian)
+        XCTAssertEqual(String._Encoding(ianaName: "utf-16le"), .utf16LittleEndian)
+        XCTAssertEqual(String._Encoding(ianaName: "utf-32"), .utf32)
+        XCTAssertEqual(String._Encoding(ianaName: "iso-10646-ucs-4"), nil)
+        XCTAssertEqual(String._Encoding(ianaName: "utf-32be"), .utf32BigEndian)
+        XCTAssertEqual(String._Encoding(ianaName: "utf-32le"), .utf32LittleEndian)
+        XCTAssertEqual(String._Encoding(ianaName: "foo-bar-baz"), nil)
+    }
 }
 
 // MARK: - Helper functions

From 7acaa40d0ed11bd088defe6cafbe47ac6126cb0a Mon Sep 17 00:00:00 2001
From: YOCKOW <YOCKOW@users.noreply.github.com>
Date: Tue, 17 Jun 2025 10:23:47 +0900
Subject: [PATCH 03/14] Remove dead code in terms of the current proposal.

---
 .../String/String+Encoding+Names.swift        | 71 -------------------
 1 file changed, 71 deletions(-)

diff --git a/Sources/FoundationEssentials/String/String+Encoding+Names.swift b/Sources/FoundationEssentials/String/String+Encoding+Names.swift
index 07ca26c21..48c1c37ce 100644
--- a/Sources/FoundationEssentials/String/String+Encoding+Names.swift
+++ b/Sources/FoundationEssentials/String/String+Encoding+Names.swift
@@ -14,15 +14,6 @@
 // MARK: - Private extensions for parsing encoding names
 
 private extension Unicode.Scalar {
-  var _isASCIINumeric: Bool {
-    return ("0"..."9").contains(self)
-  }
-
-  var _asciiNumericValue: Int {
-    assert(_isASCIINumeric)
-    return Int(self.value - 0x30)
-  }
-
   /// Returns the Boolean value that indicates whether or not `self` is "ASCII whitespace".
   ///
   /// Reference: https://infra.spec.whatwg.org/#ascii-whitespace
@@ -99,68 +90,6 @@ extension StringEncodingNameTokenizer where Self: ~Copyable {
     }
 }
 
-/// ICU-independent parser that follows [Charset Alias Matching](https://www.unicode.org/reports/tr22/tr22-8.html#Charset_Alias_Matching).
-private struct UTS22Tokenizer: StringEncodingNameTokenizer, ~Copyable {
-    enum Token: Equatable {
-        case numeric(Int)
-        case alphabet(ASCIICaseInsensitiveUnicodeScalar)
-    }
-
-    enum Error: Swift.Error {
-        case tooLargeNumericValue
-    }
-
-    let scalars: String.UnicodeScalarView
-
-    private var _currentIndex: String.UnicodeScalarView.Index
-
-    init(name: String) {
-        self.scalars = name.unicodeScalars
-        self._currentIndex = scalars.startIndex
-    }
-
-    mutating func nextToken() throws -> Token? {
-        guard _currentIndex < scalars.endIndex else {
-            return nil
-        }
-
-        let scalar = scalars[_currentIndex]
-        switch scalar {
-        case "0"..."9":
-            // Parse a numeric value ignoring leading zeros.
-            //
-            // NOTE: To prevent the value from overflow, a threhold is set here.
-            //       The max number of digits to be expected is 8 as of now: i.g. `csISO42JISC62261978`.
-            //       It wouldn't matter to throw an error in practice when the value is too large.
-
-            let threshold: Int = 999_999_999
-            var value = scalar._asciiNumericValue
-            scalars.formIndex(after: &_currentIndex)
-            while _currentIndex < scalars.endIndex {
-                let currentScalar = scalars[_currentIndex]
-                guard currentScalar._isASCIINumeric else {
-                    break
-                }
-                value = value * 10 + currentScalar._asciiNumericValue
-                if value > threshold {
-                    throw Error.tooLargeNumericValue
-                }
-                scalars.formIndex(after: &_currentIndex)
-            }
-            return .numeric(value)
-        case "A"..."Z", "a"..."z":
-            scalars.formIndex(after: &_currentIndex)
-            return .alphabet(ASCIICaseInsensitiveUnicodeScalar(scalar))
-        default:
-            scalars.formIndex(after: &_currentIndex)
-            if _currentIndex < scalars.endIndex {
-                return try nextToken()
-            }
-            return nil
-        }
-    }
-}
-
 
 /// A parser that tokenizes a string into `ASCIICaseInsensitiveUnicodeScalar`s.
 private struct ASCIICaseInsensitiveTokenizer: StringEncodingNameTokenizer, ~Copyable {

From 556347230dc0a9042ec31c4263199dedcf634c6c Mon Sep 17 00:00:00 2001
From: YOCKOW <YOCKOW@users.noreply.github.com>
Date: Sun, 21 Sep 2025 15:07:46 +0900
Subject: [PATCH 04/14] Use `Testing` for String Encoding Names tests.

---
 .../StringTests.swift                         | 116 +++++++++---------
 1 file changed, 58 insertions(+), 58 deletions(-)

diff --git a/Tests/FoundationEssentialsTests/StringTests.swift b/Tests/FoundationEssentialsTests/StringTests.swift
index a2305ff82..6a6781874 100644
--- a/Tests/FoundationEssentialsTests/StringTests.swift
+++ b/Tests/FoundationEssentialsTests/StringTests.swift
@@ -1398,67 +1398,67 @@ private struct StringTests {
         ])
     }
 
-    func test_Encoding_names() {
+    @Test func encodingNames() {
         // Encoding to Name
-        XCTAssertEqual(String._Encoding.ascii.ianaName, "US-ASCII")
-        XCTAssertEqual(String._Encoding.nextstep.ianaName, nil)
-        XCTAssertEqual(String._Encoding.japaneseEUC.ianaName, "EUC-JP")
-        XCTAssertEqual(String._Encoding.utf8.ianaName, "UTF-8")
-        XCTAssertEqual(String._Encoding.isoLatin1.ianaName, "ISO-8859-1")
-        XCTAssertEqual(String._Encoding.symbol.ianaName, nil)
-        XCTAssertEqual(String._Encoding.nonLossyASCII.ianaName, nil)
-        XCTAssertEqual(String._Encoding.shiftJIS.ianaName, "Shift_JIS")
-        XCTAssertEqual(String._Encoding.isoLatin2.ianaName, "ISO-8859-2")
-        XCTAssertEqual(String._Encoding.unicode.ianaName, "UTF-16")
-        XCTAssertEqual(String._Encoding.windowsCP1251.ianaName, "windows-1251")
-        XCTAssertEqual(String._Encoding.windowsCP1252.ianaName, "windows-1252")
-        XCTAssertEqual(String._Encoding.windowsCP1253.ianaName, "windows-1253")
-        XCTAssertEqual(String._Encoding.windowsCP1254.ianaName, "windows-1254")
-        XCTAssertEqual(String._Encoding.windowsCP1250.ianaName, "windows-1250")
-        XCTAssertEqual(String._Encoding.iso2022JP.ianaName, "ISO-2022-JP")
-        XCTAssertEqual(String._Encoding.macOSRoman.ianaName, "macintosh")
-        XCTAssertEqual(String._Encoding.utf16BigEndian.ianaName, "UTF-16BE")
-        XCTAssertEqual(String._Encoding.utf16LittleEndian.ianaName, "UTF-16LE")
-        XCTAssertEqual(String._Encoding.utf32.ianaName, "UTF-32")
-        XCTAssertEqual(String._Encoding.utf32BigEndian.ianaName, "UTF-32BE")
-        XCTAssertEqual(String._Encoding.utf32LittleEndian.ianaName, "UTF-32LE")
-        XCTAssertEqual(String._Encoding(rawValue: .max).ianaName, nil)
+        #expect(String.Encoding.ascii.ianaName == "US-ASCII")
+        #expect(String.Encoding.nextstep.ianaName == nil)
+        #expect(String.Encoding.japaneseEUC.ianaName == "EUC-JP")
+        #expect(String.Encoding.utf8.ianaName == "UTF-8")
+        #expect(String.Encoding.isoLatin1.ianaName == "ISO-8859-1")
+        #expect(String.Encoding.symbol.ianaName == nil)
+        #expect(String.Encoding.nonLossyASCII.ianaName == nil)
+        #expect(String.Encoding.shiftJIS.ianaName == "Shift_JIS")
+        #expect(String.Encoding.isoLatin2.ianaName == "ISO-8859-2")
+        #expect(String.Encoding.unicode.ianaName == "UTF-16")
+        #expect(String.Encoding.windowsCP1251.ianaName == "windows-1251")
+        #expect(String.Encoding.windowsCP1252.ianaName == "windows-1252")
+        #expect(String.Encoding.windowsCP1253.ianaName == "windows-1253")
+        #expect(String.Encoding.windowsCP1254.ianaName == "windows-1254")
+        #expect(String.Encoding.windowsCP1250.ianaName == "windows-1250")
+        #expect(String.Encoding.iso2022JP.ianaName == "ISO-2022-JP")
+        #expect(String.Encoding.macOSRoman.ianaName == "macintosh")
+        #expect(String.Encoding.utf16BigEndian.ianaName == "UTF-16BE")
+        #expect(String.Encoding.utf16LittleEndian.ianaName == "UTF-16LE")
+        #expect(String.Encoding.utf32.ianaName == "UTF-32")
+        #expect(String.Encoding.utf32BigEndian.ianaName == "UTF-32BE")
+        #expect(String.Encoding.utf32LittleEndian.ianaName == "UTF-32LE")
+        #expect(String.Encoding(rawValue: .max).ianaName == nil)
 
         // Name to Encoding
-        XCTAssertEqual(String._Encoding(ianaName: "us-ascii"), .ascii)
-        XCTAssertEqual(String._Encoding(ianaName: "iso-ir-2"), nil)
-        XCTAssertEqual(String._Encoding(ianaName: "x-nextstep"), nil)
-        XCTAssertEqual(String._Encoding(ianaName: "euc-jp"), .japaneseEUC)
-        XCTAssertEqual(String._Encoding(ianaName: "CP51932"), nil)
-        XCTAssertEqual(String._Encoding(ianaName: "utf-8"), .utf8)
-        XCTAssertEqual(String._Encoding(ianaName: "iso_8859-1"), .isoLatin1)
-        XCTAssertEqual(String._Encoding(ianaName: "x-mac-symbol"), nil)
-        XCTAssertEqual(String._Encoding(ianaName: "Adobe-symbol-encoding"), nil)
-        XCTAssertEqual(String._Encoding(ianaName: "cp932"), nil)
-        XCTAssertEqual(String._Encoding(ianaName: "shift_jis"), .shiftJIS)
-        XCTAssertEqual(String._Encoding(ianaName: "windows-31j"), nil)
-        XCTAssertEqual(String._Encoding(ianaName: "iso_8859-2"), .isoLatin2)
-        XCTAssertEqual(String._Encoding(ianaName: "utf-16"), .utf16)
-        XCTAssertEqual(String._Encoding(ianaName: "iso-10646-ucs-2"), nil)
-        XCTAssertEqual(String._Encoding(ianaName: "unicode-1-1"), nil)
-        XCTAssertEqual(String._Encoding(ianaName: "windows-1251"), .windowsCP1251)
-        XCTAssertEqual(String._Encoding(ianaName: "windows-1252"), .windowsCP1252)
-        XCTAssertEqual(String._Encoding(ianaName: "ISO-8859-1-Windows-3.0-Latin-1"), nil)
-        XCTAssertEqual(String._Encoding(ianaName: "ISO-8859-1-Windows-3.1-Latin-1"), nil)
-        XCTAssertEqual(String._Encoding(ianaName: "windows-1253"), .windowsCP1253)
-        XCTAssertEqual(String._Encoding(ianaName: "windows-1254"), .windowsCP1254)
-        XCTAssertEqual(String._Encoding(ianaName: "iso-8859-9-windows-Latin-5"), nil)
-        XCTAssertEqual(String._Encoding(ianaName: "windows-1250"), .windowsCP1250)
-        XCTAssertEqual(String._Encoding(ianaName: "iso-8859-2-windows-Latin-2"), nil)
-        XCTAssertEqual(String._Encoding(ianaName: "iso-2022-jp"), .iso2022JP)
-        XCTAssertEqual(String._Encoding(ianaName: "macintosh"), .macOSRoman)
-        XCTAssertEqual(String._Encoding(ianaName: "utf-16be"), .utf16BigEndian)
-        XCTAssertEqual(String._Encoding(ianaName: "utf-16le"), .utf16LittleEndian)
-        XCTAssertEqual(String._Encoding(ianaName: "utf-32"), .utf32)
-        XCTAssertEqual(String._Encoding(ianaName: "iso-10646-ucs-4"), nil)
-        XCTAssertEqual(String._Encoding(ianaName: "utf-32be"), .utf32BigEndian)
-        XCTAssertEqual(String._Encoding(ianaName: "utf-32le"), .utf32LittleEndian)
-        XCTAssertEqual(String._Encoding(ianaName: "foo-bar-baz"), nil)
+        #expect(String.Encoding(ianaName: "us-ascii") == .ascii)
+        #expect(String.Encoding(ianaName: "iso-ir-2") == nil)
+        #expect(String.Encoding(ianaName: "x-nextstep") == nil)
+        #expect(String.Encoding(ianaName: "euc-jp") == .japaneseEUC)
+        #expect(String.Encoding(ianaName: "CP51932") == nil)
+        #expect(String.Encoding(ianaName: "utf-8") == .utf8)
+        #expect(String.Encoding(ianaName: "iso_8859-1") == .isoLatin1)
+        #expect(String.Encoding(ianaName: "x-mac-symbol") == nil)
+        #expect(String.Encoding(ianaName: "Adobe-symbol-encoding") == nil)
+        #expect(String.Encoding(ianaName: "cp932") == nil)
+        #expect(String.Encoding(ianaName: "shift_jis") == .shiftJIS)
+        #expect(String.Encoding(ianaName: "windows-31j") == nil)
+        #expect(String.Encoding(ianaName: "iso_8859-2") == .isoLatin2)
+        #expect(String.Encoding(ianaName: "utf-16") == .utf16)
+        #expect(String.Encoding(ianaName: "iso-10646-ucs-2") == nil)
+        #expect(String.Encoding(ianaName: "unicode-1-1") == nil)
+        #expect(String.Encoding(ianaName: "windows-1251") == .windowsCP1251)
+        #expect(String.Encoding(ianaName: "windows-1252") == .windowsCP1252)
+        #expect(String.Encoding(ianaName: "ISO-8859-1-Windows-3.0-Latin-1") == nil)
+        #expect(String.Encoding(ianaName: "ISO-8859-1-Windows-3.1-Latin-1") == nil)
+        #expect(String.Encoding(ianaName: "windows-1253") == .windowsCP1253)
+        #expect(String.Encoding(ianaName: "windows-1254") == .windowsCP1254)
+        #expect(String.Encoding(ianaName: "iso-8859-9-windows-Latin-5") == nil)
+        #expect(String.Encoding(ianaName: "windows-1250") == .windowsCP1250)
+        #expect(String.Encoding(ianaName: "iso-8859-2-windows-Latin-2") == nil)
+        #expect(String.Encoding(ianaName: "iso-2022-jp") == .iso2022JP)
+        #expect(String.Encoding(ianaName: "macintosh") == .macOSRoman)
+        #expect(String.Encoding(ianaName: "utf-16be") == .utf16BigEndian)
+        #expect(String.Encoding(ianaName: "utf-16le") == .utf16LittleEndian)
+        #expect(String.Encoding(ianaName: "utf-32") == .utf32)
+        #expect(String.Encoding(ianaName: "iso-10646-ucs-4") == nil)
+        #expect(String.Encoding(ianaName: "utf-32be") == .utf32BigEndian)
+        #expect(String.Encoding(ianaName: "utf-32le") == .utf32LittleEndian)
+        #expect(String.Encoding(ianaName: "foo-bar-baz") == nil)
     }
 }
 

From a783db10f9231a2f12fd923e41f2c26001e167a9 Mon Sep 17 00:00:00 2001
From: YOCKOW <YOCKOW@users.noreply.github.com>
Date: Sun, 21 Sep 2025 15:18:15 +0900
Subject: [PATCH 05/14] NFC: Fix indentation in "String+Encoding+Names.swift".

---
 .../String/String+Encoding+Names.swift        | 22 +++++++++----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/Sources/FoundationEssentials/String/String+Encoding+Names.swift b/Sources/FoundationEssentials/String/String+Encoding+Names.swift
index 48c1c37ce..8c5b76532 100644
--- a/Sources/FoundationEssentials/String/String+Encoding+Names.swift
+++ b/Sources/FoundationEssentials/String/String+Encoding+Names.swift
@@ -14,15 +14,15 @@
 // MARK: - Private extensions for parsing encoding names
 
 private extension Unicode.Scalar {
-  /// Returns the Boolean value that indicates whether or not `self` is "ASCII whitespace".
-  ///
-  /// Reference: https://infra.spec.whatwg.org/#ascii-whitespace
-  var _isASCIIWhitespace: Bool {
-    switch self.value {
-    case 0x09, 0x0A, 0x0C, 0x0D, 0x20: true
-    default: false
+    /// Returns the Boolean value that indicates whether or not `self` is "ASCII whitespace".
+    ///
+    /// Reference: https://infra.spec.whatwg.org/#ascii-whitespace
+    var _isASCIIWhitespace: Bool {
+        switch self.value {
+        case 0x09, 0x0A, 0x0C, 0x0D, 0x20: true
+        default: false
+        }
     }
-  }
 }
 
 private extension String {
@@ -95,9 +95,9 @@ extension StringEncodingNameTokenizer where Self: ~Copyable {
 private struct ASCIICaseInsensitiveTokenizer: StringEncodingNameTokenizer, ~Copyable {
     typealias Token = ASCIICaseInsensitiveUnicodeScalar
 
-      enum Error: Swift.Error {
-          case nonASCII
-      }
+    enum Error: Swift.Error {
+        case nonASCII
+    }
 
     let scalars: Substring.UnicodeScalarView
 

From 7515bf433ef105837c6124963ec44ee845044fb6 Mon Sep 17 00:00:00 2001
From: YOCKOW <YOCKOW@users.noreply.github.com>
Date: Sun, 21 Sep 2025 16:19:32 +0900
Subject: [PATCH 06/14] SF-0033: Adjust comments/attributes to match the
 accepted proposal.

---
 .../String/String+Encoding+Names.swift                    | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/Sources/FoundationEssentials/String/String+Encoding+Names.swift b/Sources/FoundationEssentials/String/String+Encoding+Names.swift
index 8c5b76532..12f6466de 100644
--- a/Sources/FoundationEssentials/String/String+Encoding+Names.swift
+++ b/Sources/FoundationEssentials/String/String+Encoding+Names.swift
@@ -409,13 +409,17 @@ extension String.Encoding {
     }
 
     /// The name of this encoding that is compatible with the one of the IANA registry "charset".
-    @available(FoundationPreview 6.2, *)
+    @available(FoundationPreview 6.3, *)
     public var ianaName: String? {
         return _ianaCharset?.representativeName
     }
 
     /// Creates an instance from the name of the IANA registry "charset".
-    @available(FoundationPreview 6.2, *)
+    ///
+    /// - Note: The given name is compared to each IANA "charset" name
+    ///         with ASCII case-insensitive collation
+    ///         to determine which encoding is suitable.
+    @available(FoundationPreview 6.3, *)
     public init?(ianaName charsetName: String) {
         func __determineEncoding() -> String.Encoding? {
             func __matches(_ charsets: IANACharset...) -> Bool {

From c72697106b6028003552b100c996c3bf27c88a94 Mon Sep 17 00:00:00 2001
From: YOCKOW <YOCKOW@users.noreply.github.com>
Date: Sun, 12 Oct 2025 15:02:50 +0900
Subject: [PATCH 07/14] Auto-generate Swift source code for IANA Charset names.

---
 .../String/IANACharsetNames.swift             | 213 ++++++++++++++++++
 .../String/String+Encoding+Names.swift        | 201 +----------------
 utils/update-iana-charset-names               |  62 +++++
 utils/update-iana-charset-names-impl.py       | 174 ++++++++++++++
 4 files changed, 451 insertions(+), 199 deletions(-)
 create mode 100644 Sources/FoundationEssentials/String/IANACharsetNames.swift
 create mode 100755 utils/update-iana-charset-names
 create mode 100644 utils/update-iana-charset-names-impl.py

diff --git a/Sources/FoundationEssentials/String/IANACharsetNames.swift b/Sources/FoundationEssentials/String/IANACharsetNames.swift
new file mode 100644
index 000000000..8f3e88f09
--- /dev/null
+++ b/Sources/FoundationEssentials/String/IANACharsetNames.swift
@@ -0,0 +1,213 @@
+//===----------------------------------------------------------------------===//
+//
+// This source file is part of the Swift.org open source project
+//
+// Copyright (c) 2025 Apple Inc. and the Swift project authors
+// Licensed under Apache License v2.0 with Runtime Library Exception
+//
+// See https://swift.org/LICENSE.txt for license information
+// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
+//
+//===----------------------------------------------------------------------===//
+
+
+// WARNING: DO NOT EDIT THIS FILE DIRECTLY.
+//          This is auto-generated by `update-iana-charset-names`.
+
+
+extension IANACharset {
+    /// IANA Charset `US-ASCII`.
+    static let usASCII = IANACharset(
+        preferredMIMEName: "US-ASCII",
+        name: "US-ASCII",
+        aliases: [
+            "iso-ir-6",
+            "ANSI_X3.4-1968",
+            "ANSI_X3.4-1986",
+            "ISO_646.irv:1991",
+            "ISO646-US",
+            "US-ASCII",
+            "us",
+            "IBM367",
+            "cp367",
+            "csASCII",
+        ]
+    )
+
+    /// IANA Charset `ISO-8859-1`.
+    static let iso8859_1 = IANACharset(
+        preferredMIMEName: "ISO-8859-1",
+        name: "ISO_8859-1:1987",
+        aliases: [
+            "iso-ir-100",
+            "ISO_8859-1",
+            "ISO-8859-1",
+            "latin1",
+            "l1",
+            "IBM819",
+            "CP819",
+            "csISOLatin1",
+        ]
+    )
+
+    /// IANA Charset `ISO-8859-2`.
+    static let iso8859_2 = IANACharset(
+        preferredMIMEName: "ISO-8859-2",
+        name: "ISO_8859-2:1987",
+        aliases: [
+            "iso-ir-101",
+            "ISO_8859-2",
+            "ISO-8859-2",
+            "latin2",
+            "l2",
+            "csISOLatin2",
+        ]
+    )
+
+    /// IANA Charset `Shift_JIS`.
+    static let shiftJIS = IANACharset(
+        preferredMIMEName: "Shift_JIS",
+        name: "Shift_JIS",
+        aliases: [
+            "MS_Kanji",
+            "csShiftJIS",
+        ]
+    )
+
+    /// IANA Charset `EUC-JP`.
+    static let eucJP = IANACharset(
+        preferredMIMEName: "EUC-JP",
+        name: "Extended_UNIX_Code_Packed_Format_for_Japanese",
+        aliases: [
+            "csEUCPkdFmtJapanese",
+            "EUC-JP",
+        ]
+    )
+
+    /// IANA Charset `ISO-2022-JP`.
+    static let iso2022JP = IANACharset(
+        preferredMIMEName: "ISO-2022-JP",
+        name: "ISO-2022-JP",
+        aliases: [
+            "csISO2022JP",
+        ]
+    )
+
+    /// IANA Charset `UTF-8`.
+    static let utf8 = IANACharset(
+        preferredMIMEName: nil,
+        name: "UTF-8",
+        aliases: [
+            "csUTF8",
+        ]
+    )
+
+    /// IANA Charset `UTF-16BE`.
+    static let utf16BE = IANACharset(
+        preferredMIMEName: nil,
+        name: "UTF-16BE",
+        aliases: [
+            "csUTF16BE",
+        ]
+    )
+
+    /// IANA Charset `UTF-16LE`.
+    static let utf16LE = IANACharset(
+        preferredMIMEName: nil,
+        name: "UTF-16LE",
+        aliases: [
+            "csUTF16LE",
+        ]
+    )
+
+    /// IANA Charset `UTF-16`.
+    static let utf16 = IANACharset(
+        preferredMIMEName: nil,
+        name: "UTF-16",
+        aliases: [
+            "csUTF16",
+        ]
+    )
+
+    /// IANA Charset `UTF-32`.
+    static let utf32 = IANACharset(
+        preferredMIMEName: nil,
+        name: "UTF-32",
+        aliases: [
+            "csUTF32",
+        ]
+    )
+
+    /// IANA Charset `UTF-32BE`.
+    static let utf32BE = IANACharset(
+        preferredMIMEName: nil,
+        name: "UTF-32BE",
+        aliases: [
+            "csUTF32BE",
+        ]
+    )
+
+    /// IANA Charset `UTF-32LE`.
+    static let utf32LE = IANACharset(
+        preferredMIMEName: nil,
+        name: "UTF-32LE",
+        aliases: [
+            "csUTF32LE",
+        ]
+    )
+
+    /// IANA Charset `macintosh`.
+    static let macintosh = IANACharset(
+        preferredMIMEName: nil,
+        name: "macintosh",
+        aliases: [
+            "mac",
+            "csMacintosh",
+        ]
+    )
+
+    /// IANA Charset `windows-1250`.
+    static let windows1250 = IANACharset(
+        preferredMIMEName: nil,
+        name: "windows-1250",
+        aliases: [
+            "cswindows1250",
+        ]
+    )
+
+    /// IANA Charset `windows-1251`.
+    static let windows1251 = IANACharset(
+        preferredMIMEName: nil,
+        name: "windows-1251",
+        aliases: [
+            "cswindows1251",
+        ]
+    )
+
+    /// IANA Charset `windows-1252`.
+    static let windows1252 = IANACharset(
+        preferredMIMEName: nil,
+        name: "windows-1252",
+        aliases: [
+            "cswindows1252",
+        ]
+    )
+
+    /// IANA Charset `windows-1253`.
+    static let windows1253 = IANACharset(
+        preferredMIMEName: nil,
+        name: "windows-1253",
+        aliases: [
+            "cswindows1253",
+        ]
+    )
+
+    /// IANA Charset `windows-1254`.
+    static let windows1254 = IANACharset(
+        preferredMIMEName: nil,
+        name: "windows-1254",
+        aliases: [
+            "cswindows1254",
+        ]
+    )
+}
diff --git a/Sources/FoundationEssentials/String/String+Encoding+Names.swift b/Sources/FoundationEssentials/String/String+Encoding+Names.swift
index 12f6466de..ba2cc32ef 100644
--- a/Sources/FoundationEssentials/String/String+Encoding+Names.swift
+++ b/Sources/FoundationEssentials/String/String+Encoding+Names.swift
@@ -72,7 +72,7 @@ private struct ASCIICaseInsensitiveUnicodeScalar: Equatable,
 }
 
 /// A type to tokenize string for `String.Encoding` names.
-private protocol StringEncodingNameTokenizer: ~Copyable {
+internal protocol StringEncodingNameTokenizer: ~Copyable {
     associatedtype Token: Equatable
     init(name: String)
     mutating func nextToken() throws -> Token?
@@ -142,7 +142,7 @@ private extension String {
 // MARK: - IANA Charset Names
 
 /// Info about IANA Charset.
-private struct IANACharset {
+internal struct IANACharset {
     /// Preferred MIME Name
     let preferredMIMEName: String?
 
@@ -182,203 +182,6 @@ private struct IANACharset {
     }
 }
 
-// Extracted only necessary charsets from https://www.iana.org/assignments/character-sets/character-sets.xhtml
-extension IANACharset {
-    /// IANA Characater Set `US-ASCII`
-    static let usASCII = IANACharset(
-        preferredMIMEName: "US-ASCII",
-        name: "US-ASCII",
-        aliases: [
-            "iso-ir-6",
-            "ANSI_X3.4-1968",
-            "ANSI_X3.4-1986",
-            "ISO_646.irv:1991",
-            "ISO646-US",
-            "US-ASCII",
-            "us",
-            "IBM367",
-            "cp367",
-            "csASCII",
-        ]
-    )
-
-    /// IANA Characater Set `ISO-8859-1`
-    static let iso8859_1 = IANACharset(
-        preferredMIMEName: "ISO-8859-1",
-        name: "ISO_8859-1:1987",
-        aliases: [
-            "iso-ir-100",
-            "ISO_8859-1",
-            "ISO-8859-1",
-            "latin1",
-            "l1",
-            "IBM819",
-            "CP819",
-            "csISOLatin1",
-        ]
-    )
-
-    /// IANA Characater Set `ISO-8859-2`
-    static let iso8859_2 = IANACharset(
-        preferredMIMEName: "ISO-8859-2",
-        name: "ISO_8859-2:1987",
-        aliases: [
-            "iso-ir-101",
-            "ISO_8859-2",
-            "ISO-8859-2",
-            "latin2",
-            "l2",
-            "csISOLatin2",
-        ]
-    )
-
-    /// IANA Characater Set `Shift_JIS`
-    static let shiftJIS = IANACharset(
-        preferredMIMEName: "Shift_JIS",
-        name: "Shift_JIS",
-        aliases: [
-            "MS_Kanji",
-            "csShiftJIS",
-        ]
-    )
-
-    /// IANA Characater Set `EUC-JP`
-    static let eucJP = IANACharset(
-        preferredMIMEName: "EUC-JP",
-        name: "Extended_UNIX_Code_Packed_Format_for_Japanese",
-        aliases: [
-            "csEUCPkdFmtJapanese",
-            "EUC-JP",
-        ]
-    )
-
-    /// IANA Characater Set `ISO-2022-JP`
-    static let iso2022JP = IANACharset(
-        preferredMIMEName: "ISO-2022-JP",
-        name: "ISO-2022-JP",
-        aliases: [
-            "csISO2022JP",
-        ]
-    )
-
-    /// IANA Characater Set `UTF-8`
-    static let utf8 = IANACharset(
-        preferredMIMEName: nil,
-        name: "UTF-8",
-        aliases: [
-            "csUTF8",
-        ]
-    )
-
-    /// IANA Characater Set `UTF-16BE`
-    static let utf16BE = IANACharset(
-        preferredMIMEName: nil,
-        name: "UTF-16BE",
-        aliases: [
-            "csUTF16BE",
-        ]
-    )
-
-    /// IANA Characater Set `UTF-16LE`
-    static let utf16LE = IANACharset(
-        preferredMIMEName: nil,
-        name: "UTF-16LE",
-        aliases: [
-            "csUTF16LE",
-        ]
-    )
-
-    /// IANA Characater Set `UTF-16`
-    static let utf16 = IANACharset(
-        preferredMIMEName: nil,
-        name: "UTF-16",
-        aliases: [
-            "csUTF16",
-        ]
-    )
-
-    /// IANA Characater Set `UTF-32`
-    static let utf32 = IANACharset(
-        preferredMIMEName: nil,
-        name: "UTF-32",
-        aliases: [
-            "csUTF32",
-        ]
-    )
-
-    /// IANA Characater Set `UTF-32BE`
-    static let utf32BE = IANACharset(
-        preferredMIMEName: nil,
-        name: "UTF-32BE",
-        aliases: [
-            "csUTF32BE",
-        ]
-    )
-
-    /// IANA Characater Set `UTF-32LE`
-    static let utf32LE = IANACharset(
-        preferredMIMEName: nil,
-        name: "UTF-32LE",
-        aliases: [
-            "csUTF32LE",
-        ]
-    )
-
-    /// IANA Characater Set `macintosh`
-    static let macintosh = IANACharset(
-        preferredMIMEName: nil,
-        name: "macintosh",
-        aliases: [
-            "mac",
-            "csMacintosh",
-        ]
-    )
-
-    /// IANA Characater Set `windows-1250`
-    static let windows1250 = IANACharset(
-        preferredMIMEName: nil,
-        name: "windows-1250",
-        aliases: [
-            "cswindows1250",
-        ]
-    )
-
-    /// IANA Characater Set `windows-1251`
-    static let windows1251 = IANACharset(
-        preferredMIMEName: nil,
-        name: "windows-1251",
-        aliases: [
-            "cswindows1251",
-        ]
-    )
-
-    /// IANA Characater Set `windows-1252`
-    static let windows1252 = IANACharset(
-        preferredMIMEName: nil,
-        name: "windows-1252",
-        aliases: [
-            "cswindows1252",
-        ]
-    )
-
-    /// IANA Characater Set `windows-1253`
-    static let windows1253 = IANACharset(
-        preferredMIMEName: nil,
-        name: "windows-1253",
-        aliases: [
-            "cswindows1253",
-        ]
-    )
-
-    /// IANA Characater Set `windows-1254`
-    static let windows1254 = IANACharset(
-        preferredMIMEName: nil,
-        name: "windows-1254",
-        aliases: [
-            "cswindows1254",
-        ]
-    )
-}
 
 // MARK: - `String.Encoding` Names
 
diff --git a/utils/update-iana-charset-names b/utils/update-iana-charset-names
new file mode 100755
index 000000000..23d9a2ef8
--- /dev/null
+++ b/utils/update-iana-charset-names
@@ -0,0 +1,62 @@
+#!/usr/bin/env bash
+##===----------------------------------------------------------------------===##
+##
+## This source file is part of the Swift.org open source project
+##
+## Copyright (c) 2025 Apple Inc. and the Swift project authors
+## Licensed under Apache License v2.0 with Runtime Library Exception
+##
+## See https://swift.org/LICENSE.txt for license information
+## See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
+##
+##===----------------------------------------------------------------------===##
+
+# This is a shell script that generates a Swift source code file which contains
+# the list of IANA "Character Sets".
+
+set -eu
+
+declare -r commandName="$(basename "$0")"
+declare -r utilsDir="$(cd "$(dirname "$0")" && pwd)"
+declare -r foundationRepoDir="$(cd "${utilsDir}/.." && pwd)"
+declare -r targetSwiftFileRelativePath="Sources/FoundationEssentials/String/IANACharsetNames.swift"
+
+declare -r copyrightYear=$(
+  currentYear=$(date +%Y)
+  if [[ $currentYear -eq 2025 ]]; then
+    echo 2025
+  else
+    echo 2025-${currentYear}
+  fi
+)
+declare -r swiftLicenseHeader="
+//===----------------------------------------------------------------------===//
+//
+// This source file is part of the Swift.org open source project
+//
+// Copyright (c) ${copyrightYear} Apple Inc. and the Swift project authors
+// Licensed under Apache License v2.0 with Runtime Library Exception
+//
+// See https://swift.org/LICENSE.txt for license information
+// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
+//
+//===----------------------------------------------------------------------===//
+"
+declare -r warningComment="
+// WARNING: DO NOT EDIT THIS FILE DIRECTLY.
+//          This is auto-generated by \`${commandName}\`.
+
+"
+
+echo "Generating Swift source code..." 1>&2
+declare generatedCode
+generatedCode=$(
+  echo "${swiftLicenseHeader##$'\n'}"
+  echo "$warningComment"
+  python3 "${utilsDir}/${commandName}-impl.py"
+)
+
+echo "Writing the code to '${targetSwiftFileRelativePath}'..." 1>&2
+echo "$generatedCode" >"${foundationRepoDir}/${targetSwiftFileRelativePath}"
+
+echo "Done." 1>&2
diff --git a/utils/update-iana-charset-names-impl.py b/utils/update-iana-charset-names-impl.py
new file mode 100644
index 000000000..399fe16e4
--- /dev/null
+++ b/utils/update-iana-charset-names-impl.py
@@ -0,0 +1,174 @@
+#!/usr/bin/env python3
+##===----------------------------------------------------------------------===##
+##
+## This source file is part of the Swift.org open source project
+##
+## Copyright (c) 2025 Apple Inc. and the Swift project authors
+## Licensed under Apache License v2.0 with Runtime Library Exception
+##
+## See https://swift.org/LICENSE.txt for license information
+## See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
+##
+##===----------------------------------------------------------------------===##
+
+"""
+This is a python script that converts an XML file containing the list of IANA
+"Character Sets" to Swift source code.
+This script generates minimum code and is intended to be executed by other shell
+script.
+"""
+
+import re
+import urllib.request as request
+import xml.etree.ElementTree as ElemTree
+from typing import List, Optional
+
+REQUIRED_CHARSET_NAMES: List[str] = [
+    "UTF-8",
+    "US-ASCII",
+    "EUC-JP",
+    "ISO-8859-1",
+    "Shift_JIS",
+    "ISO-8859-2",
+    "UTF-16",
+    "windows-1251",
+    "windows-1252",
+    "windows-1253",
+    "windows-1254",
+    "windows-1250",
+    "ISO-2022-JP",
+    "macintosh",
+    "UTF-16BE",
+    "UTF-16LE",
+    "UTF-32",
+    "UTF-32BE",
+    "UTF-32LE",
+]
+CHARSETS_XML_URL = "https://www.iana.org/assignments/character-sets/character-sets.xml"
+CHARSETS_XML_NS = "http://www.iana.org/assignments"
+SWIFT_CODE_INDENT = "    "
+
+
+class IANACharsetNameRecord:
+    """Representation of <record> element in 'character-sets.xml'
+
+    The structure of <record> element is as blow:
+    <record>
+        <name>US-ASCII</name>
+        <xref type="rfc" data="rfc2046"/>
+        <value>3</value>
+        <description>ANSI X3.4-1986</description>
+        <alias>iso-ir-6</alias>
+        <alias>ANSI_X3.4-1968</alias>
+        <alias>ANSI_X3.4-1986</alias>
+        <alias>ISO_646.irv:1991</alias>
+        <alias>ISO646-US</alias>
+        <alias>US-ASCII</alias>
+        <alias>us</alias>
+        <alias>IBM367</alias>
+        <alias>cp367</alias>
+        <alias>csASCII</alias>
+        <preferred_alias>US-ASCII</preferred_alias>
+    </record>
+    """
+
+    def __init__(self, recordElem: ElemTree.Element):
+        self._name: str = recordElem.find('./{%s}name' % (CHARSETS_XML_NS)).text
+        self._preferredMIMEName: Optional[str] = getattr(
+            recordElem.find('./{%s}preferred_alias' % (CHARSETS_XML_NS)),
+            'text',
+            None
+        )
+        self._aliases: List[str] = list(map(
+            lambda aliasElem: aliasElem.text,
+            recordElem.findall('./{%s}alias' % (CHARSETS_XML_NS))
+        ))
+        self._camelCasedName = None
+
+    @property
+    def name(self) -> str:
+        return self._name
+
+    @property
+    def preferredMIMEName(self) -> Optional[str]:
+        return self._preferredMIMEName
+
+    @property
+    def representativeName(self) -> str:
+        return self.preferredMIMEName or self.name
+
+    @property
+    def aliases(self) -> List[str]:
+        return self._aliases
+
+    @property
+    def camelCasedName(self) -> str:
+        if (self._camelCasedName is not None):
+            return self._camelCasedName
+
+        camelCasedName = ""
+        previousWord = None
+        for ii, word in enumerate(re.split(r"[^0-9A-Za-z]", self.representativeName)):
+            if previousWord is None:
+                camelCasedName = word.lower()
+            else:
+                if re.search(r"[0-9]$", previousWord) and re.search(r"^[0-9]", word):
+                    camelCasedName += "_"
+
+                if (re.fullmatch("[0-9]*[A-Z]+", word)):
+                    camelCasedName += word
+                else:
+                    camelCasedName += word.capitalize()
+
+            previousWord = word
+
+        self._camelCasedName = camelCasedName
+        return camelCasedName
+
+    @property
+    def swiftCodeLines(self) -> List[str]:
+        def __stringLiteralOrNil(string: Optional[str]) -> str:
+            if (string is None):
+                return 'nil'
+            return f'"{string}"'
+
+        lines: List[str] = []
+        lines.append(f"/// IANA Charset `{self.representativeName}`.")
+        lines.append(f"static let {self.camelCasedName} = IANACharset(")
+        lines.append(f"{SWIFT_CODE_INDENT}preferredMIMEName: {
+            __stringLiteralOrNil(self.preferredMIMEName)
+        },")
+        lines.append(f'{SWIFT_CODE_INDENT}name: "{self.name}",')
+        lines.append(f"{SWIFT_CODE_INDENT}aliases: [")
+        for alias in self.aliases:
+            lines.append(f"{SWIFT_CODE_INDENT * 2}\"{alias}\",")
+        lines.append(f"{SWIFT_CODE_INDENT}]")
+        lines.append(")")
+        return lines
+
+
+def generateSwiftCode() -> str:
+    charsetsXMLString = request.urlopen(request.Request(CHARSETS_XML_URL)).read()
+    charsetsXMLRoot = ElemTree.fromstring(charsetsXMLString)
+    charsetsXMLRecordElements = charsetsXMLRoot.findall(
+        "./{%s}registry/{%s}record" % (CHARSETS_XML_NS, CHARSETS_XML_NS)
+    )
+    result = "extension IANACharset {"
+    for record in map(
+        lambda recordElem: IANACharsetNameRecord(recordElem),
+        charsetsXMLRecordElements
+    ):
+        if (record.representativeName not in REQUIRED_CHARSET_NAMES):
+            continue
+        result += "\n"
+        result += "\n".join(map(
+            lambda line: SWIFT_CODE_INDENT + line,
+            record.swiftCodeLines
+        ))
+        result += "\n"
+    result += "}\n"
+    return result
+
+
+if __name__ == "__main__":
+    print(generateSwiftCode())

From 5c9492c5b246af6be0f69f21dbaec004aefb444f Mon Sep 17 00:00:00 2001
From: YOCKOW <YOCKOW@users.noreply.github.com>
Date: Thu, 16 Oct 2025 14:33:52 +0900
Subject: [PATCH 08/14] Remove unnecessary `@inlinable`.

---
 Sources/FoundationEssentials/String/String+Encoding+Names.swift | 2 --
 1 file changed, 2 deletions(-)

diff --git a/Sources/FoundationEssentials/String/String+Encoding+Names.swift b/Sources/FoundationEssentials/String/String+Encoding+Names.swift
index ba2cc32ef..1d417f003 100644
--- a/Sources/FoundationEssentials/String/String+Encoding+Names.swift
+++ b/Sources/FoundationEssentials/String/String+Encoding+Names.swift
@@ -45,7 +45,6 @@ private struct ASCIICaseInsensitiveUnicodeScalar: Equatable,
 
     let scalar: Unicode.Scalar
 
-    @inlinable
     init(_ scalar: Unicode.Scalar) {
         assert(scalar.isASCII)
         self.scalar = scalar
@@ -55,7 +54,6 @@ private struct ASCIICaseInsensitiveUnicodeScalar: Equatable,
         self.init(Unicode.Scalar(unicodeScalarLiteral: value))
     }
 
-    @inlinable
     static func ==(
         lhs: ASCIICaseInsensitiveUnicodeScalar,
         rhs: ASCIICaseInsensitiveUnicodeScalar

From ac421272d276524f2aeb6fcbeac6f7714408d474 Mon Sep 17 00:00:00 2001
From: YOCKOW <YOCKOW@users.noreply.github.com>
Date: Thu, 16 Oct 2025 15:02:42 +0900
Subject: [PATCH 09/14] Simplify `String.init(ianaName:)`.

---
 .../String/String+Encoding+Names.swift        | 78 +++++++------------
 1 file changed, 29 insertions(+), 49 deletions(-)

diff --git a/Sources/FoundationEssentials/String/String+Encoding+Names.swift b/Sources/FoundationEssentials/String/String+Encoding+Names.swift
index 1d417f003..587b2eb45 100644
--- a/Sources/FoundationEssentials/String/String+Encoding+Names.swift
+++ b/Sources/FoundationEssentials/String/String+Encoding+Names.swift
@@ -222,58 +222,38 @@ extension String.Encoding {
     ///         to determine which encoding is suitable.
     @available(FoundationPreview 6.3, *)
     public init?(ianaName charsetName: String) {
+        let possibilities: [String.Encoding] = [
+            .utf8,
+            .ascii,
+            .japaneseEUC,
+            .isoLatin1,
+            .shiftJIS,
+            .isoLatin2,
+            .unicode, // .utf16
+            .windowsCP1251,
+            .windowsCP1252,
+            .windowsCP1253,
+            .windowsCP1254,
+            .windowsCP1250,
+            .iso2022JP,
+            .macOSRoman,
+            .utf16BigEndian,
+            .utf16LittleEndian,
+            .utf32,
+            .utf32BigEndian,
+            .utf32LittleEndian,
+        ]
+
         func __determineEncoding() -> String.Encoding? {
-            func __matches(_ charsets: IANACharset...) -> Bool {
-                assert(!charsets.isEmpty)
-                return charsets.contains {
-                    $0.matches(
-                        charsetName,
-                        tokenizedBy: ASCIICaseInsensitiveTokenizer.self
-                    )
+            for encoding in possibilities {
+                guard let ianaCharset = encoding._ianaCharset else {
+                    continue
+                }
+                if ianaCharset.matches(charsetName, tokenizedBy: ASCIICaseInsensitiveTokenizer.self) {
+                    return encoding
                 }
             }
-
-            return if __matches(.utf8) {
-                .utf8
-            } else if __matches(.usASCII) {
-                .ascii
-            } else if __matches(.eucJP) {
-                .japaneseEUC
-            } else if __matches(.iso8859_1) {
-                .isoLatin1
-            } else if __matches(.shiftJIS) {
-                .shiftJIS
-            } else if __matches(.iso8859_2) {
-                .isoLatin2
-            } else if __matches(.utf16) {
-                .utf16
-            } else if __matches(.windows1251) {
-                .windowsCP1251
-            } else if __matches(.windows1252) {
-                .windowsCP1252
-            } else if __matches(.windows1253) {
-                .windowsCP1253
-            } else if __matches(.windows1254) {
-                .windowsCP1254
-            } else if __matches(.windows1250) {
-                .windowsCP1250
-            } else if __matches(.iso2022JP) {
-                .iso2022JP
-            } else if __matches(.macintosh) {
-                .macOSRoman
-            } else if __matches(.utf16BE) {
-                .utf16BigEndian
-            } else if __matches(.utf16LE) {
-                .utf16LittleEndian
-            } else if __matches(.utf32) {
-                .utf32
-            } else if __matches(.utf32BE) {
-                .utf32BigEndian
-            } else if __matches(.utf32LE) {
-                .utf32LittleEndian
-            } else {
-                nil
-            }
+            return nil
         }
 
         guard let encoding = __determineEncoding() else {

From c7bdbef8b1998d7083d069b5482f9d245b0605a2 Mon Sep 17 00:00:00 2001
From: YOCKOW <YOCKOW@users.noreply.github.com>
Date: Thu, 16 Oct 2025 15:17:36 +0900
Subject: [PATCH 10/14] Add new files related to SF-0033 to CMakeLists.txt.

---
 Sources/FoundationEssentials/String/CMakeLists.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Sources/FoundationEssentials/String/CMakeLists.txt b/Sources/FoundationEssentials/String/CMakeLists.txt
index 720eb218d..6cc7994d7 100644
--- a/Sources/FoundationEssentials/String/CMakeLists.txt
+++ b/Sources/FoundationEssentials/String/CMakeLists.txt
@@ -15,10 +15,12 @@
 target_sources(FoundationEssentials PRIVATE
     BidirectionalCollection.swift
     BuiltInUnicodeScalarSet.swift
+    IANACharsetNames.swift
     RegexPatternCache.swift
     String+Bridging.swift
     String+Comparison.swift
     String+Encoding.swift
+    String+Encoding+Names.swift
     String+EndianAdaptorSequence.swift
     String+Essentials.swift
     String+IO.swift

From e674fa689f2b0b0f54ad404514418118a6be8642 Mon Sep 17 00:00:00 2001
From: YOCKOW <YOCKOW@users.noreply.github.com>
Date: Sun, 19 Oct 2025 17:44:33 +0900
Subject: [PATCH 11/14] Rewrite script in Swift instead of Python.

In response to: https://github.com/swiftlang/swift-foundation/pull/1286#discussion_r2437753438
---
 utils/update-iana-charset-names            |   2 +-
 utils/update-iana-charset-names-impl.py    | 174 ------------------
 utils/update-iana-charset-names-impl.swift | 195 +++++++++++++++++++++
 3 files changed, 196 insertions(+), 175 deletions(-)
 delete mode 100644 utils/update-iana-charset-names-impl.py
 create mode 100755 utils/update-iana-charset-names-impl.swift

diff --git a/utils/update-iana-charset-names b/utils/update-iana-charset-names
index 23d9a2ef8..e56e972b1 100755
--- a/utils/update-iana-charset-names
+++ b/utils/update-iana-charset-names
@@ -53,7 +53,7 @@ declare generatedCode
 generatedCode=$(
   echo "${swiftLicenseHeader##$'\n'}"
   echo "$warningComment"
-  python3 "${utilsDir}/${commandName}-impl.py"
+  swift -D PRINT_CODE "${utilsDir}/${commandName}-impl.swift"
 )
 
 echo "Writing the code to '${targetSwiftFileRelativePath}'..." 1>&2
diff --git a/utils/update-iana-charset-names-impl.py b/utils/update-iana-charset-names-impl.py
deleted file mode 100644
index 399fe16e4..000000000
--- a/utils/update-iana-charset-names-impl.py
+++ /dev/null
@@ -1,174 +0,0 @@
-#!/usr/bin/env python3
-##===----------------------------------------------------------------------===##
-##
-## This source file is part of the Swift.org open source project
-##
-## Copyright (c) 2025 Apple Inc. and the Swift project authors
-## Licensed under Apache License v2.0 with Runtime Library Exception
-##
-## See https://swift.org/LICENSE.txt for license information
-## See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
-##
-##===----------------------------------------------------------------------===##
-
-"""
-This is a python script that converts an XML file containing the list of IANA
-"Character Sets" to Swift source code.
-This script generates minimum code and is intended to be executed by other shell
-script.
-"""
-
-import re
-import urllib.request as request
-import xml.etree.ElementTree as ElemTree
-from typing import List, Optional
-
-REQUIRED_CHARSET_NAMES: List[str] = [
-    "UTF-8",
-    "US-ASCII",
-    "EUC-JP",
-    "ISO-8859-1",
-    "Shift_JIS",
-    "ISO-8859-2",
-    "UTF-16",
-    "windows-1251",
-    "windows-1252",
-    "windows-1253",
-    "windows-1254",
-    "windows-1250",
-    "ISO-2022-JP",
-    "macintosh",
-    "UTF-16BE",
-    "UTF-16LE",
-    "UTF-32",
-    "UTF-32BE",
-    "UTF-32LE",
-]
-CHARSETS_XML_URL = "https://www.iana.org/assignments/character-sets/character-sets.xml"
-CHARSETS_XML_NS = "http://www.iana.org/assignments"
-SWIFT_CODE_INDENT = "    "
-
-
-class IANACharsetNameRecord:
-    """Representation of <record> element in 'character-sets.xml'
-
-    The structure of <record> element is as blow:
-    <record>
-        <name>US-ASCII</name>
-        <xref type="rfc" data="rfc2046"/>
-        <value>3</value>
-        <description>ANSI X3.4-1986</description>
-        <alias>iso-ir-6</alias>
-        <alias>ANSI_X3.4-1968</alias>
-        <alias>ANSI_X3.4-1986</alias>
-        <alias>ISO_646.irv:1991</alias>
-        <alias>ISO646-US</alias>
-        <alias>US-ASCII</alias>
-        <alias>us</alias>
-        <alias>IBM367</alias>
-        <alias>cp367</alias>
-        <alias>csASCII</alias>
-        <preferred_alias>US-ASCII</preferred_alias>
-    </record>
-    """
-
-    def __init__(self, recordElem: ElemTree.Element):
-        self._name: str = recordElem.find('./{%s}name' % (CHARSETS_XML_NS)).text
-        self._preferredMIMEName: Optional[str] = getattr(
-            recordElem.find('./{%s}preferred_alias' % (CHARSETS_XML_NS)),
-            'text',
-            None
-        )
-        self._aliases: List[str] = list(map(
-            lambda aliasElem: aliasElem.text,
-            recordElem.findall('./{%s}alias' % (CHARSETS_XML_NS))
-        ))
-        self._camelCasedName = None
-
-    @property
-    def name(self) -> str:
-        return self._name
-
-    @property
-    def preferredMIMEName(self) -> Optional[str]:
-        return self._preferredMIMEName
-
-    @property
-    def representativeName(self) -> str:
-        return self.preferredMIMEName or self.name
-
-    @property
-    def aliases(self) -> List[str]:
-        return self._aliases
-
-    @property
-    def camelCasedName(self) -> str:
-        if (self._camelCasedName is not None):
-            return self._camelCasedName
-
-        camelCasedName = ""
-        previousWord = None
-        for ii, word in enumerate(re.split(r"[^0-9A-Za-z]", self.representativeName)):
-            if previousWord is None:
-                camelCasedName = word.lower()
-            else:
-                if re.search(r"[0-9]$", previousWord) and re.search(r"^[0-9]", word):
-                    camelCasedName += "_"
-
-                if (re.fullmatch("[0-9]*[A-Z]+", word)):
-                    camelCasedName += word
-                else:
-                    camelCasedName += word.capitalize()
-
-            previousWord = word
-
-        self._camelCasedName = camelCasedName
-        return camelCasedName
-
-    @property
-    def swiftCodeLines(self) -> List[str]:
-        def __stringLiteralOrNil(string: Optional[str]) -> str:
-            if (string is None):
-                return 'nil'
-            return f'"{string}"'
-
-        lines: List[str] = []
-        lines.append(f"/// IANA Charset `{self.representativeName}`.")
-        lines.append(f"static let {self.camelCasedName} = IANACharset(")
-        lines.append(f"{SWIFT_CODE_INDENT}preferredMIMEName: {
-            __stringLiteralOrNil(self.preferredMIMEName)
-        },")
-        lines.append(f'{SWIFT_CODE_INDENT}name: "{self.name}",')
-        lines.append(f"{SWIFT_CODE_INDENT}aliases: [")
-        for alias in self.aliases:
-            lines.append(f"{SWIFT_CODE_INDENT * 2}\"{alias}\",")
-        lines.append(f"{SWIFT_CODE_INDENT}]")
-        lines.append(")")
-        return lines
-
-
-def generateSwiftCode() -> str:
-    charsetsXMLString = request.urlopen(request.Request(CHARSETS_XML_URL)).read()
-    charsetsXMLRoot = ElemTree.fromstring(charsetsXMLString)
-    charsetsXMLRecordElements = charsetsXMLRoot.findall(
-        "./{%s}registry/{%s}record" % (CHARSETS_XML_NS, CHARSETS_XML_NS)
-    )
-    result = "extension IANACharset {"
-    for record in map(
-        lambda recordElem: IANACharsetNameRecord(recordElem),
-        charsetsXMLRecordElements
-    ):
-        if (record.representativeName not in REQUIRED_CHARSET_NAMES):
-            continue
-        result += "\n"
-        result += "\n".join(map(
-            lambda line: SWIFT_CODE_INDENT + line,
-            record.swiftCodeLines
-        ))
-        result += "\n"
-    result += "}\n"
-    return result
-
-
-if __name__ == "__main__":
-    print(generateSwiftCode())
diff --git a/utils/update-iana-charset-names-impl.swift b/utils/update-iana-charset-names-impl.swift
new file mode 100755
index 000000000..c7e83e0d8
--- /dev/null
+++ b/utils/update-iana-charset-names-impl.swift
@@ -0,0 +1,195 @@
+#!/usr/bin/env swift -D PRINT_CODE
+//===----------------------------------------------------------------------===//
+//
+// This source file is part of the Swift.org open source project
+//
+// Copyright (c) 2025 Apple Inc. and the Swift project authors
+// Licensed under Apache License v2.0 with Runtime Library Exception
+//
+// See https://swift.org/LICENSE.txt for license information
+// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
+//
+//===----------------------------------------------------------------------===//
+
+/*
+
+This is a Swift script that converts an XML file containing the list of IANA
+"Character Sets" to Swift source code.
+This script generates minimum code and is intended to be executed by other shell
+script.
+
+ */
+
+import Foundation
+#if canImport(FoundationXML)
+import FoundationXML
+#endif
+
+// MARK: - Constants
+
+let requiredCharsetNames = [
+    "UTF-8",
+    "US-ASCII",
+    "EUC-JP",
+    "ISO-8859-1",
+    "Shift_JIS",
+    "ISO-8859-2",
+    "UTF-16",
+    "windows-1251",
+    "windows-1252",
+    "windows-1253",
+    "windows-1254",
+    "windows-1250",
+    "ISO-2022-JP",
+    "macintosh",
+    "UTF-16BE",
+    "UTF-16LE",
+    "UTF-32",
+    "UTF-32BE",
+    "UTF-32LE",
+]
+let charsetsXMLURL = URL(
+    string: "https://www.iana.org/assignments/character-sets/character-sets.xml"
+)!
+let charsetsXMLNamespace = "http://www.iana.org/assignments"
+let swiftCodeIndent = "    "
+
+
+// MARK: - Implementation
+
+enum CodeGenerationError: Swift.Error {
+    case missingName
+    case missingAliasValue
+    case noRootElement
+}
+
+/// Representation of <record> element in 'character-sets.xml'
+///
+/// The structure of <record> element is as blow:
+/// ```xml
+/// <record>
+///     <name>US-ASCII</name>
+///     <xref type="rfc" data="rfc2046"/>
+///     <value>3</value>
+///     <description>ANSI X3.4-1986</description>
+///     <alias>iso-ir-6</alias>
+///     <alias>ANSI_X3.4-1968</alias>
+///     <alias>ANSI_X3.4-1986</alias>
+///     <alias>ISO_646.irv:1991</alias>
+///     <alias>ISO646-US</alias>
+///     <alias>US-ASCII</alias>
+///     <alias>us</alias>
+///     <alias>IBM367</alias>
+///     <alias>cp367</alias>
+///     <alias>csASCII</alias>
+///     <preferred_alias>US-ASCII</preferred_alias>
+/// </record>
+/// ```
+struct IANACharsetNameRecord {
+    /// Preferred MIME Name
+    let preferredMIMEName: String?
+
+    /// The name of this charset
+    let name: String
+
+    /// The aliases of this charset
+    let aliases: Array<String>
+
+    var representativeName: String {
+        return preferredMIMEName ?? name
+    }
+
+    var swiftCodeLines: [String] {
+        var lines: [String] = []
+        lines.append("/// IANA Charset `\(representativeName)`.")
+        lines.append("static let \(representativeName._camelcased()) = IANACharset(")
+        lines.append("\(swiftCodeIndent)preferredMIMEName: \(preferredMIMEName.map { #""\#($0)""# } ?? "nil"),")
+        lines.append("\(swiftCodeIndent)name: \"\(name)\",")
+        lines.append("\(swiftCodeIndent)aliases: [")
+        for alias in aliases {
+            lines.append("\(swiftCodeIndent)\(swiftCodeIndent)\"\(alias)\",")
+        }
+        lines.append("\(swiftCodeIndent)]")
+        lines.append(")")
+        return lines
+    }
+
+    init(_ node: XMLNode) throws {
+        guard let name = try node.nodes(forXPath: "./name").first?.stringValue else {
+            throw CodeGenerationError.missingName
+        }
+        self.name = name
+        self.preferredMIMEName = try node.nodes(forXPath: "./preferred_alias").first?.stringValue
+        self.aliases = try node.nodes(forXPath: "./alias").map {
+            guard let alias = $0.stringValue else {
+                throw CodeGenerationError.missingAliasValue
+            }
+            return alias
+        }
+    }
+}
+
+func generateSwiftCode() throws -> String {
+    let charsetsXMLDocument = try XMLDocument(contentsOf: charsetsXMLURL)
+    guard let charsetsXMLRoot = charsetsXMLDocument.rootElement() else {
+        throw CodeGenerationError.noRootElement
+    }
+    let charsetsXMLRecordElements = try charsetsXMLRoot.nodes(forXPath: "./registry/record")
+
+    var result = "extension IANACharset {"
+
+    for record in try charsetsXMLRecordElements.map({
+        try IANACharsetNameRecord($0)
+    }) where requiredCharsetNames.contains(record.representativeName) {
+        result += "\n"
+        result += record.swiftCodeLines.map({ swiftCodeIndent + $0 }).joined(separator: "\n")
+        result += "\n"
+    }
+
+    result += "}\n"
+    return result
+}
+
+#if PRINT_CODE
+print(try generateSwiftCode())
+#endif
+
+// MARK: - Extensions
+
+extension UTF8.CodeUnit {
+    var _isASCIINumeric: Bool { (0x30...0x39).contains(self) }
+    var _isASCIIUppercase: Bool { (0x41...0x5A).contains(self) }
+    var _isASCIILowercase: Bool { (0x61...0x7A).contains(self) }
+}
+
+extension String {
+    func _camelcased() -> String {
+        var result = ""
+        var previousWord: Substring.UTF8View? = nil
+        for wordUTF8 in self.utf8.split(whereSeparator: {
+            !$0._isASCIINumeric &&
+            !$0._isASCIIUppercase &&
+            !$0._isASCIILowercase
+        }) {
+            defer {
+                previousWord = wordUTF8
+            }
+            let word = String(Substring(wordUTF8))
+            guard let previousWord else {
+                result += word.lowercased()
+                continue
+            }
+            if previousWord.last!._isASCIINumeric && wordUTF8.first!._isASCIINumeric {
+                result += "_"
+            }
+            if let firstNonNumericIndex = wordUTF8.firstIndex(where: { !$0._isASCIINumeric }),
+               wordUTF8[firstNonNumericIndex...].allSatisfy({ $0._isASCIIUppercase }) {
+                result += word
+            } else {
+                result += word.capitalized(with: nil)
+            }
+
+        }
+        return result
+    }
+}

From 8f84db7ae8702407842b4325666a77b34f8a796e Mon Sep 17 00:00:00 2001
From: YOCKOW <YOCKOW@users.noreply.github.com>
Date: Wed, 22 Oct 2025 11:25:56 +0900
Subject: [PATCH 12/14] Simplify logic to parse IANA Charset names.

In response to:
- https://github.com/swiftlang/swift-foundation/pull/1286#discussion_r2441497400
- https://github.com/swiftlang/swift-foundation/pull/1286#discussion_r2441505001
- https://github.com/swiftlang/swift-foundation/pull/1286#discussion_r2441546727
---
 .../String/String+Encoding+Names.swift        | 135 +++---------------
 1 file changed, 17 insertions(+), 118 deletions(-)

diff --git a/Sources/FoundationEssentials/String/String+Encoding+Names.swift b/Sources/FoundationEssentials/String/String+Encoding+Names.swift
index 587b2eb45..2e34b3fed 100644
--- a/Sources/FoundationEssentials/String/String+Encoding+Names.swift
+++ b/Sources/FoundationEssentials/String/String+Encoding+Names.swift
@@ -13,126 +13,28 @@
 
 // MARK: - Private extensions for parsing encoding names
 
-private extension Unicode.Scalar {
-    /// Returns the Boolean value that indicates whether or not `self` is "ASCII whitespace".
-    ///
-    /// Reference: https://infra.spec.whatwg.org/#ascii-whitespace
-    var _isASCIIWhitespace: Bool {
-        switch self.value {
-        case 0x09, 0x0A, 0x0C, 0x0D, 0x20: true
+private extension UTF8.CodeUnit {
+    func _isASCIICaseinsensitivelyEqual(to other: UTF8.CodeUnit) -> Bool {
+        return switch self {
+        case other, other._uppercased, other._lowercased: true
         default: false
         }
     }
 }
 
 private extension String {
-    var _trimmed: Substring.UnicodeScalarView {
-        let scalars = self.unicodeScalars
-        let isNonWhitespace: (Unicode.Scalar) -> Bool = { !$0._isASCIIWhitespace }
-        guard let firstIndexOfNonWhitespace = scalars.firstIndex(where: isNonWhitespace),
-              let lastIndexOfNonWhitespace = scalars.lastIndex(where: isNonWhitespace) else {
-            return Substring.UnicodeScalarView()
-        }
-        return scalars[firstIndexOfNonWhitespace...lastIndexOfNonWhitespace]
-    }
-}
-
-/// A type that holds a `Unicode.Scalar` where its value is compared case-insensitively with others'
-/// _if the value is within ASCII range_.
-private struct ASCIICaseInsensitiveUnicodeScalar: Equatable,
-                                                  ExpressibleByUnicodeScalarLiteral {
-    typealias UnicodeScalarLiteralType = Unicode.Scalar.UnicodeScalarLiteralType
-
-    let scalar: Unicode.Scalar
-
-    init(_ scalar: Unicode.Scalar) {
-        assert(scalar.isASCII)
-        self.scalar = scalar
-    }
-
-    init(unicodeScalarLiteral value: Unicode.Scalar.UnicodeScalarLiteralType) {
-        self.init(Unicode.Scalar(unicodeScalarLiteral: value))
-    }
-
-    static func ==(
-        lhs: ASCIICaseInsensitiveUnicodeScalar,
-        rhs: ASCIICaseInsensitiveUnicodeScalar
-    ) -> Bool {
-        if lhs.scalar == rhs.scalar {
-            return true
-        } else if ("A"..."Z").contains(lhs.scalar) {
-            return lhs.scalar.value + 0x20 == rhs.scalar.value
-        } else if ("a"..."z").contains(lhs.scalar) {
-            return lhs.scalar.value - 0x20 == rhs.scalar.value
-        }
-        return false
-    }
-}
-
-/// A type to tokenize string for `String.Encoding` names.
-internal protocol StringEncodingNameTokenizer: ~Copyable {
-    associatedtype Token: Equatable
-    init(name: String)
-    mutating func nextToken() throws -> Token?
-}
-
-extension StringEncodingNameTokenizer where Self: ~Copyable {
-    mutating func hasEqualTokens(with other: consuming Self) throws -> Bool {
-        while let myToken = try self.nextToken() {
-            guard let otherToken = try other.nextToken(),
-                  myToken == otherToken else {
+    func _isASCIICaseinsensitivelyEqual(to other: String) -> Bool {
+        let (myUTF8, otherUTF8) = (self.utf8, other.utf8)
+        var (myIndex, otherIndex) = (myUTF8.startIndex, otherUTF8.startIndex)
+        while myIndex < myUTF8.endIndex && otherIndex < otherUTF8.endIndex {
+            guard myUTF8[myIndex]._isASCIICaseinsensitivelyEqual(to: otherUTF8[otherIndex]) else {
                 return false
             }
-        }
-        return try other.nextToken() == nil
-    }
-}
-
-
-/// A parser that tokenizes a string into `ASCIICaseInsensitiveUnicodeScalar`s.
-private struct ASCIICaseInsensitiveTokenizer: StringEncodingNameTokenizer, ~Copyable {
-    typealias Token = ASCIICaseInsensitiveUnicodeScalar
 
-    enum Error: Swift.Error {
-        case nonASCII
-    }
-
-    let scalars: Substring.UnicodeScalarView
-
-    var _currentIndex: Substring.UnicodeScalarView.Index
-
-    init(name: String) {
-        self.scalars = name._trimmed
-        self._currentIndex = scalars.startIndex
-    }
-
-    mutating func nextToken() throws -> Token? {
-        guard _currentIndex < scalars.endIndex else {
-            return nil
-        }
-        let scalar = scalars[_currentIndex]
-        guard scalar.isASCII else { throw Error.nonASCII }
-        defer {
-            scalars.formIndex(after: &_currentIndex)
-        }
-        return  ASCIICaseInsensitiveUnicodeScalar(scalar)
-    }
-}
-
-
-private extension String {
-    func isEqual<T>(
-        to other: String,
-        tokenizedBy tokenizer: T.Type
-    ) -> Bool where T: StringEncodingNameTokenizer, T: ~Copyable {
-        do {
-            var myTokenizer = T(name: self)
-            let otherTokenizer = T(name: other)
-            return try myTokenizer.hasEqualTokens(with: otherTokenizer)
-        } catch {
-            // Any errors imply that `self` or `other` contains invalid characters.
-            return false
+            myUTF8.formIndex(after: &myIndex)
+            otherUTF8.formIndex(after: &otherIndex)
         }
+        return myIndex == myUTF8.endIndex && otherIndex == otherUTF8.endIndex
     }
 }
 
@@ -160,19 +62,16 @@ internal struct IANACharset {
         self.aliases = aliases
     }
 
-    func matches<T>(
-        _ string: String,
-        tokenizedBy tokenizer: T.Type
-    ) -> Bool where T: StringEncodingNameTokenizer, T: ~Copyable {
+    func matches(_ string: String) -> Bool {
         if let preferredMIMEName = self.preferredMIMEName,
-           preferredMIMEName.isEqual(to: string, tokenizedBy: tokenizer) {
+           preferredMIMEName._isASCIICaseinsensitivelyEqual(to: string) {
             return true
         }
-        if name.isEqual(to: string, tokenizedBy: tokenizer) {
+        if name._isASCIICaseinsensitivelyEqual(to: string) {
             return true
         }
         for alias in aliases {
-            if alias.isEqual(to: string, tokenizedBy: tokenizer) {
+            if alias._isASCIICaseinsensitivelyEqual(to: string) {
                 return true
             }
         }
@@ -249,7 +148,7 @@ extension String.Encoding {
                 guard let ianaCharset = encoding._ianaCharset else {
                     continue
                 }
-                if ianaCharset.matches(charsetName, tokenizedBy: ASCIICaseInsensitiveTokenizer.self) {
+                if ianaCharset.matches(charsetName) {
                     return encoding
                 }
             }

From d4ab876d7ad2a7d54968284887bfaaeb74650445 Mon Sep 17 00:00:00 2001
From: YOCKOW <YOCKOW@users.noreply.github.com>
Date: Fri, 24 Oct 2025 11:27:53 +0900
Subject: [PATCH 13/14] Fix spelling of functions for "case-insensitively".

In response to:
- https://github.com/swiftlang/swift-foundation/pull/1286#discussion_r2457914338
---
 .../String/String+Encoding+Names.swift               | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/Sources/FoundationEssentials/String/String+Encoding+Names.swift b/Sources/FoundationEssentials/String/String+Encoding+Names.swift
index 2e34b3fed..84b51ec48 100644
--- a/Sources/FoundationEssentials/String/String+Encoding+Names.swift
+++ b/Sources/FoundationEssentials/String/String+Encoding+Names.swift
@@ -14,7 +14,7 @@
 // MARK: - Private extensions for parsing encoding names
 
 private extension UTF8.CodeUnit {
-    func _isASCIICaseinsensitivelyEqual(to other: UTF8.CodeUnit) -> Bool {
+    func _isASCIICaseInsensitivelyEqual(to other: UTF8.CodeUnit) -> Bool {
         return switch self {
         case other, other._uppercased, other._lowercased: true
         default: false
@@ -23,11 +23,11 @@ private extension UTF8.CodeUnit {
 }
 
 private extension String {
-    func _isASCIICaseinsensitivelyEqual(to other: String) -> Bool {
+    func _isASCIICaseInsensitivelyEqual(to other: String) -> Bool {
         let (myUTF8, otherUTF8) = (self.utf8, other.utf8)
         var (myIndex, otherIndex) = (myUTF8.startIndex, otherUTF8.startIndex)
         while myIndex < myUTF8.endIndex && otherIndex < otherUTF8.endIndex {
-            guard myUTF8[myIndex]._isASCIICaseinsensitivelyEqual(to: otherUTF8[otherIndex]) else {
+            guard myUTF8[myIndex]._isASCIICaseInsensitivelyEqual(to: otherUTF8[otherIndex]) else {
                 return false
             }
 
@@ -64,14 +64,14 @@ internal struct IANACharset {
 
     func matches(_ string: String) -> Bool {
         if let preferredMIMEName = self.preferredMIMEName,
-           preferredMIMEName._isASCIICaseinsensitivelyEqual(to: string) {
+           preferredMIMEName._isASCIICaseInsensitivelyEqual(to: string) {
             return true
         }
-        if name._isASCIICaseinsensitivelyEqual(to: string) {
+        if name._isASCIICaseInsensitivelyEqual(to: string) {
             return true
         }
         for alias in aliases {
-            if alias._isASCIICaseinsensitivelyEqual(to: string) {
+            if alias._isASCIICaseInsensitivelyEqual(to: string) {
                 return true
             }
         }

From cf9ed49f0ead711ed80ba469aedf55831abcb25e Mon Sep 17 00:00:00 2001
From: YOCKOW <YOCKOW@users.noreply.github.com>
Date: Fri, 24 Oct 2025 11:45:51 +0900
Subject: [PATCH 14/14] Remove redundant nested function in
 `String.Encoding(ianaName:)`.

In response to:
- https://github.com/swiftlang/swift-foundation/pull/1286#discussion_r2457939067
---
 .../String/String+Encoding+Names.swift        | 19 +++++--------------
 1 file changed, 5 insertions(+), 14 deletions(-)

diff --git a/Sources/FoundationEssentials/String/String+Encoding+Names.swift b/Sources/FoundationEssentials/String/String+Encoding+Names.swift
index 84b51ec48..1407e5ae1 100644
--- a/Sources/FoundationEssentials/String/String+Encoding+Names.swift
+++ b/Sources/FoundationEssentials/String/String+Encoding+Names.swift
@@ -143,22 +143,13 @@ extension String.Encoding {
             .utf32LittleEndian,
         ]
 
-        func __determineEncoding() -> String.Encoding? {
-            for encoding in possibilities {
-                guard let ianaCharset = encoding._ianaCharset else {
-                    continue
-                }
-                if ianaCharset.matches(charsetName) {
-                    return encoding
-                }
+        for encoding in possibilities {
+            if encoding._ianaCharset!.matches(charsetName) {
+                self = encoding
+                return
             }
-            return nil
         }
-
-        guard let encoding = __determineEncoding() else {
-            return nil
-        }
-        self = encoding
+        return nil
     }
 }