Skip to content

Commit 5b80cf1

Browse files
committed
[stdlib]: Set,Dictionary: Fix subtle cross-collection hash collision issue
When Set/Dictionary is nested in another Set, the boundaries of the nested collections weren’t correctly delineated in commutative hashing. For example, these Sets all hashed the same: [[1, 2], [3, 4]] [[1, 3], [2, 4]] [[1, 4], [2, 3]] Hash collisions could thus be systematically generated. To fix this, remove collection-level support for one-shot hashing and revert to the previous method of generating hash values. (Set is still able to support one-shot hashing for its members, though.)
1 parent a274a0c commit 5b80cf1

File tree

4 files changed

+64
-44
lines changed

4 files changed

+64
-44
lines changed

stdlib/public/core/Dictionary.swift

+1-14
Original file line numberDiff line numberDiff line change
@@ -1453,7 +1453,7 @@ extension Dictionary: Hashable where Value: Hashable {
14531453
public func hash(into hasher: inout Hasher) {
14541454
var commutativeHash = 0
14551455
for (k, v) in self {
1456-
// Note that we use a copy of the outer hasher here. This makes collisions
1456+
// Note that we use a copy of our own hasher here. This makes hash values
14571457
// dependent on its state, eliminating static collision patterns.
14581458
var elementHasher = hasher
14591459
elementHasher.combine(k)
@@ -1462,19 +1462,6 @@ extension Dictionary: Hashable where Value: Hashable {
14621462
}
14631463
hasher.combine(commutativeHash)
14641464
}
1465-
1466-
@inlinable // FIXME(sil-serialize-all)
1467-
public func _rawHashValue(seed: (UInt64, UInt64)) -> Int {
1468-
var commutativeHash = 0
1469-
let hasher = Hasher(_seed: seed)
1470-
for (k, v) in self {
1471-
var elementHasher = hasher
1472-
elementHasher.combine(k)
1473-
elementHasher.combine(v)
1474-
commutativeHash ^= elementHasher._finalize()
1475-
}
1476-
return commutativeHash
1477-
}
14781465
}
14791466

14801467
extension Dictionary: CustomStringConvertible, CustomDebugStringConvertible {

stdlib/public/core/Set.swift

+2-6
Original file line numberDiff line numberDiff line change
@@ -487,16 +487,12 @@ extension Set: Hashable {
487487
@inlinable // FIXME(sil-serialize-all)
488488
public func hash(into hasher: inout Hasher) {
489489
// FIXME(ABI)#177: <rdar://problem/18915294> Cache Set<T> hashValue
490-
hasher.combine(_rawHashValue(seed: hasher._generateSeed()))
491-
}
492-
493-
@inlinable // FIXME(sil-serialize-all)
494-
public func _rawHashValue(seed: (UInt64, UInt64)) -> Int {
495490
var hash = 0
491+
let seed = hasher._generateSeed()
496492
for member in self {
497493
hash ^= member._rawHashValue(seed: seed)
498494
}
499-
return hash
495+
hasher.combine(hash)
500496
}
501497
}
502498

validation-test/stdlib/Dictionary4.swift

+40-24
Original file line numberDiff line numberDiff line change
@@ -9,31 +9,47 @@ import StdlibCollectionUnittest
99
var DictionaryTestSuite = TestSuite("Dictionary4")
1010

1111
DictionaryTestSuite.test("Hashable") {
12-
let d1: Dictionary<Int, String> = [1: "meow", 2: "meow", 3: "meow"]
13-
let d2: Dictionary<Int, String> = [1: "meow", 2: "meow", 3: "mooo"]
14-
let d3: Dictionary<Int, String> = [1: "meow", 2: "meow", 4: "meow"]
15-
let d4: Dictionary<Int, String> = [1: "meow", 2: "meow", 4: "mooo"]
16-
checkHashable([d1, d2, d3, d4], equalityOracle: { $0 == $1 })
17-
18-
let dd1: Dictionary<Int, Dictionary<Int, String>> = [1: [2: "meow"]]
19-
let dd2: Dictionary<Int, Dictionary<Int, String>> = [2: [1: "meow"]]
20-
let dd3: Dictionary<Int, Dictionary<Int, String>> = [2: [2: "meow"]]
21-
let dd4: Dictionary<Int, Dictionary<Int, String>> = [1: [1: "meow"]]
22-
let dd5: Dictionary<Int, Dictionary<Int, String>> = [2: [2: "mooo"]]
23-
let dd6: Dictionary<Int, Dictionary<Int, String>> = [2: [:]]
24-
let dd7: Dictionary<Int, Dictionary<Int, String>> = [:]
25-
checkHashable(
26-
[dd1, dd2, dd3, dd4, dd5, dd6, dd7],
27-
equalityOracle: { $0 == $1 })
28-
29-
// Check that hash is equal even though dictionary is traversed differently
30-
var d5: Dictionary<Int, String> =
31-
[1: "meow", 2: "meow", 3: "mooo", 4: "woof", 5: "baah", 6: "mooo"]
32-
let expected = d5.hashValue
33-
for capacity in [4, 8, 16, 32, 64, 128, 256] {
34-
d5.reserveCapacity(capacity)
35-
expectEqual(d5.hashValue, expected)
12+
let d1: [Dictionary<Int, String>] = [
13+
[1: "meow", 2: "meow", 3: "meow"],
14+
[1: "meow", 2: "meow", 3: "mooo"],
15+
[1: "meow", 2: "meow", 4: "meow"],
16+
[1: "meow", 2: "meow", 4: "mooo"]]
17+
checkHashable(d1, equalityOracle: { $0 == $1 })
18+
19+
let d2: [Dictionary<Int, Dictionary<Int, String>>] = [
20+
[1: [2: "meow"]],
21+
[2: [1: "meow"]],
22+
[2: [2: "meow"]],
23+
[1: [1: "meow"]],
24+
[2: [2: "mooo"]],
25+
[2: [:]],
26+
[:]]
27+
checkHashable(d2, equalityOracle: { $0 == $1 })
28+
29+
// Dictionary should hash itself in a way that ensures instances get correctly
30+
// delineated even when they are nested in other commutative collections.
31+
// These are different Sets, so they should produce different hashes:
32+
let remix: [Set<Dictionary<String, Int>>] = [
33+
[["Blanche": 1, "Rose": 2], ["Dorothy": 3, "Sophia": 4]],
34+
[["Blanche": 1, "Dorothy": 3], ["Rose": 2, "Sophia": 4]],
35+
[["Blanche": 1, "Sophia": 4], ["Rose": 2, "Dorothy": 3]]
36+
]
37+
checkHashable(remix, equalityOracle: { $0 == $1 })
38+
39+
// Dictionary ordering is not guaranteed to be consistent across equal
40+
// instances. In particular, ordering is highly sensitive to the size of the
41+
// allocated storage buffer. Generate a few copies of the same dictionary with
42+
// different capacities, and verify that they compare and hash the same.
43+
var variants: [Dictionary<String, Int>] = []
44+
for i in 4 ..< 12 {
45+
var set: Dictionary<String, Int> = [
46+
"one": 1, "two": 2,
47+
"three": 3, "four": 4,
48+
"five": 5, "six": 6]
49+
set.reserveCapacity(1 << i)
50+
variants.append(set)
3651
}
52+
checkHashable(variants, equalityOracle: { _, _ in true })
3753
}
3854

3955
runAllTests()

validation-test/stdlib/Set.swift

+21
Original file line numberDiff line numberDiff line change
@@ -3470,6 +3470,27 @@ SetTestSuite.test("Hashable") {
34703470
let ss11 = Set([Set([2020] as [Int]), Set([3030] as [Int]), Set([2020] as [Int])])
34713471
let ss2 = Set([Set([9090] as [Int])])
34723472
checkHashable([ss1, ss11, ss2], equalityOracle: { $0 == $1 })
3473+
3474+
// Set should hash itself in a way that ensures instances get correctly
3475+
// delineated even when they are nested in other commutative collections.
3476+
// These are different Sets, so they should produce different hashes:
3477+
let remix: [Set<Set<Int>>] = [
3478+
[[1, 2], [3, 4]],
3479+
[[1, 3], [2, 4]],
3480+
[[1, 4], [2, 3]],
3481+
]
3482+
checkHashable(remix, equalityOracle: { $0 == $1 })
3483+
3484+
// Set ordering is not guaranteed to be consistent across equal instances. In
3485+
// particular, ordering is highly sensitive to the size of the allocated
3486+
// storage buffer.
3487+
var variants: [Set<Int>] = []
3488+
for i in 4 ..< 12 {
3489+
var set: Set<Int> = [1, 2, 3, 4, 5, 6]
3490+
set.reserveCapacity(1 << i)
3491+
variants.append(set)
3492+
}
3493+
checkHashable(variants, equalityOracle: { _, _ in true })
34733494
}
34743495

34753496
//===---

0 commit comments

Comments
 (0)