Skip to content

Commit 2408a8a

Browse files
authored
gh-121795: Improve performance of set membership testing from set arguments (#121796)
1 parent 9766819 commit 2408a8a

File tree

3 files changed

+47
-23
lines changed

3 files changed

+47
-23
lines changed

Diff for: Lib/test/test_set.py

+10
Original file line numberDiff line numberDiff line change
@@ -635,6 +635,16 @@ def __le__(self, some_set):
635635
myset >= myobj
636636
self.assertTrue(myobj.le_called)
637637

638+
def test_set_membership(self):
639+
myfrozenset = frozenset(range(3))
640+
myset = {myfrozenset, "abc", 1}
641+
self.assertIn(set(range(3)), myset)
642+
self.assertNotIn(set(range(1)), myset)
643+
myset.discard(set(range(3)))
644+
self.assertEqual(myset, {"abc", 1})
645+
self.assertRaises(KeyError, myset.remove, set(range(1)))
646+
self.assertRaises(KeyError, myset.remove, set(range(3)))
647+
638648

639649
class SetSubclass(set):
640650
pass
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Improve performance of set membership testing, ``set.remove()`` and ``set.discard()`` when the argument is a set.

Diff for: Objects/setobject.c

+36-23
Original file line numberDiff line numberDiff line change
@@ -709,18 +709,20 @@ _shuffle_bits(Py_uhash_t h)
709709
large primes with "interesting bit patterns" and that passed tests
710710
for good collision statistics on a variety of problematic datasets
711711
including powersets and graph structures (such as David Eppstein's
712-
graph recipes in Lib/test/test_set.py) */
712+
graph recipes in Lib/test/test_set.py).
713+
714+
This hash algorithm can be used on either a frozenset or a set.
715+
When it is used on a set, it computes the hash value of the equivalent
716+
frozenset without creating a new frozenset object. */
713717

714718
static Py_hash_t
715-
frozenset_hash(PyObject *self)
719+
frozenset_hash_impl(PyObject *self)
716720
{
721+
assert(PyAnySet_Check(self));
717722
PySetObject *so = (PySetObject *)self;
718723
Py_uhash_t hash = 0;
719724
setentry *entry;
720725

721-
if (so->hash != -1)
722-
return so->hash;
723-
724726
/* Xor-in shuffled bits from every entry's hash field because xor is
725727
commutative and a frozenset hash should be independent of order.
726728
@@ -753,6 +755,20 @@ frozenset_hash(PyObject *self)
753755
if (hash == (Py_uhash_t)-1)
754756
hash = 590923713UL;
755757

758+
return (Py_hash_t)hash;
759+
}
760+
761+
static Py_hash_t
762+
frozenset_hash(PyObject *self)
763+
{
764+
PySetObject *so = (PySetObject *)self;
765+
Py_uhash_t hash;
766+
767+
if (so->hash != -1) {
768+
return so->hash;
769+
}
770+
771+
hash = frozenset_hash_impl(self);
756772
so->hash = hash;
757773
return hash;
758774
}
@@ -2137,19 +2153,18 @@ set_add_impl(PySetObject *so, PyObject *key)
21372153
static int
21382154
set_contains_lock_held(PySetObject *so, PyObject *key)
21392155
{
2140-
PyObject *tmpkey;
21412156
int rv;
21422157

21432158
rv = set_contains_key(so, key);
21442159
if (rv < 0) {
21452160
if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))
21462161
return -1;
21472162
PyErr_Clear();
2148-
tmpkey = make_new_set(&PyFrozenSet_Type, key);
2149-
if (tmpkey == NULL)
2150-
return -1;
2151-
rv = set_contains_key(so, tmpkey);
2152-
Py_DECREF(tmpkey);
2163+
Py_hash_t hash;
2164+
Py_BEGIN_CRITICAL_SECTION(key);
2165+
hash = frozenset_hash_impl(key);
2166+
Py_END_CRITICAL_SECTION();
2167+
rv = set_contains_entry(so, key, hash);
21532168
}
21542169
return rv;
21552170
}
@@ -2203,19 +2218,18 @@ static PyObject *
22032218
set_remove_impl(PySetObject *so, PyObject *key)
22042219
/*[clinic end generated code: output=0b9134a2a2200363 input=893e1cb1df98227a]*/
22052220
{
2206-
PyObject *tmpkey;
22072221
int rv;
22082222

22092223
rv = set_discard_key(so, key);
22102224
if (rv < 0) {
22112225
if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))
22122226
return NULL;
22132227
PyErr_Clear();
2214-
tmpkey = make_new_set(&PyFrozenSet_Type, key);
2215-
if (tmpkey == NULL)
2216-
return NULL;
2217-
rv = set_discard_key(so, tmpkey);
2218-
Py_DECREF(tmpkey);
2228+
Py_hash_t hash;
2229+
Py_BEGIN_CRITICAL_SECTION(key);
2230+
hash = frozenset_hash_impl(key);
2231+
Py_END_CRITICAL_SECTION();
2232+
rv = set_discard_entry(so, key, hash);
22192233
if (rv < 0)
22202234
return NULL;
22212235
}
@@ -2244,19 +2258,18 @@ static PyObject *
22442258
set_discard_impl(PySetObject *so, PyObject *key)
22452259
/*[clinic end generated code: output=eec3b687bf32759e input=861cb7fb69b4def0]*/
22462260
{
2247-
PyObject *tmpkey;
22482261
int rv;
22492262

22502263
rv = set_discard_key(so, key);
22512264
if (rv < 0) {
22522265
if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))
22532266
return NULL;
22542267
PyErr_Clear();
2255-
tmpkey = make_new_set(&PyFrozenSet_Type, key);
2256-
if (tmpkey == NULL)
2257-
return NULL;
2258-
rv = set_discard_key(so, tmpkey);
2259-
Py_DECREF(tmpkey);
2268+
Py_hash_t hash;
2269+
Py_BEGIN_CRITICAL_SECTION(key);
2270+
hash = frozenset_hash_impl(key);
2271+
Py_END_CRITICAL_SECTION();
2272+
rv = set_discard_entry(so, key, hash);
22602273
if (rv < 0)
22612274
return NULL;
22622275
}

0 commit comments

Comments
 (0)