Skip to content

Commit 9769b7a

Browse files
[3.13] gh-113993: Allow interned strings to be mortal, and fix related issues (GH-120520) (GH-120945)
* Add an InternalDocs file describing how interning should work and how to use it. * Add internal functions to *explicitly* request what kind of interning is done: - `_PyUnicode_InternMortal` - `_PyUnicode_InternImmortal` - `_PyUnicode_InternStatic` * Switch uses of `PyUnicode_InternInPlace` to those. * Disallow using `_Py_SetImmortal` on strings directly. You should use `_PyUnicode_InternImmortal` instead: - Strings should be interned before immortalization, otherwise you're possibly interning a immortalizing copy. - `_Py_SetImmortal` doesn't handle the `SSTATE_INTERNED_MORTAL` to `SSTATE_INTERNED_IMMORTAL` update, and those flags can't be changed in backports, as they are now part of public API and version-specific ABI. * Add private `_only_immortal` argument for `sys.getunicodeinternedsize`, used in refleak test machinery. * Make sure the statically allocated string singletons are unique. This means these sets are now disjoint: - `_Py_ID` - `_Py_STR` (including the empty string) - one-character latin-1 singletons Now, when you intern a singleton, that exact singleton will be interned. * Add a `_Py_LATIN1_CHR` macro, use it instead of `_Py_ID`/`_Py_STR` for one-character latin-1 singletons everywhere (including Clinic). * Intern `_Py_STR` singletons at startup. * For free-threaded builds, intern `_Py_LATIN1_CHR` singletons at startup. * Beef up the tests. Cover internal details (marked with `@cpython_only`). * Add lots of assertions Co-authored-by: Eric Snow <ericsnowcurrently@gmail.com>
1 parent 447e07a commit 9769b7a

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+2456
-1136
lines changed

Include/internal/pycore_global_objects_fini_generated.h

+1-19
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Include/internal/pycore_global_strings.h

+5-19
Original file line numberDiff line numberDiff line change
@@ -37,21 +37,16 @@ struct _Py_global_strings {
3737
STRUCT_FOR_STR(anon_setcomp, "<setcomp>")
3838
STRUCT_FOR_STR(anon_string, "<string>")
3939
STRUCT_FOR_STR(anon_unknown, "<unknown>")
40-
STRUCT_FOR_STR(close_br, "}")
4140
STRUCT_FOR_STR(dbl_close_br, "}}")
4241
STRUCT_FOR_STR(dbl_open_br, "{{")
4342
STRUCT_FOR_STR(dbl_percent, "%%")
4443
STRUCT_FOR_STR(defaults, ".defaults")
45-
STRUCT_FOR_STR(dot, ".")
4644
STRUCT_FOR_STR(dot_locals, ".<locals>")
4745
STRUCT_FOR_STR(empty, "")
4846
STRUCT_FOR_STR(generic_base, ".generic_base")
4947
STRUCT_FOR_STR(json_decoder, "json.decoder")
5048
STRUCT_FOR_STR(kwdefaults, ".kwdefaults")
5149
STRUCT_FOR_STR(list_err, "list index out of range")
52-
STRUCT_FOR_STR(newline, "\n")
53-
STRUCT_FOR_STR(open_br, "{")
54-
STRUCT_FOR_STR(percent, "%")
5550
STRUCT_FOR_STR(type_params, ".type_params")
5651
STRUCT_FOR_STR(utf_8, "utf-8")
5752
} literals;
@@ -66,7 +61,6 @@ struct _Py_global_strings {
6661
STRUCT_FOR_ID(TextIOWrapper)
6762
STRUCT_FOR_ID(True)
6863
STRUCT_FOR_ID(WarningMessage)
69-
STRUCT_FOR_ID(_)
7064
STRUCT_FOR_ID(_WindowsConsoleIO)
7165
STRUCT_FOR_ID(__IOBase_closed)
7266
STRUCT_FOR_ID(__abc_tpflags__)
@@ -260,6 +254,7 @@ struct _Py_global_strings {
260254
STRUCT_FOR_ID(_lock_unlock_module)
261255
STRUCT_FOR_ID(_loop)
262256
STRUCT_FOR_ID(_needs_com_addref_)
257+
STRUCT_FOR_ID(_only_immortal)
263258
STRUCT_FOR_ID(_pack_)
264259
STRUCT_FOR_ID(_restype_)
265260
STRUCT_FOR_ID(_showwarnmsg)
@@ -272,7 +267,6 @@ struct _Py_global_strings {
272267
STRUCT_FOR_ID(_uninitialized_submodules)
273268
STRUCT_FOR_ID(_warn_unawaited_coroutine)
274269
STRUCT_FOR_ID(_xoptions)
275-
STRUCT_FOR_ID(a)
276270
STRUCT_FOR_ID(abs_tol)
277271
STRUCT_FOR_ID(access)
278272
STRUCT_FOR_ID(aclose)
@@ -296,7 +290,6 @@ struct _Py_global_strings {
296290
STRUCT_FOR_ID(attribute)
297291
STRUCT_FOR_ID(authorizer_callback)
298292
STRUCT_FOR_ID(autocommit)
299-
STRUCT_FOR_ID(b)
300293
STRUCT_FOR_ID(backtick)
301294
STRUCT_FOR_ID(base)
302295
STRUCT_FOR_ID(before)
@@ -314,7 +307,6 @@ struct _Py_global_strings {
314307
STRUCT_FOR_ID(byteorder)
315308
STRUCT_FOR_ID(bytes)
316309
STRUCT_FOR_ID(bytes_per_sep)
317-
STRUCT_FOR_ID(c)
318310
STRUCT_FOR_ID(c_call)
319311
STRUCT_FOR_ID(c_exception)
320312
STRUCT_FOR_ID(c_return)
@@ -370,7 +362,6 @@ struct _Py_global_strings {
370362
STRUCT_FOR_ID(count)
371363
STRUCT_FOR_ID(covariant)
372364
STRUCT_FOR_ID(cwd)
373-
STRUCT_FOR_ID(d)
374365
STRUCT_FOR_ID(data)
375366
STRUCT_FOR_ID(database)
376367
STRUCT_FOR_ID(day)
@@ -399,7 +390,6 @@ struct _Py_global_strings {
399390
STRUCT_FOR_ID(dont_inherit)
400391
STRUCT_FOR_ID(dst)
401392
STRUCT_FOR_ID(dst_dir_fd)
402-
STRUCT_FOR_ID(e)
403393
STRUCT_FOR_ID(eager_start)
404394
STRUCT_FOR_ID(effective_ids)
405395
STRUCT_FOR_ID(element_factory)
@@ -423,7 +413,6 @@ struct _Py_global_strings {
423413
STRUCT_FOR_ID(exp)
424414
STRUCT_FOR_ID(extend)
425415
STRUCT_FOR_ID(extra_tokens)
426-
STRUCT_FOR_ID(f)
427416
STRUCT_FOR_ID(facility)
428417
STRUCT_FOR_ID(factory)
429418
STRUCT_FOR_ID(false)
@@ -456,7 +445,6 @@ struct _Py_global_strings {
456445
STRUCT_FOR_ID(fset)
457446
STRUCT_FOR_ID(func)
458447
STRUCT_FOR_ID(future)
459-
STRUCT_FOR_ID(g)
460448
STRUCT_FOR_ID(generation)
461449
STRUCT_FOR_ID(genexpr)
462450
STRUCT_FOR_ID(get)
@@ -470,7 +458,6 @@ struct _Py_global_strings {
470458
STRUCT_FOR_ID(globals)
471459
STRUCT_FOR_ID(groupindex)
472460
STRUCT_FOR_ID(groups)
473-
STRUCT_FOR_ID(h)
474461
STRUCT_FOR_ID(handle)
475462
STRUCT_FOR_ID(handle_seq)
476463
STRUCT_FOR_ID(has_location)
@@ -581,7 +568,6 @@ struct _Py_global_strings {
581568
STRUCT_FOR_ID(msg)
582569
STRUCT_FOR_ID(mutex)
583570
STRUCT_FOR_ID(mycmp)
584-
STRUCT_FOR_ID(n)
585571
STRUCT_FOR_ID(n_arg)
586572
STRUCT_FOR_ID(n_fields)
587573
STRUCT_FOR_ID(n_sequence_fields)
@@ -627,7 +613,6 @@ struct _Py_global_strings {
627613
STRUCT_FOR_ID(outgoing)
628614
STRUCT_FOR_ID(overlapped)
629615
STRUCT_FOR_ID(owner)
630-
STRUCT_FOR_ID(p)
631616
STRUCT_FOR_ID(pages)
632617
STRUCT_FOR_ID(parent)
633618
STRUCT_FOR_ID(password)
@@ -655,7 +640,6 @@ struct _Py_global_strings {
655640
STRUCT_FOR_ID(ps2)
656641
STRUCT_FOR_ID(query)
657642
STRUCT_FOR_ID(quotetabs)
658-
STRUCT_FOR_ID(r)
659643
STRUCT_FOR_ID(raw)
660644
STRUCT_FOR_ID(read)
661645
STRUCT_FOR_ID(read1)
@@ -679,7 +663,6 @@ struct _Py_global_strings {
679663
STRUCT_FOR_ID(return)
680664
STRUCT_FOR_ID(reverse)
681665
STRUCT_FOR_ID(reversed)
682-
STRUCT_FOR_ID(s)
683666
STRUCT_FOR_ID(salt)
684667
STRUCT_FOR_ID(sched_priority)
685668
STRUCT_FOR_ID(scheduler)
@@ -786,7 +769,6 @@ struct _Py_global_strings {
786769
STRUCT_FOR_ID(writable)
787770
STRUCT_FOR_ID(write)
788771
STRUCT_FOR_ID(write_through)
789-
STRUCT_FOR_ID(x)
790772
STRUCT_FOR_ID(year)
791773
STRUCT_FOR_ID(zdict)
792774
} identifiers;
@@ -809,6 +791,10 @@ struct _Py_global_strings {
809791
(_Py_SINGLETON(strings.identifiers._py_ ## NAME._ascii.ob_base))
810792
#define _Py_STR(NAME) \
811793
(_Py_SINGLETON(strings.literals._py_ ## NAME._ascii.ob_base))
794+
#define _Py_LATIN1_CHR(CH) \
795+
((CH) < 128 \
796+
? (PyObject*)&_Py_SINGLETON(strings).ascii[(CH)] \
797+
: (PyObject*)&_Py_SINGLETON(strings).latin1[(CH) - 128])
812798

813799
/* _Py_DECLARE_STR() should precede all uses of _Py_STR() in a function.
814800

0 commit comments

Comments
 (0)