Skip to content

Commit 1a0b828

Browse files
gh-122561: Clean up and microoptimize str.translate and charmap codec (GH-122932)
* Replace PyLong_AS_LONG() with PyLong_AsLong(). * Call PyLong_AsLong() only once per the replacement code. * Use PyMapping_GetOptionalItem() instead of PyObject_GetItem().
1 parent 6f563e3 commit 1a0b828

File tree

1 file changed

+43
-27
lines changed

1 file changed

+43
-27
lines changed

Diff for: Objects/unicodeobject.c

+43-27
Original file line numberDiff line numberDiff line change
@@ -8208,8 +8208,12 @@ charmap_decode_mapping(const char *s,
82088208
if (key == NULL)
82098209
goto onError;
82108210

8211-
item = PyObject_GetItem(mapping, key);
8211+
int rc = PyMapping_GetOptionalItem(mapping, key, &item);
82128212
Py_DECREF(key);
8213+
if (rc == 0) {
8214+
/* No mapping found means: mapping is undefined. */
8215+
goto Undefined;
8216+
}
82138217
if (item == NULL) {
82148218
if (PyErr_ExceptionMatches(PyExc_LookupError)) {
82158219
/* No mapping found means: mapping is undefined. */
@@ -8223,7 +8227,7 @@ charmap_decode_mapping(const char *s,
82238227
if (item == Py_None)
82248228
goto Undefined;
82258229
if (PyLong_Check(item)) {
8226-
long value = PyLong_AS_LONG(item);
8230+
long value = PyLong_AsLong(item);
82278231
if (value == 0xFFFE)
82288232
goto Undefined;
82298233
if (value < 0 || value > MAX_UNICODE) {
@@ -8507,19 +8511,25 @@ encoding_map_lookup(Py_UCS4 c, PyObject *mapping)
85078511
return i;
85088512
}
85098513

8510-
/* Lookup the character ch in the mapping. If the character
8511-
can't be found, Py_None is returned (or NULL, if another
8512-
error occurred). */
8514+
/* Lookup the character in the mapping.
8515+
On success, return PyLong, PyBytes or None (if the character can't be found).
8516+
If the result is PyLong, put its value in replace.
8517+
On error, return NULL.
8518+
*/
85138519
static PyObject *
8514-
charmapencode_lookup(Py_UCS4 c, PyObject *mapping)
8520+
charmapencode_lookup(Py_UCS4 c, PyObject *mapping, unsigned char *replace)
85158521
{
85168522
PyObject *w = PyLong_FromLong((long)c);
85178523
PyObject *x;
85188524

85198525
if (w == NULL)
85208526
return NULL;
8521-
x = PyObject_GetItem(mapping, w);
8527+
int rc = PyMapping_GetOptionalItem(mapping, w, &x);
85228528
Py_DECREF(w);
8529+
if (rc == 0) {
8530+
/* No mapping found means: mapping is undefined. */
8531+
Py_RETURN_NONE;
8532+
}
85238533
if (x == NULL) {
85248534
if (PyErr_ExceptionMatches(PyExc_LookupError)) {
85258535
/* No mapping found means: mapping is undefined. */
@@ -8531,13 +8541,14 @@ charmapencode_lookup(Py_UCS4 c, PyObject *mapping)
85318541
else if (x == Py_None)
85328542
return x;
85338543
else if (PyLong_Check(x)) {
8534-
long value = PyLong_AS_LONG(x);
8544+
long value = PyLong_AsLong(x);
85358545
if (value < 0 || value > 255) {
85368546
PyErr_SetString(PyExc_TypeError,
85378547
"character mapping must be in range(256)");
85388548
Py_DECREF(x);
85398549
return NULL;
85408550
}
8551+
*replace = (unsigned char)value;
85418552
return x;
85428553
}
85438554
else if (PyBytes_Check(x))
@@ -8578,6 +8589,7 @@ charmapencode_output(Py_UCS4 c, PyObject *mapping,
85788589
PyObject **outobj, Py_ssize_t *outpos)
85798590
{
85808591
PyObject *rep;
8592+
unsigned char replace;
85818593
char *outstart;
85828594
Py_ssize_t outsize = PyBytes_GET_SIZE(*outobj);
85838595

@@ -8594,7 +8606,7 @@ charmapencode_output(Py_UCS4 c, PyObject *mapping,
85948606
return enc_SUCCESS;
85958607
}
85968608

8597-
rep = charmapencode_lookup(c, mapping);
8609+
rep = charmapencode_lookup(c, mapping, &replace);
85988610
if (rep==NULL)
85998611
return enc_EXCEPTION;
86008612
else if (rep==Py_None) {
@@ -8609,7 +8621,7 @@ charmapencode_output(Py_UCS4 c, PyObject *mapping,
86098621
return enc_EXCEPTION;
86108622
}
86118623
outstart = PyBytes_AS_STRING(*outobj);
8612-
outstart[(*outpos)++] = (char)PyLong_AS_LONG(rep);
8624+
outstart[(*outpos)++] = (char)replace;
86138625
}
86148626
else {
86158627
const char *repchars = PyBytes_AS_STRING(rep);
@@ -8658,6 +8670,7 @@ charmap_encoding_error(
86588670
/* find all unencodable characters */
86598671
while (collendpos < size) {
86608672
PyObject *rep;
8673+
unsigned char replace;
86618674
if (Py_IS_TYPE(mapping, &EncodingMapType)) {
86628675
ch = PyUnicode_READ_CHAR(unicode, collendpos);
86638676
val = encoding_map_lookup(ch, mapping);
@@ -8668,7 +8681,7 @@ charmap_encoding_error(
86688681
}
86698682

86708683
ch = PyUnicode_READ_CHAR(unicode, collendpos);
8671-
rep = charmapencode_lookup(ch, mapping);
8684+
rep = charmapencode_lookup(ch, mapping, &replace);
86728685
if (rep==NULL)
86738686
return -1;
86748687
else if (rep!=Py_None) {
@@ -8933,17 +8946,24 @@ unicode_translate_call_errorhandler(const char *errors,
89338946

89348947
/* Lookup the character ch in the mapping and put the result in result,
89358948
which must be decrefed by the caller.
8949+
The result can be PyLong, PyUnicode, None or NULL.
8950+
If the result is PyLong, put its value in replace.
89368951
Return 0 on success, -1 on error */
89378952
static int
8938-
charmaptranslate_lookup(Py_UCS4 c, PyObject *mapping, PyObject **result)
8953+
charmaptranslate_lookup(Py_UCS4 c, PyObject *mapping, PyObject **result, Py_UCS4 *replace)
89398954
{
89408955
PyObject *w = PyLong_FromLong((long)c);
89418956
PyObject *x;
89428957

89438958
if (w == NULL)
89448959
return -1;
8945-
x = PyObject_GetItem(mapping, w);
8960+
int rc = PyMapping_GetOptionalItem(mapping, w, &x);
89468961
Py_DECREF(w);
8962+
if (rc == 0) {
8963+
/* No mapping found means: use 1:1 mapping. */
8964+
*result = NULL;
8965+
return 0;
8966+
}
89478967
if (x == NULL) {
89488968
if (PyErr_ExceptionMatches(PyExc_LookupError)) {
89498969
/* No mapping found means: use 1:1 mapping. */
@@ -8958,7 +8978,7 @@ charmaptranslate_lookup(Py_UCS4 c, PyObject *mapping, PyObject **result)
89588978
return 0;
89598979
}
89608980
else if (PyLong_Check(x)) {
8961-
long value = PyLong_AS_LONG(x);
8981+
long value = PyLong_AsLong(x);
89628982
if (value < 0 || value > MAX_UNICODE) {
89638983
PyErr_Format(PyExc_ValueError,
89648984
"character mapping must be in range(0x%x)",
@@ -8967,6 +8987,7 @@ charmaptranslate_lookup(Py_UCS4 c, PyObject *mapping, PyObject **result)
89678987
return -1;
89688988
}
89698989
*result = x;
8990+
*replace = (Py_UCS4)value;
89708991
return 0;
89718992
}
89728993
else if (PyUnicode_Check(x)) {
@@ -8990,8 +9011,9 @@ charmaptranslate_output(Py_UCS4 ch, PyObject *mapping,
89909011
_PyUnicodeWriter *writer)
89919012
{
89929013
PyObject *item;
9014+
Py_UCS4 replace;
89939015

8994-
if (charmaptranslate_lookup(ch, mapping, &item))
9016+
if (charmaptranslate_lookup(ch, mapping, &item, &replace))
89959017
return -1;
89969018

89979019
if (item == NULL) {
@@ -9008,10 +9030,7 @@ charmaptranslate_output(Py_UCS4 ch, PyObject *mapping,
90089030
}
90099031

90109032
if (PyLong_Check(item)) {
9011-
long ch = (Py_UCS4)PyLong_AS_LONG(item);
9012-
/* PyLong_AS_LONG() cannot fail, charmaptranslate_lookup() already
9013-
used it */
9014-
if (_PyUnicodeWriter_WriteCharInline(writer, ch) < 0) {
9033+
if (_PyUnicodeWriter_WriteCharInline(writer, replace) < 0) {
90159034
Py_DECREF(item);
90169035
return -1;
90179036
}
@@ -9038,9 +9057,10 @@ unicode_fast_translate_lookup(PyObject *mapping, Py_UCS1 ch,
90389057
Py_UCS1 *translate)
90399058
{
90409059
PyObject *item = NULL;
9060+
Py_UCS4 replace;
90419061
int ret = 0;
90429062

9043-
if (charmaptranslate_lookup(ch, mapping, &item)) {
9063+
if (charmaptranslate_lookup(ch, mapping, &item, &replace)) {
90449064
return -1;
90459065
}
90469066

@@ -9054,19 +9074,14 @@ unicode_fast_translate_lookup(PyObject *mapping, Py_UCS1 ch,
90549074
return 1;
90559075
}
90569076
else if (PyLong_Check(item)) {
9057-
long replace = PyLong_AS_LONG(item);
9058-
/* PyLong_AS_LONG() cannot fail, charmaptranslate_lookup() already
9059-
used it */
9060-
if (127 < replace) {
9077+
if (replace > 127) {
90619078
/* invalid character or character outside ASCII:
90629079
skip the fast translate */
90639080
goto exit;
90649081
}
90659082
translate[ch] = (Py_UCS1)replace;
90669083
}
90679084
else if (PyUnicode_Check(item)) {
9068-
Py_UCS4 replace;
9069-
90709085
if (PyUnicode_GET_LENGTH(item) != 1)
90719086
goto exit;
90729087

@@ -9219,8 +9234,9 @@ _PyUnicode_TranslateCharmap(PyObject *input,
92199234
/* find all untranslatable characters */
92209235
while (collend < size) {
92219236
PyObject *x;
9237+
Py_UCS4 replace;
92229238
ch = PyUnicode_READ(kind, data, collend);
9223-
if (charmaptranslate_lookup(ch, mapping, &x))
9239+
if (charmaptranslate_lookup(ch, mapping, &x, &replace))
92249240
goto onError;
92259241
Py_XDECREF(x);
92269242
if (x != Py_None)

0 commit comments

Comments
 (0)