Skip to content

Commit 24bdb04

Browse files
committed
Marc-Andre Lemburg:
The attached patch set includes a workaround to get Python with Unicode compile on BSDI 4.x (courtesy Thomas Wouters; the cause is a bug in the BSDI wchar.h header file) and Python interfaces for the MBCS codec donated by Mark Hammond. Also included are some minor corrections w/r to the docs of the new "es" and "es#" parser markers (use PyMem_Free() instead of free(); thanks to Mark Hammond for finding these). The unicodedata tests are now in a separate file (test_unicodedata.py) to avoid problems if the module cannot be found.
1 parent 66d4513 commit 24bdb04

File tree

9 files changed

+116
-56
lines changed

9 files changed

+116
-56
lines changed

Include/unicodeobject.h

+7-1
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,10 @@ Unicode Integration Proposal (see file Misc/unicode.txt).
8282
#endif
8383

8484
#ifdef HAVE_WCHAR_H
85+
/* Work around a cosmetic bug in BSDI 4.x wchar.h; thanks to Thomas Wouters */
86+
# ifdef _HAVE_BSDI
87+
# include <time.h>
88+
# endif
8589
# include "wchar.h"
8690
#endif
8791

@@ -562,7 +566,9 @@ extern DL_IMPORT(PyObject *) PyUnicode_TranslateCharmap(
562566
);
563567

564568
#ifdef MS_WIN32
569+
565570
/* --- MBCS codecs for Windows -------------------------------------------- */
571+
566572
extern DL_IMPORT(PyObject*) PyUnicode_DecodeMBCS(
567573
const char *string, /* MBCS encoded string */
568574
int length, /* size of string */
@@ -579,8 +585,8 @@ extern DL_IMPORT(PyObject*) PyUnicode_EncodeMBCS(
579585
const char *errors /* error handling */
580586
);
581587

582-
583588
#endif /* MS_WIN32 */
589+
584590
/* --- Methods & Slots ----------------------------------------------------
585591
586592
These are capable of handling Unicode objects and strings on input

Lib/encodings/mbcs.py

-1
Original file line numberDiff line numberDiff line change
@@ -34,4 +34,3 @@ class StreamConverter(StreamWriter,StreamReader):
3434
def getregentry():
3535

3636
return (Codec.encode,Codec.decode,StreamReader,StreamWriter)
37-

Lib/test/output/test_unicode

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
test_unicode
22
Testing Unicode comparisons... done.
3+
Testing Unicode contains method... done.
34
Testing Unicode formatting strings... done.
4-
Testing unicodedata module... done.
5+
Testing builtin codecs... done.

Lib/test/output/test_unicodedata

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
test_unicodedata
2+
Testing unicodedata module... done.

Lib/test/test_unicode.py

-45
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
""" Test script for the Unicode implementation.
22
3-
43
Written by Marc-Andre Lemburg (mal@lemburg.com).
54
65
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
@@ -250,50 +249,6 @@ def __init__(self): self.seq = [7, u'hello', 123L]
250249
assert u"%(x)s, %(ä)s" % {'x':u"abc", u'ä'.encode('utf-8'):"def"} == u'abc, def'
251250
print 'done.'
252251

253-
# Test Unicode database APIs
254-
try:
255-
import unicodedata
256-
except ImportError:
257-
pass
258-
else:
259-
print 'Testing unicodedata module...',
260-
261-
assert unicodedata.digit(u'A',None) is None
262-
assert unicodedata.digit(u'9') == 9
263-
assert unicodedata.digit(u'\u215b',None) is None
264-
assert unicodedata.digit(u'\u2468') == 9
265-
266-
assert unicodedata.numeric(u'A',None) is None
267-
assert unicodedata.numeric(u'9') == 9
268-
assert unicodedata.numeric(u'\u215b') == 0.125
269-
assert unicodedata.numeric(u'\u2468') == 9.0
270-
271-
assert unicodedata.decimal(u'A',None) is None
272-
assert unicodedata.decimal(u'9') == 9
273-
assert unicodedata.decimal(u'\u215b',None) is None
274-
assert unicodedata.decimal(u'\u2468',None) is None
275-
276-
assert unicodedata.category(u'\uFFFE') == 'Cn'
277-
assert unicodedata.category(u'a') == 'Ll'
278-
assert unicodedata.category(u'A') == 'Lu'
279-
280-
assert unicodedata.bidirectional(u'\uFFFE') == ''
281-
assert unicodedata.bidirectional(u' ') == 'WS'
282-
assert unicodedata.bidirectional(u'A') == 'L'
283-
284-
assert unicodedata.decomposition(u'\uFFFE') == ''
285-
assert unicodedata.decomposition(u'\u00bc') == '<fraction> 0031 2044 0034'
286-
287-
assert unicodedata.mirrored(u'\uFFFE') == 0
288-
assert unicodedata.mirrored(u'a') == 0
289-
assert unicodedata.mirrored(u'\u2201') == 1
290-
291-
assert unicodedata.combining(u'\uFFFE') == 0
292-
assert unicodedata.combining(u'a') == 0
293-
assert unicodedata.combining(u'\u20e1') == 230
294-
295-
print 'done.'
296-
297252
# Test builtin codecs
298253
print 'Testing builtin codecs...',
299254

Lib/test/test_unicodedata.py

+50
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
""" Test script for the unicodedata module.
2+
3+
Written by Marc-Andre Lemburg (mal@lemburg.com).
4+
5+
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
6+
7+
"""#"
8+
from test_support import verbose
9+
import sys
10+
11+
# Test Unicode database APIs
12+
import unicodedata
13+
14+
print 'Testing unicodedata module...',
15+
16+
assert unicodedata.digit(u'A',None) is None
17+
assert unicodedata.digit(u'9') == 9
18+
assert unicodedata.digit(u'\u215b',None) is None
19+
assert unicodedata.digit(u'\u2468') == 9
20+
21+
assert unicodedata.numeric(u'A',None) is None
22+
assert unicodedata.numeric(u'9') == 9
23+
assert unicodedata.numeric(u'\u215b') == 0.125
24+
assert unicodedata.numeric(u'\u2468') == 9.0
25+
26+
assert unicodedata.decimal(u'A',None) is None
27+
assert unicodedata.decimal(u'9') == 9
28+
assert unicodedata.decimal(u'\u215b',None) is None
29+
assert unicodedata.decimal(u'\u2468',None) is None
30+
31+
assert unicodedata.category(u'\uFFFE') == 'Cn'
32+
assert unicodedata.category(u'a') == 'Ll'
33+
assert unicodedata.category(u'A') == 'Lu'
34+
35+
assert unicodedata.bidirectional(u'\uFFFE') == ''
36+
assert unicodedata.bidirectional(u' ') == 'WS'
37+
assert unicodedata.bidirectional(u'A') == 'L'
38+
39+
assert unicodedata.decomposition(u'\uFFFE') == ''
40+
assert unicodedata.decomposition(u'\u00bc') == '<fraction> 0031 2044 0034'
41+
42+
assert unicodedata.mirrored(u'\uFFFE') == 0
43+
assert unicodedata.mirrored(u'a') == 0
44+
assert unicodedata.mirrored(u'\u2201') == 1
45+
46+
assert unicodedata.combining(u'\uFFFE') == 0
47+
assert unicodedata.combining(u'a') == 0
48+
assert unicodedata.combining(u'\u20e1') == 230
49+
50+
print 'done.'

Misc/unicode.txt

+7-6
Original file line numberDiff line numberDiff line change
@@ -740,8 +740,8 @@ These markers are used by the PyArg_ParseTuple() APIs:
740740
On output, a buffer of the needed size is allocated and
741741
returned through *buffer as NULL-terminated string.
742742
The encoded may not contain embedded NULL characters.
743-
The caller is responsible for free()ing the allocated *buffer
744-
after usage.
743+
The caller is responsible for calling PyMem_Free()
744+
to free the allocated *buffer after usage.
745745

746746
"es#":
747747
Takes three parameters: encoding (const char *),
@@ -755,8 +755,9 @@ These markers are used by the PyArg_ParseTuple() APIs:
755755

756756
If *buffer is NULL, a buffer of the needed size is
757757
allocated and output copied into it. *buffer is then
758-
updated to point to the allocated memory area. The caller
759-
is responsible for free()ing *buffer after usage.
758+
updated to point to the allocated memory area.
759+
The caller is responsible for calling PyMem_Free()
760+
to free the allocated *buffer after usage.
760761

761762
In both cases *buffer_len is updated to the number of
762763
characters written (excluding the trailing NULL-byte).
@@ -784,7 +785,7 @@ Using "es#" with auto-allocation:
784785
return NULL;
785786
}
786787
str = PyString_FromStringAndSize(buffer, buffer_len);
787-
free(buffer);
788+
PyMem_Free(buffer);
788789
return str;
789790
}
790791

@@ -807,7 +808,7 @@ Using "es" with auto-allocation returning a NULL-terminated string:
807808
return NULL;
808809
}
809810
str = PyString_FromString(buffer);
810-
free(buffer);
811+
PyMem_Free(buffer);
811812
return str;
812813
}
813814

Modules/_codecsmodule.c

+46
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,26 @@ charmap_decode(PyObject *self,
286286
size);
287287
}
288288

289+
#ifdef MS_WIN32
290+
291+
static PyObject *
292+
mbcs_decode(PyObject *self,
293+
PyObject *args)
294+
{
295+
const char *data;
296+
int size;
297+
const char *errors = NULL;
298+
299+
if (!PyArg_ParseTuple(args, "t#|z:mbcs_decode",
300+
&data, &size, &errors))
301+
return NULL;
302+
303+
return codec_tuple(PyUnicode_DecodeMBCS(data, size, errors),
304+
size);
305+
}
306+
307+
#endif /* MS_WIN32 */
308+
289309
/* --- Encoder ------------------------------------------------------------ */
290310

291311
static PyObject *
@@ -491,6 +511,28 @@ charmap_encode(PyObject *self,
491511
PyUnicode_GET_SIZE(str));
492512
}
493513

514+
#ifdef MS_WIN32
515+
516+
static PyObject *
517+
mbcs_encode(PyObject *self,
518+
PyObject *args)
519+
{
520+
PyObject *str;
521+
const char *errors = NULL;
522+
523+
if (!PyArg_ParseTuple(args, "U|z:mbcs_encode",
524+
&str, &errors))
525+
return NULL;
526+
527+
return codec_tuple(PyUnicode_EncodeMBCS(
528+
PyUnicode_AS_UNICODE(str),
529+
PyUnicode_GET_SIZE(str),
530+
errors),
531+
PyUnicode_GET_SIZE(str));
532+
}
533+
534+
#endif /* MS_WIN32 */
535+
494536
/* --- Module API --------------------------------------------------------- */
495537

496538
static PyMethodDef _codecs_functions[] = {
@@ -519,6 +561,10 @@ static PyMethodDef _codecs_functions[] = {
519561
{"charmap_decode", charmap_decode, 1},
520562
{"readbuffer_encode", readbuffer_encode, 1},
521563
{"charbuffer_encode", charbuffer_encode, 1},
564+
#ifdef MS_WIN32
565+
{"mbcs_encode", mbcs_encode, 1},
566+
{"mbcs_decode", mbcs_decode, 1},
567+
#endif
522568
{NULL, NULL} /* sentinel */
523569
};
524570

Python/getargs.c

+2-2
Original file line numberDiff line numberDiff line change
@@ -704,7 +704,7 @@ convertsimple1(arg, p_format, p_va)
704704
the data copied into it; *buffer is
705705
updated to point to the new buffer;
706706
the caller is responsible for
707-
free()ing it after usage
707+
PyMem_Free()ing it after usage
708708
709709
- if *buffer is not NULL, the data
710710
is copied to *buffer; *buffer_len
@@ -752,7 +752,7 @@ convertsimple1(arg, p_format, p_va)
752752
is allocated and the data copied
753753
into it; *buffer is updated to
754754
point to the new buffer; the caller
755-
is responsible for free()ing it
755+
is responsible for PyMem_Free()ing it
756756
after usage
757757
758758
*/

0 commit comments

Comments
 (0)