Skip to content

Commit 40d20bc

Browse files
committed
Issue 1267, continued.
Additional patch by Christian Heimes to deal more cleanly with the FILE* vs file-descriptor issues. I cleaned up his code a bit, and moved the lseek() call into import.c.
1 parent c2954e5 commit 40d20bc

File tree

12 files changed

+93
-63
lines changed

12 files changed

+93
-63
lines changed

Doc/c-api/concrete.rst

Lines changed: 11 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -2410,31 +2410,23 @@ change in future releases of Python.
24102410
:ctype:`PyFileObject`.
24112411

24122412

2413-
.. cfunction:: PyObject* PyFile_FromString(char *filename, char *mode)
2413+
.. cfunction:: PyFile_FromFd(int fd, char *name, char *mode, int buffering, char *encoding, char *newline)
24142414

2415-
.. index:: single: fopen()
2416-
2417-
On success, return a new file object that is opened on the file given by
2418-
*filename*, with a file mode given by *mode*, where *mode* has the same
2419-
semantics as the standard C routine :cfunc:`fopen`. On failure, return *NULL*.
2420-
2421-
2422-
.. cfunction:: PyObject* PyFile_FromFile(FILE *fp, char *name, char *mode, int (*close)(FILE*))
2415+
Create a new :ctype:`PyFileObject` from the file descriptor of an already
2416+
opened file *fd*. The arguments *name*, *encoding* and *newline* can be
2417+
*NULL* as well as buffering can be *-1* to use the defaults. Return *NULL* on
2418+
failure.
24232419

2424-
Create a new :ctype:`PyFileObject` from the already-open standard C file
2425-
pointer, *fp*. The function *close* will be called when the file should be
2426-
closed. Return *NULL* on failure.
2420+
.. warning::
24272421

2428-
.. cfunction:: PyFile_FromFileEx(FILE *fp, char *name, char *mode, int (*close)(FILE *), int buffering, char *encoding, char *newline)
2422+
Take care when you are mixing streams and descriptors! For more
2423+
information, see `GNU C Library
2424+
<http://www.gnu.org/software/libc/manual/html_node/Stream_002fDescriptor-Precautions.html#Stream_002fDescriptor-Precautions>`_.
24292425

2430-
Create a new :ctype:`PyFileObject` from the already-open standard C file
2431-
pointer, *fp*. The functions works similar to *PyFile_FromFile* but takes
2432-
optional arguments for *buffering*, *encoding* and *newline*. Use -1 resp.
2433-
*NULL* for default values.
24342426

2435-
.. cfunction:: FILE* PyFile_AsFile(PyObject *p)
2427+
.. cfunction:: int PyObject_AsFileDescriptor(PyObject *p)
24362428

2437-
Return the file object associated with *p* as a :ctype:`FILE\*`.
2429+
Return the file descriptor associated with *p* as an :ctype:`int`.
24382430

24392431

24402432
.. cfunction:: PyObject* PyFile_GetLine(PyObject *p, int n)

Doc/reference/introduction.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ Python for .NET
6060
This implementation actually uses the CPython implementation, but is a managed
6161
.NET application and makes .NET libraries available. This was created by Brian
6262
Lloyd. For more information, see the `Python for .NET home page
63-
<http://www.zope.org/Members/Brian/PythonNet>`_.
63+
<http://pythonnet.sourceforge.net>`_.
6464

6565
IronPython
6666
An alternate Python for .NET. Unlike Python.NET, this is a complete Python

Include/fileobject.h

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,7 @@ extern "C" {
88

99
#define PY_STDIOTEXTMODE "b"
1010

11-
PyAPI_FUNC(PyObject *) PyFile_FromFile(FILE *, char *, char *, int (*)(FILE*));
12-
PyAPI_FUNC(PyObject *) PyFile_FromFileEx(FILE *, char *, char *,
13-
int (*)(FILE *), int, char *,
14-
char *);
11+
PyAPI_FUNC(PyObject *) PyFile_FromFd(int, char *, char *, int, char *, char *);
1512
PyAPI_FUNC(PyObject *) PyFile_GetLine(PyObject *, int);
1613
PyAPI_FUNC(int) PyFile_WriteObject(PyObject *, PyObject *, int);
1714
PyAPI_FUNC(int) PyFile_WriteString(const char *, PyObject *);

Lib/test/test_imp.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,23 @@ def test_find_module_encoding(self):
4444
fd = imp.find_module("heapq")[0]
4545
self.assertEqual(fd.encoding, "iso-8859-1")
4646

47+
def test_issue1267(self):
48+
fp, filename, info = imp.find_module("pydoc")
49+
self.assertNotEqual(fp, None)
50+
self.assertEqual(fp.encoding, "iso-8859-1")
51+
self.assertEqual(fp.tell(), 0)
52+
self.assertEqual(fp.readline(), '#!/usr/bin/env python\n')
53+
fp.close()
54+
55+
fp, filename, info = imp.find_module("tokenize")
56+
self.assertNotEqual(fp, None)
57+
self.assertEqual(fp.encoding, "utf-8")
58+
self.assertEqual(fp.tell(), 0)
59+
self.assertEqual(fp.readline(),
60+
'"""Tokenization help for Python programs.\n')
61+
fp.close()
62+
63+
4764
def test_main():
4865
test_support.run_unittest(
4966
LockTests,

Misc/NEWS

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,18 @@ What's New in Python 3.0a2?
88

99
*Unreleased*
1010

11+
Core and Builtins
12+
-----------------
13+
14+
- Replaced `PyFile_FromFile()` with `PyFile_FromFd(fd, name. mode, buffer,
15+
encoding, newline)`
16+
17+
- Fixed `imp.find_module()` to obey the -*- coding: -*- header.
18+
19+
- Changed `__file__` and `co_filename` to unicode. The path names are decoded
20+
with `Py_FileSystemDefaultEncoding` and a new API method
21+
`PyUnicode_DecodeFSDefault(char*)` was added.
22+
1123
Extension Modules
1224
-----------------
1325

Modules/posixmodule.c

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5386,11 +5386,18 @@ static PyObject *
53865386
posix_tmpfile(PyObject *self, PyObject *noargs)
53875387
{
53885388
FILE *fp;
5389+
int fd;
53895390

53905391
fp = tmpfile();
53915392
if (fp == NULL)
53925393
return posix_error();
5393-
return PyFile_FromFile(fp, "<tmpfile>", "w+b", fclose);
5394+
fd = fileno(fp);
5395+
if (fd != -1)
5396+
fd = dup(fd);
5397+
fclose(fp);
5398+
if (fd == -1)
5399+
return posix_error();
5400+
return PyFile_FromFd(fd, "<tmpfile>", "w+b", -1, NULL, NULL);
53945401
}
53955402
#endif
53965403

Objects/bytesobject.c

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1214,7 +1214,7 @@ _bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
12141214
Py_ssize_t len = PyBytes_GET_SIZE(self);
12151215
const char* str;
12161216
Py_buffer vsubstr;
1217-
int rv;
1217+
int rv = 0;
12181218

12191219
str = PyBytes_AS_STRING(self);
12201220

@@ -1226,13 +1226,11 @@ _bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
12261226
if (direction < 0) {
12271227
/* startswith */
12281228
if (start+vsubstr.len > len) {
1229-
rv = 0;
12301229
goto done;
12311230
}
12321231
} else {
12331232
/* endswith */
12341233
if (end-start < vsubstr.len || start > len) {
1235-
rv = 0;
12361234
goto done;
12371235
}
12381236

Objects/fileobject.c

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -26,22 +26,16 @@ extern "C" {
2626
/* External C interface */
2727

2828
PyObject *
29-
PyFile_FromFile(FILE *fp, char *name, char *mode, int (*close)(FILE *))
29+
PyFile_FromFd(int fd, char *name, char *mode, int buffering, char *encoding,
30+
char *newline)
3031
{
31-
return PyFile_FromFileEx(fp, name, mode, close, -1, NULL, NULL);
32-
}
33-
34-
PyObject *
35-
PyFile_FromFileEx(FILE *fp, char *name, char *mode, int (*close)(FILE *),
36-
int buffering, char *encoding, char *newline)
37-
{
38-
PyObject *io, *stream, *nameobj=NULL;
32+
PyObject *io, *stream, *nameobj = NULL;
3933

4034
io = PyImport_ImportModule("io");
4135
if (io == NULL)
4236
return NULL;
43-
stream = PyObject_CallMethod(io, "open", "isiss", fileno(fp), mode,
44-
buffering, encoding, newline);
37+
stream = PyObject_CallMethod(io, "open", "isiss", fd, mode,
38+
buffering, encoding, newline);
4539
Py_DECREF(io);
4640
if (stream == NULL)
4741
return NULL;

Parser/tokenizer.c

Lines changed: 20 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1602,40 +1602,44 @@ PyTokenizer_RestoreEncoding(struct tok_state* tok, int len, int *offset)
16021602
}
16031603
#endif
16041604

1605-
/* Get -*- encoding -*- from a Python file
1605+
/* Get -*- encoding -*- from a Python file.
16061606
16071607
PyTokenizer_FindEncoding returns NULL when it can't find the encoding in
16081608
the first or second line of the file (in which case the encoding
16091609
should be assumed to be PyUnicode_GetDefaultEncoding()).
16101610
1611-
The char * returned was malloc'ed from PyMem_MALLOC() and thus must be freed
1612-
when no longer needed.
1611+
The char * returned is malloc'ed via PyMem_MALLOC() and thus must be freed
1612+
by the caller.
16131613
*/
16141614
char *
1615-
PyTokenizer_FindEncoding(FILE *fp) {
1615+
PyTokenizer_FindEncoding(int fd)
1616+
{
16161617
struct tok_state *tok;
1617-
char *p_start=NULL, *p_end=NULL, *encoding=NULL;
1618+
FILE *fp;
1619+
char *p_start =NULL , *p_end =NULL , *encoding = NULL;
16181620

1619-
if ((tok = PyTokenizer_FromFile(fp, NULL, NULL, NULL)) == NULL) {
1620-
/* lseek() usage is on purpose; see note later in code. */
1621-
lseek(fileno(fp), 0, 0);
1621+
fd = dup(fd);
1622+
if (fd < 0) {
1623+
return NULL;
1624+
}
1625+
fp = fdopen(fd, "r");
1626+
if (fp == NULL) {
1627+
return NULL;
1628+
}
1629+
tok = PyTokenizer_FromFile(fp, NULL, NULL, NULL);
1630+
if (tok == NULL) {
1631+
fclose(fp);
16221632
return NULL;
16231633
}
1624-
while(((tok->lineno < 2) && (tok->done == E_OK))) {
1634+
while (tok->lineno < 2 && tok->done == E_OK) {
16251635
PyTokenizer_Get(tok, &p_start, &p_end);
16261636
}
1627-
1628-
/* lseek() must be used instead of fseek()/rewind() as those fail on
1629-
OS X 10.4 to properly seek back to the beginning when reading from
1630-
the file descriptor instead of the file pointer. */
1631-
lseek(fileno(fp), 0, 0);
1632-
1637+
fclose(fp);
16331638
if (tok->encoding) {
16341639
encoding = (char *)PyMem_MALLOC(strlen(tok->encoding) + 1);
16351640
strcpy(encoding, tok->encoding);
16361641
}
16371642
PyTokenizer_Free(tok);
1638-
16391643
return encoding;
16401644
}
16411645

Parser/tokenizer.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ extern void PyTokenizer_Free(struct tok_state *);
6767
extern int PyTokenizer_Get(struct tok_state *, char **, char **);
6868
extern char * PyTokenizer_RestoreEncoding(struct tok_state* tok,
6969
int len, int *offset);
70-
extern char * PyTokenizer_FindEncoding(FILE *fp);
70+
extern char * PyTokenizer_FindEncoding(int);
7171

7272
#ifdef __cplusplus
7373
}

Python/import.c

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ static PyObject *extensions = NULL;
9292
extern struct _inittab _PyImport_Inittab[];
9393

9494
/* Method from Parser/tokenizer.c */
95-
extern char * PyTokenizer_FindEncoding(FILE *fp);
95+
extern char * PyTokenizer_FindEncoding(int);
9696

9797
struct _inittab *PyImport_Inittab = _PyImport_Inittab;
9898

@@ -2561,6 +2561,7 @@ call_find_module(char *name, PyObject *path)
25612561
struct filedescr *fdp;
25622562
char pathname[MAXPATHLEN+1];
25632563
FILE *fp = NULL;
2564+
int fd = -1;
25642565
char *found_encoding = NULL;
25652566
char *encoding = NULL;
25662567

@@ -2571,17 +2572,25 @@ call_find_module(char *name, PyObject *path)
25712572
if (fdp == NULL)
25722573
return NULL;
25732574
if (fp != NULL) {
2575+
fd = fileno(fp);
2576+
if (fd != -1)
2577+
fd = dup(fd);
2578+
fclose(fp);
2579+
fp = NULL;
2580+
}
2581+
if (fd != -1) {
25742582
if (strchr(fdp->mode, 'b') == NULL) {
25752583
/* PyTokenizer_FindEncoding() returns PyMem_MALLOC'ed
25762584
memory. */
2577-
found_encoding = PyTokenizer_FindEncoding(fp);
2585+
found_encoding = PyTokenizer_FindEncoding(fd);
2586+
lseek(fd, 0, 0); /* Reset position */
25782587
encoding = (found_encoding != NULL) ? found_encoding :
25792588
(char*)PyUnicode_GetDefaultEncoding();
25802589
}
2581-
fob = PyFile_FromFileEx(fp, pathname, fdp->mode, fclose, -1,
2590+
fob = PyFile_FromFd(fd, pathname, fdp->mode, -1,
25822591
(char*)encoding, NULL);
25832592
if (fob == NULL) {
2584-
fclose(fp);
2593+
close(fd);
25852594
PyMem_FREE(found_encoding);
25862595
return NULL;
25872596
}

Python/pythonrun.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -719,7 +719,7 @@ initstdio(void)
719719
}
720720

721721
/* Set sys.stdin */
722-
if (!(std = PyFile_FromFileEx(stdin, "<stdin>", "r", fclose, -1,
722+
if (!(std = PyFile_FromFd(fileno(stdin), "<stdin>", "r", -1,
723723
NULL, "\n"))) {
724724
goto error;
725725
}
@@ -728,7 +728,7 @@ initstdio(void)
728728
Py_DECREF(std);
729729

730730
/* Set sys.stdout */
731-
if (!(std = PyFile_FromFileEx(stdout, "<stdout>", "w", fclose, -1,
731+
if (!(std = PyFile_FromFd(fileno(stdout), "<stdout>", "w", -1,
732732
NULL, "\n"))) {
733733
goto error;
734734
}
@@ -737,7 +737,7 @@ initstdio(void)
737737
Py_DECREF(std);
738738

739739
/* Set sys.stderr */
740-
if (!(std = PyFile_FromFileEx(stderr, "<stderr>", "w", fclose, -1,
740+
if (!(std = PyFile_FromFd(fileno(stderr), "<stderr>", "w", -1,
741741
NULL, "\n"))) {
742742
goto error;
743743
}

0 commit comments

Comments
 (0)