Skip to content

Commit ff1b032

Browse files
committed
Fix sloppy handling of corner-case errors in fd.c.
Several places in fd.c had badly-thought-through handling of error returns from lseek() and close(). The fact that those would seldom fail on valid FDs is probably the reason we've not noticed this up to now; but if they did fail, we'd get quite confused. LruDelete and LruInsert actually just Assert'd that lseek never fails, which is pretty awful on its face. In LruDelete, we indeed can't throw an error, because that's likely to get called during error abort and so throwing an error would probably just lead to an infinite loop. But by the same token, throwing an error from the close() right after that was ill-advised, not to mention that it would've left the LRU state corrupted since we'd already unlinked the VFD from the list. I also noticed that really, most of the time, we should know the current seek position and it shouldn't be necessary to do an lseek here at all. As patched, if we don't have a seek position and an lseek attempt doesn't give us one, we'll close the file but then subsequent re-open attempts will fail (except in the somewhat-unlikely case that a FileSeek(SEEK_SET) call comes between and allows us to re-establish a known target seek position). This isn't great but it won't result in any state corruption. Meanwhile, having an Assert instead of an honest test in LruInsert is really dangerous: if that lseek failed, a subsequent read or write would read or write from the start of the file, not where the caller expected, leading to data corruption. In both LruDelete and FileClose, if close() fails, just LOG that and mark the VFD closed anyway. Possibly leaking an FD is preferable to getting into an infinite loop or corrupting the VFD list. Besides, as far as I can tell from the POSIX spec, it's unspecified whether or not the file has been closed, so treating it as still open could be the wrong thing anyhow. I also fixed a number of other places that were being sloppy about behaving correctly when the seekPos is unknown. Also, I changed FileSeek to return -1 with EINVAL for the cases where it detects a bad offset, rather than throwing a hard elog(ERROR). It seemed pretty inconsistent that some bad-offset cases would get a failure return while others got elog(ERROR). It was missing an offset validity check for the SEEK_CUR case on a closed file, too. Back-patch to all supported branches, since all this code is fundamentally identical in all of them. Discussion: https://postgr.es/m/2982.1487617365@sss.pgh.pa.us
1 parent 931182f commit ff1b032

File tree

1 file changed

+136
-59
lines changed
  • src/backend/storage/file

1 file changed

+136
-59
lines changed

src/backend/storage/file/fd.c

Lines changed: 136 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,14 @@ int max_safe_fds = 32; /* default if not changed */
153153

154154
#define FileIsNotOpen(file) (VfdCache[file].fd == VFD_CLOSED)
155155

156+
/*
157+
* Note: a VFD's seekPos is normally always valid, but if for some reason
158+
* an lseek() fails, it might become set to FileUnknownPos. We can struggle
159+
* along without knowing the seek position in many cases, but in some places
160+
* we have to fail if we don't have it.
161+
*/
156162
#define FileUnknownPos ((off_t) -1)
163+
#define FilePosIsUnknown(pos) ((pos) < 0)
157164

158165
/* these are the assigned bits in fdstate below: */
159166
#define FD_TEMPORARY (1 << 0) /* T = delete when closed */
@@ -167,7 +174,7 @@ typedef struct vfd
167174
File nextFree; /* link to next free VFD, if in freelist */
168175
File lruMoreRecently; /* doubly linked recency-of-use list */
169176
File lruLessRecently;
170-
off_t seekPos; /* current logical file position */
177+
off_t seekPos; /* current logical file position, or -1 */
171178
off_t fileSize; /* current size of file (0 if not temporary) */
172179
char *fileName; /* name of file, or NULL for unused VFD */
173180
/* NB: fileName is malloc'd, and must be free'd when closing the VFD */
@@ -826,19 +833,33 @@ LruDelete(File file)
826833

827834
vfdP = &VfdCache[file];
828835

829-
/* delete the vfd record from the LRU ring */
830-
Delete(file);
831-
832-
/* save the seek position */
833-
vfdP->seekPos = lseek(vfdP->fd, (off_t) 0, SEEK_CUR);
834-
Assert(vfdP->seekPos != (off_t) -1);
836+
/*
837+
* Normally we should know the seek position, but if for some reason we
838+
* have lost track of it, try again to get it. If we still can't get it,
839+
* we have a problem: we will be unable to restore the file seek position
840+
* when and if the file is re-opened. But we can't really throw an error
841+
* and refuse to close the file, or activities such as transaction cleanup
842+
* will be broken.
843+
*/
844+
if (FilePosIsUnknown(vfdP->seekPos))
845+
{
846+
vfdP->seekPos = lseek(vfdP->fd, (off_t) 0, SEEK_CUR);
847+
if (FilePosIsUnknown(vfdP->seekPos))
848+
elog(LOG, "could not seek file \"%s\" before closing: %m",
849+
vfdP->fileName);
850+
}
835851

836-
/* close the file */
852+
/*
853+
* Close the file. We aren't expecting this to fail; if it does, better
854+
* to leak the FD than to mess up our internal state.
855+
*/
837856
if (close(vfdP->fd))
838-
elog(ERROR, "could not close file \"%s\": %m", vfdP->fileName);
839-
840-
--nfile;
857+
elog(LOG, "could not close file \"%s\": %m", vfdP->fileName);
841858
vfdP->fd = VFD_CLOSED;
859+
--nfile;
860+
861+
/* delete the vfd record from the LRU ring */
862+
Delete(file);
842863
}
843864

844865
static void
@@ -889,22 +910,39 @@ LruInsert(File file)
889910
vfdP->fileMode);
890911
if (vfdP->fd < 0)
891912
{
892-
DO_DB(elog(LOG, "RE_OPEN FAILED: %d", errno));
913+
DO_DB(elog(LOG, "re-open failed: %m"));
893914
return -1;
894915
}
895916
else
896917
{
897-
DO_DB(elog(LOG, "RE_OPEN SUCCESS"));
898918
++nfile;
899919
}
900920

901-
/* seek to the right position */
921+
/*
922+
* Seek to the right position. We need no special case for seekPos
923+
* equal to FileUnknownPos, as lseek() will certainly reject that
924+
* (thus completing the logic noted in LruDelete() that we will fail
925+
* to re-open a file if we couldn't get its seek position before
926+
* closing).
927+
*/
902928
if (vfdP->seekPos != (off_t) 0)
903929
{
904-
off_t returnValue PG_USED_FOR_ASSERTS_ONLY;
905-
906-
returnValue = lseek(vfdP->fd, vfdP->seekPos, SEEK_SET);
907-
Assert(returnValue != (off_t) -1);
930+
if (lseek(vfdP->fd, vfdP->seekPos, SEEK_SET) < 0)
931+
{
932+
/*
933+
* If we fail to restore the seek position, treat it like an
934+
* open() failure.
935+
*/
936+
int save_errno = errno;
937+
938+
elog(LOG, "could not seek file \"%s\" after re-opening: %m",
939+
vfdP->fileName);
940+
(void) close(vfdP->fd);
941+
vfdP->fd = VFD_CLOSED;
942+
--nfile;
943+
errno = save_errno;
944+
return -1;
945+
}
908946
}
909947
}
910948

@@ -1287,15 +1325,15 @@ FileClose(File file)
12871325

12881326
if (!FileIsNotOpen(file))
12891327
{
1290-
/* remove the file from the lru ring */
1291-
Delete(file);
1292-
12931328
/* close the file */
12941329
if (close(vfdP->fd))
1295-
elog(ERROR, "could not close file \"%s\": %m", vfdP->fileName);
1330+
elog(LOG, "could not close file \"%s\": %m", vfdP->fileName);
12961331

12971332
--nfile;
12981333
vfdP->fd = VFD_CLOSED;
1334+
1335+
/* remove the file from the lru ring */
1336+
Delete(file);
12991337
}
13001338

13011339
/*
@@ -1400,6 +1438,7 @@ int
14001438
FileRead(File file, char *buffer, int amount)
14011439
{
14021440
int returnCode;
1441+
Vfd *vfdP;
14031442

14041443
Assert(FileIsValid(file));
14051444

@@ -1412,11 +1451,17 @@ FileRead(File file, char *buffer, int amount)
14121451
if (returnCode < 0)
14131452
return returnCode;
14141453

1454+
vfdP = &VfdCache[file];
1455+
14151456
retry:
1416-
returnCode = read(VfdCache[file].fd, buffer, amount);
1457+
returnCode = read(vfdP->fd, buffer, amount);
14171458

14181459
if (returnCode >= 0)
1419-
VfdCache[file].seekPos += returnCode;
1460+
{
1461+
/* if seekPos is unknown, leave it that way */
1462+
if (!FilePosIsUnknown(vfdP->seekPos))
1463+
vfdP->seekPos += returnCode;
1464+
}
14201465
else
14211466
{
14221467
/*
@@ -1445,7 +1490,7 @@ FileRead(File file, char *buffer, int amount)
14451490
goto retry;
14461491

14471492
/* Trouble, so assume we don't know the file position anymore */
1448-
VfdCache[file].seekPos = FileUnknownPos;
1493+
vfdP->seekPos = FileUnknownPos;
14491494
}
14501495

14511496
return returnCode;
@@ -1455,6 +1500,7 @@ int
14551500
FileWrite(File file, char *buffer, int amount)
14561501
{
14571502
int returnCode;
1503+
Vfd *vfdP;
14581504

14591505
Assert(FileIsValid(file));
14601506

@@ -1467,6 +1513,8 @@ FileWrite(File file, char *buffer, int amount)
14671513
if (returnCode < 0)
14681514
return returnCode;
14691515

1516+
vfdP = &VfdCache[file];
1517+
14701518
/*
14711519
* If enforcing temp_file_limit and it's a temp file, check to see if the
14721520
* write would overrun temp_file_limit, and throw error if so. Note: it's
@@ -1475,15 +1523,28 @@ FileWrite(File file, char *buffer, int amount)
14751523
* message if we do that. All current callers would just throw error
14761524
* immediately anyway, so this is safe at present.
14771525
*/
1478-
if (temp_file_limit >= 0 && (VfdCache[file].fdstate & FD_TEMPORARY))
1526+
if (temp_file_limit >= 0 && (vfdP->fdstate & FD_TEMPORARY))
14791527
{
1480-
off_t newPos = VfdCache[file].seekPos + amount;
1528+
off_t newPos;
14811529

1482-
if (newPos > VfdCache[file].fileSize)
1530+
/*
1531+
* Normally we should know the seek position, but if for some reason
1532+
* we have lost track of it, try again to get it. Here, it's fine to
1533+
* throw an error if we still can't get it.
1534+
*/
1535+
if (FilePosIsUnknown(vfdP->seekPos))
1536+
{
1537+
vfdP->seekPos = lseek(vfdP->fd, (off_t) 0, SEEK_CUR);
1538+
if (FilePosIsUnknown(vfdP->seekPos))
1539+
elog(ERROR, "could not seek file \"%s\": %m", vfdP->fileName);
1540+
}
1541+
1542+
newPos = vfdP->seekPos + amount;
1543+
if (newPos > vfdP->fileSize)
14831544
{
14841545
uint64 newTotal = temporary_files_size;
14851546

1486-
newTotal += newPos - VfdCache[file].fileSize;
1547+
newTotal += newPos - vfdP->fileSize;
14871548
if (newTotal > (uint64) temp_file_limit * (uint64) 1024)
14881549
ereport(ERROR,
14891550
(errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
@@ -1494,25 +1555,33 @@ FileWrite(File file, char *buffer, int amount)
14941555

14951556
retry:
14961557
errno = 0;
1497-
returnCode = write(VfdCache[file].fd, buffer, amount);
1558+
returnCode = write(vfdP->fd, buffer, amount);
14981559

14991560
/* if write didn't set errno, assume problem is no disk space */
15001561
if (returnCode != amount && errno == 0)
15011562
errno = ENOSPC;
15021563

15031564
if (returnCode >= 0)
15041565
{
1505-
VfdCache[file].seekPos += returnCode;
1566+
/* if seekPos is unknown, leave it that way */
1567+
if (!FilePosIsUnknown(vfdP->seekPos))
1568+
vfdP->seekPos += returnCode;
15061569

1507-
/* maintain fileSize and temporary_files_size if it's a temp file */
1508-
if (VfdCache[file].fdstate & FD_TEMPORARY)
1570+
/*
1571+
* Maintain fileSize and temporary_files_size if it's a temp file.
1572+
*
1573+
* If seekPos is -1 (unknown), this will do nothing; but we could only
1574+
* get here in that state if we're not enforcing temporary_files_size,
1575+
* so we don't care.
1576+
*/
1577+
if (vfdP->fdstate & FD_TEMPORARY)
15091578
{
1510-
off_t newPos = VfdCache[file].seekPos;
1579+
off_t newPos = vfdP->seekPos;
15111580

1512-
if (newPos > VfdCache[file].fileSize)
1581+
if (newPos > vfdP->fileSize)
15131582
{
1514-
temporary_files_size += newPos - VfdCache[file].fileSize;
1515-
VfdCache[file].fileSize = newPos;
1583+
temporary_files_size += newPos - vfdP->fileSize;
1584+
vfdP->fileSize = newPos;
15161585
}
15171586
}
15181587
}
@@ -1540,7 +1609,7 @@ FileWrite(File file, char *buffer, int amount)
15401609
goto retry;
15411610

15421611
/* Trouble, so assume we don't know the file position anymore */
1543-
VfdCache[file].seekPos = FileUnknownPos;
1612+
vfdP->seekPos = FileUnknownPos;
15441613
}
15451614

15461615
return returnCode;
@@ -1566,7 +1635,7 @@ FileSync(File file)
15661635
off_t
15671636
FileSeek(File file, off_t offset, int whence)
15681637
{
1569-
int returnCode;
1638+
Vfd *vfdP;
15701639

15711640
Assert(FileIsValid(file));
15721641

@@ -1575,25 +1644,33 @@ FileSeek(File file, off_t offset, int whence)
15751644
(int64) VfdCache[file].seekPos,
15761645
(int64) offset, whence));
15771646

1647+
vfdP = &VfdCache[file];
1648+
15781649
if (FileIsNotOpen(file))
15791650
{
15801651
switch (whence)
15811652
{
15821653
case SEEK_SET:
15831654
if (offset < 0)
1584-
elog(ERROR, "invalid seek offset: " INT64_FORMAT,
1585-
(int64) offset);
1586-
VfdCache[file].seekPos = offset;
1655+
{
1656+
errno = EINVAL;
1657+
return (off_t) -1;
1658+
}
1659+
vfdP->seekPos = offset;
15871660
break;
15881661
case SEEK_CUR:
1589-
VfdCache[file].seekPos += offset;
1662+
if (FilePosIsUnknown(vfdP->seekPos) ||
1663+
vfdP->seekPos + offset < 0)
1664+
{
1665+
errno = EINVAL;
1666+
return (off_t) -1;
1667+
}
1668+
vfdP->seekPos += offset;
15901669
break;
15911670
case SEEK_END:
1592-
returnCode = FileAccess(file);
1593-
if (returnCode < 0)
1594-
return returnCode;
1595-
VfdCache[file].seekPos = lseek(VfdCache[file].fd,
1596-
offset, whence);
1671+
if (FileAccess(file) < 0)
1672+
return (off_t) -1;
1673+
vfdP->seekPos = lseek(vfdP->fd, offset, whence);
15971674
break;
15981675
default:
15991676
elog(ERROR, "invalid whence: %d", whence);
@@ -1606,27 +1683,27 @@ FileSeek(File file, off_t offset, int whence)
16061683
{
16071684
case SEEK_SET:
16081685
if (offset < 0)
1609-
elog(ERROR, "invalid seek offset: " INT64_FORMAT,
1610-
(int64) offset);
1611-
if (VfdCache[file].seekPos != offset)
1612-
VfdCache[file].seekPos = lseek(VfdCache[file].fd,
1613-
offset, whence);
1686+
{
1687+
errno = EINVAL;
1688+
return (off_t) -1;
1689+
}
1690+
if (vfdP->seekPos != offset)
1691+
vfdP->seekPos = lseek(vfdP->fd, offset, whence);
16141692
break;
16151693
case SEEK_CUR:
1616-
if (offset != 0 || VfdCache[file].seekPos == FileUnknownPos)
1617-
VfdCache[file].seekPos = lseek(VfdCache[file].fd,
1618-
offset, whence);
1694+
if (offset != 0 || FilePosIsUnknown(vfdP->seekPos))
1695+
vfdP->seekPos = lseek(vfdP->fd, offset, whence);
16191696
break;
16201697
case SEEK_END:
1621-
VfdCache[file].seekPos = lseek(VfdCache[file].fd,
1622-
offset, whence);
1698+
vfdP->seekPos = lseek(vfdP->fd, offset, whence);
16231699
break;
16241700
default:
16251701
elog(ERROR, "invalid whence: %d", whence);
16261702
break;
16271703
}
16281704
}
1629-
return VfdCache[file].seekPos;
1705+
1706+
return vfdP->seekPos;
16301707
}
16311708

16321709
/*

0 commit comments

Comments
 (0)