Daniel Gustafsson <dan...@yesql.se> writes: > On 16 Jun 2025, at 16:20, Tom Lane <t...@sss.pgh.pa.us> wrote: >> This API is actually quite bizarrely defined, if you ask me. >> Returning the byte count is optional, but if you don't pay >> attention to the byte count you cannot know if you got any >> data. At best, that's bug-encouraging.
> Agreed. Given that many of implementations in the gzip support code directly > return the gzXXX function I suspect it was modeled around GZip but thats an > unresearched guess. The fact that this returns Z_NULL where the API defines > NULL is unlikely to ever be an issue, but it also doesn't seem entirely neat.. After looking around a bit more I realized that this API is a complete disaster: it's not only bug-prone, but there are actual bugs in multiple callers, eg _ReadBuf() totally fails to detect early EOF as it intends to. We need to fix it, not slap some band-aids on. Draft patch attached. The write_func side is a bit of a mess too. I fixed some obvious API violations in the attached, but I think we need to do more, because it seems that zero thought was given to reporting errors sanely. The callers all assume that strerror() is what to use to report an incomplete write, but this is not appropriate in every case, for instance we need to pay attention to gzerror() in gzip cases. I'm inclined to think that the best answer is to move error reporting into the write_funcs, and just make the API spec be "write or die". I've not tackled that here though. I was depressed to find that Zstd_getc reads one byte into an otherwise-uninitialized int and then returns the whole int. Even if we're lucky enough for the variable to start off zero, this would fail utterly on big-endian machines. The only reason we've not noticed is that the regression tests do not reach Zstd_getc, nor most of the other getc_func implementations. Now I'm afraid to look at the rest of the compress_io.h API. If it's as badly written as these parts, there are more bugs to be found. regards, tom lane
diff --git a/src/bin/pg_dump/compress_gzip.c b/src/bin/pg_dump/compress_gzip.c index 5a30ebf9bf5..8db121b94a3 100644 --- a/src/bin/pg_dump/compress_gzip.c +++ b/src/bin/pg_dump/compress_gzip.c @@ -251,14 +251,14 @@ InitCompressorGzip(CompressorState *cs, *---------------------- */ -static bool -Gzip_read(void *ptr, size_t size, size_t *rsize, CompressFileHandle *CFH) +static size_t +Gzip_read(void *ptr, size_t size, CompressFileHandle *CFH) { gzFile gzfp = (gzFile) CFH->private_data; int gzret; gzret = gzread(gzfp, ptr, size); - if (gzret <= 0 && !gzeof(gzfp)) + if (gzret < 0) { int errnum; const char *errmsg = gzerror(gzfp, &errnum); @@ -267,10 +267,7 @@ Gzip_read(void *ptr, size_t size, size_t *rsize, CompressFileHandle *CFH) errnum == Z_ERRNO ? strerror(errno) : errmsg); } - if (rsize) - *rsize = (size_t) gzret; - - return true; + return (size_t) gzret; } static bool @@ -278,7 +275,7 @@ Gzip_write(const void *ptr, size_t size, CompressFileHandle *CFH) { gzFile gzfp = (gzFile) CFH->private_data; - return gzwrite(gzfp, ptr, size) > 0; + return gzwrite(gzfp, ptr, size) == size; } static int diff --git a/src/bin/pg_dump/compress_io.h b/src/bin/pg_dump/compress_io.h index db9b38744c8..dad0c91fec5 100644 --- a/src/bin/pg_dump/compress_io.h +++ b/src/bin/pg_dump/compress_io.h @@ -123,21 +123,23 @@ struct CompressFileHandle CompressFileHandle *CFH); /* - * Read 'size' bytes of data from the file and store them into 'ptr'. - * Optionally it will store the number of bytes read in 'rsize'. + * Read up to 'size' bytes of data from the file and store them into + * 'ptr'. * - * Returns true on success and throws an internal error otherwise. + * Returns number of bytes read (this might be less than 'size' if EOF was + * reached). Throws error for error conditions other than EOF. */ - bool (*read_func) (void *ptr, size_t size, size_t *rsize, + size_t (*read_func) (void *ptr, size_t size, CompressFileHandle *CFH); /* * Write 'size' bytes of data into the file from 'ptr'. * - * Returns true on success and false on error. + * Returns true on success and false on error (it is caller's + * responsibility to deal with error conditions). */ bool (*write_func) (const void *ptr, size_t size, - struct CompressFileHandle *CFH); + CompressFileHandle *CFH); /* * Read at most size - 1 characters from the compress file handle into diff --git a/src/bin/pg_dump/compress_lz4.c b/src/bin/pg_dump/compress_lz4.c index e99f0cad71f..c94f9dcd4cf 100644 --- a/src/bin/pg_dump/compress_lz4.c +++ b/src/bin/pg_dump/compress_lz4.c @@ -583,6 +583,7 @@ LZ4Stream_write(const void *ptr, size_t size, CompressFileHandle *CFH) return false; } + errno = 0; if (fwrite(state->buffer, 1, status, state->fp) != status) { errno = (errno) ? errno : ENOSPC; @@ -598,8 +599,8 @@ LZ4Stream_write(const void *ptr, size_t size, CompressFileHandle *CFH) /* * fread() equivalent implementation for LZ4 compressed files. */ -static bool -LZ4Stream_read(void *ptr, size_t size, size_t *rsize, CompressFileHandle *CFH) +static size_t +LZ4Stream_read(void *ptr, size_t size, CompressFileHandle *CFH) { LZ4State *state = (LZ4State *) CFH->private_data; int ret; @@ -607,10 +608,7 @@ LZ4Stream_read(void *ptr, size_t size, size_t *rsize, CompressFileHandle *CFH) if ((ret = LZ4Stream_read_internal(state, ptr, size, false)) < 0) pg_fatal("could not read from input file: %s", LZ4Stream_get_error(CFH)); - if (rsize) - *rsize = (size_t) ret; - - return true; + return (size_t) ret; } /* diff --git a/src/bin/pg_dump/compress_none.c b/src/bin/pg_dump/compress_none.c index 3fc89c99854..4924935a5bd 100644 --- a/src/bin/pg_dump/compress_none.c +++ b/src/bin/pg_dump/compress_none.c @@ -83,23 +83,17 @@ InitCompressorNone(CompressorState *cs, * Private routines */ -static bool -read_none(void *ptr, size_t size, size_t *rsize, CompressFileHandle *CFH) +static size_t +read_none(void *ptr, size_t size, CompressFileHandle *CFH) { FILE *fp = (FILE *) CFH->private_data; size_t ret; - if (size == 0) - return true; - ret = fread(ptr, 1, size, fp); - if (ret != size && !feof(fp)) + if (ferror(fp)) pg_fatal("could not read from input file: %m"); - if (rsize) - *rsize = ret; - - return true; + return ret; } static bool diff --git a/src/bin/pg_dump/compress_zstd.c b/src/bin/pg_dump/compress_zstd.c index cb595b10c2d..8f718212d9f 100644 --- a/src/bin/pg_dump/compress_zstd.c +++ b/src/bin/pg_dump/compress_zstd.c @@ -258,8 +258,8 @@ InitCompressorZstd(CompressorState *cs, * Compressed stream API */ -static bool -Zstd_read(void *ptr, size_t size, size_t *rdsize, CompressFileHandle *CFH) +static size_t +Zstd_read(void *ptr, size_t size, CompressFileHandle *CFH) { ZstdCompressorState *zstdcs = (ZstdCompressorState *) CFH->private_data; ZSTD_inBuffer *input = &zstdcs->input; @@ -292,6 +292,9 @@ Zstd_read(void *ptr, size_t size, size_t *rdsize, CompressFileHandle *CFH) if (input->pos == input->size) { cnt = fread(unconstify(void *, input->src), 1, input_allocated_size, zstdcs->fp); + if (ferror(zstdcs->fp)) + pg_fatal("could not read from input file: %m"); + input->size = cnt; Assert(cnt <= input_allocated_size); @@ -320,10 +323,7 @@ Zstd_read(void *ptr, size_t size, size_t *rdsize, CompressFileHandle *CFH) break; /* We read all the data that fits */ } - if (rdsize != NULL) - *rdsize = output->pos; - - return true; + return output->pos; } static bool @@ -358,22 +358,16 @@ Zstd_write(const void *ptr, size_t size, CompressFileHandle *CFH) } } - return size; + return true; } static int Zstd_getc(CompressFileHandle *CFH) { - ZstdCompressorState *zstdcs = (ZstdCompressorState *) CFH->private_data; - int ret; + unsigned char ret; - if (CFH->read_func(&ret, 1, NULL, CFH) != 1) - { - if (feof(zstdcs->fp)) - pg_fatal("could not read from input file: end of file"); - else - pg_fatal("could not read from input file: %m"); - } + if (CFH->read_func(&ret, 1, CFH) != 1) + pg_fatal("could not read from input file: end of file"); return ret; } @@ -390,11 +384,7 @@ Zstd_gets(char *buf, int len, CompressFileHandle *CFH) */ for (i = 0; i < len - 1; ++i) { - size_t readsz; - - if (!CFH->read_func(&buf[i], 1, &readsz, CFH)) - break; - if (readsz != 1) + if (CFH->read_func(&buf[i], 1, CFH) != 1) break; if (buf[i] == '\n') { diff --git a/src/bin/pg_dump/pg_backup_directory.c b/src/bin/pg_dump/pg_backup_directory.c index bc2a2fb4797..eb2ce8d5a6c 100644 --- a/src/bin/pg_dump/pg_backup_directory.c +++ b/src/bin/pg_dump/pg_backup_directory.c @@ -351,7 +351,7 @@ _EndData(ArchiveHandle *AH, TocEntry *te) static void _PrintFileData(ArchiveHandle *AH, char *filename) { - size_t cnt = 0; + size_t cnt; char *buf; size_t buflen; CompressFileHandle *CFH; @@ -366,7 +366,7 @@ _PrintFileData(ArchiveHandle *AH, char *filename) buflen = DEFAULT_IO_BUFFER_SIZE; buf = pg_malloc(buflen); - while (CFH->read_func(buf, buflen, &cnt, CFH) && cnt > 0) + while ((cnt = CFH->read_func(buf, buflen, CFH)) > 0) { ahwrite(buf, 1, cnt, AH); } @@ -531,10 +531,10 @@ _ReadBuf(ArchiveHandle *AH, void *buf, size_t len) CompressFileHandle *CFH = ctx->dataFH; /* - * If there was an I/O error, we already exited in readF(), so here we - * exit on short reads. + * We do not expect a short read, so fail if we get one. The read_func + * already dealt with any outright I/O error. */ - if (!CFH->read_func(buf, len, NULL, CFH)) + if (CFH->read_func(buf, len, CFH) != len) pg_fatal("could not read from input file: end of file"); }