After a complete tar header was buffered it would only be processed during the next iteration of the read loop. A zero-length file such as a directory had no data to read, so the loop would exit without ever having processed the file. --- src/bin/pg_basebackup/pg_basebackup.c | 238 +++++++++++++++++++--------------- 1 file changed, 134 insertions(+), 104 deletions(-)
diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c index f73dd38..ccd0890 100644 --- a/src/bin/pg_basebackup/pg_basebackup.c +++ b/src/bin/pg_basebackup/pg_basebackup.c @@ -60,6 +60,15 @@ typedef struct TarStream { bool keepopen; } TarStream; +typedef struct TarParser { + char tarhdr[512]; + TarStream *stream; + bool in_tarhdr; + bool skip_file; + size_t tarhdrsz; + size_t filesz; +} TarParser; + /* Global options */ static char *basedir = NULL; static TablespaceList tablespace_dirs = {NULL, NULL}; @@ -112,6 +121,10 @@ static void progress_report(int tablespacenum, const char *filename, bool force) static void OpenTarFile(TarStream *tarfile, const char *path); static void CloseTarFile(TarStream *tarfile); static void TarInsertRecoveryConf(TarStream *stream); +static void IterateAndWriteTar(TarParser *tp, char *inbuf, int buflen, + bool (*callback)(char *, void *), void *cbarg); +static bool TarIterSkipRecoveryConf(char *h, void *arg); + static void ReceiveTarFile(PGconn *conn, PGresult *res, int rownum); static void ReceiveAndUnpackTarFile(PGconn *conn, PGresult *res, int rownum); static void GenerateRecoveryConf(PGconn *conn); @@ -899,6 +912,120 @@ TarInsertRecoveryConf(TarStream *stream) /* + * Process the individual files inside the TAR stream and pass their headers + * to a callback which can modify or chose to skip them. The stream consists + * of a header and zero or more chunks, all 512 bytes long. The stream from + * the server is broken up into smaller pieces, so we have to track the size + * of the files to find the next header structure. + */ +static void +IterateAndWriteTar(TarParser *tp, char *inbuf, int buflen, + bool (*callback)(char *, void *), void *cbarg) +{ + int rr = buflen; + int pos = 0; + + while (rr > 0) + { + if (tp->in_tarhdr) + { + /* + * We're currently reading a header structure inside the + * TAR stream, i.e. the file metadata. + */ + if (tp->tarhdrsz < 512) + { + /* + * Copy the header structure into tarhdr in case the + * header is not aligned to 512 bytes or it's not + * returned in whole by the last PQgetCopyData call. + */ + int hdrleft; + int bytes2copy; + + hdrleft = 512 - tp->tarhdrsz; + bytes2copy = (rr > hdrleft ? hdrleft : rr); + + memcpy(&tp->tarhdr[tp->tarhdrsz], inbuf + pos, bytes2copy); + + rr -= bytes2copy; + pos += bytes2copy; + tp->tarhdrsz += bytes2copy; + } + + if (tp->tarhdrsz == 512) + { + /* + * We have the complete header structure in tarhdr, let the + * callback possibly modify it or chose to skip the file. Find + * out the size of the file padded to the next multiple of 512 + */ + int padding; + + tp->skip_file = callback(tp->tarhdr, cbarg); + + sscanf(&tp->tarhdr[124], "%11o", (unsigned int *) &tp->filesz); + + padding = ((tp->filesz + 511) & ~511) - tp->filesz; + tp->filesz += padding; + + /* Next part is the file, not the header */ + tp->in_tarhdr = false; + + /* + * If we're not skipping the file, write the tar + * header unmodified. + */ + if (!tp->skip_file) + writeTarData(tp->stream, tp->tarhdr, 512); + } + } + else + { + /* + * We're processing a file's contents. + */ + if (tp->filesz > 0) + { + /* + * We still have data to read (and possibly write). + */ + int bytes2write; + + bytes2write = (tp->filesz > rr ? rr : tp->filesz); + + if (!tp->skip_file) + writeTarData(tp->stream, inbuf + pos, bytes2write); + + rr -= bytes2write; + pos += bytes2write; + tp->filesz -= bytes2write; + } + else + { + /* + * No more data in the current file, the next piece of + * data (if any) will be a new file header structure. + */ + tp->in_tarhdr = true; + tp->skip_file = false; + tp->tarhdrsz = 0; + tp->filesz = 0; + } + } + } +} + + +static bool +TarIterSkipRecoveryConf(char *h, void *arg) +{ + /* Skip the file if the name is recovery.conf */ + return (strcmp(&h[0], "recovery.conf") == 0); +} + + +/* * Open a (possibly zlib-compressed) tar file for writing. The filebase * argument should be the desired filename relative to basedir, without a .tar * or .tar.gz file extension. If the user specified a basedir of - then stdout @@ -936,12 +1063,12 @@ ReceiveTarFile(PGconn *conn, PGresult *res, int rownum) { TarStream stream; char *copybuf = NULL; - char tarhdr[512]; bool basetablespace = PQgetisnull(res, rownum, 0); - bool in_tarhdr = true; - bool skip_file = false; - size_t tarhdrsz = 0; - size_t filesz = 0; + TarParser parser; + + MemSet(&parser, 0, sizeof(parser)); + parser.stream = &stream; + parser.in_tarhdr = true; if (basetablespace) /* Base tablespace */ @@ -1010,106 +1137,9 @@ ReceiveTarFile(PGconn *conn, PGresult *res, int rownum) * Look for a recovery.conf in the existing tar stream. If it's * there, we must skip it so we can later overwrite it with our * own version of the file. - * - * To do this, we have to process the individual files inside the - * TAR stream. The stream consists of a header and zero or more - * chunks, all 512 bytes long. The stream from the server is - * broken up into smaller pieces, so we have to track the size of - * the files to find the next header structure. */ - int rr = r; - int pos = 0; - - while (rr > 0) - { - if (in_tarhdr) - { - /* - * We're currently reading a header structure inside the - * TAR stream, i.e. the file metadata. - */ - if (tarhdrsz < 512) - { - /* - * Copy the header structure into tarhdr in case the - * header is not aligned to 512 bytes or it's not - * returned in whole by the last PQgetCopyData call. - */ - int hdrleft; - int bytes2copy; - - hdrleft = 512 - tarhdrsz; - bytes2copy = (rr > hdrleft ? hdrleft : rr); - - memcpy(&tarhdr[tarhdrsz], copybuf + pos, bytes2copy); - - rr -= bytes2copy; - pos += bytes2copy; - tarhdrsz += bytes2copy; - } - else - { - /* - * We have the complete header structure in tarhdr, - * look at the file metadata: - the subsequent file - * contents have to be skipped if the filename is - * recovery.conf - find out the size of the file - * padded to the next multiple of 512 - */ - int padding; - - skip_file = (strcmp(&tarhdr[0], "recovery.conf") == 0); - - sscanf(&tarhdr[124], "%11o", (unsigned int *) &filesz); - - padding = ((filesz + 511) & ~511) - filesz; - filesz += padding; - - /* Next part is the file, not the header */ - in_tarhdr = false; - - /* - * If we're not skipping the file, write the tar - * header unmodified. - */ - if (!skip_file) - writeTarData(&stream, tarhdr, 512); - } - } - else - { - /* - * We're processing a file's contents. - */ - if (filesz > 0) - { - /* - * We still have data to read (and possibly write). - */ - int bytes2write; - - bytes2write = (filesz > rr ? rr : filesz); - - if (!skip_file) - writeTarData(&stream, copybuf + pos, bytes2write); - - rr -= bytes2write; - pos += bytes2write; - filesz -= bytes2write; - } - else - { - /* - * No more data in the current file, the next piece of - * data (if any) will be a new file header structure. - */ - in_tarhdr = true; - skip_file = false; - tarhdrsz = 0; - filesz = 0; - } - } - } + IterateAndWriteTar(&parser, copybuf, r, + TarIterSkipRecoveryConf, NULL); } totaldone += r; progress_report(rownum, stream.path, false); -- 2.3.0 -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers