On Thu, Sep 3, 2020 at 3:52 AM Juan José Santamaría Flecha <juanjo.santama...@gmail.com> wrote: > On Wed, Sep 2, 2020 at 4:35 PM Tom Lane <t...@sss.pgh.pa.us> wrote: >> Thomas Munro <thomas.mu...@gmail.com> writes: >> > You don't need to call stat() just to find out if a dirent is a file >> > or directory, most of the time. Please see attached. >> >> Hm. If we do this, I can see wanting to apply the knowledge in more >> places than walkdir().
Good idea. Here's a new version that defines a new function get_dirent_type() in src/common/file_utils_febe.c and uses it for both frontend and backend walkdir(). > Win32 could also benefit from this micro-optimisation if we expanded the > dirent port to include d_type. Please find attached a patch that does so. Is GetFileAttributes() actually faster than stat()? Oh, I see that our pgwin32_safestat() makes extra system calls. Huh. Ok then. Thanks!
From 19a5c980370f89d1340cdec7d74659715d8b4a17 Mon Sep 17 00:00:00 2001 From: Thomas Munro <thomas.mu...@gmail.com> Date: Thu, 3 Sep 2020 13:58:17 +1200 Subject: [PATCH v2 1/4] Skip unnecessary stat() calls in walkdir(). MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some kernels can tell us the type of a "dirent", so we can avoid a call to stat() or lstat() in many cases. In order to be able to apply this change to both frontend and backend versions of walkdir(), define a new function get_dirent_type() in a new translation unit file_utils_febe.c. Reviewed-by: Tom Lane <t...@sss.pgh.pa.us> Reviewed-by: Juan José Santamaría Flecha <juanjo.santama...@gmail.com> Discussion: https://postgr.es/m/CA%2BhUKG%2BFzxupGGN4GpUdbzZN%2Btn6FQPHo8w0Q%2BAPH5Wz8RG%2Bww%40mail.gmail.com --- src/backend/storage/file/fd.c | 26 +++++------ src/common/Makefile | 1 + src/common/file_utils.c | 28 +++++------- src/common/file_utils_febe.c | 81 +++++++++++++++++++++++++++++++++ src/include/common/file_utils.h | 21 ++++++++- src/tools/msvc/Mkvcbuild.pm | 2 +- 6 files changed, 126 insertions(+), 33 deletions(-) create mode 100644 src/common/file_utils_febe.c diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c index f376a97ed6..b9acd38d23 100644 --- a/src/backend/storage/file/fd.c +++ b/src/backend/storage/file/fd.c @@ -89,6 +89,7 @@ #include "access/xlog.h" #include "catalog/pg_tablespace.h" #include "common/file_perm.h" +#include "common/file_utils.h" #include "miscadmin.h" #include "pgstat.h" #include "portability/mem.h" @@ -3340,8 +3341,6 @@ walkdir(const char *path, while ((de = ReadDirExtended(dir, path, elevel)) != NULL) { char subpath[MAXPGPATH * 2]; - struct stat fst; - int sret; CHECK_FOR_INTERRUPTS(); @@ -3351,23 +3350,22 @@ walkdir(const char *path, snprintf(subpath, sizeof(subpath), "%s/%s", path, de->d_name); - if (process_symlinks) - sret = stat(subpath, &fst); - else - sret = lstat(subpath, &fst); - - if (sret < 0) + switch (get_dirent_type(subpath, de, process_symlinks)) { + case PGFILETYPE_REG: + (*action) (subpath, false, elevel); + break; + case PGFILETYPE_DIR: + walkdir(subpath, action, false, elevel); + break; + case PGFILETYPE_ERROR: ereport(elevel, (errcode_for_file_access(), errmsg("could not stat file \"%s\": %m", subpath))); - continue; + break; + default: + break; } - - if (S_ISREG(fst.st_mode)) - (*action) (subpath, false, elevel); - else if (S_ISDIR(fst.st_mode)) - walkdir(subpath, action, false, elevel); } FreeDir(dir); /* we ignore any error here */ diff --git a/src/common/Makefile b/src/common/Makefile index 16619e4ba8..aac92aabe1 100644 --- a/src/common/Makefile +++ b/src/common/Makefile @@ -56,6 +56,7 @@ OBJS_COMMON = \ exec.o \ f2s.o \ file_perm.o \ + file_utils_febe.o \ hashfn.o \ ip.o \ jsonapi.o \ diff --git a/src/common/file_utils.c b/src/common/file_utils.c index a2faafdf13..5a427e246c 100644 --- a/src/common/file_utils.c +++ b/src/common/file_utils.c @@ -2,7 +2,7 @@ * * File-processing utility routines. * - * Assorted utility functions to work on files. + * Assorted utility functions to work on files, frontend only. * * * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group @@ -27,7 +27,6 @@ #include "common/file_utils.h" #include "common/logging.h" - /* Define PG_FLUSH_DATA_WORKS if we have an implementation for pg_flush_data */ #if defined(HAVE_SYNC_FILE_RANGE) #define PG_FLUSH_DATA_WORKS 1 @@ -167,8 +166,6 @@ walkdir(const char *path, while (errno = 0, (de = readdir(dir)) != NULL) { char subpath[MAXPGPATH * 2]; - struct stat fst; - int sret; if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0) @@ -176,21 +173,20 @@ walkdir(const char *path, snprintf(subpath, sizeof(subpath), "%s/%s", path, de->d_name); - if (process_symlinks) - sret = stat(subpath, &fst); - else - sret = lstat(subpath, &fst); - - if (sret < 0) + switch (get_dirent_type(subpath, de, process_symlinks)) { - pg_log_error("could not stat file \"%s\": %m", subpath); - continue; - } - - if (S_ISREG(fst.st_mode)) + case PGFILETYPE_REG: (*action) (subpath, false); - else if (S_ISDIR(fst.st_mode)) + break; + case PGFILETYPE_DIR: walkdir(subpath, action, false); + break; + case PGFILETYPE_ERROR: + pg_log_error("could not stat file \"%s\": %m", subpath); + break; + default: + break; + } } if (errno) diff --git a/src/common/file_utils_febe.c b/src/common/file_utils_febe.c new file mode 100644 index 0000000000..f4c84b8a64 --- /dev/null +++ b/src/common/file_utils_febe.c @@ -0,0 +1,81 @@ +/*------------------------------------------------------------------------- + * + * File-processing utility routines. + * + * Assorted utility functions to work on files, frontend and backend. + * + * + * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/common/file_utils_febe.c + * + *------------------------------------------------------------------------- + */ + +#ifdef FRONTEND +#include "postgres_fe.h" +#else +#include "postgres.h" +#endif + +#include <dirent.h> +#include <sys/stat.h> + +#include "common/file_utils.h" + +/* + * Return the type of a directory entry. + */ +PGFileType +get_dirent_type(const char *path, + const struct dirent *de, + bool look_through_symlinks) +{ + PGFileType result; + + /* + * We want to know the type of a directory entry. Some systems tell us + * that directly in the dirent struct, but that's a BSD/GNU extension. + * Even when the interface is present, sometimes the type is unknown, + * depending on the filesystem in use or in some cases options used at + * filesystem creation time. + */ +#if defined(DT_UNKNOWN) && defined(DT_REG) && defined(DT_DIR) && defined(DT_LNK) + if (de->d_type == DT_REG) + result = PGFILETYPE_REG; + else if (de->d_type == DT_DIR) + result = PGFILETYPE_DIR; + else if (de->d_type == DT_LNK && !look_through_symlinks) + result = PGFILETYPE_LNK; + else + result = PGFILETYPE_UNKNOWN; +#else + result = PGFILETYPE_UNKNOWN; +#endif + + if (result == PGFILETYPE_UNKNOWN) + { + struct stat fst; + int sret; + + + if (look_through_symlinks) + sret = stat(path, &fst); + else + sret = lstat(path, &fst); + + if (sret < 0) + result = PGFILETYPE_ERROR; + else if (S_ISREG(fst.st_mode)) + result = PGFILETYPE_REG; + else if (S_ISDIR(fst.st_mode)) + result = PGFILETYPE_DIR; +#ifdef S_ISLNK + else if (S_ISLNK(fst.st_mode)) + result = PGFILETYPE_LNK; +#endif + } + + return result; +} diff --git a/src/include/common/file_utils.h b/src/include/common/file_utils.h index a7add75efa..d723c483d2 100644 --- a/src/include/common/file_utils.h +++ b/src/include/common/file_utils.h @@ -1,6 +1,4 @@ /*------------------------------------------------------------------------- - * - * File-processing utility routines for frontend code * * Assorted utility functions to work on files. * @@ -15,10 +13,29 @@ #ifndef FILE_UTILS_H #define FILE_UTILS_H +#include <dirent.h> + +typedef enum PGFileType +{ + PGFILETYPE_ERROR, + PGFILETYPE_UNKNOWN, + PGFILETYPE_REG, + PGFILETYPE_DIR, + PGFILETYPE_LNK +} PGFileType; + +/* Functions defined in file_utils_febe.c for both FE and BE code. */ +extern PGFileType get_dirent_type(const char *path, + const struct dirent *de, + bool look_through_symlinks); + +/* Functions defined in file_utils.c only for FE code. */ +#ifdef FRONTEND extern int fsync_fname(const char *fname, bool isdir); extern void fsync_pgdata(const char *pg_data, int serverVersion); extern void fsync_dir_recurse(const char *dir); extern int durable_rename(const char *oldfile, const char *newfile); extern int fsync_parent_path(const char *fname); +#endif #endif /* FILE_UTILS_H */ diff --git a/src/tools/msvc/Mkvcbuild.pm b/src/tools/msvc/Mkvcbuild.pm index 20da7985c1..a64127a196 100644 --- a/src/tools/msvc/Mkvcbuild.pm +++ b/src/tools/msvc/Mkvcbuild.pm @@ -121,7 +121,7 @@ sub mkvcbuild our @pgcommonallfiles = qw( archive.c base64.c checksum_helper.c config_info.c controldata_utils.c d2s.c encnames.c exec.c - f2s.c file_perm.c hashfn.c ip.c jsonapi.c + f2s.c file_perm.c file_utils_febe.c hashfn.c ip.c jsonapi.c keywords.c kwlookup.c link-canary.c md5.c pg_lzcompress.c pgfnames.c psprintf.c relpath.c rmtree.c saslprep.c scram-common.c string.c stringinfo.c unicode_norm.c username.c -- 2.20.1
From 1c4ea1bc1a23a9db1b48c8b83cfc44c376c5c811 Mon Sep 17 00:00:00 2001 From: Thomas Munro <thomas.mu...@gmail.com> Date: Thu, 3 Sep 2020 13:09:52 +1200 Subject: [PATCH v2 2/4] Add d_type to Win32 dirent port. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The member d_type is a BSD/GNU extension to struct dirent, but Windows has the information required to populate it so let's add it to our emulation so that get_dirent_type() can use it. Author: Juan José Santamaría Flecha <juanjo.santama...@gmail.com> Discussion: https://postgr.es/m/CA%2BhUKG%2BFzxupGGN4GpUdbzZN%2Btn6FQPHo8w0Q%2BAPH5Wz8RG%2Bww%40mail.gmail.com --- src/include/port/win32_msvc/dirent.h | 23 +++++++++++++++++++++++ src/port/dirent.c | 15 +++++++++++++++ 2 files changed, 38 insertions(+) diff --git a/src/include/port/win32_msvc/dirent.h b/src/include/port/win32_msvc/dirent.h index 9fabdf332b..6a54f964f5 100644 --- a/src/include/port/win32_msvc/dirent.h +++ b/src/include/port/win32_msvc/dirent.h @@ -10,6 +10,7 @@ struct dirent { long d_ino; unsigned short d_reclen; + unsigned char d_type; unsigned short d_namlen; char d_name[MAX_PATH]; }; @@ -20,4 +21,26 @@ DIR *opendir(const char *); struct dirent *readdir(DIR *); int closedir(DIR *); +/* File types for 'd_type'. */ +enum + { + DT_UNKNOWN = 0, +# define DT_UNKNOWN DT_UNKNOWN + DT_FIFO = 1, +# define DT_FIFO DT_FIFO + DT_CHR = 2, +# define DT_CHR DT_CHR + DT_DIR = 4, +# define DT_DIR DT_DIR + DT_BLK = 6, +# define DT_BLK DT_BLK + DT_REG = 8, +# define DT_REG DT_REG + DT_LNK = 10, +# define DT_LNK DT_LNK + DT_SOCK = 12, +# define DT_SOCK DT_SOCK + DT_WHT = 14 +# define DT_WHT DT_WHT + }; #endif diff --git a/src/port/dirent.c b/src/port/dirent.c index b264484fca..519dd82101 100644 --- a/src/port/dirent.c +++ b/src/port/dirent.c @@ -69,6 +69,7 @@ opendir(const char *dirname) d->handle = INVALID_HANDLE_VALUE; d->ret.d_ino = 0; /* no inodes on win32 */ d->ret.d_reclen = 0; /* not used on win32 */ + d->ret.d_type = DT_UNKNOWN; return d; } @@ -77,6 +78,7 @@ struct dirent * readdir(DIR *d) { WIN32_FIND_DATA fd; + DWORD attrib; if (d->handle == INVALID_HANDLE_VALUE) { @@ -105,6 +107,19 @@ readdir(DIR *d) } strcpy(d->ret.d_name, fd.cFileName); /* Both strings are MAX_PATH long */ d->ret.d_namlen = strlen(d->ret.d_name); + /* + * The only identifed types are: directory, regular file or symbolic link. + * Errors are treated as a file type that could not be determined. + */ + attrib = GetFileAttributes(d->ret.d_name); + if (attrib == INVALID_FILE_ATTRIBUTES) + d->ret.d_type = DT_UNKNOWN; + else if ((attrib & FILE_ATTRIBUTE_DIRECTORY) != 0) + d->ret.d_type = DT_DIR; + else if ((attrib & FILE_ATTRIBUTE_REPARSE_POINT) != 0) + d->ret.d_type = DT_LNK; + else + d->ret.d_type = DT_REG; return &d->ret; } -- 2.20.1