On Thu, Sep 3, 2020 at 3:52 AM Juan José Santamaría Flecha
<juanjo.santama...@gmail.com> wrote:
> On Wed, Sep 2, 2020 at 4:35 PM Tom Lane <t...@sss.pgh.pa.us> wrote:
>> Thomas Munro <thomas.mu...@gmail.com> writes:
>> > You don't need to call stat() just to find out if a dirent is a file
>> > or directory, most of the time.  Please see attached.
>>
>> Hm.  If we do this, I can see wanting to apply the knowledge in more
>> places than walkdir().

Good idea.  Here's a new version that defines a new function
get_dirent_type() in src/common/file_utils_febe.c and uses it for both
frontend and backend walkdir().

> Win32 could also benefit from this micro-optimisation if we expanded the 
> dirent port to include d_type. Please find attached a patch that does so.

Is GetFileAttributes() actually faster than stat()?  Oh, I see that
our pgwin32_safestat() makes extra system calls.  Huh.  Ok then.
Thanks!
From 19a5c980370f89d1340cdec7d74659715d8b4a17 Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.mu...@gmail.com>
Date: Thu, 3 Sep 2020 13:58:17 +1200
Subject: [PATCH v2 1/4] Skip unnecessary stat() calls in walkdir().
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some kernels can tell us the type of a "dirent", so we can avoid a call
to stat() or lstat() in many cases.  In order to be able to apply this
change to both frontend and backend versions of walkdir(), define a new
function get_dirent_type() in a new translation unit file_utils_febe.c.

Reviewed-by: Tom Lane <t...@sss.pgh.pa.us>
Reviewed-by: Juan José Santamaría Flecha <juanjo.santama...@gmail.com>
Discussion: https://postgr.es/m/CA%2BhUKG%2BFzxupGGN4GpUdbzZN%2Btn6FQPHo8w0Q%2BAPH5Wz8RG%2Bww%40mail.gmail.com
---
 src/backend/storage/file/fd.c   | 26 +++++------
 src/common/Makefile             |  1 +
 src/common/file_utils.c         | 28 +++++-------
 src/common/file_utils_febe.c    | 81 +++++++++++++++++++++++++++++++++
 src/include/common/file_utils.h | 21 ++++++++-
 src/tools/msvc/Mkvcbuild.pm     |  2 +-
 6 files changed, 126 insertions(+), 33 deletions(-)
 create mode 100644 src/common/file_utils_febe.c

diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c
index f376a97ed6..b9acd38d23 100644
--- a/src/backend/storage/file/fd.c
+++ b/src/backend/storage/file/fd.c
@@ -89,6 +89,7 @@
 #include "access/xlog.h"
 #include "catalog/pg_tablespace.h"
 #include "common/file_perm.h"
+#include "common/file_utils.h"
 #include "miscadmin.h"
 #include "pgstat.h"
 #include "portability/mem.h"
@@ -3340,8 +3341,6 @@ walkdir(const char *path,
 	while ((de = ReadDirExtended(dir, path, elevel)) != NULL)
 	{
 		char		subpath[MAXPGPATH * 2];
-		struct stat fst;
-		int			sret;
 
 		CHECK_FOR_INTERRUPTS();
 
@@ -3351,23 +3350,22 @@ walkdir(const char *path,
 
 		snprintf(subpath, sizeof(subpath), "%s/%s", path, de->d_name);
 
-		if (process_symlinks)
-			sret = stat(subpath, &fst);
-		else
-			sret = lstat(subpath, &fst);
-
-		if (sret < 0)
+		switch (get_dirent_type(subpath, de, process_symlinks))
 		{
+		case PGFILETYPE_REG:
+			(*action) (subpath, false, elevel);
+			break;
+		case PGFILETYPE_DIR:
+			walkdir(subpath, action, false, elevel);
+			break;
+		case PGFILETYPE_ERROR:
 			ereport(elevel,
 					(errcode_for_file_access(),
 					 errmsg("could not stat file \"%s\": %m", subpath)));
-			continue;
+			break;
+		default:
+			break;
 		}
-
-		if (S_ISREG(fst.st_mode))
-			(*action) (subpath, false, elevel);
-		else if (S_ISDIR(fst.st_mode))
-			walkdir(subpath, action, false, elevel);
 	}
 
 	FreeDir(dir);				/* we ignore any error here */
diff --git a/src/common/Makefile b/src/common/Makefile
index 16619e4ba8..aac92aabe1 100644
--- a/src/common/Makefile
+++ b/src/common/Makefile
@@ -56,6 +56,7 @@ OBJS_COMMON = \
 	exec.o \
 	f2s.o \
 	file_perm.o \
+	file_utils_febe.o \
 	hashfn.o \
 	ip.o \
 	jsonapi.o \
diff --git a/src/common/file_utils.c b/src/common/file_utils.c
index a2faafdf13..5a427e246c 100644
--- a/src/common/file_utils.c
+++ b/src/common/file_utils.c
@@ -2,7 +2,7 @@
  *
  * File-processing utility routines.
  *
- * Assorted utility functions to work on files.
+ * Assorted utility functions to work on files, frontend only.
  *
  *
  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
@@ -27,7 +27,6 @@
 #include "common/file_utils.h"
 #include "common/logging.h"
 
-
 /* Define PG_FLUSH_DATA_WORKS if we have an implementation for pg_flush_data */
 #if defined(HAVE_SYNC_FILE_RANGE)
 #define PG_FLUSH_DATA_WORKS 1
@@ -167,8 +166,6 @@ walkdir(const char *path,
 	while (errno = 0, (de = readdir(dir)) != NULL)
 	{
 		char		subpath[MAXPGPATH * 2];
-		struct stat fst;
-		int			sret;
 
 		if (strcmp(de->d_name, ".") == 0 ||
 			strcmp(de->d_name, "..") == 0)
@@ -176,21 +173,20 @@ walkdir(const char *path,
 
 		snprintf(subpath, sizeof(subpath), "%s/%s", path, de->d_name);
 
-		if (process_symlinks)
-			sret = stat(subpath, &fst);
-		else
-			sret = lstat(subpath, &fst);
-
-		if (sret < 0)
+		switch (get_dirent_type(subpath, de, process_symlinks))
 		{
-			pg_log_error("could not stat file \"%s\": %m", subpath);
-			continue;
-		}
-
-		if (S_ISREG(fst.st_mode))
+		case PGFILETYPE_REG:
 			(*action) (subpath, false);
-		else if (S_ISDIR(fst.st_mode))
+			break;
+		case PGFILETYPE_DIR:
 			walkdir(subpath, action, false);
+			break;
+		case PGFILETYPE_ERROR:
+			pg_log_error("could not stat file \"%s\": %m", subpath);
+			break;
+		default:
+			break;
+		}
 	}
 
 	if (errno)
diff --git a/src/common/file_utils_febe.c b/src/common/file_utils_febe.c
new file mode 100644
index 0000000000..f4c84b8a64
--- /dev/null
+++ b/src/common/file_utils_febe.c
@@ -0,0 +1,81 @@
+/*-------------------------------------------------------------------------
+ *
+ * File-processing utility routines.
+ *
+ * Assorted utility functions to work on files, frontend and backend.
+ *
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/common/file_utils_febe.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifdef FRONTEND
+#include "postgres_fe.h"
+#else
+#include "postgres.h"
+#endif
+
+#include <dirent.h>
+#include <sys/stat.h>
+
+#include "common/file_utils.h"
+
+/*
+ * Return the type of a directory entry.
+ */
+PGFileType
+get_dirent_type(const char *path,
+				const struct dirent *de,
+				bool look_through_symlinks)
+{
+	PGFileType		result;
+
+	/*
+	 * We want to know the type of a directory entry.  Some systems tell us
+	 * that directly in the dirent struct, but that's a BSD/GNU extension.
+	 * Even when the interface is present, sometimes the type is unknown,
+	 * depending on the filesystem in use or in some cases options used at
+	 * filesystem creation time.
+	 */
+#if defined(DT_UNKNOWN) && defined(DT_REG) && defined(DT_DIR) && defined(DT_LNK)
+	if (de->d_type == DT_REG)
+		result = PGFILETYPE_REG;
+	else if (de->d_type == DT_DIR)
+		result = PGFILETYPE_DIR;
+	else if (de->d_type == DT_LNK && !look_through_symlinks)
+		result = PGFILETYPE_LNK;
+	else
+		result = PGFILETYPE_UNKNOWN;
+#else
+	result = PGFILETYPE_UNKNOWN;
+#endif
+
+	if (result == PGFILETYPE_UNKNOWN)
+	{
+		struct stat fst;
+		int			sret;
+
+
+		if (look_through_symlinks)
+			sret = stat(path, &fst);
+		else
+			sret = lstat(path, &fst);
+
+		if (sret < 0)
+			result = PGFILETYPE_ERROR;
+		else if (S_ISREG(fst.st_mode))
+			result = PGFILETYPE_REG;
+		else if (S_ISDIR(fst.st_mode))
+			result = PGFILETYPE_DIR;
+#ifdef S_ISLNK
+		else if (S_ISLNK(fst.st_mode))
+			result = PGFILETYPE_LNK;
+#endif
+	}
+
+	return result;
+}
diff --git a/src/include/common/file_utils.h b/src/include/common/file_utils.h
index a7add75efa..d723c483d2 100644
--- a/src/include/common/file_utils.h
+++ b/src/include/common/file_utils.h
@@ -1,6 +1,4 @@
 /*-------------------------------------------------------------------------
- *
- * File-processing utility routines for frontend code
  *
  * Assorted utility functions to work on files.
  *
@@ -15,10 +13,29 @@
 #ifndef FILE_UTILS_H
 #define FILE_UTILS_H
 
+#include <dirent.h>
+
+typedef enum PGFileType
+{
+	PGFILETYPE_ERROR,
+	PGFILETYPE_UNKNOWN,
+	PGFILETYPE_REG,
+	PGFILETYPE_DIR,
+	PGFILETYPE_LNK
+} PGFileType;
+
+/* Functions defined in file_utils_febe.c for both FE and BE code. */
+extern PGFileType get_dirent_type(const char *path,
+								  const struct dirent *de,
+								  bool look_through_symlinks);
+
+/* Functions defined in file_utils.c only for FE code. */
+#ifdef FRONTEND
 extern int	fsync_fname(const char *fname, bool isdir);
 extern void fsync_pgdata(const char *pg_data, int serverVersion);
 extern void fsync_dir_recurse(const char *dir);
 extern int	durable_rename(const char *oldfile, const char *newfile);
 extern int	fsync_parent_path(const char *fname);
+#endif
 
 #endif							/* FILE_UTILS_H */
diff --git a/src/tools/msvc/Mkvcbuild.pm b/src/tools/msvc/Mkvcbuild.pm
index 20da7985c1..a64127a196 100644
--- a/src/tools/msvc/Mkvcbuild.pm
+++ b/src/tools/msvc/Mkvcbuild.pm
@@ -121,7 +121,7 @@ sub mkvcbuild
 	our @pgcommonallfiles = qw(
 	  archive.c base64.c checksum_helper.c
 	  config_info.c controldata_utils.c d2s.c encnames.c exec.c
-	  f2s.c file_perm.c hashfn.c ip.c jsonapi.c
+	  f2s.c file_perm.c file_utils_febe.c hashfn.c ip.c jsonapi.c
 	  keywords.c kwlookup.c link-canary.c md5.c
 	  pg_lzcompress.c pgfnames.c psprintf.c relpath.c rmtree.c
 	  saslprep.c scram-common.c string.c stringinfo.c unicode_norm.c username.c
-- 
2.20.1

From 1c4ea1bc1a23a9db1b48c8b83cfc44c376c5c811 Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.mu...@gmail.com>
Date: Thu, 3 Sep 2020 13:09:52 +1200
Subject: [PATCH v2 2/4] Add d_type to Win32 dirent port.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The member d_type is a BSD/GNU extension to struct dirent, but Windows
has the information required to populate it so let's add it to our
emulation so that get_dirent_type() can use it.

Author: Juan José Santamaría Flecha <juanjo.santama...@gmail.com>
Discussion: https://postgr.es/m/CA%2BhUKG%2BFzxupGGN4GpUdbzZN%2Btn6FQPHo8w0Q%2BAPH5Wz8RG%2Bww%40mail.gmail.com
---
 src/include/port/win32_msvc/dirent.h | 23 +++++++++++++++++++++++
 src/port/dirent.c                    | 15 +++++++++++++++
 2 files changed, 38 insertions(+)

diff --git a/src/include/port/win32_msvc/dirent.h b/src/include/port/win32_msvc/dirent.h
index 9fabdf332b..6a54f964f5 100644
--- a/src/include/port/win32_msvc/dirent.h
+++ b/src/include/port/win32_msvc/dirent.h
@@ -10,6 +10,7 @@ struct dirent
 {
 	long		d_ino;
 	unsigned short d_reclen;
+	unsigned char d_type;
 	unsigned short d_namlen;
 	char		d_name[MAX_PATH];
 };
@@ -20,4 +21,26 @@ DIR		   *opendir(const char *);
 struct dirent *readdir(DIR *);
 int			closedir(DIR *);
 
+/* File types for 'd_type'. */
+enum
+  {
+	DT_UNKNOWN = 0,
+# define DT_UNKNOWN		DT_UNKNOWN
+	DT_FIFO = 1,
+# define DT_FIFO		DT_FIFO
+	DT_CHR = 2,
+# define DT_CHR			DT_CHR
+	DT_DIR = 4,
+# define DT_DIR			DT_DIR
+	DT_BLK = 6,
+# define DT_BLK			DT_BLK
+	DT_REG = 8,
+# define DT_REG			DT_REG
+	DT_LNK = 10,
+# define DT_LNK			DT_LNK
+	DT_SOCK = 12,
+# define DT_SOCK		DT_SOCK
+	DT_WHT = 14
+# define DT_WHT			DT_WHT
+  };
 #endif
diff --git a/src/port/dirent.c b/src/port/dirent.c
index b264484fca..519dd82101 100644
--- a/src/port/dirent.c
+++ b/src/port/dirent.c
@@ -69,6 +69,7 @@ opendir(const char *dirname)
 	d->handle = INVALID_HANDLE_VALUE;
 	d->ret.d_ino = 0;			/* no inodes on win32 */
 	d->ret.d_reclen = 0;		/* not used on win32 */
+	d->ret.d_type = DT_UNKNOWN;
 
 	return d;
 }
@@ -77,6 +78,7 @@ struct dirent *
 readdir(DIR *d)
 {
 	WIN32_FIND_DATA fd;
+	DWORD			attrib;
 
 	if (d->handle == INVALID_HANDLE_VALUE)
 	{
@@ -105,6 +107,19 @@ readdir(DIR *d)
 	}
 	strcpy(d->ret.d_name, fd.cFileName);	/* Both strings are MAX_PATH long */
 	d->ret.d_namlen = strlen(d->ret.d_name);
+	/*
+	 * The only identifed types are: directory, regular file or symbolic link.
+	 * Errors are treated as a file type that could not be determined.
+	 */
+	attrib = GetFileAttributes(d->ret.d_name);
+	if (attrib == INVALID_FILE_ATTRIBUTES)
+		d->ret.d_type = DT_UNKNOWN;
+	else if ((attrib & FILE_ATTRIBUTE_DIRECTORY) != 0)
+		d->ret.d_type = DT_DIR;
+	else if ((attrib & FILE_ATTRIBUTE_REPARSE_POINT) != 0)
+		d->ret.d_type = DT_LNK;
+	else
+		d->ret.d_type = DT_REG;
 
 	return &d->ret;
 }
-- 
2.20.1

Reply via email to