rebased -- Nathan Bossart Amazon Web Services: https://aws.amazon.com
>From 1bb28cfe5d40670386ae663e14c8854dc1b5486d Mon Sep 17 00:00:00 2001 From: Nathan Bossart <nat...@postgresql.org> Date: Mon, 21 Aug 2023 19:05:14 -0700 Subject: [PATCH v6 1/2] move PG_TEMP_FILE* macros to file_utils.h
--- src/backend/backup/basebackup.c | 2 +- src/backend/postmaster/postmaster.c | 1 + src/backend/storage/file/fileset.c | 1 + src/bin/pg_checksums/pg_checksums.c | 10 ---------- src/bin/pg_rewind/filemap.c | 2 +- src/include/common/file_utils.h | 4 ++++ src/include/storage/fd.h | 4 ---- 7 files changed, 8 insertions(+), 16 deletions(-) diff --git a/src/backend/backup/basebackup.c b/src/backend/backup/basebackup.c index 45be21131c..5d66014499 100644 --- a/src/backend/backup/basebackup.c +++ b/src/backend/backup/basebackup.c @@ -25,6 +25,7 @@ #include "commands/defrem.h" #include "common/compression.h" #include "common/file_perm.h" +#include "common/file_utils.h" #include "lib/stringinfo.h" #include "miscadmin.h" #include "nodes/pg_list.h" @@ -37,7 +38,6 @@ #include "storage/bufpage.h" #include "storage/checksum.h" #include "storage/dsm_impl.h" -#include "storage/fd.h" #include "storage/ipc.h" #include "storage/reinit.h" #include "utils/builtins.h" diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index d7bfb28ff3..54e9bfb8c4 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -94,6 +94,7 @@ #include "access/xlogrecovery.h" #include "catalog/pg_control.h" #include "common/file_perm.h" +#include "common/file_utils.h" #include "common/ip.h" #include "common/pg_prng.h" #include "common/string.h" diff --git a/src/backend/storage/file/fileset.c b/src/backend/storage/file/fileset.c index e9951b0269..84cae32548 100644 --- a/src/backend/storage/file/fileset.c +++ b/src/backend/storage/file/fileset.c @@ -25,6 +25,7 @@ #include "catalog/pg_tablespace.h" #include "commands/tablespace.h" +#include "common/file_utils.h" #include "common/hashfn.h" #include "miscadmin.h" #include "storage/ipc.h" diff --git a/src/bin/pg_checksums/pg_checksums.c b/src/bin/pg_checksums/pg_checksums.c index 19eb67e485..9011a19b4e 100644 --- a/src/bin/pg_checksums/pg_checksums.c +++ b/src/bin/pg_checksums/pg_checksums.c @@ -52,16 +52,6 @@ typedef enum PG_MODE_ENABLE } PgChecksumMode; -/* - * Filename components. - * - * XXX: fd.h is not declared here as frontend side code is not able to - * interact with the backend-side definitions for the various fsync - * wrappers. - */ -#define PG_TEMP_FILES_DIR "pgsql_tmp" -#define PG_TEMP_FILE_PREFIX "pgsql_tmp" - static PgChecksumMode mode = PG_MODE_CHECK; static const char *progname; diff --git a/src/bin/pg_rewind/filemap.c b/src/bin/pg_rewind/filemap.c index bd5c598e20..58280d9abc 100644 --- a/src/bin/pg_rewind/filemap.c +++ b/src/bin/pg_rewind/filemap.c @@ -27,12 +27,12 @@ #include <unistd.h> #include "catalog/pg_tablespace_d.h" +#include "common/file_utils.h" #include "common/hashfn.h" #include "common/string.h" #include "datapagemap.h" #include "filemap.h" #include "pg_rewind.h" -#include "storage/fd.h" /* * Define a hash table which we can use to store information about the files diff --git a/src/include/common/file_utils.h b/src/include/common/file_utils.h index b7efa1226d..dd1532bcb0 100644 --- a/src/include/common/file_utils.h +++ b/src/include/common/file_utils.h @@ -46,4 +46,8 @@ extern ssize_t pg_pwritev_with_retry(int fd, extern ssize_t pg_pwrite_zeros(int fd, size_t size, off_t offset); +/* Filename components */ +#define PG_TEMP_FILES_DIR "pgsql_tmp" +#define PG_TEMP_FILE_PREFIX "pgsql_tmp" + #endif /* FILE_UTILS_H */ diff --git a/src/include/storage/fd.h b/src/include/storage/fd.h index 6791a406fc..aea30c0622 100644 --- a/src/include/storage/fd.h +++ b/src/include/storage/fd.h @@ -195,8 +195,4 @@ extern int durable_unlink(const char *fname, int elevel); extern void SyncDataDirectory(void); extern int data_sync_elevel(int elevel); -/* Filename components */ -#define PG_TEMP_FILES_DIR "pgsql_tmp" -#define PG_TEMP_FILE_PREFIX "pgsql_tmp" - #endif /* FD_H */ -- 2.25.1
>From 81e87db711bb90f4641b41b4f863f1f5064eb05d Mon Sep 17 00:00:00 2001 From: Nathan Bossart <nat...@postgresql.org> Date: Fri, 28 Jul 2023 15:56:24 -0700 Subject: [PATCH v6 2/2] allow syncfs in frontend utilities --- doc/src/sgml/config.sgml | 12 +--- doc/src/sgml/filelist.sgml | 1 + doc/src/sgml/postgres.sgml | 1 + doc/src/sgml/ref/initdb.sgml | 22 +++++++ doc/src/sgml/ref/pg_basebackup.sgml | 25 ++++++++ doc/src/sgml/ref/pg_checksums.sgml | 22 +++++++ doc/src/sgml/ref/pg_dump.sgml | 21 +++++++ doc/src/sgml/ref/pg_rewind.sgml | 22 +++++++ doc/src/sgml/ref/pgupgrade.sgml | 23 +++++++ doc/src/sgml/syncfs.sgml | 36 +++++++++++ src/backend/storage/file/fd.c | 4 +- src/backend/utils/misc/guc_tables.c | 7 ++- src/bin/initdb/initdb.c | 11 +++- src/bin/pg_basebackup/pg_basebackup.c | 12 +++- src/bin/pg_checksums/pg_checksums.c | 9 ++- src/bin/pg_dump/pg_backup.h | 4 +- src/bin/pg_dump/pg_backup_archiver.c | 14 +++-- src/bin/pg_dump/pg_backup_archiver.h | 1 + src/bin/pg_dump/pg_backup_directory.c | 2 +- src/bin/pg_dump/pg_dump.c | 10 ++- src/bin/pg_rewind/file_ops.c | 2 +- src/bin/pg_rewind/pg_rewind.c | 9 +++ src/bin/pg_rewind/pg_rewind.h | 2 + src/bin/pg_upgrade/option.c | 13 ++++ src/bin/pg_upgrade/pg_upgrade.c | 6 +- src/bin/pg_upgrade/pg_upgrade.h | 1 + src/common/file_utils.c | 91 ++++++++++++++++++++++++++- src/fe_utils/option_utils.c | 24 +++++++ src/include/common/file_utils.h | 11 +++- src/include/fe_utils/option_utils.h | 4 ++ src/include/storage/fd.h | 6 -- src/tools/pgindent/typedefs.list | 1 + 32 files changed, 389 insertions(+), 40 deletions(-) create mode 100644 doc/src/sgml/syncfs.sgml diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index 694d667bf9..2c7d9f1262 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -10580,15 +10580,9 @@ dynamic_library_path = 'C:\tools\postgresql;H:\my_project\lib;$libdir' On Linux, <literal>syncfs</literal> may be used instead, to ask the operating system to synchronize the whole file systems that contain the data directory, the WAL files and each tablespace (but not any other - file systems that may be reachable through symbolic links). This may - be a lot faster than the <literal>fsync</literal> setting, because it - doesn't need to open each file one by one. On the other hand, it may - be slower if a file system is shared by other applications that - modify a lot of files, since those files will also be written to disk. - Furthermore, on versions of Linux before 5.8, I/O errors encountered - while writing data to disk may not be reported to - <productname>PostgreSQL</productname>, and relevant error messages may - appear only in kernel logs. + file systems that may be reachable through symbolic links). See + <xref linkend="syncfs"/> for more information about using + <function>syncfs()</function>. </para> <para> This parameter can only be set in the diff --git a/doc/src/sgml/filelist.sgml b/doc/src/sgml/filelist.sgml index 63b0fc2a46..df5d7c3025 100644 --- a/doc/src/sgml/filelist.sgml +++ b/doc/src/sgml/filelist.sgml @@ -184,6 +184,7 @@ <!ENTITY acronyms SYSTEM "acronyms.sgml"> <!ENTITY glossary SYSTEM "glossary.sgml"> <!ENTITY color SYSTEM "color.sgml"> +<!ENTITY syncfs SYSTEM "syncfs.sgml"> <!ENTITY features-supported SYSTEM "features-supported.sgml"> <!ENTITY features-unsupported SYSTEM "features-unsupported.sgml"> diff --git a/doc/src/sgml/postgres.sgml b/doc/src/sgml/postgres.sgml index 2e271862fc..f629524be0 100644 --- a/doc/src/sgml/postgres.sgml +++ b/doc/src/sgml/postgres.sgml @@ -294,6 +294,7 @@ break is not needed in a wider output rendering. &acronyms; &glossary; &color; + &syncfs; &obsolete; </part> diff --git a/doc/src/sgml/ref/initdb.sgml b/doc/src/sgml/ref/initdb.sgml index 22f1011781..8a09c5c438 100644 --- a/doc/src/sgml/ref/initdb.sgml +++ b/doc/src/sgml/ref/initdb.sgml @@ -365,6 +365,28 @@ PostgreSQL documentation </listitem> </varlistentry> + <varlistentry id="app-initdb-option-sync-method"> + <term><option>--sync-method</option></term> + <listitem> + <para> + When set to <literal>fsync</literal>, which is the default, + <command>initdb</command> will recursively open and synchronize all + files in the data directory. The search for files will follow symbolic + links for the WAL directory and each configured tablespace. + </para> + <para> + On Linux, <literal>syncfs</literal> may be used instead to ask the + operating system to synchronize the whole file systems that contain the + data directory, the WAL files, and each tablespace. See + <xref linkend="syncfs"/> for more information about using + <function>syncfs()</function>. + </para> + <para> + This option has no effect when <option>--no-sync</option> is used. + </para> + </listitem> + </varlistentry> + <varlistentry id="app-initdb-option-sync-only"> <term><option>-S</option></term> <term><option>--sync-only</option></term> diff --git a/doc/src/sgml/ref/pg_basebackup.sgml b/doc/src/sgml/ref/pg_basebackup.sgml index 79d3e657c3..d2b8ddd200 100644 --- a/doc/src/sgml/ref/pg_basebackup.sgml +++ b/doc/src/sgml/ref/pg_basebackup.sgml @@ -594,6 +594,31 @@ PostgreSQL documentation </listitem> </varlistentry> + <varlistentry> + <term><option>--sync-method</option></term> + <listitem> + <para> + When set to <literal>fsync</literal>, which is the default, + <command>pg_basebackup</command> will recursively open and synchronize + all files in the backup directory. When the plain format is used, the + search for files will follow symbolic links for the WAL directory and + each configured tablespace. + </para> + <para> + On Linux, <literal>syncfs</literal> may be used instead to ask the + operating system to synchronize the whole file system that contains the + backup directory. When the plain format is used, + <command>pg_basebackup</command> will also synchronize the file systems + that contain the WAL files and each tablespace. See + <xref linkend="syncfs"/> for more information about using + <function>syncfs()</function>. + </para> + <para> + This option has no effect when <option>--no-sync</option> is used. + </para> + </listitem> + </varlistentry> + <varlistentry> <term><option>-v</option></term> <term><option>--verbose</option></term> diff --git a/doc/src/sgml/ref/pg_checksums.sgml b/doc/src/sgml/ref/pg_checksums.sgml index a3d0b0f0a3..7b44ba71cf 100644 --- a/doc/src/sgml/ref/pg_checksums.sgml +++ b/doc/src/sgml/ref/pg_checksums.sgml @@ -139,6 +139,28 @@ PostgreSQL documentation </listitem> </varlistentry> + <varlistentry> + <term><option>--sync-method</option></term> + <listitem> + <para> + When set to <literal>fsync</literal>, which is the default, + <command>pg_checksums</command> will recursively open and synchronize + all files in the data directory. The search for files will follow + symbolic links for the WAL directory and each configured tablespace. + </para> + <para> + On Linux, <literal>syncfs</literal> may be used instead to ask the + operating system to synchronize the whole file systems that contain the + data directory, the WAL files, and each tablespace. See + <xref linkend="syncfs"/> for more information about using + <function>syncfs()</function>. + </para> + <para> + This option has no effect when <option>--no-sync</option> is used. + </para> + </listitem> + </varlistentry> + <varlistentry> <term><option>-v</option></term> <term><option>--verbose</option></term> diff --git a/doc/src/sgml/ref/pg_dump.sgml b/doc/src/sgml/ref/pg_dump.sgml index a3cf0608f5..c1e2220b3c 100644 --- a/doc/src/sgml/ref/pg_dump.sgml +++ b/doc/src/sgml/ref/pg_dump.sgml @@ -1179,6 +1179,27 @@ PostgreSQL documentation </listitem> </varlistentry> + <varlistentry> + <term><option>--sync-method</option></term> + <listitem> + <para> + When set to <literal>fsync</literal>, which is the default, + <command>pg_dump --format=directory</command> will recursively open and + synchronize all files in the archive directory. + </para> + <para> + On Linux, <literal>syncfs</literal> may be used instead to ask the + operating system to synchronize the whole file system that contains the + archive directory. See <xref linkend="syncfs"/> for more information + about using <function>syncfs()</function>. + </para> + <para> + This option has no effect when <option>--no-sync</option> is used or + <option>--format</option> is not set to <literal>directory</literal>. + </para> + </listitem> + </varlistentry> + <varlistentry> <term><option>--table-and-children=<replaceable class="parameter">pattern</replaceable></option></term> <listitem> diff --git a/doc/src/sgml/ref/pg_rewind.sgml b/doc/src/sgml/ref/pg_rewind.sgml index 15cddd086b..80dff16168 100644 --- a/doc/src/sgml/ref/pg_rewind.sgml +++ b/doc/src/sgml/ref/pg_rewind.sgml @@ -284,6 +284,28 @@ PostgreSQL documentation </listitem> </varlistentry> + <varlistentry> + <term><option>--sync-method</option></term> + <listitem> + <para> + When set to <literal>fsync</literal>, which is the default, + <command>pg_rewind</command> will recursively open and synchronize all + files in the data directory. The search for files will follow symbolic + links for the WAL directory and each configured tablespace. + </para> + <para> + On Linux, <literal>syncfs</literal> may be used instead to ask the + operating system to synchronize the whole file systems that contain the + data directory, the WAL files, and each tablespace. See + <xref linkend="syncfs"/> for more information about using + <function>syncfs()</function>. + </para> + <para> + This option has no effect when <option>--no-sync</option> is used. + </para> + </listitem> + </varlistentry> + <varlistentry> <term><option>-V</option></term> <term><option>--version</option></term> diff --git a/doc/src/sgml/ref/pgupgrade.sgml b/doc/src/sgml/ref/pgupgrade.sgml index 7816b4c685..6c033485ea 100644 --- a/doc/src/sgml/ref/pgupgrade.sgml +++ b/doc/src/sgml/ref/pgupgrade.sgml @@ -190,6 +190,29 @@ PostgreSQL documentation variable <envar>PGSOCKETDIR</envar></para></listitem> </varlistentry> + <varlistentry> + <term><option>--sync-method</option></term> + <listitem> + <para> + When set to <literal>fsync</literal>, which is the default, + <command>pg_upgrade</command> will recursively open and synchronize all + files in the upgraded cluster's data directory. The search for files + will follow symbolic links for the WAL directory and each configured + tablespace. + </para> + <para> + On Linux, <literal>syncfs</literal> may be used instead to ask the + operating system to synchronize the whole file systems that contain the + upgrade cluster's data directory, its WAL files, and each tablespace. + See <xref linkend="syncfs"/> for more information about using + <function>syncfs()</function>. + </para> + <para> + This option has no effect when <option>--no-sync</option> is used. + </para> + </listitem> + </varlistentry> + <varlistentry> <term><option>-U</option> <replaceable>username</replaceable></term> <term><option>--username=</option><replaceable>username</replaceable></term> diff --git a/doc/src/sgml/syncfs.sgml b/doc/src/sgml/syncfs.sgml new file mode 100644 index 0000000000..00457d2457 --- /dev/null +++ b/doc/src/sgml/syncfs.sgml @@ -0,0 +1,36 @@ +<!-- doc/src/sgml/syncfs.sgml --> + +<appendix id="syncfs"> + <title><function>syncfs()</function> Caveats</title> + + <indexterm zone="syncfs"> + <primary>syncfs</primary> + </indexterm> + + <para> + On Linux <function>syncfs()</function> may be specified for some + configuration parameters (e.g., + <xref linkend="guc-recovery-init-sync-method"/>), server applications (e.g., + <application>pg_upgrade</application>), and client applications (e.g., + <application>pg_basebackup</application>) that involve synchronizing many + files to disk. <function>syncfs()</function> is advantageous in many cases, + but there are some trade-offs to keep in mind. + </para> + + <para> + Since <function>syncfs()</function> instructs the operating system to + synchronize a whole file system, it typically requires many fewer system + calls than using <function>fsync()</function> to synchronize each file one by + one. Therefore, using <function>syncfs()</function> may be a lot faster than + using <function>fsync()</function>. However, it may be slower if a file + system is shared by other applications that modify a lot of files, since + those files will also be written to disk. + </para> + + <para> + Furthermore, on versions of Linux before 5.8, I/O errors encountered while + writing data to disk may not be reported to the calling program, and relevant + error messages may appear only in kernel logs. + </para> + +</appendix> diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c index b490a76ba7..3fed475c38 100644 --- a/src/backend/storage/file/fd.c +++ b/src/backend/storage/file/fd.c @@ -162,7 +162,7 @@ int max_safe_fds = FD_MINFREE; /* default if not changed */ bool data_sync_retry = false; /* How SyncDataDirectory() should do its job. */ -int recovery_init_sync_method = RECOVERY_INIT_SYNC_METHOD_FSYNC; +int recovery_init_sync_method = DATA_DIR_SYNC_METHOD_FSYNC; /* Which kinds of files should be opened with PG_O_DIRECT. */ int io_direct_flags; @@ -3513,7 +3513,7 @@ SyncDataDirectory(void) } #ifdef HAVE_SYNCFS - if (recovery_init_sync_method == RECOVERY_INIT_SYNC_METHOD_SYNCFS) + if (recovery_init_sync_method == DATA_DIR_SYNC_METHOD_SYNCFS) { DIR *dir; struct dirent *de; diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c index e565a3092f..5abebe9a9c 100644 --- a/src/backend/utils/misc/guc_tables.c +++ b/src/backend/utils/misc/guc_tables.c @@ -41,6 +41,7 @@ #include "commands/trigger.h" #include "commands/user.h" #include "commands/vacuum.h" +#include "common/file_utils.h" #include "common/scram-common.h" #include "jit/jit.h" #include "libpq/auth.h" @@ -430,9 +431,9 @@ StaticAssertDecl(lengthof(ssl_protocol_versions_info) == (PG_TLS1_3_VERSION + 2) "array length mismatch"); static const struct config_enum_entry recovery_init_sync_method_options[] = { - {"fsync", RECOVERY_INIT_SYNC_METHOD_FSYNC, false}, + {"fsync", DATA_DIR_SYNC_METHOD_FSYNC, false}, #ifdef HAVE_SYNCFS - {"syncfs", RECOVERY_INIT_SYNC_METHOD_SYNCFS, false}, + {"syncfs", DATA_DIR_SYNC_METHOD_SYNCFS, false}, #endif {NULL, 0, false} }; @@ -4964,7 +4965,7 @@ struct config_enum ConfigureNamesEnum[] = gettext_noop("Sets the method for synchronizing the data directory before crash recovery."), }, &recovery_init_sync_method, - RECOVERY_INIT_SYNC_METHOD_FSYNC, recovery_init_sync_method_options, + DATA_DIR_SYNC_METHOD_FSYNC, recovery_init_sync_method_options, NULL, NULL, NULL }, diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c index 905b979947..543927b8b4 100644 --- a/src/bin/initdb/initdb.c +++ b/src/bin/initdb/initdb.c @@ -165,6 +165,7 @@ static bool show_setting = false; static bool data_checksums = false; static char *xlog_dir = NULL; static int wal_segment_size_mb = (DEFAULT_XLOG_SEG_SIZE) / (1024 * 1024); +static DataDirSyncMethod sync_method = DATA_DIR_SYNC_METHOD_FSYNC; /* internal vars */ @@ -2466,6 +2467,7 @@ usage(const char *progname) printf(_(" -N, --no-sync do not wait for changes to be written safely to disk\n")); printf(_(" --no-instructions do not print instructions for next steps\n")); printf(_(" -s, --show show internal settings\n")); + printf(_(" --sync-method=METHOD set method for syncing files to disk\n")); printf(_(" -S, --sync-only only sync database files to disk, then exit\n")); printf(_("\nOther options:\n")); printf(_(" -V, --version output version information, then exit\n")); @@ -3106,6 +3108,7 @@ main(int argc, char *argv[]) {"locale-provider", required_argument, NULL, 15}, {"icu-locale", required_argument, NULL, 16}, {"icu-rules", required_argument, NULL, 17}, + {"sync-method", required_argument, NULL, 18}, {NULL, 0, NULL, 0} }; @@ -3286,6 +3289,10 @@ main(int argc, char *argv[]) case 17: icu_rules = pg_strdup(optarg); break; + case 18: + if (!parse_sync_method(optarg, &sync_method)) + exit(1); + break; default: /* getopt_long already emitted a complaint */ pg_log_error_hint("Try \"%s --help\" for more information.", progname); @@ -3333,7 +3340,7 @@ main(int argc, char *argv[]) fputs(_("syncing data to disk ... "), stdout); fflush(stdout); - fsync_pgdata(pg_data, PG_VERSION_NUM); + fsync_pgdata(pg_data, PG_VERSION_NUM, sync_method); check_ok(); return 0; } @@ -3396,7 +3403,7 @@ main(int argc, char *argv[]) { fputs(_("syncing data to disk ... "), stdout); fflush(stdout); - fsync_pgdata(pg_data, PG_VERSION_NUM); + fsync_pgdata(pg_data, PG_VERSION_NUM, sync_method); check_ok(); } else diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c index 1dc8efe0cb..977c793a67 100644 --- a/src/bin/pg_basebackup/pg_basebackup.c +++ b/src/bin/pg_basebackup/pg_basebackup.c @@ -148,6 +148,7 @@ static bool verify_checksums = true; static bool manifest = true; static bool manifest_force_encode = false; static char *manifest_checksums = NULL; +static DataDirSyncMethod sync_method = DATA_DIR_SYNC_METHOD_FSYNC; static bool success = false; static bool made_new_pgdata = false; @@ -424,6 +425,8 @@ usage(void) printf(_(" --no-slot prevent creation of temporary replication slot\n")); printf(_(" --no-verify-checksums\n" " do not verify checksums\n")); + printf(_(" --sync-method=METHOD\n" + " set method for syncing files to disk\n")); printf(_(" -?, --help show this help, then exit\n")); printf(_("\nConnection options:\n")); printf(_(" -d, --dbname=CONNSTR connection string\n")); @@ -2199,11 +2202,11 @@ BaseBackup(char *compression_algorithm, char *compression_detail, if (format == 't') { if (strcmp(basedir, "-") != 0) - (void) fsync_dir_recurse(basedir); + (void) fsync_dir_recurse(basedir, sync_method); } else { - (void) fsync_pgdata(basedir, serverVersion); + (void) fsync_pgdata(basedir, serverVersion, sync_method); } } @@ -2281,6 +2284,7 @@ main(int argc, char **argv) {"no-manifest", no_argument, NULL, 5}, {"manifest-force-encode", no_argument, NULL, 6}, {"manifest-checksums", required_argument, NULL, 7}, + {"sync-method", required_argument, NULL, 8}, {NULL, 0, NULL, 0} }; int c; @@ -2452,6 +2456,10 @@ main(int argc, char **argv) case 7: manifest_checksums = pg_strdup(optarg); break; + case 8: + if (!parse_sync_method(optarg, &sync_method)) + exit(1); + break; default: /* getopt_long already emitted a complaint */ pg_log_error_hint("Try \"%s --help\" for more information.", progname); diff --git a/src/bin/pg_checksums/pg_checksums.c b/src/bin/pg_checksums/pg_checksums.c index 9011a19b4e..9fb2c5b3c7 100644 --- a/src/bin/pg_checksums/pg_checksums.c +++ b/src/bin/pg_checksums/pg_checksums.c @@ -44,6 +44,7 @@ static char *only_filenode = NULL; static bool do_sync = true; static bool verbose = false; static bool showprogress = false; +static DataDirSyncMethod sync_method = DATA_DIR_SYNC_METHOD_FSYNC; typedef enum { @@ -77,6 +78,7 @@ usage(void) printf(_(" -f, --filenode=FILENODE check only relation with specified filenode\n")); printf(_(" -N, --no-sync do not wait for changes to be written safely to disk\n")); printf(_(" -P, --progress show progress information\n")); + printf(_(" --sync-method=METHOD set method for syncing files to disk\n")); printf(_(" -v, --verbose output verbose messages\n")); printf(_(" -V, --version output version information, then exit\n")); printf(_(" -?, --help show this help, then exit\n")); @@ -435,6 +437,7 @@ main(int argc, char *argv[]) {"no-sync", no_argument, NULL, 'N'}, {"progress", no_argument, NULL, 'P'}, {"verbose", no_argument, NULL, 'v'}, + {"sync-method", required_argument, NULL, 1}, {NULL, 0, NULL, 0} }; @@ -493,6 +496,10 @@ main(int argc, char *argv[]) case 'v': verbose = true; break; + case 1: + if (!parse_sync_method(optarg, &sync_method)) + exit(1); + break; default: /* getopt_long already emitted a complaint */ pg_log_error_hint("Try \"%s --help\" for more information.", progname); @@ -623,7 +630,7 @@ main(int argc, char *argv[]) if (do_sync) { pg_log_info("syncing data directory"); - fsync_pgdata(DataDir, PG_VERSION_NUM); + fsync_pgdata(DataDir, PG_VERSION_NUM, sync_method); } pg_log_info("updating control file"); diff --git a/src/bin/pg_dump/pg_backup.h b/src/bin/pg_dump/pg_backup.h index aba780ef4b..3a57cdd97d 100644 --- a/src/bin/pg_dump/pg_backup.h +++ b/src/bin/pg_dump/pg_backup.h @@ -24,6 +24,7 @@ #define PG_BACKUP_H #include "common/compression.h" +#include "common/file_utils.h" #include "fe_utils/simple_list.h" #include "libpq-fe.h" @@ -307,7 +308,8 @@ extern Archive *OpenArchive(const char *FileSpec, const ArchiveFormat fmt); extern Archive *CreateArchive(const char *FileSpec, const ArchiveFormat fmt, const pg_compress_specification compression_spec, bool dosync, ArchiveMode mode, - SetupWorkerPtrType setupDumpWorker); + SetupWorkerPtrType setupDumpWorker, + DataDirSyncMethod sync_method); /* The --list option */ extern void PrintTOCSummary(Archive *AHX); diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c index 39ebcfec32..4d83381d84 100644 --- a/src/bin/pg_dump/pg_backup_archiver.c +++ b/src/bin/pg_dump/pg_backup_archiver.c @@ -66,7 +66,8 @@ typedef struct _parallelReadyList static ArchiveHandle *_allocAH(const char *FileSpec, const ArchiveFormat fmt, const pg_compress_specification compression_spec, bool dosync, ArchiveMode mode, - SetupWorkerPtrType setupWorkerPtr); + SetupWorkerPtrType setupWorkerPtr, + DataDirSyncMethod sync_method); static void _getObjectDescription(PQExpBuffer buf, const TocEntry *te); static void _printTocEntry(ArchiveHandle *AH, TocEntry *te, bool isData); static char *sanitize_line(const char *str, bool want_hyphen); @@ -238,11 +239,12 @@ Archive * CreateArchive(const char *FileSpec, const ArchiveFormat fmt, const pg_compress_specification compression_spec, bool dosync, ArchiveMode mode, - SetupWorkerPtrType setupDumpWorker) + SetupWorkerPtrType setupDumpWorker, + DataDirSyncMethod sync_method) { ArchiveHandle *AH = _allocAH(FileSpec, fmt, compression_spec, - dosync, mode, setupDumpWorker); + dosync, mode, setupDumpWorker, sync_method); return (Archive *) AH; } @@ -257,7 +259,8 @@ OpenArchive(const char *FileSpec, const ArchiveFormat fmt) compression_spec.algorithm = PG_COMPRESSION_NONE; AH = _allocAH(FileSpec, fmt, compression_spec, true, - archModeRead, setupRestoreWorker); + archModeRead, setupRestoreWorker, + DATA_DIR_SYNC_METHOD_FSYNC); return (Archive *) AH; } @@ -2233,7 +2236,7 @@ static ArchiveHandle * _allocAH(const char *FileSpec, const ArchiveFormat fmt, const pg_compress_specification compression_spec, bool dosync, ArchiveMode mode, - SetupWorkerPtrType setupWorkerPtr) + SetupWorkerPtrType setupWorkerPtr, DataDirSyncMethod sync_method) { ArchiveHandle *AH; CompressFileHandle *CFH; @@ -2287,6 +2290,7 @@ _allocAH(const char *FileSpec, const ArchiveFormat fmt, AH->mode = mode; AH->compression_spec = compression_spec; AH->dosync = dosync; + AH->sync_method = sync_method; memset(&(AH->sqlparse), 0, sizeof(AH->sqlparse)); diff --git a/src/bin/pg_dump/pg_backup_archiver.h b/src/bin/pg_dump/pg_backup_archiver.h index 18b38c17ab..b07673933d 100644 --- a/src/bin/pg_dump/pg_backup_archiver.h +++ b/src/bin/pg_dump/pg_backup_archiver.h @@ -312,6 +312,7 @@ struct _archiveHandle pg_compress_specification compression_spec; /* Requested specification for * compression */ bool dosync; /* data requested to be synced on sight */ + DataDirSyncMethod sync_method; ArchiveMode mode; /* File mode - r or w */ void *formatData; /* Header data specific to file format */ diff --git a/src/bin/pg_dump/pg_backup_directory.c b/src/bin/pg_dump/pg_backup_directory.c index 7f2ac7c7fd..6faa3a511f 100644 --- a/src/bin/pg_dump/pg_backup_directory.c +++ b/src/bin/pg_dump/pg_backup_directory.c @@ -613,7 +613,7 @@ _CloseArchive(ArchiveHandle *AH) * individually. Just recurse once through all the files generated. */ if (AH->dosync) - fsync_dir_recurse(ctx->directory); + fsync_dir_recurse(ctx->directory, AH->sync_method); } AH->FH = NULL; } diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index 65f64c282d..dc4a28e81e 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -357,6 +357,7 @@ main(int argc, char **argv) char *compression_algorithm_str = "none"; char *error_detail = NULL; bool user_compression_defined = false; + DataDirSyncMethod sync_method = DATA_DIR_SYNC_METHOD_FSYNC; static DumpOptions dopt; @@ -431,6 +432,7 @@ main(int argc, char **argv) {"table-and-children", required_argument, NULL, 12}, {"exclude-table-and-children", required_argument, NULL, 13}, {"exclude-table-data-and-children", required_argument, NULL, 14}, + {"sync-method", required_argument, NULL, 15}, {NULL, 0, NULL, 0} }; @@ -657,6 +659,11 @@ main(int argc, char **argv) optarg); break; + case 15: + if (!parse_sync_method(optarg, &sync_method)) + exit_nicely(1); + break; + default: /* getopt_long already emitted a complaint */ pg_log_error_hint("Try \"%s --help\" for more information.", progname); @@ -777,7 +784,7 @@ main(int argc, char **argv) /* Open the output file */ fout = CreateArchive(filename, archiveFormat, compression_spec, - dosync, archiveMode, setupDumpWorker); + dosync, archiveMode, setupDumpWorker, sync_method); /* Make dump options accessible right away */ SetArchiveOptions(fout, &dopt, NULL); @@ -1068,6 +1075,7 @@ help(const char *progname) " compress as specified\n")); printf(_(" --lock-wait-timeout=TIMEOUT fail after waiting TIMEOUT for a table lock\n")); printf(_(" --no-sync do not wait for changes to be written safely to disk\n")); + printf(_(" --sync-method=METHOD set method for syncing files to disk\n")); printf(_(" -?, --help show this help, then exit\n")); printf(_("\nOptions controlling the output content:\n")); diff --git a/src/bin/pg_rewind/file_ops.c b/src/bin/pg_rewind/file_ops.c index 25996b4da4..451fb1856e 100644 --- a/src/bin/pg_rewind/file_ops.c +++ b/src/bin/pg_rewind/file_ops.c @@ -296,7 +296,7 @@ sync_target_dir(void) if (!do_sync || dry_run) return; - fsync_pgdata(datadir_target, PG_VERSION_NUM); + fsync_pgdata(datadir_target, PG_VERSION_NUM, sync_method); } diff --git a/src/bin/pg_rewind/pg_rewind.c b/src/bin/pg_rewind/pg_rewind.c index 7f69f02441..bfd44a284e 100644 --- a/src/bin/pg_rewind/pg_rewind.c +++ b/src/bin/pg_rewind/pg_rewind.c @@ -22,6 +22,7 @@ #include "common/file_perm.h" #include "common/restricted_token.h" #include "common/string.h" +#include "fe_utils/option_utils.h" #include "fe_utils/recovery_gen.h" #include "fe_utils/string_utils.h" #include "file_ops.h" @@ -74,6 +75,7 @@ bool showprogress = false; bool dry_run = false; bool do_sync = true; bool restore_wal = false; +DataDirSyncMethod sync_method = DATA_DIR_SYNC_METHOD_FSYNC; /* Target history */ TimeLineHistoryEntry *targetHistory; @@ -107,6 +109,7 @@ usage(const char *progname) " file when running target cluster\n")); printf(_(" --debug write a lot of debug messages\n")); printf(_(" --no-ensure-shutdown do not automatically fix unclean shutdown\n")); + printf(_(" --sync-method=METHOD set method for syncing files to disk\n")); printf(_(" -V, --version output version information, then exit\n")); printf(_(" -?, --help show this help, then exit\n")); printf(_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT); @@ -131,6 +134,7 @@ main(int argc, char **argv) {"no-sync", no_argument, NULL, 'N'}, {"progress", no_argument, NULL, 'P'}, {"debug", no_argument, NULL, 3}, + {"sync-method", required_argument, NULL, 6}, {NULL, 0, NULL, 0} }; int option_index; @@ -218,6 +222,11 @@ main(int argc, char **argv) config_file = pg_strdup(optarg); break; + case 6: + if (!parse_sync_method(optarg, &sync_method)) + exit(1); + break; + default: /* getopt_long already emitted a complaint */ pg_log_error_hint("Try \"%s --help\" for more information.", progname); diff --git a/src/bin/pg_rewind/pg_rewind.h b/src/bin/pg_rewind/pg_rewind.h index ef8bdc1fbb..05729adfef 100644 --- a/src/bin/pg_rewind/pg_rewind.h +++ b/src/bin/pg_rewind/pg_rewind.h @@ -13,6 +13,7 @@ #include "access/timeline.h" #include "common/logging.h" +#include "common/file_utils.h" #include "datapagemap.h" #include "libpq-fe.h" #include "storage/block.h" @@ -24,6 +25,7 @@ extern bool showprogress; extern bool dry_run; extern bool do_sync; extern int WalSegSz; +extern DataDirSyncMethod sync_method; /* Target history */ extern TimeLineHistoryEntry *targetHistory; diff --git a/src/bin/pg_upgrade/option.c b/src/bin/pg_upgrade/option.c index 640361009e..b9d900d0db 100644 --- a/src/bin/pg_upgrade/option.c +++ b/src/bin/pg_upgrade/option.c @@ -14,6 +14,7 @@ #endif #include "common/string.h" +#include "fe_utils/option_utils.h" #include "getopt_long.h" #include "pg_upgrade.h" #include "utils/pidfile.h" @@ -57,12 +58,14 @@ parseCommandLine(int argc, char *argv[]) {"verbose", no_argument, NULL, 'v'}, {"clone", no_argument, NULL, 1}, {"copy", no_argument, NULL, 2}, + {"sync-method", required_argument, NULL, 3}, {NULL, 0, NULL, 0} }; int option; /* Command line option */ int optindex = 0; /* used by getopt_long */ int os_user_effective_id; + DataDirSyncMethod unused; user_opts.do_sync = true; user_opts.transfer_mode = TRANSFER_MODE_COPY; @@ -199,6 +202,12 @@ parseCommandLine(int argc, char *argv[]) user_opts.transfer_mode = TRANSFER_MODE_COPY; break; + case 3: + if (!parse_sync_method(optarg, &unused)) + exit(1); + user_opts.sync_method = pg_strdup(optarg); + break; + default: fprintf(stderr, _("Try \"%s --help\" for more information.\n"), os_info.progname); @@ -209,6 +218,9 @@ parseCommandLine(int argc, char *argv[]) if (optind < argc) pg_fatal("too many command-line arguments (first is \"%s\")", argv[optind]); + if (!user_opts.sync_method) + user_opts.sync_method = pg_strdup("fsync"); + if (log_opts.verbose) pg_log(PG_REPORT, "Running in verbose mode"); @@ -289,6 +301,7 @@ usage(void) printf(_(" -V, --version display version information, then exit\n")); printf(_(" --clone clone instead of copying files to new cluster\n")); printf(_(" --copy copy files to new cluster (default)\n")); + printf(_(" --sync-method=METHOD set method for syncing files to disk\n")); printf(_(" -?, --help show this help, then exit\n")); printf(_("\n" "Before running pg_upgrade you must:\n" diff --git a/src/bin/pg_upgrade/pg_upgrade.c b/src/bin/pg_upgrade/pg_upgrade.c index 4562dafcff..96bfb67167 100644 --- a/src/bin/pg_upgrade/pg_upgrade.c +++ b/src/bin/pg_upgrade/pg_upgrade.c @@ -192,8 +192,10 @@ main(int argc, char **argv) { prep_status("Sync data directory to disk"); exec_prog(UTILITY_LOG_FILE, NULL, true, true, - "\"%s/initdb\" --sync-only \"%s\"", new_cluster.bindir, - new_cluster.pgdata); + "\"%s/initdb\" --sync-only \"%s\" --sync-method %s", + new_cluster.bindir, + new_cluster.pgdata, + user_opts.sync_method); check_ok(); } diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h index 7afa96716e..842f3b6cd3 100644 --- a/src/bin/pg_upgrade/pg_upgrade.h +++ b/src/bin/pg_upgrade/pg_upgrade.h @@ -304,6 +304,7 @@ typedef struct transferMode transfer_mode; /* copy files or link them? */ int jobs; /* number of processes/threads to use */ char *socketdir; /* directory to use for Unix sockets */ + char *sync_method; } UserOpts; typedef struct diff --git a/src/common/file_utils.c b/src/common/file_utils.c index 74833c4acb..c977c990ad 100644 --- a/src/common/file_utils.c +++ b/src/common/file_utils.c @@ -51,6 +51,31 @@ static void walkdir(const char *path, int (*action) (const char *fname, bool isdir), bool process_symlinks); +#ifdef HAVE_SYNCFS +static void +do_syncfs(const char *path) +{ + int fd; + + fd = open(path, O_RDONLY, 0); + + if (fd < 0) + { + pg_log_error("could not open file \"%s\": %m", path); + return; + } + + if (syncfs(fd) < 0) + { + pg_log_error("could not synchronize file system for file \"%s\": %m", path); + (void) close(fd); + exit(EXIT_FAILURE); + } + + (void) close(fd); +} +#endif + /* * Issue fsync recursively on PGDATA and all its contents. * @@ -63,7 +88,8 @@ static void walkdir(const char *path, */ void fsync_pgdata(const char *pg_data, - int serverVersion) + int serverVersion, + DataDirSyncMethod sync_method) { bool xlog_is_symlink; char pg_wal[MAXPGPATH]; @@ -89,6 +115,55 @@ fsync_pgdata(const char *pg_data, xlog_is_symlink = true; } +#ifdef HAVE_SYNCFS + if (sync_method == DATA_DIR_SYNC_METHOD_SYNCFS) + { + DIR *dir; + struct dirent *de; + + /* + * On Linux, we don't have to open every single file one by one. We + * can use syncfs() to sync whole filesystems. We only expect + * filesystem boundaries to exist where we tolerate symlinks, namely + * pg_wal and the tablespaces, so we call syncfs() for each of those + * directories. + */ + + /* Sync the top level pgdata directory. */ + do_syncfs(pg_data); + + /* If any tablespaces are configured, sync each of those. */ + dir = opendir(pg_tblspc); + if (dir == NULL) + pg_log_error("could not open directory \"%s\": %m", pg_tblspc); + else + { + while (errno = 0, (de = readdir(dir)) != NULL) + { + char subpath[MAXPGPATH * 2]; + + if (strcmp(de->d_name, ".") == 0 || + strcmp(de->d_name, "..") == 0) + continue; + + snprintf(subpath, sizeof(subpath), "%s/%s", pg_tblspc, de->d_name); + do_syncfs(subpath); + } + + if (errno) + pg_log_error("could not read directory \"%s\": %m", pg_tblspc); + + (void) closedir(dir); + } + + /* If pg_wal is a symlink, process that too. */ + if (xlog_is_symlink) + do_syncfs(pg_wal); + + return; + } +#endif /* HAVE_SYNCFS */ + /* * If possible, hint to the kernel that we're soon going to fsync the data * directory and its contents. @@ -121,8 +196,20 @@ fsync_pgdata(const char *pg_data, * This is a convenient wrapper on top of walkdir(). */ void -fsync_dir_recurse(const char *dir) +fsync_dir_recurse(const char *dir, DataDirSyncMethod sync_method) { +#ifdef HAVE_SYNCFS + if (sync_method == DATA_DIR_SYNC_METHOD_SYNCFS) + { + /* + * On Linux, we don't have to open every single file one by one. We + * can use syncfs() to sync the whole filesystem. + */ + do_syncfs(dir); + return; + } +#endif /* HAVE_SYNCFS */ + /* * If possible, hint to the kernel that we're soon going to fsync the data * directory and its contents. diff --git a/src/fe_utils/option_utils.c b/src/fe_utils/option_utils.c index 763c991015..36ac689964 100644 --- a/src/fe_utils/option_utils.c +++ b/src/fe_utils/option_utils.c @@ -82,3 +82,27 @@ option_parse_int(const char *optarg, const char *optname, *result = val; return true; } + +bool +parse_sync_method(const char *optarg, DataDirSyncMethod *sync_method) +{ + if (strcmp(optarg, "fsync") == 0) + *sync_method = DATA_DIR_SYNC_METHOD_FSYNC; + else if (strcmp(optarg, "syncfs") == 0) + { +#ifdef HAVE_SYNCFS + *sync_method = DATA_DIR_SYNC_METHOD_SYNCFS; +#else + pg_log_error("this build does not support sync method \"%s\"", + "syncfs"); + return false; +#endif + } + else + { + pg_log_error("unrecognized sync method: %s", optarg); + return false; + } + + return true; +} diff --git a/src/include/common/file_utils.h b/src/include/common/file_utils.h index dd1532bcb0..09d1e5d561 100644 --- a/src/include/common/file_utils.h +++ b/src/include/common/file_utils.h @@ -26,10 +26,17 @@ typedef enum PGFileType struct iovec; /* avoid including port/pg_iovec.h here */ +typedef enum DataDirSyncMethod +{ + DATA_DIR_SYNC_METHOD_FSYNC, + DATA_DIR_SYNC_METHOD_SYNCFS +} DataDirSyncMethod; + #ifdef FRONTEND extern int fsync_fname(const char *fname, bool isdir); -extern void fsync_pgdata(const char *pg_data, int serverVersion); -extern void fsync_dir_recurse(const char *dir); +extern void fsync_pgdata(const char *pg_data, int serverVersion, + DataDirSyncMethod sync_method); +extern void fsync_dir_recurse(const char *dir, DataDirSyncMethod sync_method); extern int durable_rename(const char *oldfile, const char *newfile); extern int fsync_parent_path(const char *fname); #endif diff --git a/src/include/fe_utils/option_utils.h b/src/include/fe_utils/option_utils.h index b7b0654cee..6f3a965916 100644 --- a/src/include/fe_utils/option_utils.h +++ b/src/include/fe_utils/option_utils.h @@ -14,6 +14,8 @@ #include "postgres_fe.h" +#include "common/file_utils.h" + typedef void (*help_handler) (const char *progname); extern void handle_help_version_opts(int argc, char *argv[], @@ -22,5 +24,7 @@ extern void handle_help_version_opts(int argc, char *argv[], extern bool option_parse_int(const char *optarg, const char *optname, int min_range, int max_range, int *result); +extern bool parse_sync_method(const char *optarg, + DataDirSyncMethod *sync_method); #endif /* OPTION_UTILS_H */ diff --git a/src/include/storage/fd.h b/src/include/storage/fd.h index aea30c0622..d9d5d9da5f 100644 --- a/src/include/storage/fd.h +++ b/src/include/storage/fd.h @@ -46,12 +46,6 @@ #include <dirent.h> #include <fcntl.h> -typedef enum RecoveryInitSyncMethod -{ - RECOVERY_INIT_SYNC_METHOD_FSYNC, - RECOVERY_INIT_SYNC_METHOD_SYNCFS -} RecoveryInitSyncMethod; - typedef int File; diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 49a33c0387..fe571c6265 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -545,6 +545,7 @@ DR_printtup DR_sqlfunction DR_transientrel DWORD +DataDirSyncMethod DataDumperPtr DataPageDeleteStack DatabaseInfo -- 2.25.1