Re: [PING] fallocate() causes btrfs to never compress postgresql files

Thomas Munro Sat, 31 May 2025 07:10:31 -0700

On Fri, May 30, 2025 at 3:58 AM Dimitrios Apostolou <ji...@gmx.net> wrote:
> All I'm saying is that this is a regression for PostgreSQL users that keep
> tablespaces on compressed Btrfs. What could be done from postgres, is to
> provide a runtime setting for avoiding fallocate(), going instead through
> the old code path. Idelly this would be an option per tablespace, but even
> a global one is better than nothing.


Here's an initial sketch of such a setting.  Better name, design,
words welcome.  Would need a bit more work to cover temp tables too.
It's slightly tricky to get smgr to behave differently because of the
contents of a system catalogue!  I couldn't think of a better way than
exposing it as a flag that the buffer manager layer has to know about
and compute earlier, but that also seems a bit strange, as fallocate
is a highly md.c specific concern.  Hmm.

I suppose something like the 0001 part could be back-patched if this
is considered a serious enough problem without other workarounds, so I
did this in two steps.  I wonder if there are good reasons to want to
change the number on other file systems.  I suppose it at least allows
experimentation.

From 8607189eb19302c509eed78a7a2db55b9a2d70b3 Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.mu...@gmail.com>
Date: Sat, 31 May 2025 22:50:22 +1200
Subject: [PATCH 1/2] Add io_min_fallocate setting.

BTRFS's compression is reported to be disabled by posix_fallocate(), so
offer a way to turn it off.  The previous coding had a threshold of 8
blocks before using that instead of writing zeroes, so make that
configurable.  0 means never, and other numbers specify a threshold in
blocks, defaulting to 8 as before.

Reported-by: Dimitrios Apostolou <ji...@gmx.net>
Discussion: https://postgr.es/m/b1843124-fd22-e279-a31f-252dffb6fbf2%40gmx.net
---
 doc/src/sgml/config.sgml                      | 17 +++++++++++++++++
 src/backend/storage/file/fd.c                 |  3 +++
 src/backend/storage/smgr/md.c                 |  6 ++----
 src/backend/utils/misc/guc_tables.c           | 12 ++++++++++++
 src/backend/utils/misc/postgresql.conf.sample |  1 +
 src/include/storage/fd.h                      |  1 +
 6 files changed, 36 insertions(+), 4 deletions(-)

diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index f4a0191c55b..7d476665f42 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -2684,6 +2684,23 @@ include_dir 'conf.d'
        </listitem>
       </varlistentry>
 
+      <varlistentry id="guc-io-min-fallocate" xreflabel="io_min_fallocate">
+       <term><varname>io_min_fallocate</varname> (<type>integer</type>)
+       <indexterm>
+        <primary><varname>io_min_fallocate</varname> configuration parameter</primary>
+       </indexterm>
+       </term>
+       <listitem>
+        <para>
+         Threshold at which <function>posix_fallocate()</function> is used to
+         extend data files instead of writing zeroes.  <literal>0</literal>
+         means never (always write
+         zeroes), and other values indicate a number of blocks.
+         The default is <literal>8</literal>.
+        </para>
+       </listitem>
+      </varlistentry>
+
       <varlistentry id="guc-io-max-combine-limit" xreflabel="io_max_combine_limit">
        <term><varname>io_max_combine_limit</varname> (<type>integer</type>)
        <indexterm>
diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c
index 0e8299dd556..ff16b5cc6bd 100644
--- a/src/backend/storage/file/fd.c
+++ b/src/backend/storage/file/fd.c
@@ -164,6 +164,9 @@ bool		data_sync_retry = false;
 /* How SyncDataDirectory() should do its job. */
 int			recovery_init_sync_method = DATA_DIR_SYNC_METHOD_FSYNC;
 
+/* At what size FileFallocate() should be preferred over FileZero(). */
+int			io_min_fallocate = 8;
+
 /* Which kinds of files should be opened with PG_O_DIRECT. */
 int			io_direct_flags;
 
diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c
index 2ccb0faceb5..6d1b9cb65b2 100644
--- a/src/backend/storage/smgr/md.c
+++ b/src/backend/storage/smgr/md.c
@@ -588,11 +588,9 @@ mdzeroextend(SMgrRelation reln, ForkNumber forknum,
 		 * to allocate page cache space for the extended pages.
 		 *
 		 * However, we don't use FileFallocate() for small extensions, as it
-		 * defeats delayed allocation on some filesystems. Not clear where
-		 * that decision should be made though? For now just use a cutoff of
-		 * 8, anything between 4 and 8 worked OK in some local testing.
+		 * defeats delayed allocation on some filesystems.
 		 */
-		if (numblocks > 8)
+		if (io_min_fallocate > 0 && numblocks >= io_min_fallocate)
 		{
 			int			ret;
 
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index 2f8cbd86759..a75ff8623d9 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -3265,6 +3265,18 @@ struct config_int ConfigureNamesInt[] =
 		NULL
 	},
 
+	{
+		{"io_min_fallocate",
+			PGC_USERSET,
+			RESOURCES_IO,
+			gettext_noop("Threshold for preferring posix_fallocate() when extending data files."),
+			NULL,
+			GUC_UNIT_BLOCKS
+		},
+		&io_min_fallocate,
+		8, 0, INT_MAX
+	},
+
 	{
 		{"io_max_combine_limit",
 			PGC_POSTMASTER,
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index 87ce76b18f4..8b712ef244f 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -200,6 +200,7 @@
 #backend_flush_after = 0		# measured in pages, 0 disables
 #effective_io_concurrency = 16		# 1-1000; 0 disables issuing multiple simultaneous IO requests
 #maintenance_io_concurrency = 16	# 1-1000; same as effective_io_concurrency
+#io_min_fallocate = 8			# min size at which to prefer posix_fallocate, 0 = never
 #io_max_combine_limit = 128kB		# usually 1-128 blocks (depends on OS)
 					# (change requires restart)
 #io_combine_limit = 128kB		# usually 1-128 blocks (depends on OS)
diff --git a/src/include/storage/fd.h b/src/include/storage/fd.h
index b77d8e5e30e..b8714e5ceb8 100644
--- a/src/include/storage/fd.h
+++ b/src/include/storage/fd.h
@@ -60,6 +60,7 @@ typedef int File;
 extern PGDLLIMPORT int max_files_per_process;
 extern PGDLLIMPORT bool data_sync_retry;
 extern PGDLLIMPORT int recovery_init_sync_method;
+extern PGDLLIMPORT int io_min_fallocate;
 extern PGDLLIMPORT int io_direct_flags;
 
 /*
-- 
2.39.5

From 33bb0c22769206a3a752e99a8987be44dd9bdd31 Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.mu...@gmail.com>
Date: Sat, 31 May 2025 23:26:28 +1200
Subject: [PATCH 2/2] Add io_min_fallocate tablespace option.

Allow io_min_fallocate to be set on a per-tablespace basis.  The
decision must be made by bufmgr.c because md.c can't access catalogs
once the extension lock is held.  The smgrzeroextend() function gains a
flags parameter, and smgrextend() too for consistency.

TODO: temp tables too

Reported-by: Dimitrios Apostolou <ji...@gmx.net>
Discussion: https://postgr.es/m/b1843124-fd22-e279-a31f-252dffb6fbf2%40gmx.net
---
 doc/src/sgml/config.sgml               |  7 +++++--
 doc/src/sgml/ref/alter_tablespace.sgml |  1 +
 src/backend/access/common/reloptions.c | 12 +++++++++++-
 src/backend/access/hash/hashpage.c     |  3 +--
 src/backend/storage/buffer/bufmgr.c    | 22 +++++++++++++++++++++-
 src/backend/storage/smgr/bulk_write.c  |  5 +++--
 src/backend/storage/smgr/md.c          | 12 ++++++++----
 src/backend/storage/smgr/smgr.c        | 12 ++++++------
 src/backend/utils/cache/spccache.c     | 13 +++++++++++++
 src/bin/psql/tab-complete.in.c         |  1 +
 src/include/commands/tablespace.h      |  1 +
 src/include/storage/md.h               |  4 ++--
 src/include/storage/smgr.h             |  7 +++++--
 src/include/utils/rel.h                |  1 +
 src/include/utils/spccache.h           |  1 +
 15 files changed, 80 insertions(+), 22 deletions(-)

diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 7d476665f42..7f956666c99 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -2696,8 +2696,11 @@ include_dir 'conf.d'
          extend data files instead of writing zeroes.  <literal>0</literal>
          means never (always write
          zeroes), and other values indicate a number of blocks.
-         The default is <literal>8</literal>.
-        </para>
+         The default is <literal>8</literal>.  This value can be overridden
+         for tables in a particular tablespace by setting the tablespace
+         parameter of the same name (see <xref
+         linkend="sql-altertablespace"/>).
+         </para>
        </listitem>
       </varlistentry>
 
diff --git a/doc/src/sgml/ref/alter_tablespace.sgml b/doc/src/sgml/ref/alter_tablespace.sgml
index d0e08089ddb..351ae62b24a 100644
--- a/doc/src/sgml/ref/alter_tablespace.sgml
+++ b/doc/src/sgml/ref/alter_tablespace.sgml
@@ -92,6 +92,7 @@ ALTER TABLESPACE <replaceable>name</replaceable> RESET ( <replaceable class="par
       by the configuration parameters of the
       same name (see <xref linkend="guc-seq-page-cost"/>,
       <xref linkend="guc-random-page-cost"/>,
+      <xref linkend="guc-io-min-fallocate"/>,
       <xref linkend="guc-effective-io-concurrency"/>,
       <xref linkend="guc-maintenance-io-concurrency"/>).  This may be useful if
       one tablespace is located on a disk which is faster or slower than the
diff --git a/src/backend/access/common/reloptions.c b/src/backend/access/common/reloptions.c
index 46c1dce222d..703b395c302 100644
--- a/src/backend/access/common/reloptions.c
+++ b/src/backend/access/common/reloptions.c
@@ -372,6 +372,15 @@ static relopt_int intRelOpts[] =
 		},
 		-1, 0, MAX_IO_CONCURRENCY
 	},
+	{
+		{
+			"io_min_fallocate",
+			"Threshold for preferring posix_fallocate() when extending data files.",
+			RELOPT_KIND_TABLESPACE,
+			ShareUpdateExclusiveLock
+		},
+		-1, 0, INT_MAX
+	},
 	{
 		{
 			"parallel_workers",
@@ -2116,7 +2125,8 @@ tablespace_reloptions(Datum reloptions, bool validate)
 		{"random_page_cost", RELOPT_TYPE_REAL, offsetof(TableSpaceOpts, random_page_cost)},
 		{"seq_page_cost", RELOPT_TYPE_REAL, offsetof(TableSpaceOpts, seq_page_cost)},
 		{"effective_io_concurrency", RELOPT_TYPE_INT, offsetof(TableSpaceOpts, effective_io_concurrency)},
-		{"maintenance_io_concurrency", RELOPT_TYPE_INT, offsetof(TableSpaceOpts, maintenance_io_concurrency)}
+		{"maintenance_io_concurrency", RELOPT_TYPE_INT, offsetof(TableSpaceOpts, maintenance_io_concurrency)},
+		{"io_min_fallocate", RELOPT_TYPE_INT, offsetof(TableSpaceOpts, io_min_fallocate)}
 	};
 
 	return (bytea *) build_reloptions(reloptions, validate,
diff --git a/src/backend/access/hash/hashpage.c b/src/backend/access/hash/hashpage.c
index b8e5bd005e5..d8b5f06f435 100644
--- a/src/backend/access/hash/hashpage.c
+++ b/src/backend/access/hash/hashpage.c
@@ -1030,8 +1030,7 @@ _hash_alloc_buckets(Relation rel, BlockNumber firstblock, uint32 nblocks)
 					true);
 
 	PageSetChecksumInplace(page, lastblock);
-	smgrextend(RelationGetSmgr(rel), MAIN_FORKNUM, lastblock, zerobuf.data,
-			   false);
+	smgrextend(RelationGetSmgr(rel), MAIN_FORKNUM, lastblock, zerobuf.data, 0);
 
 	return true;
 }
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index f93131a645e..69dc70d7ad1 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -65,6 +65,7 @@
 #include "utils/ps_status.h"
 #include "utils/rel.h"
 #include "utils/resowner.h"
+#include "utils/spccache.h"
 #include "utils/timestamp.h"
 
 
@@ -2616,6 +2617,7 @@ ExtendBufferedRelShared(BufferManagerRelation bmr,
 						Buffer *buffers,
 						uint32 *extended_by)
 {
+	int			smgr_flags = 0;
 	BlockNumber first_block;
 	IOContext	io_context = IOContextForStrategy(strategy);
 	instr_time	io_start;
@@ -2643,6 +2645,24 @@ ExtendBufferedRelShared(BufferManagerRelation bmr,
 		MemSet(buf_block, 0, BLCKSZ);
 	}
 
+	/*
+	 * For multi-block extension, decide if smgr should use fallocate for an
+	 * extension of this size.  It can't decide for itself, because it can't
+	 * access the catalog with the extension lock held.  Likewise, initdb and
+	 * recovery can't access catalogs either.
+	 */
+	if (extend_by > 1 && IsUnderPostmaster)
+	{
+		int			threshold;
+
+		if (InRecovery)
+			threshold = io_min_fallocate;
+		else
+			threshold = get_tablespace_io_min_fallocate(bmr.smgr->smgr_rlocator.locator.spcOid);
+		if (threshold != 0 && extend_by >= threshold)
+			smgr_flags |= SMGR_FLAG_FALLOCATE;
+	}
+
 	/*
 	 * Lock relation against concurrent extensions, unless requested not to.
 	 *
@@ -2836,7 +2856,7 @@ ExtendBufferedRelShared(BufferManagerRelation bmr,
 	 *
 	 * We don't need to set checksum for all-zero pages.
 	 */
-	smgrzeroextend(bmr.smgr, fork, first_block, extend_by, false);
+	smgrzeroextend(bmr.smgr, fork, first_block, extend_by, smgr_flags);
 
 	/*
 	 * Release the file-extension lock; it's now OK for someone else to extend
diff --git a/src/backend/storage/smgr/bulk_write.c b/src/backend/storage/smgr/bulk_write.c
index b958be15716..ef821300f38 100644
--- a/src/backend/storage/smgr/bulk_write.c
+++ b/src/backend/storage/smgr/bulk_write.c
@@ -296,11 +296,12 @@ smgr_bulk_flush(BulkWriteState *bulkstate)
 				smgrextend(bulkstate->smgr, bulkstate->forknum,
 						   bulkstate->relsize,
 						   &zero_buffer,
-						   true);
+						   SMGR_FLAG_SKIP_FSYNC);
 				bulkstate->relsize++;
 			}
 
-			smgrextend(bulkstate->smgr, bulkstate->forknum, blkno, page, true);
+			smgrextend(bulkstate->smgr, bulkstate->forknum, blkno, page,
+					   SMGR_FLAG_SKIP_FSYNC);
 			bulkstate->relsize++;
 		}
 		else
diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c
index 6d1b9cb65b2..4f9909755c8 100644
--- a/src/backend/storage/smgr/md.c
+++ b/src/backend/storage/smgr/md.c
@@ -475,11 +475,12 @@ mdunlinkfork(RelFileLocatorBackend rlocator, ForkNumber forknum, bool isRedo)
  */
 void
 mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
-		 const void *buffer, bool skipFsync)
+		 const void *buffer, int flags)
 {
 	off_t		seekpos;
 	int			nbytes;
 	MdfdVec    *v;
+	bool		skipFsync = flags & SMGR_FLAG_SKIP_FSYNC;
 
 	/* If this build supports direct I/O, the buffer must be I/O aligned. */
 	if (PG_O_DIRECT != 0 && PG_IO_ALIGN_SIZE <= BLCKSZ)
@@ -540,11 +541,12 @@ mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
  */
 void
 mdzeroextend(SMgrRelation reln, ForkNumber forknum,
-			 BlockNumber blocknum, int nblocks, bool skipFsync)
+			 BlockNumber blocknum, int nblocks, int flags)
 {
 	MdfdVec    *v;
 	BlockNumber curblocknum = blocknum;
 	int			remblocks = nblocks;
+	bool		skipFsync = flags & SMGR_FLAG_SKIP_FSYNC;
 
 	Assert(nblocks > 0);
 
@@ -588,9 +590,11 @@ mdzeroextend(SMgrRelation reln, ForkNumber forknum,
 		 * to allocate page cache space for the extended pages.
 		 *
 		 * However, we don't use FileFallocate() for small extensions, as it
-		 * defeats delayed allocation on some filesystems.
+		 * defeats delayed allocation on some filesystems.  bufmgr.c must make
+		 * that decision, because we can't access the tablespace catalog while
+		 * the extension lock is held.
 		 */
-		if (io_min_fallocate > 0 && numblocks >= io_min_fallocate)
+		if (flags & SMGR_FLAG_FALLOCATE)
 		{
 			int			ret;
 
diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c
index bce37a36d51..dfe556ca5e2 100644
--- a/src/backend/storage/smgr/smgr.c
+++ b/src/backend/storage/smgr/smgr.c
@@ -97,9 +97,9 @@ typedef struct f_smgr
 	void		(*smgr_unlink) (RelFileLocatorBackend rlocator, ForkNumber forknum,
 								bool isRedo);
 	void		(*smgr_extend) (SMgrRelation reln, ForkNumber forknum,
-								BlockNumber blocknum, const void *buffer, bool skipFsync);
+								BlockNumber blocknum, const void *buffer, int flags);
 	void		(*smgr_zeroextend) (SMgrRelation reln, ForkNumber forknum,
-									BlockNumber blocknum, int nblocks, bool skipFsync);
+									BlockNumber blocknum, int nblocks, int flags);
 	bool		(*smgr_prefetch) (SMgrRelation reln, ForkNumber forknum,
 								  BlockNumber blocknum, int nblocks);
 	uint32		(*smgr_maxcombine) (SMgrRelation reln, ForkNumber forknum,
@@ -618,12 +618,12 @@ smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo)
  */
 void
 smgrextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
-		   const void *buffer, bool skipFsync)
+		   const void *buffer, int flags)
 {
 	HOLD_INTERRUPTS();
 
 	smgrsw[reln->smgr_which].smgr_extend(reln, forknum, blocknum,
-										 buffer, skipFsync);
+										 buffer, flags);
 
 	/*
 	 * Normally we expect this to increase nblocks by one, but if the cached
@@ -647,12 +647,12 @@ smgrextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
  */
 void
 smgrzeroextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
-			   int nblocks, bool skipFsync)
+			   int nblocks, int flags)
 {
 	HOLD_INTERRUPTS();
 
 	smgrsw[reln->smgr_which].smgr_zeroextend(reln, forknum, blocknum,
-											 nblocks, skipFsync);
+											 nblocks, flags);
 
 	/*
 	 * Normally we expect this to increase the fork size by nblocks, but if
diff --git a/src/backend/utils/cache/spccache.c b/src/backend/utils/cache/spccache.c
index 23458599298..b2eb51d1bab 100644
--- a/src/backend/utils/cache/spccache.c
+++ b/src/backend/utils/cache/spccache.c
@@ -24,6 +24,7 @@
 #include "miscadmin.h"
 #include "optimizer/optimizer.h"
 #include "storage/bufmgr.h"
+#include "storage/fd.h"
 #include "utils/catcache.h"
 #include "utils/hsearch.h"
 #include "utils/inval.h"
@@ -235,3 +236,15 @@ get_tablespace_maintenance_io_concurrency(Oid spcid)
 	else
 		return spc->opts->maintenance_io_concurrency;
 }
+
+int
+get_tablespace_io_min_fallocate(Oid spcid)
+{
+	TableSpaceCacheEntry *spc;
+
+	spc = get_tablespace(spcid);
+	if (!spc->opts || spc->opts->io_min_fallocate < 0)
+		return io_min_fallocate;
+	else
+		return spc->opts->io_min_fallocate;
+}
diff --git a/src/bin/psql/tab-complete.in.c b/src/bin/psql/tab-complete.in.c
index ec65ab79fec..489b391b437 100644
--- a/src/bin/psql/tab-complete.in.c
+++ b/src/bin/psql/tab-complete.in.c
@@ -3000,6 +3000,7 @@ match_previous_words(int pattern_id,
 	/* ALTER TABLESPACE <foo> SET|RESET ( */
 	else if (Matches("ALTER", "TABLESPACE", MatchAny, "SET|RESET", "("))
 		COMPLETE_WITH("seq_page_cost", "random_page_cost",
+					  "io_min_fallocate",
 					  "effective_io_concurrency", "maintenance_io_concurrency");
 
 	/* ALTER TEXT SEARCH */
diff --git a/src/include/commands/tablespace.h b/src/include/commands/tablespace.h
index 4e8bf4dc0de..986f9d623bf 100644
--- a/src/include/commands/tablespace.h
+++ b/src/include/commands/tablespace.h
@@ -45,6 +45,7 @@ typedef struct TableSpaceOpts
 	float8		seq_page_cost;
 	int			effective_io_concurrency;
 	int			maintenance_io_concurrency;
+	int			io_min_fallocate;
 } TableSpaceOpts;
 
 extern Oid	CreateTableSpace(CreateTableSpaceStmt *stmt);
diff --git a/src/include/storage/md.h b/src/include/storage/md.h
index b563c27abf0..608267599b1 100644
--- a/src/include/storage/md.h
+++ b/src/include/storage/md.h
@@ -30,9 +30,9 @@ extern void mdcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo);
 extern bool mdexists(SMgrRelation reln, ForkNumber forknum);
 extern void mdunlink(RelFileLocatorBackend rlocator, ForkNumber forknum, bool isRedo);
 extern void mdextend(SMgrRelation reln, ForkNumber forknum,
-					 BlockNumber blocknum, const void *buffer, bool skipFsync);
+					 BlockNumber blocknum, const void *buffer, int flags);
 extern void mdzeroextend(SMgrRelation reln, ForkNumber forknum,
-						 BlockNumber blocknum, int nblocks, bool skipFsync);
+						 BlockNumber blocknum, int nblocks, int flags);
 extern bool mdprefetch(SMgrRelation reln, ForkNumber forknum,
 					   BlockNumber blocknum, int nblocks);
 extern uint32 mdmaxcombine(SMgrRelation reln, ForkNumber forknum,
diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h
index 3964d9334b3..b8b4da0e90c 100644
--- a/src/include/storage/smgr.h
+++ b/src/include/storage/smgr.h
@@ -19,6 +19,9 @@
 #include "storage/block.h"
 #include "storage/relfilelocator.h"
 
+#define SMGR_FLAG_SKIP_FSYNC	(1 << 0)
+#define SMGR_FLAG_FALLOCATE 	(1 << 1)
+
 /*
  * smgr.c maintains a table of SMgrRelation objects, which are essentially
  * cached file handles.  An SMgrRelation is created (if not already present)
@@ -90,9 +93,9 @@ extern void smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo);
 extern void smgrdosyncall(SMgrRelation *rels, int nrels);
 extern void smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo);
 extern void smgrextend(SMgrRelation reln, ForkNumber forknum,
-					   BlockNumber blocknum, const void *buffer, bool skipFsync);
+					   BlockNumber blocknum, const void *buffer, int flags);
 extern void smgrzeroextend(SMgrRelation reln, ForkNumber forknum,
-						   BlockNumber blocknum, int nblocks, bool skipFsync);
+						   BlockNumber blocknum, int nblocks, int flags);
 extern bool smgrprefetch(SMgrRelation reln, ForkNumber forknum,
 						 BlockNumber blocknum, int nblocks);
 extern uint32 smgrmaxcombine(SMgrRelation reln, ForkNumber forknum,
diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h
index b552359915f..a4592f2f184 100644
--- a/src/include/utils/rel.h
+++ b/src/include/utils/rel.h
@@ -342,6 +342,7 @@ typedef struct StdRdOptions
 	int32		vl_len_;		/* varlena header (do not touch directly!) */
 	int			fillfactor;		/* page fill factor in percent (0..100) */
 	int			toast_tuple_target; /* target for tuple toasting */
+	int			io_min_fallocate;
 	AutoVacOpts autovacuum;		/* autovacuum-related options */
 	bool		user_catalog_table; /* use as an additional catalog relation */
 	int			parallel_workers;	/* max number of parallel workers */
diff --git a/src/include/utils/spccache.h b/src/include/utils/spccache.h
index d7edd79b18d..497f3707e2a 100644
--- a/src/include/utils/spccache.h
+++ b/src/include/utils/spccache.h
@@ -17,5 +17,6 @@ extern void get_tablespace_page_costs(Oid spcid, float8 *spc_random_page_cost,
 									  float8 *spc_seq_page_cost);
 extern int	get_tablespace_io_concurrency(Oid spcid);
 extern int	get_tablespace_maintenance_io_concurrency(Oid spcid);
+extern int	get_tablespace_io_min_fallocate(Oid spcid);
 
 #endif							/* SPCCACHE_H */
-- 
2.39.5

Re: [PING] fallocate() causes btrfs to never compress postgresql files

Reply via email to