Hi, I've been doing some testing today, and it seems heap_sync is somewhat confused by partitioned tables. I'm doing a COPY into a partitioned table (lineitem from TPC-H partitioned per month) like this:
--------------------------------------------------------------------- BEGIN; CREATE TABLE lineitem (...) PARTITION BY RANGE (l_shipdate); CREATE TABLE lineitem_1992_01 PARTITION OF lineitem FOR VALUES FROM ('1992-01-01') TO ('1992-02-01'); ... CREATE TABLE lineitem_1998_12 PARTITION OF lineitem FOR VALUES FROM ('1998-12-01') TO ('1999-01-01'); COPY INTO lineitem FROM .... COMMIT; --------------------------------------------------------------------- I'm observing COPY failing like this: ERROR: could not open file "base/16384/16427": No such file or directory where the relfilenode matches the "lineitem" relation (which is guaranteed to be empty, as it's partitioned, so it's not surprising the relfilenode does not exist). After adding an Assert(false) into mdopen(), which is where the error comes from, I got this backtrace (full backtrace attached): ... #3 0x0000000000766387 in mdopen (...) at md.c:609 #4 0x00000000007674b7 in mdnblocks (...) at md.c:876 #5 0x0000000000767a61 in mdimmedsync (...) at md.c:1034 #6 0x00000000004c62b5 in heap_sync (...) at heapam.c:9399 #7 0x00000000005a2bdd in CopyFrom (...) at copy.c:2890 #8 0x00000000005a1621 in DoCopy (...) at copy.c:992 ... So apparently CopyFrom() invokes heap_sync() on the partitioned relation, which attempts to do mdimmedsync() only on the root. That seems like a bug to me. Obviously this only applies to wal_level=minimal. There are multiple callers of heap_sync(), but only the COPY seems to be affected by this, because the rest can't see partitioned tables. So it seems heap_sync() needs to be improved to sync all partitions. regards -- Tomas Vondra http://www.2ndQuadrant.com PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services
(gdb) bt #0 __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:51 #1 0x00007f79267aeb7d in __GI_abort () at abort.c:90 #2 0x0000000000878ef4 in ExceptionalCondition (conditionName=<optimized out>, errorType=<optimized out>, fileName=<optimized out>, lineNumber=<optimized out>) at assert.c:54 #3 0x0000000000766387 in mdopen (reln=<optimized out>, forknum=<optimized out>, behavior=1) at md.c:609 #4 0x00000000007674b7 in mdnblocks (reln=0x174c420, forknum=MAIN_FORKNUM) at md.c:876 #5 0x0000000000767a61 in mdimmedsync (reln=0x174c420, forknum=MAIN_FORKNUM) at md.c:1034 #6 0x00000000004c62b5 in heap_sync (rel=0x7f78a1bdf5c0) at heapam.c:9399 #7 0x00000000005a2bdd in CopyFrom (cstate=<optimized out>) at copy.c:2890 #8 0x00000000005a1621 in DoCopy (pstate=<optimized out>, stmt=0x150c298, stmt_location=<optimized out>, stmt_len=<optimized out>, processed=<optimized out>) at copy.c:992 #9 0x0000000000770ae1 in standard_ProcessUtility (pstmt=0x150c388, queryString=0x150b578 "COPY lineitem FROM '/mnt/raid/tpch/lineitem.csv' WITH (FORMAT csv, DELIMITER '|');", context=PROCESS_UTILITY_TOPLEVEL, params=0x0, queryEnv=0x0, dest=0x150c678, completionTag=0x7ffe998b7b20 "") at utility.c:548 #10 0x00000000007704c2 in ProcessUtility (pstmt=0x2, queryString=0x7ffe998b7420 "", context=PROCESS_UTILITY_TOPLEVEL, params=0x7f79267acfa0 <__GI_raise+272>, queryEnv=0x0, dest=0x7ffe998b7420, completionTag=0x7ffe998b7b20 "") at utility.c:360 #11 0x00000000007700a3 in PortalRunUtility (portal=0x1572028, pstmt=0x150c388, isTopLevel=<optimized out>, setHoldSnapshot=<optimized out>, dest=0x150c678, completionTag=0x7ffe998b7b20 "") at pquery.c:1178 #12 0x000000000076f7b6 in PortalRunMulti (portal=0x1572028, isTopLevel=true, setHoldSnapshot=false, dest=0x150c678, altdest=0x150c678, completionTag=0x7ffe998b7b20 "") at pquery.c:1331 #13 0x000000000076f141 in PortalRun (portal=0x1572028, count=9223372036854775807, isTopLevel=true, run_once=<optimized out>, dest=<optimized out>, altdest=0x150c678, completionTag=0x7ffe998b7b20 "") at pquery.c:799 #14 0x000000000076e108 in exec_simple_query (query_string=0x150b578 "COPY lineitem FROM '/mnt/raid/tpch/lineitem.csv' WITH (FORMAT csv, DELIMITER '|');") at postgres.c:1122 #15 0x000000000076c045 in PostgresMain (argc=<optimized out>, argv=<optimized out>, dbname=<optimized out>, username=<optimized out>) at postgres.c:4155 #16 0x00000000006ef3d8 in BackendRun (port=0x152c620) at postmaster.c:4361 #17 0x00000000006eea96 in BackendStartup (port=<optimized out>) at postmaster.c:4033 #18 ServerLoop () at postmaster.c:1706 #19 0x00000000006ebd21 in PostmasterMain (argc=<optimized out>, argv=<optimized out>) at postmaster.c:1379 #20 0x0000000000660dd4 in main (argc=3, argv=0x1505fd0) at main.c:228 (gdb) print wal_level $1 = 0 (gdb) bt full #0 __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:51 set = {__val = {0, 9779315902183899136, 0, 0, 0, 2305878331024736256, 0, 0, 4594464784623501632, 0, 0, 0, 4594464784623501626, 0, 4619490161887593984, 0}} pid = <optimized out> tid = <optimized out> ret = <optimized out> #1 0x00007f79267aeb7d in __GI_abort () at abort.c:90 save_stage = 2 act = {__sigaction_handler = {sa_handler = 0x0, sa_sigaction = 0x0}, sa_mask = {__val = {0 <repeats 15 times>, 19}}, sa_flags = 1471333120, sa_restorer = 0x0} sigs = {__val = {32, 0 <repeats 15 times>}} #2 0x0000000000878ef4 in ExceptionalCondition (conditionName=<optimized out>, errorType=<optimized out>, fileName=<optimized out>, lineNumber=<optimized out>) at assert.c:54 No locals. #3 0x0000000000766387 in mdopen (reln=<optimized out>, forknum=<optimized out>, behavior=1) at md.c:609 path = 0x1532b18 "base/24576/24619" fd = -1 mdfd = <optimized out> #4 0x00000000007674b7 in mdnblocks (reln=0x174c420, forknum=MAIN_FORKNUM) at md.c:876 segno = <optimized out> v = <optimized out> nblocks = <optimized out> #5 0x0000000000767a61 in mdimmedsync (reln=0x174c420, forknum=MAIN_FORKNUM) at md.c:1034 segno = <optimized out> #6 0x00000000004c62b5 in heap_sync (rel=0x7f78a1bdf5c0) at heapam.c:9399 No locals. #7 0x00000000005a2bdd in CopyFrom (cstate=<optimized out>) at copy.c:2890 estate = <optimized out> firstBufferedLineNo = <optimized out> bufferedTuplesSize = 0 prev_leaf_part_index = <optimized out> nBufferedTuples = 0 processed = <optimized out> hi_options = <optimized out> mycid = <optimized out> tupDesc = <optimized out> resultRelInfo = <optimized out> myslot = <optimized out> mtstate = <optimized out> bufferedTuples = <optimized out> values = <optimized out> nulls = <optimized out> bistate = <optimized out> saved_resultRelInfo = <optimized out> econtext = <optimized out> tuple = <optimized out> useHeapMultiInsert = <optimized out> oldcontext = <optimized out> errcallback = <optimized out> #8 0x00000000005a1621 in DoCopy (pstate=<optimized out>, stmt=0x150c298, stmt_location=<optimized out>, stmt_len=<optimized out>, processed=<optimized out>) at copy.c:992 rel = <optimized out> relid = <optimized out> query = 0x0 cstate = 0x1532150 is_from = <optimized out> pipe = <optimized out> #9 0x0000000000770ae1 in standard_ProcessUtility (pstmt=0x150c388, queryString=0x150b578 "COPY lineitem FROM '/mnt/raid/tpch/lineitem.csv' WITH (FORMAT csv, DELIMITER '|');", context=PROCESS_UTILITY_TOPLEVEL, params=0x0, queryEnv=0x0, dest=0x150c678, completionTag=0x7ffe998b7b20 "") at utility.c:548 processed = 22608872 parsetree = 0x150c298 pstate = 0x152e458 isTopLevel = <optimized out> isAtomicContext = <optimized out> #10 0x00000000007704c2 in ProcessUtility (pstmt=0x2, queryString=0x7ffe998b7420 "", context=PROCESS_UTILITY_TOPLEVEL, params=0x7f79267acfa0 <__GI_raise+272>, queryEnv=0x0, dest=0x7ffe998b7420, completionTag=0x7ffe998b7b20 "") at utility.c:360 No locals. #11 0x00000000007700a3 in PortalRunUtility (portal=0x1572028, pstmt=0x150c388, isTopLevel=<optimized out>, setHoldSnapshot=<optimized out>, dest=0x150c678, completionTag=0x7ffe998b7b20 "") at pquery.c:1178 utilityStmt = <optimized out> snapshot = 0x158fc20 #12 0x000000000076f7b6 in PortalRunMulti (portal=0x1572028, isTopLevel=true, setHoldSnapshot=false, dest=0x150c678, altdest=0x150c678, completionTag=0x7ffe998b7b20 "") at pquery.c:1331 pstmt = 0x150c388 active_snapshot_set = true stmtlist_item = 0x150c618 #13 0x000000000076f141 in PortalRun (portal=0x1572028, count=9223372036854775807, isTopLevel=true, run_once=<optimized out>, dest=<optimized out>, altdest=0x150c678, completionTag=0x7ffe998b7b20 "") at pquery.c:799 local_sigjmp_buf = {{__jmpbuf = {22487080, 1734723643579863938, 22070904, 22247592, 22608592, 1, 1734723643588252546, -1733945172490022014}, __mask_was_saved = 0, __saved_mask = {__val = {22495288, 11050096, 112, 2, 22487080, 22066272, 22070056, 140731474475808, 9070928, 140731474475703, 22487080, 22070848, 10408773, 11045917, 22070056, 1}}}} save_exception_stack = <optimized out> ---Type <return> to continue, or q <return> to quit--- save_context_stack = <optimized out> saveTopTransactionResourceOwner = 0x15378a8 saveTopTransactionContext = 0x158fad0 saveResourceOwner = 0x15378a8 saveMemoryContext = 0x158fad0 result = <optimized out> nprocessed = <optimized out> saveActivePortal = <optimized out> savePortalContext = <optimized out> #14 0x000000000076e108 in exec_simple_query (query_string=0x150b578 "COPY lineitem FROM '/mnt/raid/tpch/lineitem.csv' WITH (FORMAT csv, DELIMITER '|');") at postgres.c:1122 portal = 0x1572028 parsetree = <optimized out> completionTag = '\000' <repeats 12 times>, "\002\000\000\000R\000\000\000\000\000\000\000\006\000\000\000\000\000\000\000\b^S\001\000\000\000\000\b|\213\231\376\177\000\000x\265P\001", '\000' <repeats 11 times> plantree_list = <optimized out> format = 0 snapshot_set = true commandTag = <optimized out> querytree_list = <optimized out> receiver = 0x150c678 msec_str = '\000' <repeats 12 times>, "\002\000\000\000R\000\000\000\000\000\000\000\006\000\000\000\000\000\000" was_logged = false dest = <optimized out> parsetree_list = 0x150c350 parsetree_item = 0x150c328 oldcontext = <optimized out> save_log_statement_stats = <optimized out> use_implicit_block = <optimized out> #15 0x000000000076c045 in PostgresMain (argc=<optimized out>, argv=<optimized out>, dbname=<optimized out>, username=<optimized out>) at postgres.c:4155 query_string = <optimized out> local_sigjmp_buf = {{__jmpbuf = {140731474476272, -1734125229882698878, 22240776, 22201840, 22240512, 253, 1734723643521143682, -1733945170571390078}, __mask_was_saved = 1, __saved_mask = {__val = {0, 0, 0, 11050096, 22044800, 22201888, 9038901, 22071752, 13953968, 22240776, 22240776, 140731474476536, 8946411, 7364253961987031040, 22071752, 4398046511104}}}} send_ready_for_query = false disable_idle_in_transaction_timeout = false firstchar = 81 input_message = {data = 0x150b578 "COPY lineitem FROM '/mnt/raid/tpch/lineitem.csv' WITH (FORMAT csv, DELIMITER '|');", len = 83, maxlen = 1024, cursor = 83} #16 0x00000000006ef3d8 in BackendRun (port=0x152c620) at postmaster.c:4361 secs = 590627245 usecs = <optimized out> maxac = <optimized out> ac = 2 av = 0x1535e08 i = <optimized out> #17 0x00000000006eea96 in BackendStartup (port=<optimized out>) at postmaster.c:4033 bn = <optimized out> pid = 0 save_errno = <optimized out> #18 ServerLoop () at postmaster.c:1706 port = <optimized out> i = <optimized out> selres = <optimized out> now = <optimized out> rmask = <optimized out> last_lockfile_recheck_time = <optimized out> last_touch_time = <optimized out> readmask = {fds_bits = {24, 0 <repeats 15 times>}} nSockets = 5 #19 0x00000000006ebd21 in PostmasterMain (argc=<optimized out>, argv=<optimized out>) at postmaster.c:1379 listen_addr_saved = true output_config_variable = <optimized out> opt = <optimized out> userDoption = <optimized out> i = 0 status = <optimized out> #20 0x0000000000660dd4 in main (argc=3, argv=0x1505fd0) at main.c:228 No locals.