Hello,

We've accidentally found a subtle bug introduced by

commit 9d9c02ccd1aea8e9131d8f4edb21bf1687e40782
Author: David Rowley
Date:   Fri Apr 8 10:34:36 2022 +1200

    Teach planner and executor about monotonic window funcs


On a 32-bit system Valgrind reports use-after-free when running the "window" test:

==35487== Invalid read of size 4
==35487==    at 0x48398A4: memcpy (vg_replace_strmem.c:1035)
==35487==    by 0x1A2902: fill_val (heaptuple.c:287)
==35487==    by 0x1A2902: heap_fill_tuple (heaptuple.c:336)
==35487==    by 0x1A3C29: heap_form_minimal_tuple (heaptuple.c:1412)
==35487==    by 0x3D4555: tts_virtual_copy_minimal_tuple (execTuples.c:290)
==35487==    by 0x72FC33: ExecCopySlotMinimalTuple (tuptable.h:473)
==35487==    by 0x72FC33: tuplesort_puttupleslot (tuplesortvariants.c:610)
==35487==    by 0x403463: ExecSort (nodeSort.c:153)
==35487==    by 0x3D0C8E: ExecProcNodeFirst (execProcnode.c:464)
==35487==    by 0x40AF09: ExecProcNode (executor.h:259)
==35487==    by 0x40AF09: begin_partition (nodeWindowAgg.c:1106)
==35487==    by 0x40D259: ExecWindowAgg (nodeWindowAgg.c:2125)
==35487==    by 0x3D0C8E: ExecProcNodeFirst (execProcnode.c:464)
==35487==    by 0x405E17: ExecProcNode (executor.h:259)
==35487==    by 0x405E17: SubqueryNext (nodeSubqueryscan.c:53)
==35487==    by 0x3D41C7: ExecScanFetch (execScan.c:133)
==35487==    by 0x3D41C7: ExecScan (execScan.c:199)
==35487== Address 0xe3e8af0 is 168 bytes inside a block of size 8,192 alloc'd
==35487==    at 0x483463B: malloc (vg_replace_malloc.c:299)
==35487==    by 0x712B63: AllocSetContextCreateInternal (aset.c:446)
==35487==    by 0x3D82BE: CreateExprContextInternal (execUtils.c:253)
==35487==    by 0x3D84DC: CreateExprContext (execUtils.c:303)
==35487==    by 0x3D8750: ExecAssignExprContext (execUtils.c:482)
==35487==    by 0x40BC1A: ExecInitWindowAgg (nodeWindowAgg.c:2382)
==35487==    by 0x3D1232: ExecInitNode (execProcnode.c:346)
==35487==    by 0x4035E0: ExecInitSort (nodeSort.c:265)
==35487==    by 0x3D11AB: ExecInitNode (execProcnode.c:321)
==35487==    by 0x40BD36: ExecInitWindowAgg (nodeWindowAgg.c:2432)
==35487==    by 0x3D1232: ExecInitNode (execProcnode.c:346)
==35487==    by 0x405E99: ExecInitSubqueryScan (nodeSubqueryscan.c:126)


It's faster to run just this test under Valgrind:

        make installcheck-test TESTS='test_setup window'


This can also be reproduced on a 64-bit system by forcing int8 to be passed by reference:

--- a/src/include/pg_config_manual.h
+++ b/src/include/pg_config_manual.h
@@ -82,9 +82,7 @@
  *
  * Changing this requires an initdb.
  */
-#if SIZEOF_VOID_P >= 8
-#define USE_FLOAT8_BYVAL 1
-#endif
+#undef USE_FLOAT8_BYVAL

 /*
* When we don't have native spinlocks, we use semaphores to simulate them.


Futhermore, zeroing freed memory makes the test fail:

--- a/src/include/utils/memdebug.h
+++ b/src/include/utils/memdebug.h
@@ -39,7 +39,7 @@ static inline void
 wipe_mem(void *ptr, size_t size)
 {
    VALGRIND_MAKE_MEM_UNDEFINED(ptr, size);
-   memset(ptr, 0x7F, size);
+   memset(ptr, 0, size);
    VALGRIND_MAKE_MEM_NOACCESS(ptr, size);
 }

$ cat src/test/regress/regression.diffs
diff -U3 /home/sergey/pgwork/devel/src/src/test/regress/expected/window.out /home/sergey/pgwork/devel/src/src/test/regress/results/window.out --- /home/sergey/pgwork/devel/src/src/test/regress/expected/window.out 2022-11-03 18:26:52.203624217 +0300 +++ /home/sergey/pgwork/devel/src/src/test/regress/results/window.out 2022-11-16 01:47:18.494273352 +0300
@@ -3721,7 +3721,8 @@
 -----------+-------+--------+-------------+----+----+----+----
  personnel |     5 |   3500 | 12-10-2007  |  2 |  1 |  2 |  2
  sales     |     3 |   4800 | 08-01-2007  |  3 |  1 |  3 |  3
-(2 rows)
+ sales     |     4 |   4800 | 08-08-2007  |  3 |  0 |  3 |  3
+(3 rows)

-- Tests to ensure we don't push down the run condition when it's not valid to
 -- do so.


The failing query is:

SELECT * FROM
  (SELECT *,
          count(salary) OVER (PARTITION BY depname || '') c1, -- w1
          row_number() OVER (PARTITION BY depname) rn, -- w2
          count(*) OVER (PARTITION BY depname) c2, -- w2
          count(*) OVER (PARTITION BY '' || depname) c3 -- w3
   FROM empsalary
) e WHERE rn <= 1 AND c1 <= 3;


As far as I understand, ExecWindowAgg for the intermediate WindowAgg node switches into pass-through mode, stops evaluating row_number(), and returns the previous value instead. But if int8 is passed by reference, the previous value stored in econtext->ecxt_aggvalues becomes a dangling pointer when the per-output-tuple memory context is reset.

Attaching a patch that makes the window test fail on a 64-bit system.

Best regards,

--
Sergey Shinderuk                https://postgrespro.com/
diff --git a/src/include/pg_config_manual.h b/src/include/pg_config_manual.h
index f2a106f983d..66ab343d51c 100644
--- a/src/include/pg_config_manual.h
+++ b/src/include/pg_config_manual.h
@@ -82,9 +82,7 @@
  *
  * Changing this requires an initdb.
  */
-#if SIZEOF_VOID_P >= 8
-#define USE_FLOAT8_BYVAL 1
-#endif
+#undef USE_FLOAT8_BYVAL
 
 /*
  * When we don't have native spinlocks, we use semaphores to simulate them.
diff --git a/src/include/utils/memdebug.h b/src/include/utils/memdebug.h
index 6876e0aceff..7accee958d0 100644
--- a/src/include/utils/memdebug.h
+++ b/src/include/utils/memdebug.h
@@ -39,7 +39,7 @@ static inline void
 wipe_mem(void *ptr, size_t size)
 {
 	VALGRIND_MAKE_MEM_UNDEFINED(ptr, size);
-	memset(ptr, 0x7F, size);
+	memset(ptr, 0, size);
 	VALGRIND_MAKE_MEM_NOACCESS(ptr, size);
 }
 

Reply via email to