On 9/1/23 10:00, Alexander Lakhin wrote:
> Hello Thomas,
>
> 31.08.2023 14:15, Thomas Munro wrote:
>
>> We have a signal that is pending and not blocked, so I don't
>> immediately know why poll() hasn't returned control.
>
> When I worked at the Postgres Pro company, we observed a similar lockup
> under rather specific conditions (we used Elbrus CPU and the specific
> Elbrus
> compiler (lcc) based on edg).
> I managed to reproduce that lockup and Anton Voloshin investigated it.
> The issue was caused by the compiler optimization in WaitEventSetWait():
> waiting = true;
> ...
> while (returned_events == 0)
> {
> ...
> if (set->latch && set->latch->is_set)
> {
> ...
> break;
> }
>
> In that case, compiler decided that it may place the read
> "set->latch->is_set" before the write "waiting = true".
> (Placing "pg_compiler_barrier();" just after "waiting = true;" fixed the
> issue for us.)
> I can't provide more details for now, but maybe you could look at the
> binary
> code generated on the target platform to confirm or reject my guess.
>
Hmmm, I'm not very good at reading the binary code, but here's what
objdump produced for WaitEventSetWait. Maybe someone will see what the
issue is.
I thought about maybe just adding the barrier in the code, but then how
would we know it's the issue and this fixed it? It happens so rarely we
can't make any conclusions from a couple runs of tests.
regards
--
Tomas Vondra
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company
0000000000805ba0 <WaitEventSetWait>:
805ba0: d102c3ff sub sp, sp, #176
805ba4: 6d0423e9 stp d9, d8, [sp, #64]
805ba8: a9057bfd stp x29, x30, [sp, #80]
805bac: a9066ffc stp x28, x27, [sp, #96]
805bb0: a90767fa stp x26, x25, [sp, #112]
805bb4: a9085ff8 stp x24, x23, [sp, #128]
805bb8: a90957f6 stp x22, x21, [sp, #144]
805bbc: a90a4ff4 stp x20, x19, [sp, #160]
805bc0: 910143fd add x29, sp, #80
805bc4: 7100007f cmp w3, #0
805bc8: f90007e2 str x2, [sp, #8]
805bcc: 5400240d b.le 0x80604c <WaitEventSetWait+0x4ac>
805bd0: 2a0403f8 mov w24, w4
805bd4: 2a0303f5 mov w21, w3
805bd8: aa0103f3 mov x19, x1
805bdc: aa0003f4 mov x20, x0
805be0: b7f801e1 tbnz x1, #63, 0x805c1c <WaitEventSetWait+0x7c>
805be4: 910083e1 add x1, sp, #32
805be8: 52800080 mov w0, #4
805bec: 940623f5 bl 0x98ebc0 <clock_gettime@plt>
805bf0: d35ffe68 lsr x8, x19, #31
805bf4: aa1303f7 mov x23, x19
805bf8: b4000148 cbz x8, 0x805c20 <WaitEventSetWait+0x80>
805bfc: 90ffd480 adrp x0, 0x295000 <dsm_unpin_mapping+0x30>
805c00: 91175c00 add x0, x0, #1495
805c04: f0ffd5c1 adrp x1, 0x2c0000 <dsm_pin_segment+0x88>
805c08: 9111f021 add x1, x1, #1148
805c0c: 90ffd822 adrp x2, 0x309000 <dsm_unpin_segment+0xd0>
805c10: 91046842 add x2, x2, #282
805c14: 52807563 mov w3, #939
805c18: 9404de3e bl 0x93d510 <ExceptionalCondition>
805c1c: 92800017 mov x23, #-1
805c20: 90000de8 adrp x8, 0x9c1000 <ModifyWaitEvent+0xb8>
805c24: 90000f09 adrp x9, 0x9e5000 <PMSignalShmemSize+0x10>
805c28: 3979e108 ldrb w8, [x8, #3704]
805c2c: 34000088 cbz w8, 0x805c3c <WaitEventSetWait+0x9c>
805c30: f940f128 ldr x8, [x9, #480]
805c34: b4000048 cbz x8, 0x805c3c <WaitEventSetWait+0x9c>
805c38: b902ad18 str w24, [x8, #684]
805c3c: b0ffdb09 adrp x9, 0x366000 <dsm_segment_address+0x24>
805c40: b0ffdb0a adrp x10, 0x366000 <dsm_segment_address+0x28>
805c44: f0000eeb adrp x11, 0x9e4000 <PMSignalShmemInit+0x4>
805c48: 52800028 mov w8, #1
805c4c: 52800319 mov w25, #24
805c50: 5280073a mov w26, #57
805c54: fd446128 ldr d8, [x9, #2240]
805c58: 90000d7b adrp x27, 0x9b1000 <ModifyWaitEvent+0xb0>
805c5c: fd415949 ldr d9, [x10, #688]
805c60: f9071d68 str x8, [x11, #3640]
805c64: f90003f3 str x19, [sp]
805c68: 14000010 b 0x805ca8 <WaitEventSetWait+0x108>
805c6c: 9e620100 scvtf d0, x8
805c70: d2d09008 mov x8, #145685290680320
805c74: f2e825c8 movk x8, #16686, lsl #48
805c78: 9e670101 fmov d1, x8
805c7c: d2c80008 mov x8, #70368744177664
805c80: f2e811e8 movk x8, #16527, lsl #48
805c84: 1e611800 fdiv d0, d0, d1
805c88: 9e620121 scvtf d1, x9
805c8c: 9e670102 fmov d2, x8
805c90: 1f420020 fmadd d0, d1, d2, d0
805c94: 9e780008 fcvtzs x8, d0
805c98: cb080277 sub x23, x19, x8
805c9c: f10006ff cmp x23, #1
805ca0: 540012ab b.lt 0x805ef4 <WaitEventSetWait+0x354>
805ca4: 35001478 cbnz w24, 0x805f30 <WaitEventSetWait+0x390>
805ca8: f9400a88 ldr x8, [x20, #16]
805cac: b4000068 cbz x8, 0x805cb8 <WaitEventSetWait+0x118>
805cb0: f9400108 ldr x8, [x8]
805cb4: b5001248 cbnz x8, 0x805efc <WaitEventSetWait+0x35c>
805cb8: f9401280 ldr x0, [x20, #32]
805cbc: 2a1703e2 mov w2, w23
805cc0: b9400281 ldr w1, [x20]
805cc4: 940626eb bl 0x98f870 <poll@plt>
805cc8: 37f80cc0 tbnz w0, #31, 0x805e60 <WaitEventSetWait+0x2c0>
805ccc: 34001140 cbz w0, 0x805ef4 <WaitEventSetWait+0x354>
805cd0: f940069c ldr x28, [x20, #8]
805cd4: b9800288 ldrsw x8, [x20]
805cd8: 9b197108 madd x8, x8, x25, x28
805cdc: eb08039f cmp x28, x8
805ce0: 54000c82 b.hs 0x805e70 <WaitEventSetWait+0x2d0>
805ce4: 2a1f03f8 mov w24, wzr
805ce8: f9401293 ldr x19, [x20, #32]
805cec: f94007f6 ldr x22, [sp, #8]
805cf0: 79400e68 ldrh w8, [x19, #6]
805cf4: 340009a8 cbz w8, 0x805e28 <WaitEventSetWait+0x288>
805cf8: b9400388 ldr w8, [x28]
805cfc: b90002c8 str w8, [x22]
805d00: f9400b88 ldr x8, [x28, #16]
805d04: b90006df str wzr, [x22, #4]
805d08: f9000ac8 str x8, [x22, #16]
805d0c: b9400788 ldr w8, [x28, #4]
805d10: 7100411f cmp w8, #16
805d14: 54000380 b.eq 0x805d84 <WaitEventSetWait+0x1e4>
805d18: 7100051f cmp w8, #1
805d1c: 54000421 b.ne 0x805da0 <WaitEventSetWait+0x200>
805d20: 79400e68 ldrh w8, [x19, #6]
805d24: 6a1a011f tst w8, w26
805d28: 54000800 b.eq 0x805e28 <WaitEventSetWait+0x288>
805d2c: b9411b60 ldr w0, [x27, #280]
805d30: d10083a1 sub x1, x29, #32
805d34: 52800202 mov w2, #16
805d38: 940622e2 bl 0x98e8c0 <read@plt>
805d3c: 37f800a0 tbnz w0, #31, 0x805d50 <WaitEventSetWait+0x1b0>
805d40: 340011e0 cbz w0, 0x805f7c <WaitEventSetWait+0x3dc>
805d44: f27c6c1f tst x0, #0xfffffff0
805d48: 54ffff21 b.ne 0x805d2c <WaitEventSetWait+0x18c>
805d4c: 14000025 b 0x805de0 <WaitEventSetWait+0x240>
805d50: 940622b8 bl 0x98e830 <__error@plt>
805d54: b9400008 ldr w8, [x0]
805d58: 71008d1f cmp w8, #35
805d5c: 54000420 b.eq 0x805de0 <WaitEventSetWait+0x240>
805d60: 940622b4 bl 0x98e830 <__error@plt>
805d64: b9400008 ldr w8, [x0]
805d68: 71008d1f cmp w8, #35
805d6c: 540003a0 b.eq 0x805de0 <WaitEventSetWait+0x240>
805d70: 940622b0 bl 0x98e830 <__error@plt>
805d74: b9400008 ldr w8, [x0]
805d78: 7100111f cmp w8, #4
805d7c: 54fffd80 b.eq 0x805d2c <WaitEventSetWait+0x18c>
805d80: 1400008a b 0x805fa8 <WaitEventSetWait+0x408>
805d84: 79400e68 ldrh w8, [x19, #6]
805d88: 6a1a011f tst w8, w26
805d8c: 540004e0 b.eq 0x805e28 <WaitEventSetWait+0x288>
805d90: 940002bf bl 0x80688c <PostmasterIsAlive>
805d94: 370004a0 tbnz w0, #0, 0x805e28 <WaitEventSetWait+0x288>
805d98: fc0042c8 stur d8, [x22, #4]
805d9c: 14000021 b 0x805e20 <WaitEventSetWait+0x280>
805da0: 721f051f tst w8, #0x6
805da4: 54000420 b.eq 0x805e28 <WaitEventSetWait+0x288>
805da8: b9400b89 ldr w9, [x28, #8]
805dac: 3100093f cmn w9, #2
805db0: 5400116d b.le 0x805fdc <WaitEventSetWait+0x43c>
805db4: 36080208 tbz w8, #1, 0x805df4 <WaitEventSetWait+0x254>
805db8: 79400e69 ldrh w9, [x19, #6]
805dbc: 6a1a013f tst w9, w26
805dc0: 540001a0 b.eq 0x805df4 <WaitEventSetWait+0x254>
805dc4: 52800048 mov w8, #2
805dc8: 2a1f03ea mov w10, wzr
805dcc: 528000c9 mov w9, #6
805dd0: b90006c8 str w8, [x22, #4]
805dd4: b9400788 ldr w8, [x28, #4]
805dd8: 37100148 tbnz w8, #2, 0x805e00 <WaitEventSetWait+0x260>
805ddc: 1400000e b 0x805e14 <WaitEventSetWait+0x274>
805de0: f9400a88 ldr x8, [x20, #16]
805de4: f9400108 ldr x8, [x8]
805de8: b4000208 cbz x8, 0x805e28 <WaitEventSetWait+0x288>
805dec: fc0042c9 stur d9, [x22, #4]
805df0: 1400000c b 0x805e20 <WaitEventSetWait+0x280>
805df4: 52800089 mov w9, #4
805df8: 5280002a mov w10, #1
805dfc: 361000c8 tbz w8, #2, 0x805e14 <WaitEventSetWait+0x274>
805e00: 79400e68 ldrh w8, [x19, #6]
805e04: 721e0d1f tst w8, #0x3c
805e08: 54000060 b.eq 0x805e14 <WaitEventSetWait+0x274>
805e0c: b90006c9 str w9, [x22, #4]
805e10: 14000002 b 0x805e18 <WaitEventSetWait+0x278>
805e14: 370000aa tbnz w10, #0, 0x805e28 <WaitEventSetWait+0x288>
805e18: b9400b88 ldr w8, [x28, #8]
805e1c: b9000ac8 str w8, [x22, #8]
805e20: 910062d6 add x22, x22, #24
805e24: 11000718 add w24, w24, #1
805e28: f9400688 ldr x8, [x20, #8]
805e2c: 9100639c add x28, x28, #24
805e30: b9800289 ldrsw x9, [x20]
805e34: 9b192128 madd x8, x9, x25, x8
805e38: eb08039f cmp x28, x8
805e3c: 54000082 b.hs 0x805e4c <WaitEventSetWait+0x2ac>
805e40: 91002273 add x19, x19, #8
805e44: 6b15031f cmp w24, w21
805e48: 54fff54b b.lt 0x805cf0 <WaitEventSetWait+0x150>
805e4c: f94003f3 ldr x19, [sp]
805e50: 3100071f cmn w24, #1
805e54: 54000500 b.eq 0x805ef4 <WaitEventSetWait+0x354>
805e58: b6f80113 tbz x19, #63, 0x805e78 <WaitEventSetWait+0x2d8>
805e5c: 17ffff92 b 0x805ca4 <WaitEventSetWait+0x104>
805e60: 94062274 bl 0x98e830 <__error@plt>
805e64: b9400008 ldr w8, [x0]
805e68: 7100111f cmp w8, #4
805e6c: 54000c81 b.ne 0x805ffc <WaitEventSetWait+0x45c>
805e70: 2a1f03f8 mov w24, wzr
805e74: b7fff193 tbnz x19, #63, 0x805ca4 <WaitEventSetWait+0x104>
805e78: 35fff178 cbnz w24, 0x805ca4 <WaitEventSetWait+0x104>
805e7c: 910043e1 add x1, sp, #16
805e80: 52800080 mov w0, #4
805e84: 9406234f bl 0x98ebc0 <clock_gettime@plt>
805e88: a9422fea ldp x10, x11, [sp, #32]
805e8c: a94137ec ldp x12, x13, [sp, #16]
805e90: cb0a0189 sub x9, x12, x10
805e94: eb0b01a8 subs x8, x13, x11
805e98: a90123e9 stp x9, x8, [sp, #16]
805e9c: 54ffee85 b.pl 0x805c6c <WaitEventSetWait+0xcc>
805ea0: 92993fe9 mov x9, #-51712
805ea4: 5299400e mov w14, #51712
805ea8: f2b88ca9 movk x9, #50277, lsl #16
805eac: 72a7734e movk w14, #15258, lsl #16
805eb0: eb09011f cmp x8, x9
805eb4: aa2a03ea mvn x10, x10
805eb8: 9a89c108 csel x8, x8, x9, gt
805ebc: 8b0a018a add x10, x12, x10
805ec0: 8b080168 add x8, x11, x8
805ec4: eb0d011f cmp x8, x13
805ec8: 9a8d05a9 cinc x9, x13, ne
805ecc: eb0d011f cmp x8, x13
805ed0: cb090109 sub x9, x8, x9
805ed4: 8b0e01a8 add x8, x13, x14
805ed8: 9ace0929 udiv x9, x9, x14
805edc: 9a890529 cinc x9, x9, ne
805ee0: 9b0e2128 madd x8, x9, x14, x8
805ee4: cb090149 sub x9, x10, x9
805ee8: cb0b0108 sub x8, x8, x11
805eec: a90123e9 stp x9, x8, [sp, #16]
805ef0: 17ffff5f b 0x805c6c <WaitEventSetWait+0xcc>
805ef4: 2a1f03f8 mov w24, wzr
805ef8: 1400000e b 0x805f30 <WaitEventSetWait+0x390>
805efc: 12800008 mov w8, #-1
805f00: f94007eb ldr x11, [sp, #8]
805f04: 5280030a mov w10, #24
805f08: 52800038 mov w24, #1
805f0c: b9000968 str w8, [x11, #8]
805f10: b9401a88 ldr w8, [x20, #24]
805f14: b9000168 str w8, [x11]
805f18: f9400688 ldr x8, [x20, #8]
805f1c: b9801a89 ldrsw x9, [x20, #24]
805f20: 9b0a2128 madd x8, x9, x10, x8
805f24: f9400908 ldr x8, [x8, #16]
805f28: b9000578 str w24, [x11, #4]
805f2c: f9000968 str x8, [x11, #16]
805f30: f0000ee8 adrp x8, 0x9e4000 <IsPostmasterChildWalSender+0x5c>
805f34: f9071d1f str xzr, [x8, #3640]
805f38: 90000de8 adrp x8, 0x9c1000 <ReleasePostmasterChildSlot+0x48>
805f3c: 3979e108 ldrb w8, [x8, #3704]
805f40: 340000a8 cbz w8, 0x805f54 <WaitEventSetWait+0x3b4>
805f44: 90000f08 adrp x8, 0x9e5000 <MarkPostmasterChildActive+0x14>
805f48: f940f108 ldr x8, [x8, #480]
805f4c: b4000048 cbz x8, 0x805f54 <WaitEventSetWait+0x3b4>
805f50: b902ad1f str wzr, [x8, #684]
805f54: 2a1803e0 mov w0, w24
805f58: a94a4ff4 ldp x20, x19, [sp, #160]
805f5c: a94957f6 ldp x22, x21, [sp, #144]
805f60: a9485ff8 ldp x24, x23, [sp, #128]
805f64: a94767fa ldp x26, x25, [sp, #112]
805f68: a9466ffc ldp x28, x27, [sp, #96]
805f6c: a9457bfd ldp x29, x30, [sp, #80]
805f70: 6d4423e9 ldp d9, d8, [sp, #64]
805f74: 9102c3ff add sp, sp, #176
805f78: d65f03c0 ret
805f7c: f0000ee8 adrp x8, 0x9e4000 <MarkPostmasterChildActive+0x48>
805f80: 90ffd820 adrp x0, 0x309000 <proc_exit_prepare+0x38>
805f84: 91046800 add x0, x0, #282
805f88: f0ffd842 adrp x2, 0x310000 <proc_exit_prepare+0x5c>
805f8c: 9128c442 add x2, x2, #2609
805f90: 5280c1a1 mov w1, #1549
805f94: f9071d1f str xzr, [x8, #3640]
805f98: 9404ef05 bl 0x941bac <elog_start>
805f9c: d0ffda01 adrp x1, 0x347000 <shmem_exit+0x70>
805fa0: 91046421 add x1, x1, #281
805fa4: 1400000b b 0x805fd0 <WaitEventSetWait+0x430>
805fa8: f0000ee8 adrp x8, 0x9e4000 <MarkPostmasterChildActive+0x74>
805fac: 90ffd820 adrp x0, 0x309000 <proc_exit_prepare+0x64>
805fb0: 91046800 add x0, x0, #282
805fb4: f0ffd842 adrp x2, 0x310000 <proc_exit_prepare+0x88>
805fb8: 9128c442 add x2, x2, #2609
805fbc: 5280c0e1 mov w1, #1543
805fc0: f9071d1f str xzr, [x8, #3640]
805fc4: 9404eefa bl 0x941bac <elog_start>
805fc8: b0ffd7e1 adrp x1, 0x302000 <proc_exit_prepare+0x64>
805fcc: 91276021 add x1, x1, #2520
805fd0: 52800280 mov w0, #20
805fd4: 9404ef3a bl 0x941cbc <elog_finish>
805fd8: 940621c6 bl 0x98e6f0 <abort@plt>
805fdc: b0ffd740 adrp x0, 0x2ee000 <proc_exit_prepare+0x28>
805fe0: 9102e000 add x0, x0, #184
805fe4: f0ffd5c1 adrp x1, 0x2c0000 <reset_on_dsm_detach+0x28>
805fe8: 9111f021 add x1, x1, #1148
805fec: 90ffd822 adrp x2, 0x309000 <proc_exit_prepare+0xa4>
805ff0: 91046842 add x2, x2, #282
805ff4: 52809ba3 mov w3, #1245
805ff8: 9404dd46 bl 0x93d510 <ExceptionalCondition>
805ffc: f0000ee8 adrp x8, 0x9e4000 <MarkPostmasterChildWalSender+0x3c>
806000: f0ffd801 adrp x1, 0x309000 <proc_exit_prepare+0xb4>
806004: 91046821 add x1, x1, #282
806008: f0ffd803 adrp x3, 0x309000 <proc_exit_prepare+0xbc>
80600c: 91066463 add x3, x3, #409
806010: 52800280 mov w0, #20
806014: 528093c2 mov w2, #1182
806018: aa1f03e4 mov x4, xzr
80601c: f9071d1f str xzr, [x8, #3640]
806020: 9404dd54 bl 0x93d570 <errstart>
806024: 36000120 tbz w0, #0, 0x806048 <WaitEventSetWait+0x4a8>
806028: 9404e7ef bl 0x93ffe4 <errcode_for_socket_access>
80602c: 2a0003f3 mov w19, w0
806030: 90ffda40 adrp x0, 0x34e000 <on_proc_exit+0x4>
806034: 911bf800 add x0, x0, #1790
806038: 9404e810 bl 0x940078 <errmsg>
80603c: 2a0003e1 mov w1, w0
806040: 2a1303e0 mov w0, w19
806044: 9404de6c bl 0x93d9f4 <errfinish>
806048: 940621aa bl 0x98e6f0 <abort@plt>
80604c: f0ffd680 adrp x0, 0x2d9000 <proc_exit_prepare+0x40>
806050: 91271400 add x0, x0, #2501
806054: d0ffd5c1 adrp x1, 0x2c0000 <proc_exit+0x2c>
806058: 9111f021 add x1, x1, #1148
80605c: f0ffd802 adrp x2, 0x309000 <shmem_exit+0x34>
806060: 91046842 add x2, x2, #282
806064: 52807443 mov w3, #930
806068: 9404dd2a bl 0x93d510 <ExceptionalCondition>