On 9/1/23 10:00, Alexander Lakhin wrote:
> Hello Thomas,
> 
> 31.08.2023 14:15, Thomas Munro wrote:
> 
>> We have a signal that is pending and not blocked, so I don't
>> immediately know why poll() hasn't returned control.
> 
> When I worked at the Postgres Pro company, we observed a similar lockup
> under rather specific conditions (we used Elbrus CPU and the specific
> Elbrus
> compiler (lcc) based on edg).
> I managed to reproduce that lockup and Anton Voloshin investigated it.
> The issue was caused by the compiler optimization in WaitEventSetWait():
>     waiting = true;
> ...
>     while (returned_events == 0)
>     {
> ...
>         if (set->latch && set->latch->is_set)
>         {
> ...
>             break;
>         }
> 
> In that case, compiler decided that it may place the read
> "set->latch->is_set" before the write "waiting = true".
> (Placing "pg_compiler_barrier();" just after "waiting = true;" fixed the
> issue for us.)
> I can't provide more details for now, but maybe you could look at the
> binary
> code generated on the target platform to confirm or reject my guess.
> 

Hmmm, I'm not very good at reading the binary code, but here's what
objdump produced for WaitEventSetWait. Maybe someone will see what the
issue is.

I thought about maybe just adding the barrier in the code, but then how
would we know it's the issue and this fixed it? It happens so rarely we
can't make any conclusions from a couple runs of tests.


regards

-- 
Tomas Vondra
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company
0000000000805ba0 <WaitEventSetWait>:
  805ba0: d102c3ff      sub     sp, sp, #176
  805ba4: 6d0423e9      stp     d9, d8, [sp, #64]
  805ba8: a9057bfd      stp     x29, x30, [sp, #80]
  805bac: a9066ffc      stp     x28, x27, [sp, #96]
  805bb0: a90767fa      stp     x26, x25, [sp, #112]
  805bb4: a9085ff8      stp     x24, x23, [sp, #128]
  805bb8: a90957f6      stp     x22, x21, [sp, #144]
  805bbc: a90a4ff4      stp     x20, x19, [sp, #160]
  805bc0: 910143fd      add     x29, sp, #80
  805bc4: 7100007f      cmp     w3, #0
  805bc8: f90007e2      str     x2, [sp, #8]
  805bcc: 5400240d      b.le    0x80604c <WaitEventSetWait+0x4ac>
  805bd0: 2a0403f8      mov     w24, w4
  805bd4: 2a0303f5      mov     w21, w3
  805bd8: aa0103f3      mov     x19, x1
  805bdc: aa0003f4      mov     x20, x0
  805be0: b7f801e1      tbnz    x1, #63, 0x805c1c <WaitEventSetWait+0x7c>
  805be4: 910083e1      add     x1, sp, #32
  805be8: 52800080      mov     w0, #4
  805bec: 940623f5      bl      0x98ebc0 <clock_gettime@plt>
  805bf0: d35ffe68      lsr     x8, x19, #31
  805bf4: aa1303f7      mov     x23, x19
  805bf8: b4000148      cbz     x8, 0x805c20 <WaitEventSetWait+0x80>
  805bfc: 90ffd480      adrp    x0, 0x295000 <dsm_unpin_mapping+0x30>
  805c00: 91175c00      add     x0, x0, #1495
  805c04: f0ffd5c1      adrp    x1, 0x2c0000 <dsm_pin_segment+0x88>
  805c08: 9111f021      add     x1, x1, #1148
  805c0c: 90ffd822      adrp    x2, 0x309000 <dsm_unpin_segment+0xd0>
  805c10: 91046842      add     x2, x2, #282
  805c14: 52807563      mov     w3, #939
  805c18: 9404de3e      bl      0x93d510 <ExceptionalCondition>
  805c1c: 92800017      mov     x23, #-1
  805c20: 90000de8      adrp    x8, 0x9c1000 <ModifyWaitEvent+0xb8>
  805c24: 90000f09      adrp    x9, 0x9e5000 <PMSignalShmemSize+0x10>
  805c28: 3979e108      ldrb    w8, [x8, #3704]
  805c2c: 34000088      cbz     w8, 0x805c3c <WaitEventSetWait+0x9c>
  805c30: f940f128      ldr     x8, [x9, #480]
  805c34: b4000048      cbz     x8, 0x805c3c <WaitEventSetWait+0x9c>
  805c38: b902ad18      str     w24, [x8, #684]
  805c3c: b0ffdb09      adrp    x9, 0x366000 <dsm_segment_address+0x24>
  805c40: b0ffdb0a      adrp    x10, 0x366000 <dsm_segment_address+0x28>
  805c44: f0000eeb      adrp    x11, 0x9e4000 <PMSignalShmemInit+0x4>
  805c48: 52800028      mov     w8, #1
  805c4c: 52800319      mov     w25, #24
  805c50: 5280073a      mov     w26, #57
  805c54: fd446128      ldr     d8, [x9, #2240]
  805c58: 90000d7b      adrp    x27, 0x9b1000 <ModifyWaitEvent+0xb0>
  805c5c: fd415949      ldr     d9, [x10, #688]
  805c60: f9071d68      str     x8, [x11, #3640]
  805c64: f90003f3      str     x19, [sp]
  805c68: 14000010      b       0x805ca8 <WaitEventSetWait+0x108>
  805c6c: 9e620100      scvtf   d0, x8
  805c70: d2d09008      mov     x8, #145685290680320
  805c74: f2e825c8      movk    x8, #16686, lsl #48
  805c78: 9e670101      fmov    d1, x8
  805c7c: d2c80008      mov     x8, #70368744177664
  805c80: f2e811e8      movk    x8, #16527, lsl #48
  805c84: 1e611800      fdiv    d0, d0, d1
  805c88: 9e620121      scvtf   d1, x9
  805c8c: 9e670102      fmov    d2, x8
  805c90: 1f420020      fmadd   d0, d1, d2, d0
  805c94: 9e780008      fcvtzs  x8, d0
  805c98: cb080277      sub     x23, x19, x8
  805c9c: f10006ff      cmp     x23, #1
  805ca0: 540012ab      b.lt    0x805ef4 <WaitEventSetWait+0x354>
  805ca4: 35001478      cbnz    w24, 0x805f30 <WaitEventSetWait+0x390>
  805ca8: f9400a88      ldr     x8, [x20, #16]
  805cac: b4000068      cbz     x8, 0x805cb8 <WaitEventSetWait+0x118>
  805cb0: f9400108      ldr     x8, [x8]
  805cb4: b5001248      cbnz    x8, 0x805efc <WaitEventSetWait+0x35c>
  805cb8: f9401280      ldr     x0, [x20, #32]
  805cbc: 2a1703e2      mov     w2, w23
  805cc0: b9400281      ldr     w1, [x20]
  805cc4: 940626eb      bl      0x98f870 <poll@plt>
  805cc8: 37f80cc0      tbnz    w0, #31, 0x805e60 <WaitEventSetWait+0x2c0>
  805ccc: 34001140      cbz     w0, 0x805ef4 <WaitEventSetWait+0x354>
  805cd0: f940069c      ldr     x28, [x20, #8]
  805cd4: b9800288      ldrsw   x8, [x20]
  805cd8: 9b197108      madd    x8, x8, x25, x28
  805cdc: eb08039f      cmp     x28, x8
  805ce0: 54000c82      b.hs    0x805e70 <WaitEventSetWait+0x2d0>
  805ce4: 2a1f03f8      mov     w24, wzr
  805ce8: f9401293      ldr     x19, [x20, #32]
  805cec: f94007f6      ldr     x22, [sp, #8]
  805cf0: 79400e68      ldrh    w8, [x19, #6]
  805cf4: 340009a8      cbz     w8, 0x805e28 <WaitEventSetWait+0x288>
  805cf8: b9400388      ldr     w8, [x28]
  805cfc: b90002c8      str     w8, [x22]
  805d00: f9400b88      ldr     x8, [x28, #16]
  805d04: b90006df      str     wzr, [x22, #4]
  805d08: f9000ac8      str     x8, [x22, #16]
  805d0c: b9400788      ldr     w8, [x28, #4]
  805d10: 7100411f      cmp     w8, #16
  805d14: 54000380      b.eq    0x805d84 <WaitEventSetWait+0x1e4>
  805d18: 7100051f      cmp     w8, #1
  805d1c: 54000421      b.ne    0x805da0 <WaitEventSetWait+0x200>
  805d20: 79400e68      ldrh    w8, [x19, #6]
  805d24: 6a1a011f      tst     w8, w26
  805d28: 54000800      b.eq    0x805e28 <WaitEventSetWait+0x288>
  805d2c: b9411b60      ldr     w0, [x27, #280]
  805d30: d10083a1      sub     x1, x29, #32
  805d34: 52800202      mov     w2, #16
  805d38: 940622e2      bl      0x98e8c0 <read@plt>
  805d3c: 37f800a0      tbnz    w0, #31, 0x805d50 <WaitEventSetWait+0x1b0>
  805d40: 340011e0      cbz     w0, 0x805f7c <WaitEventSetWait+0x3dc>
  805d44: f27c6c1f      tst     x0, #0xfffffff0
  805d48: 54ffff21      b.ne    0x805d2c <WaitEventSetWait+0x18c>
  805d4c: 14000025      b       0x805de0 <WaitEventSetWait+0x240>
  805d50: 940622b8      bl      0x98e830 <__error@plt>
  805d54: b9400008      ldr     w8, [x0]
  805d58: 71008d1f      cmp     w8, #35
  805d5c: 54000420      b.eq    0x805de0 <WaitEventSetWait+0x240>
  805d60: 940622b4      bl      0x98e830 <__error@plt>
  805d64: b9400008      ldr     w8, [x0]
  805d68: 71008d1f      cmp     w8, #35
  805d6c: 540003a0      b.eq    0x805de0 <WaitEventSetWait+0x240>
  805d70: 940622b0      bl      0x98e830 <__error@plt>
  805d74: b9400008      ldr     w8, [x0]
  805d78: 7100111f      cmp     w8, #4
  805d7c: 54fffd80      b.eq    0x805d2c <WaitEventSetWait+0x18c>
  805d80: 1400008a      b       0x805fa8 <WaitEventSetWait+0x408>
  805d84: 79400e68      ldrh    w8, [x19, #6]
  805d88: 6a1a011f      tst     w8, w26
  805d8c: 540004e0      b.eq    0x805e28 <WaitEventSetWait+0x288>
  805d90: 940002bf      bl      0x80688c <PostmasterIsAlive>
  805d94: 370004a0      tbnz    w0, #0, 0x805e28 <WaitEventSetWait+0x288>
  805d98: fc0042c8      stur    d8, [x22, #4]
  805d9c: 14000021      b       0x805e20 <WaitEventSetWait+0x280>
  805da0: 721f051f      tst     w8, #0x6
  805da4: 54000420      b.eq    0x805e28 <WaitEventSetWait+0x288>
  805da8: b9400b89      ldr     w9, [x28, #8]
  805dac: 3100093f      cmn     w9, #2
  805db0: 5400116d      b.le    0x805fdc <WaitEventSetWait+0x43c>
  805db4: 36080208      tbz     w8, #1, 0x805df4 <WaitEventSetWait+0x254>
  805db8: 79400e69      ldrh    w9, [x19, #6]
  805dbc: 6a1a013f      tst     w9, w26
  805dc0: 540001a0      b.eq    0x805df4 <WaitEventSetWait+0x254>
  805dc4: 52800048      mov     w8, #2
  805dc8: 2a1f03ea      mov     w10, wzr
  805dcc: 528000c9      mov     w9, #6
  805dd0: b90006c8      str     w8, [x22, #4]
  805dd4: b9400788      ldr     w8, [x28, #4]
  805dd8: 37100148      tbnz    w8, #2, 0x805e00 <WaitEventSetWait+0x260>
  805ddc: 1400000e      b       0x805e14 <WaitEventSetWait+0x274>
  805de0: f9400a88      ldr     x8, [x20, #16]
  805de4: f9400108      ldr     x8, [x8]
  805de8: b4000208      cbz     x8, 0x805e28 <WaitEventSetWait+0x288>
  805dec: fc0042c9      stur    d9, [x22, #4]
  805df0: 1400000c      b       0x805e20 <WaitEventSetWait+0x280>
  805df4: 52800089      mov     w9, #4
  805df8: 5280002a      mov     w10, #1
  805dfc: 361000c8      tbz     w8, #2, 0x805e14 <WaitEventSetWait+0x274>
  805e00: 79400e68      ldrh    w8, [x19, #6]
  805e04: 721e0d1f      tst     w8, #0x3c
  805e08: 54000060      b.eq    0x805e14 <WaitEventSetWait+0x274>
  805e0c: b90006c9      str     w9, [x22, #4]
  805e10: 14000002      b       0x805e18 <WaitEventSetWait+0x278>
  805e14: 370000aa      tbnz    w10, #0, 0x805e28 <WaitEventSetWait+0x288>
  805e18: b9400b88      ldr     w8, [x28, #8]
  805e1c: b9000ac8      str     w8, [x22, #8]
  805e20: 910062d6      add     x22, x22, #24
  805e24: 11000718      add     w24, w24, #1
  805e28: f9400688      ldr     x8, [x20, #8]
  805e2c: 9100639c      add     x28, x28, #24
  805e30: b9800289      ldrsw   x9, [x20]
  805e34: 9b192128      madd    x8, x9, x25, x8
  805e38: eb08039f      cmp     x28, x8
  805e3c: 54000082      b.hs    0x805e4c <WaitEventSetWait+0x2ac>
  805e40: 91002273      add     x19, x19, #8
  805e44: 6b15031f      cmp     w24, w21
  805e48: 54fff54b      b.lt    0x805cf0 <WaitEventSetWait+0x150>
  805e4c: f94003f3      ldr     x19, [sp]
  805e50: 3100071f      cmn     w24, #1
  805e54: 54000500      b.eq    0x805ef4 <WaitEventSetWait+0x354>
  805e58: b6f80113      tbz     x19, #63, 0x805e78 <WaitEventSetWait+0x2d8>
  805e5c: 17ffff92      b       0x805ca4 <WaitEventSetWait+0x104>
  805e60: 94062274      bl      0x98e830 <__error@plt>
  805e64: b9400008      ldr     w8, [x0]
  805e68: 7100111f      cmp     w8, #4
  805e6c: 54000c81      b.ne    0x805ffc <WaitEventSetWait+0x45c>
  805e70: 2a1f03f8      mov     w24, wzr
  805e74: b7fff193      tbnz    x19, #63, 0x805ca4 <WaitEventSetWait+0x104>
  805e78: 35fff178      cbnz    w24, 0x805ca4 <WaitEventSetWait+0x104>
  805e7c: 910043e1      add     x1, sp, #16
  805e80: 52800080      mov     w0, #4
  805e84: 9406234f      bl      0x98ebc0 <clock_gettime@plt>
  805e88: a9422fea      ldp     x10, x11, [sp, #32]
  805e8c: a94137ec      ldp     x12, x13, [sp, #16]
  805e90: cb0a0189      sub     x9, x12, x10
  805e94: eb0b01a8      subs    x8, x13, x11
  805e98: a90123e9      stp     x9, x8, [sp, #16]
  805e9c: 54ffee85      b.pl    0x805c6c <WaitEventSetWait+0xcc>
  805ea0: 92993fe9      mov     x9, #-51712
  805ea4: 5299400e      mov     w14, #51712
  805ea8: f2b88ca9      movk    x9, #50277, lsl #16
  805eac: 72a7734e      movk    w14, #15258, lsl #16
  805eb0: eb09011f      cmp     x8, x9
  805eb4: aa2a03ea      mvn     x10, x10
  805eb8: 9a89c108      csel    x8, x8, x9, gt
  805ebc: 8b0a018a      add     x10, x12, x10
  805ec0: 8b080168      add     x8, x11, x8
  805ec4: eb0d011f      cmp     x8, x13
  805ec8: 9a8d05a9      cinc    x9, x13, ne
  805ecc: eb0d011f      cmp     x8, x13
  805ed0: cb090109      sub     x9, x8, x9
  805ed4: 8b0e01a8      add     x8, x13, x14
  805ed8: 9ace0929      udiv    x9, x9, x14
  805edc: 9a890529      cinc    x9, x9, ne
  805ee0: 9b0e2128      madd    x8, x9, x14, x8
  805ee4: cb090149      sub     x9, x10, x9
  805ee8: cb0b0108      sub     x8, x8, x11
  805eec: a90123e9      stp     x9, x8, [sp, #16]
  805ef0: 17ffff5f      b       0x805c6c <WaitEventSetWait+0xcc>
  805ef4: 2a1f03f8      mov     w24, wzr
  805ef8: 1400000e      b       0x805f30 <WaitEventSetWait+0x390>
  805efc: 12800008      mov     w8, #-1
  805f00: f94007eb      ldr     x11, [sp, #8]
  805f04: 5280030a      mov     w10, #24
  805f08: 52800038      mov     w24, #1
  805f0c: b9000968      str     w8, [x11, #8]
  805f10: b9401a88      ldr     w8, [x20, #24]
  805f14: b9000168      str     w8, [x11]
  805f18: f9400688      ldr     x8, [x20, #8]
  805f1c: b9801a89      ldrsw   x9, [x20, #24]
  805f20: 9b0a2128      madd    x8, x9, x10, x8
  805f24: f9400908      ldr     x8, [x8, #16]
  805f28: b9000578      str     w24, [x11, #4]
  805f2c: f9000968      str     x8, [x11, #16]
  805f30: f0000ee8      adrp    x8, 0x9e4000 <IsPostmasterChildWalSender+0x5c>
  805f34: f9071d1f      str     xzr, [x8, #3640]
  805f38: 90000de8      adrp    x8, 0x9c1000 <ReleasePostmasterChildSlot+0x48>
  805f3c: 3979e108      ldrb    w8, [x8, #3704]
  805f40: 340000a8      cbz     w8, 0x805f54 <WaitEventSetWait+0x3b4>
  805f44: 90000f08      adrp    x8, 0x9e5000 <MarkPostmasterChildActive+0x14>
  805f48: f940f108      ldr     x8, [x8, #480]
  805f4c: b4000048      cbz     x8, 0x805f54 <WaitEventSetWait+0x3b4>
  805f50: b902ad1f      str     wzr, [x8, #684]
  805f54: 2a1803e0      mov     w0, w24
  805f58: a94a4ff4      ldp     x20, x19, [sp, #160]
  805f5c: a94957f6      ldp     x22, x21, [sp, #144]
  805f60: a9485ff8      ldp     x24, x23, [sp, #128]
  805f64: a94767fa      ldp     x26, x25, [sp, #112]
  805f68: a9466ffc      ldp     x28, x27, [sp, #96]
  805f6c: a9457bfd      ldp     x29, x30, [sp, #80]
  805f70: 6d4423e9      ldp     d9, d8, [sp, #64]
  805f74: 9102c3ff      add     sp, sp, #176
  805f78: d65f03c0      ret
  805f7c: f0000ee8      adrp    x8, 0x9e4000 <MarkPostmasterChildActive+0x48>
  805f80: 90ffd820      adrp    x0, 0x309000 <proc_exit_prepare+0x38>
  805f84: 91046800      add     x0, x0, #282
  805f88: f0ffd842      adrp    x2, 0x310000 <proc_exit_prepare+0x5c>
  805f8c: 9128c442      add     x2, x2, #2609
  805f90: 5280c1a1      mov     w1, #1549
  805f94: f9071d1f      str     xzr, [x8, #3640]
  805f98: 9404ef05      bl      0x941bac <elog_start>
  805f9c: d0ffda01      adrp    x1, 0x347000 <shmem_exit+0x70>
  805fa0: 91046421      add     x1, x1, #281
  805fa4: 1400000b      b       0x805fd0 <WaitEventSetWait+0x430>
  805fa8: f0000ee8      adrp    x8, 0x9e4000 <MarkPostmasterChildActive+0x74>
  805fac: 90ffd820      adrp    x0, 0x309000 <proc_exit_prepare+0x64>
  805fb0: 91046800      add     x0, x0, #282
  805fb4: f0ffd842      adrp    x2, 0x310000 <proc_exit_prepare+0x88>
  805fb8: 9128c442      add     x2, x2, #2609
  805fbc: 5280c0e1      mov     w1, #1543
  805fc0: f9071d1f      str     xzr, [x8, #3640]
  805fc4: 9404eefa      bl      0x941bac <elog_start>
  805fc8: b0ffd7e1      adrp    x1, 0x302000 <proc_exit_prepare+0x64>
  805fcc: 91276021      add     x1, x1, #2520
  805fd0: 52800280      mov     w0, #20
  805fd4: 9404ef3a      bl      0x941cbc <elog_finish>
  805fd8: 940621c6      bl      0x98e6f0 <abort@plt>
  805fdc: b0ffd740      adrp    x0, 0x2ee000 <proc_exit_prepare+0x28>
  805fe0: 9102e000      add     x0, x0, #184
  805fe4: f0ffd5c1      adrp    x1, 0x2c0000 <reset_on_dsm_detach+0x28>
  805fe8: 9111f021      add     x1, x1, #1148
  805fec: 90ffd822      adrp    x2, 0x309000 <proc_exit_prepare+0xa4>
  805ff0: 91046842      add     x2, x2, #282
  805ff4: 52809ba3      mov     w3, #1245
  805ff8: 9404dd46      bl      0x93d510 <ExceptionalCondition>
  805ffc: f0000ee8      adrp    x8, 0x9e4000 <MarkPostmasterChildWalSender+0x3c>
  806000: f0ffd801      adrp    x1, 0x309000 <proc_exit_prepare+0xb4>
  806004: 91046821      add     x1, x1, #282
  806008: f0ffd803      adrp    x3, 0x309000 <proc_exit_prepare+0xbc>
  80600c: 91066463      add     x3, x3, #409
  806010: 52800280      mov     w0, #20
  806014: 528093c2      mov     w2, #1182
  806018: aa1f03e4      mov     x4, xzr
  80601c: f9071d1f      str     xzr, [x8, #3640]
  806020: 9404dd54      bl      0x93d570 <errstart>
  806024: 36000120      tbz     w0, #0, 0x806048 <WaitEventSetWait+0x4a8>
  806028: 9404e7ef      bl      0x93ffe4 <errcode_for_socket_access>
  80602c: 2a0003f3      mov     w19, w0
  806030: 90ffda40      adrp    x0, 0x34e000 <on_proc_exit+0x4>
  806034: 911bf800      add     x0, x0, #1790
  806038: 9404e810      bl      0x940078 <errmsg>
  80603c: 2a0003e1      mov     w1, w0
  806040: 2a1303e0      mov     w0, w19
  806044: 9404de6c      bl      0x93d9f4 <errfinish>
  806048: 940621aa      bl      0x98e6f0 <abort@plt>
  80604c: f0ffd680      adrp    x0, 0x2d9000 <proc_exit_prepare+0x40>
  806050: 91271400      add     x0, x0, #2501
  806054: d0ffd5c1      adrp    x1, 0x2c0000 <proc_exit+0x2c>
  806058: 9111f021      add     x1, x1, #1148
  80605c: f0ffd802      adrp    x2, 0x309000 <shmem_exit+0x34>
  806060: 91046842      add     x2, x2, #282
  806064: 52807443      mov     w3, #930
  806068: 9404dd2a      bl      0x93d510 <ExceptionalCondition>

Reply via email to