13.10.2025 01:44, Thomas Munro wrote:
On Sun, Oct 12, 2025 at 6:00 PM Alexander Lakhin <[email protected]> wrote:
Please find those attached (gdb "disass/m pgaio_io_update_state" misses
the start of the function (but it's still disassembled below), so I
decided to share the whole output).
Could you please also disassemble pgaio_io_reclaim()?

Sure, the output of disass/m pgaio_io_reclaim is attached.

A side note: I could not reproduce the hang so far — 8 out of 30
iterations of 027_stream_regress failed, but all completed within
700-800 seconds.

Best regards,
Alexander
GNU gdb (Debian 15.2-1) 15.2
Copyright (C) 2024 Free Software Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law.
Type "show copying" and "show warranty" for details.
This GDB was configured as "riscv64-linux-gnu".
Type "show configuration" for configuration details.
For bug reporting instructions, please see:
<https://www.gnu.org/software/gdb/bugs/>.
Find the GDB manual and other documentation resources online at:
    <http://www.gnu.org/software/gdb/documentation/>.

For help, type "help".
Type "apropos word" to search for commands related to "word"...
Reading symbols from tmp_install/usr/local/pgsql/bin/postgres...
(gdb) Dump of assembler code for function pgaio_io_reclaim:
341             Assert(ioh >= pgaio_ctl->io_handles &&
   0x00000000004366d0 <+162>:   auipc   a0,0x4c3
   0x00000000004366d4 <+166>:   ld      a1,-1192(a0) # 0x8f9228 <pgaio_ctl>
   0x00000000004366d8 <+170>:   ld      a0,48(a1)
   0x00000000004366da <+172>:   bltu    s0,a0,0x436886 <pgaio_io_reclaim+600>
   0x00000000004366de <+176>:   lwu     a1,40(a1)
   0x00000000004366e2 <+180>:   li      a2,144
   0x00000000004366e6 <+184>:   mul     a1,a1,a2
   0x00000000004366ea <+188>:   add     a1,a1,a0
   0x00000000004366ec <+190>:   bgeu    s0,a1,0x436886 <pgaio_io_reclaim+600>
   0x00000000004366f0 <+194>:   auipc   a1,0x4c1
   0x00000000004366f4 <+198>:   ld      s4,-296(a1) # 0x8f75c8
   0x0000000000436886 <+600>:   auipc   a0,0x242
   0x000000000043688a <+604>:   addi    a0,a0,-271 # 0x678777
   0x000000000043688e <+608>:   auipc   a1,0x242
   0x0000000000436892 <+612>:   addi    a1,a1,-657 # 0x6785fd
   0x0000000000436896 <+616>:   li      a2,342
   0x000000000043689a <+620>:   auipc   ra,0x160
   0x000000000043689e <+624>:   jalr    174(ra) # 0x596948 
<ExceptionalCondition>

342                        ioh < (pgaio_ctl->io_handles + 
pgaio_ctl->io_handle_count));
343             return ioh - pgaio_ctl->io_handles;
   0x00000000004366f8 <+202>:   sub     s1,s0,a0
   0x00000000004366fc <+206>:   srai    s1,s1,0x4

344     }
345     
346     /*
347      * Return the ProcNumber for the process that can use an IO handle. The
348      * mapping from IO handles to PGPROCs is static, therefore this even 
works
349      * when the corresponding PGPROC is not in use.
350      */
351     ProcNumber
352     pgaio_io_get_owner(PgAioHandle *ioh)
353     {
354             return ioh->owner_procno;
355     }
356     
357     /*
358      * Return a wait reference for the IO. Only wait references can be used 
to
359      * wait for an IOs completion, as handles themselves can be reused after
360      * completion.  See also the comment above pgaio_io_acquire().
361      */
362     void
363     pgaio_io_get_wref(PgAioHandle *ioh, PgAioWaitRef *iow)
364     {
365             Assert(ioh->state == PGAIO_HS_HANDED_OUT ||
366                        ioh->state == PGAIO_HS_DEFINED ||
367                        ioh->state == PGAIO_HS_STAGED);
368             Assert(ioh->generation != 0);
369     
370             iow->aio_index = ioh - pgaio_ctl->io_handles;
371             iow->generation_upper = (uint32) (ioh->generation >> 32);
372             iow->generation_lower = (uint32) ioh->generation;
373     }
374     
375     
376     
377     /* 
--------------------------------------------------------------------------------
378      * Internal Functions related to PgAioHandle
379      * 
--------------------------------------------------------------------------------
380      */
381     
382     static inline void
383     pgaio_io_update_state(PgAioHandle *ioh, PgAioHandleState new_state)
384     {
385             /*
386              * All callers need to have held interrupts in some form, 
otherwise
387              * interrupt processing could wait for the IO to complete, 
while in an
388              * intermediary state.
389              */
390             Assert(!INTERRUPTS_CAN_BE_PROCESSED());
391     
392             pgaio_debug_io(DEBUG5, ioh,
393                                        "updating state to %s",
394                                        pgaio_io_state_get_name(new_state));
395     
396             /*
397              * Ensure the changes signified by the new state are visible 
before the
398              * new state becomes visible.
399              */
400             pg_write_barrier();
401     
402             ioh->state = new_state;
403     }
404     
405     static void
406     pgaio_io_resowner_register(PgAioHandle *ioh)
407     {
408             Assert(!ioh->resowner);
409             Assert(CurrentResourceOwner);
410     
411             ResourceOwnerRememberAioHandle(CurrentResourceOwner, 
&ioh->resowner_node);
412             ioh->resowner = CurrentResourceOwner;
413     }
414     
415     /*
416      * Stage IO for execution and, if appropriate, submit it immediately.
417      *
418      * Should only be called from pgaio_io_start_*().
419      */
420     void
421     pgaio_io_stage(PgAioHandle *ioh, PgAioOp op)
422     {
423             bool            needs_synchronous;
424     
425             Assert(ioh->state == PGAIO_HS_HANDED_OUT);
426             Assert(pgaio_my_backend->handed_out_io == ioh);
427             Assert(pgaio_io_has_target(ioh));
428     
429             /*
430              * Otherwise an interrupt, in the middle of staging and 
possibly executing
431              * the IO, could end up trying to wait for the IO, leading to 
state
432              * confusion.
433              */
434             HOLD_INTERRUPTS();
435     
436             ioh->op = op;
437             ioh->result = 0;
438     
439             pgaio_io_update_state(ioh, PGAIO_HS_DEFINED);
440     
441             /* allow a new IO to be staged */
442             pgaio_my_backend->handed_out_io = NULL;
443     
444             pgaio_io_call_stage(ioh);
445     
446             pgaio_io_update_state(ioh, PGAIO_HS_STAGED);
447     
448             /*
449              * Synchronous execution has to be executed, well, 
synchronously, so check
450              * that first.
451              */
452             needs_synchronous = pgaio_io_needs_synchronous_execution(ioh);
453     
454             pgaio_debug_io(DEBUG3, ioh,
455                                        "staged (synchronous: %d, in_batch: 
%d)",
456                                        needs_synchronous, 
pgaio_my_backend->in_batchmode);
457     
458             if (!needs_synchronous)
459             {
460                     
pgaio_my_backend->staged_ios[pgaio_my_backend->num_staged_ios++] = ioh;
461                     Assert(pgaio_my_backend->num_staged_ios <= 
PGAIO_SUBMIT_BATCH_SIZE);
462     
463                     /*
464                      * Unless code explicitly opted into batching IOs, 
submit the IO
465                      * immediately.
466                      */
467                     if (!pgaio_my_backend->in_batchmode)
468                             pgaio_submit_staged();
469             }
470             else
471             {
472                     pgaio_io_prepare_submit(ioh);
473                     pgaio_io_perform_synchronously(ioh);
474             }
475     
476             RESUME_INTERRUPTS();
477     }
478     
479     bool
480     pgaio_io_needs_synchronous_execution(PgAioHandle *ioh)
481     {
482             /*
483              * If the caller said to execute the IO synchronously, do so.
484              *
485              * XXX: We could optimize the logic when to execute 
synchronously by first
486              * checking if there are other IOs in flight and only 
synchronously
487              * executing if not. Unclear whether that'll be sufficiently 
common to be
488              * worth worrying about.
489              */
490             if (ioh->flags & PGAIO_HF_SYNCHRONOUS)
491                     return true;
492     
493             /* Check if the IO method requires synchronous execution of IO 
*/
494             if (pgaio_method_ops->needs_synchronous_execution)
495                     return 
pgaio_method_ops->needs_synchronous_execution(ioh);
496     
497             return false;
498     }
499     
500     /*
501      * Handle IO being processed by IO method.
502      *
503      * Should be called by IO methods / synchronous IO execution, just 
before the
504      * IO is performed.
505      */
506     void
507     pgaio_io_prepare_submit(PgAioHandle *ioh)
508     {
509             pgaio_io_update_state(ioh, PGAIO_HS_SUBMITTED);
510     
511             dclist_push_tail(&pgaio_my_backend->in_flight_ios, &ioh->node);
512     }
513     
514     /*
515      * Handle IO getting completed by a method.
516      *
517      * Should be called by IO methods / synchronous IO execution, just 
after the
518      * IO has been performed.
519      *
520      * Expects to be called in a critical section. We expect IOs to be 
usable for
521      * WAL etc, which requires being able to execute completion callbacks 
in a
522      * critical section.
523      */
524     void
525     pgaio_io_process_completion(PgAioHandle *ioh, int result)
526     {
527             Assert(ioh->state == PGAIO_HS_SUBMITTED);
528     
529             Assert(CritSectionCount > 0);
530     
531             ioh->result = result;
532     
533             pgaio_io_update_state(ioh, PGAIO_HS_COMPLETED_IO);
534     
535             INJECTION_POINT("aio-process-completion-before-shared", ioh);
536     
537             pgaio_io_call_complete_shared(ioh);
538     
539             pgaio_io_update_state(ioh, PGAIO_HS_COMPLETED_SHARED);
540     
541             /* condition variable broadcast ensures state is visible before 
wakeup */
542             ConditionVariableBroadcast(&ioh->cv);
543     
544             /* contains call to pgaio_io_call_complete_local() */
545             if (ioh->owner_procno == MyProcNumber)
546                     pgaio_io_reclaim(ioh);
547     }
548     
549     /*
550      * Has the IO completed and thus the IO handle been reused?
551      *
552      * This is useful when waiting for IO completion at a low level (e.g. 
in an IO
553      * method's ->wait_one() callback).
554      */
555     bool
556     pgaio_io_was_recycled(PgAioHandle *ioh, uint64 ref_generation, 
PgAioHandleState *state)
557     {
558             *state = ioh->state;
559     
560             /*
561              * Ensure that we don't see an earlier state of the handle than 
ioh->state
562              * due to compiler or CPU reordering. This protects both 
->generation as
563              * directly used here, and other fields in the handle accessed 
in the
564              * caller if the handle was not reused.
565              */
566             pg_read_barrier();
567     
568             return ioh->generation != ref_generation;
569     }
570     
571     /*
572      * Wait for IO to complete. External code should never use this, 
outside of
573      * the AIO subsystem waits are only allowed via pgaio_wref_wait().
574      */
575     static void
576     pgaio_io_wait(PgAioHandle *ioh, uint64 ref_generation)
577     {
578             PgAioHandleState state;
579             bool            am_owner;
580     
581             am_owner = ioh->owner_procno == MyProcNumber;
582     
583             if (pgaio_io_was_recycled(ioh, ref_generation, &state))
584                     return;
585     
586             if (am_owner)
587             {
588                     if (state != PGAIO_HS_SUBMITTED
589                             && state != PGAIO_HS_COMPLETED_IO
590                             && state != PGAIO_HS_COMPLETED_SHARED
591                             && state != PGAIO_HS_COMPLETED_LOCAL)
592                     {
593                             elog(PANIC, "waiting for own IO %d in wrong 
state: %s",
594                                      pgaio_io_get_id(ioh), 
pgaio_io_get_state_name(ioh));
595                     }
596             }
597     
598             while (true)
599             {
600                     if (pgaio_io_was_recycled(ioh, ref_generation, &state))
601                             return;
602     
603                     switch ((PgAioHandleState) state)
604                     {
605                             case PGAIO_HS_IDLE:
606                             case PGAIO_HS_HANDED_OUT:
607                                     elog(ERROR, "IO in wrong state: %d", 
state);
608                                     break;
609     
610                             case PGAIO_HS_SUBMITTED:
611     
612                                     /*
613                                      * If we need to wait via the IO 
method, do so now. Don't
614                                      * check via the IO method if the 
issuing backend is executing
615                                      * the IO synchronously.
616                                      */
617                                     if (pgaio_method_ops->wait_one && 
!(ioh->flags & PGAIO_HF_SYNCHRONOUS))
618                                     {
619                                             pgaio_method_ops->wait_one(ioh, 
ref_generation);
620                                             continue;
621                                     }
622                                     /* fallthrough */
623     
624                                     /* waiting for owner to submit */
625                             case PGAIO_HS_DEFINED:
626                             case PGAIO_HS_STAGED:
627                                     /* waiting for reaper to complete */
628                                     /* fallthrough */
629                             case PGAIO_HS_COMPLETED_IO:
630                                     /* shouldn't be able to hit this 
otherwise */
631                                     Assert(IsUnderPostmaster);
632                                     /* ensure we're going to get woken up */
633                                     
ConditionVariablePrepareToSleep(&ioh->cv);
634     
635                                     while (!pgaio_io_was_recycled(ioh, 
ref_generation, &state))
636                                     {
637                                             if (state == 
PGAIO_HS_COMPLETED_SHARED ||
638                                                     state == 
PGAIO_HS_COMPLETED_LOCAL)
639                                                     break;
640                                             
ConditionVariableSleep(&ioh->cv, WAIT_EVENT_AIO_IO_COMPLETION);
641                                     }
642     
643                                     ConditionVariableCancelSleep();
644                                     break;
645     
646                             case PGAIO_HS_COMPLETED_SHARED:
647                             case PGAIO_HS_COMPLETED_LOCAL:
648     
649                                     /*
650                                      * Note that no interrupts are 
processed between
651                                      * pgaio_io_was_recycled() and this 
check - that's important
652                                      * as otherwise an interrupt could have 
already reclaimed the
653                                      * handle.
654                                      */
655                                     if (am_owner)
656                                             pgaio_io_reclaim(ioh);
657                                     return;
658                     }
659             }
660     }
661     
662     /*
663      * Make IO handle ready to be reused after IO has completed or after the
664      * handle has been released without being used.
665      *
666      * Note that callers need to be careful about only calling this in the 
right
667      * state and that no interrupts can be processed between the state 
check and
668      * the call to pgaio_io_reclaim(). Otherwise interrupt processing could
669      * already have reclaimed the handle.
670      */
671     static void
672     pgaio_io_reclaim(PgAioHandle *ioh)
673     {
   0x000000000043662e <+0>:     addi    sp,sp,-64
   0x0000000000436630 <+2>:     sd      ra,56(sp)
   0x0000000000436632 <+4>:     sd      s0,48(sp)
   0x0000000000436634 <+6>:     sd      s1,40(sp)
   0x0000000000436636 <+8>:     sd      s2,32(sp)
   0x0000000000436638 <+10>:    sd      s3,24(sp)
   0x000000000043663a <+12>:    sd      s4,16(sp)

674             /* This is only ok if it's our IO */
675             Assert(ioh->owner_procno == MyProcNumber);
   0x000000000043663c <+14>:    auipc   a1,0x4a6
   0x0000000000436640 <+18>:    ld      a1,-2020(a1) # 0x8dbe58
   0x0000000000436644 <+22>:    mv      s0,a0
   0x0000000000436646 <+24>:    lw      a0,16(a0)
   0x0000000000436648 <+26>:    lw      a1,0(a1)
   0x000000000043664a <+28>:    bne     a0,a1,0x43684e <pgaio_io_reclaim+544>
   0x000000000043684e <+544>:   auipc   a0,0x242
   0x0000000000436852 <+548>:   addi    a0,a0,-494 # 0x678660
   0x0000000000436856 <+552>:   auipc   a1,0x242
   0x000000000043685a <+556>:   addi    a1,a1,-601 # 0x6785fd
   0x000000000043685e <+560>:   li      a2,675
   0x0000000000436862 <+564>:   auipc   ra,0x160
   0x0000000000436866 <+568>:   jalr    230(ra) # 0x596948 
<ExceptionalCondition>

676             Assert(ioh->state != PGAIO_HS_IDLE);
   0x000000000043664e <+32>:    lbu     a0,0(s0)
   0x0000000000436652 <+36>:    beqz    a0,0x43686a <pgaio_io_reclaim+572>
   0x000000000043686a <+572>:   auipc   a0,0x242
   0x000000000043686e <+576>:   addi    a0,a0,1037 # 0x678c77
   0x0000000000436872 <+580>:   auipc   a1,0x242
   0x0000000000436876 <+584>:   addi    a1,a1,-629 # 0x6785fd
   0x000000000043687a <+588>:   li      a2,676
   0x000000000043687e <+592>:   auipc   ra,0x160
   0x0000000000436882 <+596>:   jalr    202(ra) # 0x596948 
<ExceptionalCondition>

677     
678             /* see comment in function header */
679             HOLD_INTERRUPTS();
   0x0000000000436656 <+40>:    auipc   a0,0x4a6
   0x000000000043665a <+44>:    ld      s2,34(a0) # 0x8dc678
   0x000000000043665e <+48>:    lw      a0,0(s2)
   0x0000000000436662 <+52>:    addi    a0,a0,1
   0x0000000000436664 <+54>:    sw      a0,0(s2)

680     
681             /*
682              * It's a bit ugly, but right now the easiest place to put the 
execution
683              * of local completion callbacks is this function, as we need 
to execute
684              * local callbacks just before reclaiming at multiple callsites.
685              */
686             if (ioh->state == PGAIO_HS_COMPLETED_SHARED)
   0x0000000000436668 <+58>:    lbu     a0,0(s0)
   0x000000000043666c <+62>:    li      a1,6
   0x000000000043666e <+64>:    bne     a0,a1,0x4366ae <pgaio_io_reclaim+128>

687             {
688                     PgAioResult local_result;
689     
690                     local_result = pgaio_io_call_complete_local(ioh);
   0x0000000000436672 <+68>:    mv      a0,s0
   0x0000000000436674 <+70>:    jal     0x4381de <pgaio_io_call_complete_local>
   0x0000000000436678 <+74>:    mv      s1,a0

691                     pgaio_io_update_state(ioh, PGAIO_HS_COMPLETED_LOCAL);
   0x000000000043667a <+76>:    li      a1,7
   0x000000000043667c <+78>:    mv      a0,s0
   0x000000000043667e <+80>:    jal     0x436436 <pgaio_io_update_state>

692     
693                     if (ioh->report_return)
   0x0000000000436682 <+84>:    ld      a0,96(s0)
   0x0000000000436684 <+86>:    beqz    a0,0x4366ae <pgaio_io_reclaim+128>

694                     {
695                             ioh->report_return->result = local_result;
   0x0000000000436686 <+88>:    sw      s1,0(a0)
   0x0000000000436688 <+90>:    srli    s1,s1,0x20
   0x000000000043668a <+92>:    sw      s1,4(a0)

696                             ioh->report_return->target_data = 
ioh->target_data;
   0x000000000043668c <+94>:    ld      a0,96(s0)
   0x000000000043668e <+96>:    lw      a1,140(s0)
   0x0000000000436692 <+100>:   sw      a1,28(a0)
   0x0000000000436694 <+102>:   lw      a1,136(s0)
   0x0000000000436698 <+106>:   sw      a1,24(a0)
   0x000000000043669a <+108>:   lw      a1,132(s0)
   0x000000000043669e <+112>:   sw      a1,20(a0)
   0x00000000004366a0 <+114>:   lw      a1,128(s0)
   0x00000000004366a4 <+118>:   sw      a1,16(a0)
   0x00000000004366a6 <+120>:   lw      a1,124(s0)
   0x00000000004366a8 <+122>:   sw      a1,12(a0)
   0x00000000004366aa <+124>:   lw      a1,120(s0)
   0x00000000004366ac <+126>:   sw      a1,8(a0)

697                     }
698             }
699     
700             pgaio_debug_io(DEBUG4, ioh,
   0x00000000004366ae <+128>:   li      a0,11
   0x00000000004366b0 <+130>:   li      a1,0
   0x00000000004366b2 <+132>:   auipc   ra,0x161
   0x00000000004366b6 <+136>:   jalr    -1776(ra) # 0x596fc2 <errstart>
   0x00000000004366ba <+140>:   beqz    a0,0x436788 <pgaio_io_reclaim+346>
   0x00000000004366bc <+142>:   li      a0,1
   0x00000000004366be <+144>:   auipc   ra,0x163
   0x00000000004366c2 <+148>:   jalr    688(ra) # 0x59996e <errhidestmt>
   0x00000000004366c6 <+152>:   li      a0,1
   0x00000000004366c8 <+154>:   auipc   ra,0x163
   0x00000000004366cc <+158>:   jalr    780(ra) # 0x5999d4 <errhidecontext>
   0x00000000004366fe <+208>:   mv      a0,s0
   0x0000000000436700 <+210>:   jal     0x438f90 <pgaio_io_get_op_name>
   0x0000000000436704 <+214>:   mv      s3,a0
   0x0000000000436706 <+216>:   mv      a0,s0
   0x0000000000436708 <+218>:   jal     0x439076 <pgaio_io_get_target_name>
   0x0000000000436712 <+228>:   mv      a3,a0
   0x000000000043672a <+252>:   lw      a0,84(s0)
   0x000000000043672c <+254>:   slli    a2,a0,0x37
   0x0000000000436730 <+258>:   srli    a2,a2,0x3d
   0x0000000000436732 <+260>:   li      a5,4
   0x0000000000436734 <+262>:   mulw    a1,s1,s4
   0x000000000043674e <+288>:   lw      a2,20(s0)
   0x0000000000436750 <+290>:   andi    a6,a0,63
   0x0000000000436754 <+294>:   srliw   a7,a0,0x9
   0x0000000000436758 <+298>:   auipc   a0,0x242
   0x000000000043675c <+302>:   addi    a0,a0,1339 # 0x678c93
   0x0000000000436760 <+306>:   sd      a2,0(sp)
   0x0000000000436762 <+308>:   mv      a2,s3
   0x0000000000436764 <+310>:   auipc   ra,0x161
   0x0000000000436768 <+314>:   jalr    -642(ra) # 0x5974e2 <errmsg_internal>
   0x000000000043676c <+318>:   auipc   a0,0x242
   0x0000000000436770 <+322>:   addi    a0,a0,-367 # 0x6785fd
   0x0000000000436774 <+326>:   auipc   a1,0x242
   0x0000000000436778 <+330>:   addi    a2,a1,1433 # 0x678d0d
   0x000000000043677c <+334>:   li      a1,705
   0x0000000000436780 <+338>:   auipc   ra,0x161
   0x0000000000436784 <+342>:   jalr    -1284(ra) # 0x59727c <errfinish>

701                                        "reclaiming: distilled_result: 
(status %s, id %u, error_data %d), raw_result: %d",
702                                        
pgaio_result_status_string(ioh->distilled_result.status),
703                                        ioh->distilled_result.id,
704                                        ioh->distilled_result.error_data,
705                                        ioh->result);
706     
707             /* if the IO has been defined, it's on the in-flight list, 
remove */
708             if (ioh->state != PGAIO_HS_HANDED_OUT)
   0x0000000000436788 <+346>:   lbu     a0,0(s0)
   0x000000000043678c <+350>:   li      a1,1
   0x000000000043678e <+352>:   auipc   s1,0x4c3
   0x0000000000436792 <+356>:   beq     a0,a1,0x4367b6 <pgaio_io_reclaim+392>

709                     dclist_delete_from(&pgaio_my_backend->in_flight_ios, 
&ioh->node);
   0x0000000000436796 <+360>:   ld      a0,-1390(s1)

710     
711             if (ioh->resowner)
   0x00000000004367b6 <+392>:   ld      a0,40(s0)
   0x00000000004367b8 <+394>:   beqz    a0,0x4367ca <pgaio_io_reclaim+412>

712             {
713                     ResourceOwnerForgetAioHandle(ioh->resowner, 
&ioh->resowner_node);
   0x00000000004367ba <+396>:   addi    a1,s0,48
   0x00000000004367be <+400>:   auipc   ra,0x18d
   0x00000000004367c2 <+404>:   jalr    1566(ra) # 0x5c3ddc 
<ResourceOwnerForgetAioHandle>

714                     ioh->resowner = NULL;
   0x00000000004367c6 <+408>:   sd      zero,40(s0)

715             }
716     
717             Assert(!ioh->resowner);
718     
719             /*
720              * Update generation & state first, before resetting the IO's 
fields,
721              * otherwise a concurrent "viewer" could think the fields are 
valid, even
722              * though they are being reset.  Increment the generation 
first, so that
723              * we can assert elsewhere that we never wait for an IDLE IO.  
While it's
724              * a bit weird for the state to go backwards for a generation, 
it's OK
725              * here, as there cannot be references to the "reborn" IO yet.  
Can't
726              * update both at once, so something has to give.
727              */
728             ioh->generation++;
   0x00000000004367ca <+412>:   ld      a0,64(s0)
   0x00000000004367cc <+414>:   addi    a0,a0,1
   0x00000000004367ce <+416>:   sd      a0,64(s0)

729             pgaio_io_update_state(ioh, PGAIO_HS_IDLE);
   0x00000000004367d0 <+418>:   mv      a0,s0
   0x00000000004367d2 <+420>:   li      a1,0
   0x00000000004367d4 <+422>:   jal     0x436436 <pgaio_io_update_state>

730     
731             /* ensure the state update is visible before we reset fields */
732             pg_write_barrier();
   0x00000000004367d8 <+426>:   fence   rw,w

733     
734             ioh->op = PGAIO_OP_INVALID;
735             ioh->target = PGAIO_TID_INVALID;
   0x00000000004367e4 <+438>:   sb      zero,4(s0)
   0x00000000004367e8 <+442>:   sb      zero,3(s0)
   0x00000000004367ec <+446>:   sb      zero,2(s0)
   0x00000000004367f4 <+454>:   sb      zero,1(s0)

736             ioh->flags = 0;
737             ioh->num_callbacks = 0;
738             ioh->handle_data_len = 0;
   0x00000000004367dc <+430>:   sb      zero,13(s0)

739             ioh->report_return = NULL;
   0x00000000004367e0 <+434>:   sd      zero,96(s0)

740             ioh->result = 0;
   0x00000000004367fc <+462>:   sw      zero,20(s0)

741             ioh->distilled_result.status = PGAIO_RS_UNKNOWN;
   0x00000000004367f0 <+450>:   lwu     a1,84(s0)
   0x0000000000436800 <+466>:   andi    a1,a1,-449
   0x0000000000436804 <+470>:   sw      a1,84(s0)

742     
743             /*
744              * We push the IO to the head of the idle IO list, that seems 
more cache
745              * efficient in cases where only a few IOs are used.
746              */
747             dclist_push_head(&pgaio_my_backend->idle_ios, &ioh->node);
   0x00000000004367f8 <+458>:   ld      a0,-1390(s1)
   0x0000000000436808 <+474>:   addi    a2,a0,8
   0x000000000043680c <+478>:   addi    a1,s0,24

748     
749             RESUME_INTERRUPTS();
   0x000000000043682e <+512>:   lw      a0,0(s2)
   0x0000000000436832 <+516>:   beqz    a0,0x4368be <pgaio_io_reclaim+656>
   0x0000000000436834 <+518>:   lw      a0,0(s2)
   0x0000000000436838 <+522>:   addi    a0,a0,-1
   0x000000000043683a <+524>:   sw      a0,0(s2)
   0x000000000043683e <+528>:   ld      ra,56(sp)
   0x0000000000436840 <+530>:   ld      s0,48(sp)
   0x0000000000436842 <+532>:   ld      s1,40(sp)
   0x0000000000436844 <+534>:   ld      s2,32(sp)
   0x0000000000436846 <+536>:   ld      s3,24(sp)
   0x0000000000436848 <+538>:   ld      s4,16(sp)
   0x00000000004368be <+656>:   auipc   a0,0x1cd
   0x00000000004368c2 <+660>:   addi    a0,a0,-716 # 0x6035f2
   0x00000000004368c6 <+664>:   auipc   a1,0x242
   0x00000000004368ca <+668>:   addi    a1,a1,-713 # 0x6785fd
   0x00000000004368ce <+672>:   li      a2,749
   0x00000000004368d2 <+676>:   auipc   ra,0x160
   0x00000000004368d6 <+680>:   jalr    118(ra) # 0x596948 
<ExceptionalCondition>

750     }
   0x000000000043684a <+540>:   addi    sp,sp,64
   0x000000000043684c <+542>:   ret

751     
752     /*
753      * Wait for an IO handle to become usable.
754      *
755      * This only really is useful for pgaio_io_acquire().
756      */
757     static void
758     pgaio_io_wait_for_free(void)
759     {
760             int                     reclaimed = 0;
761     
762             pgaio_debug(DEBUG2, "waiting for free IO with %d pending, %u 
in-flight, %u idle IOs",
763                                     pgaio_my_backend->num_staged_ios,
764                                     
dclist_count(&pgaio_my_backend->in_flight_ios),
765                                     
dclist_count(&pgaio_my_backend->idle_ios));
766     
767             /*
768              * First check if any of our IOs actually have completed - when 
using
769              * worker, that'll often be the case. We could do so as part of 
the loop
770              * below, but that'd potentially lead us to wait for some IO 
submitted
771              * before.
772              */
773             for (int i = 0; i < io_max_concurrency; i++)
774             {
775                     PgAioHandle *ioh = 
&pgaio_ctl->io_handles[pgaio_my_backend->io_handle_off + i];
776     
777                     if (ioh->state == PGAIO_HS_COMPLETED_SHARED)
778                     {
779                             /*
780                              * Note that no interrupts are processed 
between the state check
781                              * and the call to reclaim - that's important 
as otherwise an
782                              * interrupt could have already reclaimed the 
handle.
783                              *
784                              * Need to ensure that there's no reordering, 
in the more common
785                              * paths, where we wait for IO, that's done by
786                              * pgaio_io_was_recycled().
787                              */
788                             pg_read_barrier();
789                             pgaio_io_reclaim(ioh);
790                             reclaimed++;
791                     }
792             }
793     
794             if (reclaimed > 0)
795                     return;
796     
797             /*
798              * If we have any unsubmitted IOs, submit them now. We'll start 
waiting in
799              * a second, so it's better they're in flight. This also 
addresses the
800              * edge-case that all IOs are unsubmitted.
801              */
802             if (pgaio_my_backend->num_staged_ios > 0)
803                     pgaio_submit_staged();
804     
805             /* possibly some IOs finished during submission */
806             if (!dclist_is_empty(&pgaio_my_backend->idle_ios))
807                     return;
808     
809             if (dclist_count(&pgaio_my_backend->in_flight_ios) == 0)
810                     ereport(ERROR,
811                                     errmsg_internal("no free IOs despite no 
in-flight IOs"),
812                                     errdetail_internal("%d pending, %u 
in-flight, %u idle IOs",
813                                                                        
pgaio_my_backend->num_staged_ios,
814                                                                        
dclist_count(&pgaio_my_backend->in_flight_ios),
815                                                                        
dclist_count(&pgaio_my_backend->idle_ios)));
816     
817             /*
818              * Wait for the oldest in-flight IO to complete.
819              *
820              * XXX: Reusing the general IO wait is suboptimal, we don't 
need to wait
821              * for that specific IO to complete, we just need *any* IO to 
complete.
822              */
823             {
824                     PgAioHandle *ioh = dclist_head_element(PgAioHandle, 
node,
825                                                                             
                   &pgaio_my_backend->in_flight_ios);
826                     uint64          generation = ioh->generation;
827     
828                     switch ((PgAioHandleState) ioh->state)
829                     {
830                                     /* should not be in in-flight list */
831                             case PGAIO_HS_IDLE:
832                             case PGAIO_HS_DEFINED:
833                             case PGAIO_HS_HANDED_OUT:
834                             case PGAIO_HS_STAGED:
835                             case PGAIO_HS_COMPLETED_LOCAL:
836                                     elog(ERROR, "shouldn't get here with 
io:%d in state %d",
837                                              pgaio_io_get_id(ioh), 
ioh->state);
838                                     break;
839     
840                             case PGAIO_HS_COMPLETED_IO:
841                             case PGAIO_HS_SUBMITTED:
842                                     pgaio_debug_io(DEBUG2, ioh,
843                                                                "waiting for 
free io with %u in flight",
844                                                                
dclist_count(&pgaio_my_backend->in_flight_ios));
845     
846                                     /*
847                                      * In a more general case this would be 
racy, because the
848                                      * generation could increase after we 
read ioh->state above.
849                                      * But we are only looking at IOs by 
the current backend and
850                                      * the IO can only be recycled by this 
backend.  Even this is
851                                      * only OK because we get the handle's 
generation before
852                                      * potentially processing interrupts, 
e.g. as part of
853                                      * pgaio_debug_io().
854                                      */
855                                     pgaio_io_wait(ioh, generation);
856                                     break;
857     
858                             case PGAIO_HS_COMPLETED_SHARED:
859     
860                                     /*
861                                      * It's possible that another backend 
just finished this IO.
862                                      *
863                                      * Note that no interrupts are 
processed between the state
864                                      * check and the call to reclaim - 
that's important as
865                                      * otherwise an interrupt could have 
already reclaimed the
866                                      * handle.
867                                      *
868                                      * Need to ensure that there's no 
reordering, in the more
869                                      * common paths, where we wait for IO, 
that's done by
870                                      * pgaio_io_was_recycled().
871                                      */
872                                     pg_read_barrier();
873                                     pgaio_io_reclaim(ioh);
874                                     break;
875                     }
876     
877                     if (dclist_count(&pgaio_my_backend->idle_ios) == 0)
878                             elog(PANIC, "no idle IO after waiting for IO to 
terminate");
879                     return;
880             }
881     }
882     
883     /*
884      * Internal - code outside of AIO should never need this and it'd be 
hard for
885      * such code to be safe.
886      */
887     static PgAioHandle *
888     pgaio_io_from_wref(PgAioWaitRef *iow, uint64 *ref_generation)
889     {
890             PgAioHandle *ioh;
891     
892             Assert(iow->aio_index < pgaio_ctl->io_handle_count);
893     
894             ioh = &pgaio_ctl->io_handles[iow->aio_index];
895     
896             *ref_generation = ((uint64) iow->generation_upper) << 32 |
897                     iow->generation_lower;
898     
899             Assert(*ref_generation != 0);
900     
901             return ioh;
902     }
903     
904     static const char *
905     pgaio_io_state_get_name(PgAioHandleState s)
906     {
907     #define PGAIO_HS_TOSTR_CASE(sym) case PGAIO_HS_##sym: return #sym
908             switch ((PgAioHandleState) s)
   0x0000000000436714 <+230>:   bltu    a2,a1,0x436728 <pgaio_io_reclaim+250>
   0x0000000000436718 <+234>:   slli    a1,a1,0x3
   0x000000000043671a <+236>:   auipc   a0,0x485
   0x000000000043671e <+240>:   addi    a0,a0,-1106 # 0x8bb2c8
   0x0000000000436722 <+244>:   add     a0,a0,a1
   0x0000000000436724 <+246>:   ld      a4,0(a0)
   0x0000000000436726 <+248>:   j       0x43672a <pgaio_io_reclaim+252>
   0x0000000000436728 <+250>:   li      a4,0

909             {
910                             PGAIO_HS_TOSTR_CASE(IDLE);
911                             PGAIO_HS_TOSTR_CASE(HANDED_OUT);
912                             PGAIO_HS_TOSTR_CASE(DEFINED);
913                             PGAIO_HS_TOSTR_CASE(STAGED);
914                             PGAIO_HS_TOSTR_CASE(SUBMITTED);
915                             PGAIO_HS_TOSTR_CASE(COMPLETED_IO);
916                             PGAIO_HS_TOSTR_CASE(COMPLETED_SHARED);
917                             PGAIO_HS_TOSTR_CASE(COMPLETED_LOCAL);
918             }
919     #undef PGAIO_HS_TOSTR_CASE
920     
921             return NULL;                            /* silence compiler */
922     }
923     
924     const char *
925     pgaio_io_get_state_name(PgAioHandle *ioh)
926     {
927             return pgaio_io_state_get_name(ioh->state);
   0x000000000043670c <+222>:   lbu     a1,0(s0)
   0x0000000000436710 <+226>:   li      a2,7

928     }
929     
930     const char *
931     pgaio_result_status_string(PgAioResultStatus rs)
932     {
933             switch ((PgAioResultStatus) rs)
   0x0000000000436738 <+266>:   bltu    a5,a2,0x43674c <pgaio_io_reclaim+286>
   0x000000000043673c <+270>:   slli    a2,a2,0x3
   0x000000000043673e <+272>:   auipc   a5,0x485
   0x0000000000436742 <+276>:   addi    a5,a5,-1182 # 0x8bb2a0
   0x0000000000436746 <+280>:   add     a2,a2,a5
   0x0000000000436748 <+282>:   ld      a5,0(a2)
   0x000000000043674a <+284>:   j       0x43674e <pgaio_io_reclaim+288>
   0x000000000043674c <+286>:   li      a5,0

End of assembler dump.
(gdb) 

Reply via email to