13.10.2025 01:44, Thomas Munro wrote:
On Sun, Oct 12, 2025 at 6:00 PM Alexander Lakhin <[email protected]> wrote:
Please find those attached (gdb "disass/m pgaio_io_update_state" misses
the start of the function (but it's still disassembled below), so I
decided to share the whole output).
Could you please also disassemble pgaio_io_reclaim()?
Sure, the output of disass/m pgaio_io_reclaim is attached.
A side note: I could not reproduce the hang so far — 8 out of 30
iterations of 027_stream_regress failed, but all completed within
700-800 seconds.
Best regards,
Alexander
GNU gdb (Debian 15.2-1) 15.2
Copyright (C) 2024 Free Software Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law.
Type "show copying" and "show warranty" for details.
This GDB was configured as "riscv64-linux-gnu".
Type "show configuration" for configuration details.
For bug reporting instructions, please see:
<https://www.gnu.org/software/gdb/bugs/>.
Find the GDB manual and other documentation resources online at:
<http://www.gnu.org/software/gdb/documentation/>.
For help, type "help".
Type "apropos word" to search for commands related to "word"...
Reading symbols from tmp_install/usr/local/pgsql/bin/postgres...
(gdb) Dump of assembler code for function pgaio_io_reclaim:
341 Assert(ioh >= pgaio_ctl->io_handles &&
0x00000000004366d0 <+162>: auipc a0,0x4c3
0x00000000004366d4 <+166>: ld a1,-1192(a0) # 0x8f9228 <pgaio_ctl>
0x00000000004366d8 <+170>: ld a0,48(a1)
0x00000000004366da <+172>: bltu s0,a0,0x436886 <pgaio_io_reclaim+600>
0x00000000004366de <+176>: lwu a1,40(a1)
0x00000000004366e2 <+180>: li a2,144
0x00000000004366e6 <+184>: mul a1,a1,a2
0x00000000004366ea <+188>: add a1,a1,a0
0x00000000004366ec <+190>: bgeu s0,a1,0x436886 <pgaio_io_reclaim+600>
0x00000000004366f0 <+194>: auipc a1,0x4c1
0x00000000004366f4 <+198>: ld s4,-296(a1) # 0x8f75c8
0x0000000000436886 <+600>: auipc a0,0x242
0x000000000043688a <+604>: addi a0,a0,-271 # 0x678777
0x000000000043688e <+608>: auipc a1,0x242
0x0000000000436892 <+612>: addi a1,a1,-657 # 0x6785fd
0x0000000000436896 <+616>: li a2,342
0x000000000043689a <+620>: auipc ra,0x160
0x000000000043689e <+624>: jalr 174(ra) # 0x596948
<ExceptionalCondition>
342 ioh < (pgaio_ctl->io_handles +
pgaio_ctl->io_handle_count));
343 return ioh - pgaio_ctl->io_handles;
0x00000000004366f8 <+202>: sub s1,s0,a0
0x00000000004366fc <+206>: srai s1,s1,0x4
344 }
345
346 /*
347 * Return the ProcNumber for the process that can use an IO handle. The
348 * mapping from IO handles to PGPROCs is static, therefore this even
works
349 * when the corresponding PGPROC is not in use.
350 */
351 ProcNumber
352 pgaio_io_get_owner(PgAioHandle *ioh)
353 {
354 return ioh->owner_procno;
355 }
356
357 /*
358 * Return a wait reference for the IO. Only wait references can be used
to
359 * wait for an IOs completion, as handles themselves can be reused after
360 * completion. See also the comment above pgaio_io_acquire().
361 */
362 void
363 pgaio_io_get_wref(PgAioHandle *ioh, PgAioWaitRef *iow)
364 {
365 Assert(ioh->state == PGAIO_HS_HANDED_OUT ||
366 ioh->state == PGAIO_HS_DEFINED ||
367 ioh->state == PGAIO_HS_STAGED);
368 Assert(ioh->generation != 0);
369
370 iow->aio_index = ioh - pgaio_ctl->io_handles;
371 iow->generation_upper = (uint32) (ioh->generation >> 32);
372 iow->generation_lower = (uint32) ioh->generation;
373 }
374
375
376
377 /*
--------------------------------------------------------------------------------
378 * Internal Functions related to PgAioHandle
379 *
--------------------------------------------------------------------------------
380 */
381
382 static inline void
383 pgaio_io_update_state(PgAioHandle *ioh, PgAioHandleState new_state)
384 {
385 /*
386 * All callers need to have held interrupts in some form,
otherwise
387 * interrupt processing could wait for the IO to complete,
while in an
388 * intermediary state.
389 */
390 Assert(!INTERRUPTS_CAN_BE_PROCESSED());
391
392 pgaio_debug_io(DEBUG5, ioh,
393 "updating state to %s",
394 pgaio_io_state_get_name(new_state));
395
396 /*
397 * Ensure the changes signified by the new state are visible
before the
398 * new state becomes visible.
399 */
400 pg_write_barrier();
401
402 ioh->state = new_state;
403 }
404
405 static void
406 pgaio_io_resowner_register(PgAioHandle *ioh)
407 {
408 Assert(!ioh->resowner);
409 Assert(CurrentResourceOwner);
410
411 ResourceOwnerRememberAioHandle(CurrentResourceOwner,
&ioh->resowner_node);
412 ioh->resowner = CurrentResourceOwner;
413 }
414
415 /*
416 * Stage IO for execution and, if appropriate, submit it immediately.
417 *
418 * Should only be called from pgaio_io_start_*().
419 */
420 void
421 pgaio_io_stage(PgAioHandle *ioh, PgAioOp op)
422 {
423 bool needs_synchronous;
424
425 Assert(ioh->state == PGAIO_HS_HANDED_OUT);
426 Assert(pgaio_my_backend->handed_out_io == ioh);
427 Assert(pgaio_io_has_target(ioh));
428
429 /*
430 * Otherwise an interrupt, in the middle of staging and
possibly executing
431 * the IO, could end up trying to wait for the IO, leading to
state
432 * confusion.
433 */
434 HOLD_INTERRUPTS();
435
436 ioh->op = op;
437 ioh->result = 0;
438
439 pgaio_io_update_state(ioh, PGAIO_HS_DEFINED);
440
441 /* allow a new IO to be staged */
442 pgaio_my_backend->handed_out_io = NULL;
443
444 pgaio_io_call_stage(ioh);
445
446 pgaio_io_update_state(ioh, PGAIO_HS_STAGED);
447
448 /*
449 * Synchronous execution has to be executed, well,
synchronously, so check
450 * that first.
451 */
452 needs_synchronous = pgaio_io_needs_synchronous_execution(ioh);
453
454 pgaio_debug_io(DEBUG3, ioh,
455 "staged (synchronous: %d, in_batch:
%d)",
456 needs_synchronous,
pgaio_my_backend->in_batchmode);
457
458 if (!needs_synchronous)
459 {
460
pgaio_my_backend->staged_ios[pgaio_my_backend->num_staged_ios++] = ioh;
461 Assert(pgaio_my_backend->num_staged_ios <=
PGAIO_SUBMIT_BATCH_SIZE);
462
463 /*
464 * Unless code explicitly opted into batching IOs,
submit the IO
465 * immediately.
466 */
467 if (!pgaio_my_backend->in_batchmode)
468 pgaio_submit_staged();
469 }
470 else
471 {
472 pgaio_io_prepare_submit(ioh);
473 pgaio_io_perform_synchronously(ioh);
474 }
475
476 RESUME_INTERRUPTS();
477 }
478
479 bool
480 pgaio_io_needs_synchronous_execution(PgAioHandle *ioh)
481 {
482 /*
483 * If the caller said to execute the IO synchronously, do so.
484 *
485 * XXX: We could optimize the logic when to execute
synchronously by first
486 * checking if there are other IOs in flight and only
synchronously
487 * executing if not. Unclear whether that'll be sufficiently
common to be
488 * worth worrying about.
489 */
490 if (ioh->flags & PGAIO_HF_SYNCHRONOUS)
491 return true;
492
493 /* Check if the IO method requires synchronous execution of IO
*/
494 if (pgaio_method_ops->needs_synchronous_execution)
495 return
pgaio_method_ops->needs_synchronous_execution(ioh);
496
497 return false;
498 }
499
500 /*
501 * Handle IO being processed by IO method.
502 *
503 * Should be called by IO methods / synchronous IO execution, just
before the
504 * IO is performed.
505 */
506 void
507 pgaio_io_prepare_submit(PgAioHandle *ioh)
508 {
509 pgaio_io_update_state(ioh, PGAIO_HS_SUBMITTED);
510
511 dclist_push_tail(&pgaio_my_backend->in_flight_ios, &ioh->node);
512 }
513
514 /*
515 * Handle IO getting completed by a method.
516 *
517 * Should be called by IO methods / synchronous IO execution, just
after the
518 * IO has been performed.
519 *
520 * Expects to be called in a critical section. We expect IOs to be
usable for
521 * WAL etc, which requires being able to execute completion callbacks
in a
522 * critical section.
523 */
524 void
525 pgaio_io_process_completion(PgAioHandle *ioh, int result)
526 {
527 Assert(ioh->state == PGAIO_HS_SUBMITTED);
528
529 Assert(CritSectionCount > 0);
530
531 ioh->result = result;
532
533 pgaio_io_update_state(ioh, PGAIO_HS_COMPLETED_IO);
534
535 INJECTION_POINT("aio-process-completion-before-shared", ioh);
536
537 pgaio_io_call_complete_shared(ioh);
538
539 pgaio_io_update_state(ioh, PGAIO_HS_COMPLETED_SHARED);
540
541 /* condition variable broadcast ensures state is visible before
wakeup */
542 ConditionVariableBroadcast(&ioh->cv);
543
544 /* contains call to pgaio_io_call_complete_local() */
545 if (ioh->owner_procno == MyProcNumber)
546 pgaio_io_reclaim(ioh);
547 }
548
549 /*
550 * Has the IO completed and thus the IO handle been reused?
551 *
552 * This is useful when waiting for IO completion at a low level (e.g.
in an IO
553 * method's ->wait_one() callback).
554 */
555 bool
556 pgaio_io_was_recycled(PgAioHandle *ioh, uint64 ref_generation,
PgAioHandleState *state)
557 {
558 *state = ioh->state;
559
560 /*
561 * Ensure that we don't see an earlier state of the handle than
ioh->state
562 * due to compiler or CPU reordering. This protects both
->generation as
563 * directly used here, and other fields in the handle accessed
in the
564 * caller if the handle was not reused.
565 */
566 pg_read_barrier();
567
568 return ioh->generation != ref_generation;
569 }
570
571 /*
572 * Wait for IO to complete. External code should never use this,
outside of
573 * the AIO subsystem waits are only allowed via pgaio_wref_wait().
574 */
575 static void
576 pgaio_io_wait(PgAioHandle *ioh, uint64 ref_generation)
577 {
578 PgAioHandleState state;
579 bool am_owner;
580
581 am_owner = ioh->owner_procno == MyProcNumber;
582
583 if (pgaio_io_was_recycled(ioh, ref_generation, &state))
584 return;
585
586 if (am_owner)
587 {
588 if (state != PGAIO_HS_SUBMITTED
589 && state != PGAIO_HS_COMPLETED_IO
590 && state != PGAIO_HS_COMPLETED_SHARED
591 && state != PGAIO_HS_COMPLETED_LOCAL)
592 {
593 elog(PANIC, "waiting for own IO %d in wrong
state: %s",
594 pgaio_io_get_id(ioh),
pgaio_io_get_state_name(ioh));
595 }
596 }
597
598 while (true)
599 {
600 if (pgaio_io_was_recycled(ioh, ref_generation, &state))
601 return;
602
603 switch ((PgAioHandleState) state)
604 {
605 case PGAIO_HS_IDLE:
606 case PGAIO_HS_HANDED_OUT:
607 elog(ERROR, "IO in wrong state: %d",
state);
608 break;
609
610 case PGAIO_HS_SUBMITTED:
611
612 /*
613 * If we need to wait via the IO
method, do so now. Don't
614 * check via the IO method if the
issuing backend is executing
615 * the IO synchronously.
616 */
617 if (pgaio_method_ops->wait_one &&
!(ioh->flags & PGAIO_HF_SYNCHRONOUS))
618 {
619 pgaio_method_ops->wait_one(ioh,
ref_generation);
620 continue;
621 }
622 /* fallthrough */
623
624 /* waiting for owner to submit */
625 case PGAIO_HS_DEFINED:
626 case PGAIO_HS_STAGED:
627 /* waiting for reaper to complete */
628 /* fallthrough */
629 case PGAIO_HS_COMPLETED_IO:
630 /* shouldn't be able to hit this
otherwise */
631 Assert(IsUnderPostmaster);
632 /* ensure we're going to get woken up */
633
ConditionVariablePrepareToSleep(&ioh->cv);
634
635 while (!pgaio_io_was_recycled(ioh,
ref_generation, &state))
636 {
637 if (state ==
PGAIO_HS_COMPLETED_SHARED ||
638 state ==
PGAIO_HS_COMPLETED_LOCAL)
639 break;
640
ConditionVariableSleep(&ioh->cv, WAIT_EVENT_AIO_IO_COMPLETION);
641 }
642
643 ConditionVariableCancelSleep();
644 break;
645
646 case PGAIO_HS_COMPLETED_SHARED:
647 case PGAIO_HS_COMPLETED_LOCAL:
648
649 /*
650 * Note that no interrupts are
processed between
651 * pgaio_io_was_recycled() and this
check - that's important
652 * as otherwise an interrupt could have
already reclaimed the
653 * handle.
654 */
655 if (am_owner)
656 pgaio_io_reclaim(ioh);
657 return;
658 }
659 }
660 }
661
662 /*
663 * Make IO handle ready to be reused after IO has completed or after the
664 * handle has been released without being used.
665 *
666 * Note that callers need to be careful about only calling this in the
right
667 * state and that no interrupts can be processed between the state
check and
668 * the call to pgaio_io_reclaim(). Otherwise interrupt processing could
669 * already have reclaimed the handle.
670 */
671 static void
672 pgaio_io_reclaim(PgAioHandle *ioh)
673 {
0x000000000043662e <+0>: addi sp,sp,-64
0x0000000000436630 <+2>: sd ra,56(sp)
0x0000000000436632 <+4>: sd s0,48(sp)
0x0000000000436634 <+6>: sd s1,40(sp)
0x0000000000436636 <+8>: sd s2,32(sp)
0x0000000000436638 <+10>: sd s3,24(sp)
0x000000000043663a <+12>: sd s4,16(sp)
674 /* This is only ok if it's our IO */
675 Assert(ioh->owner_procno == MyProcNumber);
0x000000000043663c <+14>: auipc a1,0x4a6
0x0000000000436640 <+18>: ld a1,-2020(a1) # 0x8dbe58
0x0000000000436644 <+22>: mv s0,a0
0x0000000000436646 <+24>: lw a0,16(a0)
0x0000000000436648 <+26>: lw a1,0(a1)
0x000000000043664a <+28>: bne a0,a1,0x43684e <pgaio_io_reclaim+544>
0x000000000043684e <+544>: auipc a0,0x242
0x0000000000436852 <+548>: addi a0,a0,-494 # 0x678660
0x0000000000436856 <+552>: auipc a1,0x242
0x000000000043685a <+556>: addi a1,a1,-601 # 0x6785fd
0x000000000043685e <+560>: li a2,675
0x0000000000436862 <+564>: auipc ra,0x160
0x0000000000436866 <+568>: jalr 230(ra) # 0x596948
<ExceptionalCondition>
676 Assert(ioh->state != PGAIO_HS_IDLE);
0x000000000043664e <+32>: lbu a0,0(s0)
0x0000000000436652 <+36>: beqz a0,0x43686a <pgaio_io_reclaim+572>
0x000000000043686a <+572>: auipc a0,0x242
0x000000000043686e <+576>: addi a0,a0,1037 # 0x678c77
0x0000000000436872 <+580>: auipc a1,0x242
0x0000000000436876 <+584>: addi a1,a1,-629 # 0x6785fd
0x000000000043687a <+588>: li a2,676
0x000000000043687e <+592>: auipc ra,0x160
0x0000000000436882 <+596>: jalr 202(ra) # 0x596948
<ExceptionalCondition>
677
678 /* see comment in function header */
679 HOLD_INTERRUPTS();
0x0000000000436656 <+40>: auipc a0,0x4a6
0x000000000043665a <+44>: ld s2,34(a0) # 0x8dc678
0x000000000043665e <+48>: lw a0,0(s2)
0x0000000000436662 <+52>: addi a0,a0,1
0x0000000000436664 <+54>: sw a0,0(s2)
680
681 /*
682 * It's a bit ugly, but right now the easiest place to put the
execution
683 * of local completion callbacks is this function, as we need
to execute
684 * local callbacks just before reclaiming at multiple callsites.
685 */
686 if (ioh->state == PGAIO_HS_COMPLETED_SHARED)
0x0000000000436668 <+58>: lbu a0,0(s0)
0x000000000043666c <+62>: li a1,6
0x000000000043666e <+64>: bne a0,a1,0x4366ae <pgaio_io_reclaim+128>
687 {
688 PgAioResult local_result;
689
690 local_result = pgaio_io_call_complete_local(ioh);
0x0000000000436672 <+68>: mv a0,s0
0x0000000000436674 <+70>: jal 0x4381de <pgaio_io_call_complete_local>
0x0000000000436678 <+74>: mv s1,a0
691 pgaio_io_update_state(ioh, PGAIO_HS_COMPLETED_LOCAL);
0x000000000043667a <+76>: li a1,7
0x000000000043667c <+78>: mv a0,s0
0x000000000043667e <+80>: jal 0x436436 <pgaio_io_update_state>
692
693 if (ioh->report_return)
0x0000000000436682 <+84>: ld a0,96(s0)
0x0000000000436684 <+86>: beqz a0,0x4366ae <pgaio_io_reclaim+128>
694 {
695 ioh->report_return->result = local_result;
0x0000000000436686 <+88>: sw s1,0(a0)
0x0000000000436688 <+90>: srli s1,s1,0x20
0x000000000043668a <+92>: sw s1,4(a0)
696 ioh->report_return->target_data =
ioh->target_data;
0x000000000043668c <+94>: ld a0,96(s0)
0x000000000043668e <+96>: lw a1,140(s0)
0x0000000000436692 <+100>: sw a1,28(a0)
0x0000000000436694 <+102>: lw a1,136(s0)
0x0000000000436698 <+106>: sw a1,24(a0)
0x000000000043669a <+108>: lw a1,132(s0)
0x000000000043669e <+112>: sw a1,20(a0)
0x00000000004366a0 <+114>: lw a1,128(s0)
0x00000000004366a4 <+118>: sw a1,16(a0)
0x00000000004366a6 <+120>: lw a1,124(s0)
0x00000000004366a8 <+122>: sw a1,12(a0)
0x00000000004366aa <+124>: lw a1,120(s0)
0x00000000004366ac <+126>: sw a1,8(a0)
697 }
698 }
699
700 pgaio_debug_io(DEBUG4, ioh,
0x00000000004366ae <+128>: li a0,11
0x00000000004366b0 <+130>: li a1,0
0x00000000004366b2 <+132>: auipc ra,0x161
0x00000000004366b6 <+136>: jalr -1776(ra) # 0x596fc2 <errstart>
0x00000000004366ba <+140>: beqz a0,0x436788 <pgaio_io_reclaim+346>
0x00000000004366bc <+142>: li a0,1
0x00000000004366be <+144>: auipc ra,0x163
0x00000000004366c2 <+148>: jalr 688(ra) # 0x59996e <errhidestmt>
0x00000000004366c6 <+152>: li a0,1
0x00000000004366c8 <+154>: auipc ra,0x163
0x00000000004366cc <+158>: jalr 780(ra) # 0x5999d4 <errhidecontext>
0x00000000004366fe <+208>: mv a0,s0
0x0000000000436700 <+210>: jal 0x438f90 <pgaio_io_get_op_name>
0x0000000000436704 <+214>: mv s3,a0
0x0000000000436706 <+216>: mv a0,s0
0x0000000000436708 <+218>: jal 0x439076 <pgaio_io_get_target_name>
0x0000000000436712 <+228>: mv a3,a0
0x000000000043672a <+252>: lw a0,84(s0)
0x000000000043672c <+254>: slli a2,a0,0x37
0x0000000000436730 <+258>: srli a2,a2,0x3d
0x0000000000436732 <+260>: li a5,4
0x0000000000436734 <+262>: mulw a1,s1,s4
0x000000000043674e <+288>: lw a2,20(s0)
0x0000000000436750 <+290>: andi a6,a0,63
0x0000000000436754 <+294>: srliw a7,a0,0x9
0x0000000000436758 <+298>: auipc a0,0x242
0x000000000043675c <+302>: addi a0,a0,1339 # 0x678c93
0x0000000000436760 <+306>: sd a2,0(sp)
0x0000000000436762 <+308>: mv a2,s3
0x0000000000436764 <+310>: auipc ra,0x161
0x0000000000436768 <+314>: jalr -642(ra) # 0x5974e2 <errmsg_internal>
0x000000000043676c <+318>: auipc a0,0x242
0x0000000000436770 <+322>: addi a0,a0,-367 # 0x6785fd
0x0000000000436774 <+326>: auipc a1,0x242
0x0000000000436778 <+330>: addi a2,a1,1433 # 0x678d0d
0x000000000043677c <+334>: li a1,705
0x0000000000436780 <+338>: auipc ra,0x161
0x0000000000436784 <+342>: jalr -1284(ra) # 0x59727c <errfinish>
701 "reclaiming: distilled_result:
(status %s, id %u, error_data %d), raw_result: %d",
702
pgaio_result_status_string(ioh->distilled_result.status),
703 ioh->distilled_result.id,
704 ioh->distilled_result.error_data,
705 ioh->result);
706
707 /* if the IO has been defined, it's on the in-flight list,
remove */
708 if (ioh->state != PGAIO_HS_HANDED_OUT)
0x0000000000436788 <+346>: lbu a0,0(s0)
0x000000000043678c <+350>: li a1,1
0x000000000043678e <+352>: auipc s1,0x4c3
0x0000000000436792 <+356>: beq a0,a1,0x4367b6 <pgaio_io_reclaim+392>
709 dclist_delete_from(&pgaio_my_backend->in_flight_ios,
&ioh->node);
0x0000000000436796 <+360>: ld a0,-1390(s1)
710
711 if (ioh->resowner)
0x00000000004367b6 <+392>: ld a0,40(s0)
0x00000000004367b8 <+394>: beqz a0,0x4367ca <pgaio_io_reclaim+412>
712 {
713 ResourceOwnerForgetAioHandle(ioh->resowner,
&ioh->resowner_node);
0x00000000004367ba <+396>: addi a1,s0,48
0x00000000004367be <+400>: auipc ra,0x18d
0x00000000004367c2 <+404>: jalr 1566(ra) # 0x5c3ddc
<ResourceOwnerForgetAioHandle>
714 ioh->resowner = NULL;
0x00000000004367c6 <+408>: sd zero,40(s0)
715 }
716
717 Assert(!ioh->resowner);
718
719 /*
720 * Update generation & state first, before resetting the IO's
fields,
721 * otherwise a concurrent "viewer" could think the fields are
valid, even
722 * though they are being reset. Increment the generation
first, so that
723 * we can assert elsewhere that we never wait for an IDLE IO.
While it's
724 * a bit weird for the state to go backwards for a generation,
it's OK
725 * here, as there cannot be references to the "reborn" IO yet.
Can't
726 * update both at once, so something has to give.
727 */
728 ioh->generation++;
0x00000000004367ca <+412>: ld a0,64(s0)
0x00000000004367cc <+414>: addi a0,a0,1
0x00000000004367ce <+416>: sd a0,64(s0)
729 pgaio_io_update_state(ioh, PGAIO_HS_IDLE);
0x00000000004367d0 <+418>: mv a0,s0
0x00000000004367d2 <+420>: li a1,0
0x00000000004367d4 <+422>: jal 0x436436 <pgaio_io_update_state>
730
731 /* ensure the state update is visible before we reset fields */
732 pg_write_barrier();
0x00000000004367d8 <+426>: fence rw,w
733
734 ioh->op = PGAIO_OP_INVALID;
735 ioh->target = PGAIO_TID_INVALID;
0x00000000004367e4 <+438>: sb zero,4(s0)
0x00000000004367e8 <+442>: sb zero,3(s0)
0x00000000004367ec <+446>: sb zero,2(s0)
0x00000000004367f4 <+454>: sb zero,1(s0)
736 ioh->flags = 0;
737 ioh->num_callbacks = 0;
738 ioh->handle_data_len = 0;
0x00000000004367dc <+430>: sb zero,13(s0)
739 ioh->report_return = NULL;
0x00000000004367e0 <+434>: sd zero,96(s0)
740 ioh->result = 0;
0x00000000004367fc <+462>: sw zero,20(s0)
741 ioh->distilled_result.status = PGAIO_RS_UNKNOWN;
0x00000000004367f0 <+450>: lwu a1,84(s0)
0x0000000000436800 <+466>: andi a1,a1,-449
0x0000000000436804 <+470>: sw a1,84(s0)
742
743 /*
744 * We push the IO to the head of the idle IO list, that seems
more cache
745 * efficient in cases where only a few IOs are used.
746 */
747 dclist_push_head(&pgaio_my_backend->idle_ios, &ioh->node);
0x00000000004367f8 <+458>: ld a0,-1390(s1)
0x0000000000436808 <+474>: addi a2,a0,8
0x000000000043680c <+478>: addi a1,s0,24
748
749 RESUME_INTERRUPTS();
0x000000000043682e <+512>: lw a0,0(s2)
0x0000000000436832 <+516>: beqz a0,0x4368be <pgaio_io_reclaim+656>
0x0000000000436834 <+518>: lw a0,0(s2)
0x0000000000436838 <+522>: addi a0,a0,-1
0x000000000043683a <+524>: sw a0,0(s2)
0x000000000043683e <+528>: ld ra,56(sp)
0x0000000000436840 <+530>: ld s0,48(sp)
0x0000000000436842 <+532>: ld s1,40(sp)
0x0000000000436844 <+534>: ld s2,32(sp)
0x0000000000436846 <+536>: ld s3,24(sp)
0x0000000000436848 <+538>: ld s4,16(sp)
0x00000000004368be <+656>: auipc a0,0x1cd
0x00000000004368c2 <+660>: addi a0,a0,-716 # 0x6035f2
0x00000000004368c6 <+664>: auipc a1,0x242
0x00000000004368ca <+668>: addi a1,a1,-713 # 0x6785fd
0x00000000004368ce <+672>: li a2,749
0x00000000004368d2 <+676>: auipc ra,0x160
0x00000000004368d6 <+680>: jalr 118(ra) # 0x596948
<ExceptionalCondition>
750 }
0x000000000043684a <+540>: addi sp,sp,64
0x000000000043684c <+542>: ret
751
752 /*
753 * Wait for an IO handle to become usable.
754 *
755 * This only really is useful for pgaio_io_acquire().
756 */
757 static void
758 pgaio_io_wait_for_free(void)
759 {
760 int reclaimed = 0;
761
762 pgaio_debug(DEBUG2, "waiting for free IO with %d pending, %u
in-flight, %u idle IOs",
763 pgaio_my_backend->num_staged_ios,
764
dclist_count(&pgaio_my_backend->in_flight_ios),
765
dclist_count(&pgaio_my_backend->idle_ios));
766
767 /*
768 * First check if any of our IOs actually have completed - when
using
769 * worker, that'll often be the case. We could do so as part of
the loop
770 * below, but that'd potentially lead us to wait for some IO
submitted
771 * before.
772 */
773 for (int i = 0; i < io_max_concurrency; i++)
774 {
775 PgAioHandle *ioh =
&pgaio_ctl->io_handles[pgaio_my_backend->io_handle_off + i];
776
777 if (ioh->state == PGAIO_HS_COMPLETED_SHARED)
778 {
779 /*
780 * Note that no interrupts are processed
between the state check
781 * and the call to reclaim - that's important
as otherwise an
782 * interrupt could have already reclaimed the
handle.
783 *
784 * Need to ensure that there's no reordering,
in the more common
785 * paths, where we wait for IO, that's done by
786 * pgaio_io_was_recycled().
787 */
788 pg_read_barrier();
789 pgaio_io_reclaim(ioh);
790 reclaimed++;
791 }
792 }
793
794 if (reclaimed > 0)
795 return;
796
797 /*
798 * If we have any unsubmitted IOs, submit them now. We'll start
waiting in
799 * a second, so it's better they're in flight. This also
addresses the
800 * edge-case that all IOs are unsubmitted.
801 */
802 if (pgaio_my_backend->num_staged_ios > 0)
803 pgaio_submit_staged();
804
805 /* possibly some IOs finished during submission */
806 if (!dclist_is_empty(&pgaio_my_backend->idle_ios))
807 return;
808
809 if (dclist_count(&pgaio_my_backend->in_flight_ios) == 0)
810 ereport(ERROR,
811 errmsg_internal("no free IOs despite no
in-flight IOs"),
812 errdetail_internal("%d pending, %u
in-flight, %u idle IOs",
813
pgaio_my_backend->num_staged_ios,
814
dclist_count(&pgaio_my_backend->in_flight_ios),
815
dclist_count(&pgaio_my_backend->idle_ios)));
816
817 /*
818 * Wait for the oldest in-flight IO to complete.
819 *
820 * XXX: Reusing the general IO wait is suboptimal, we don't
need to wait
821 * for that specific IO to complete, we just need *any* IO to
complete.
822 */
823 {
824 PgAioHandle *ioh = dclist_head_element(PgAioHandle,
node,
825
&pgaio_my_backend->in_flight_ios);
826 uint64 generation = ioh->generation;
827
828 switch ((PgAioHandleState) ioh->state)
829 {
830 /* should not be in in-flight list */
831 case PGAIO_HS_IDLE:
832 case PGAIO_HS_DEFINED:
833 case PGAIO_HS_HANDED_OUT:
834 case PGAIO_HS_STAGED:
835 case PGAIO_HS_COMPLETED_LOCAL:
836 elog(ERROR, "shouldn't get here with
io:%d in state %d",
837 pgaio_io_get_id(ioh),
ioh->state);
838 break;
839
840 case PGAIO_HS_COMPLETED_IO:
841 case PGAIO_HS_SUBMITTED:
842 pgaio_debug_io(DEBUG2, ioh,
843 "waiting for
free io with %u in flight",
844
dclist_count(&pgaio_my_backend->in_flight_ios));
845
846 /*
847 * In a more general case this would be
racy, because the
848 * generation could increase after we
read ioh->state above.
849 * But we are only looking at IOs by
the current backend and
850 * the IO can only be recycled by this
backend. Even this is
851 * only OK because we get the handle's
generation before
852 * potentially processing interrupts,
e.g. as part of
853 * pgaio_debug_io().
854 */
855 pgaio_io_wait(ioh, generation);
856 break;
857
858 case PGAIO_HS_COMPLETED_SHARED:
859
860 /*
861 * It's possible that another backend
just finished this IO.
862 *
863 * Note that no interrupts are
processed between the state
864 * check and the call to reclaim -
that's important as
865 * otherwise an interrupt could have
already reclaimed the
866 * handle.
867 *
868 * Need to ensure that there's no
reordering, in the more
869 * common paths, where we wait for IO,
that's done by
870 * pgaio_io_was_recycled().
871 */
872 pg_read_barrier();
873 pgaio_io_reclaim(ioh);
874 break;
875 }
876
877 if (dclist_count(&pgaio_my_backend->idle_ios) == 0)
878 elog(PANIC, "no idle IO after waiting for IO to
terminate");
879 return;
880 }
881 }
882
883 /*
884 * Internal - code outside of AIO should never need this and it'd be
hard for
885 * such code to be safe.
886 */
887 static PgAioHandle *
888 pgaio_io_from_wref(PgAioWaitRef *iow, uint64 *ref_generation)
889 {
890 PgAioHandle *ioh;
891
892 Assert(iow->aio_index < pgaio_ctl->io_handle_count);
893
894 ioh = &pgaio_ctl->io_handles[iow->aio_index];
895
896 *ref_generation = ((uint64) iow->generation_upper) << 32 |
897 iow->generation_lower;
898
899 Assert(*ref_generation != 0);
900
901 return ioh;
902 }
903
904 static const char *
905 pgaio_io_state_get_name(PgAioHandleState s)
906 {
907 #define PGAIO_HS_TOSTR_CASE(sym) case PGAIO_HS_##sym: return #sym
908 switch ((PgAioHandleState) s)
0x0000000000436714 <+230>: bltu a2,a1,0x436728 <pgaio_io_reclaim+250>
0x0000000000436718 <+234>: slli a1,a1,0x3
0x000000000043671a <+236>: auipc a0,0x485
0x000000000043671e <+240>: addi a0,a0,-1106 # 0x8bb2c8
0x0000000000436722 <+244>: add a0,a0,a1
0x0000000000436724 <+246>: ld a4,0(a0)
0x0000000000436726 <+248>: j 0x43672a <pgaio_io_reclaim+252>
0x0000000000436728 <+250>: li a4,0
909 {
910 PGAIO_HS_TOSTR_CASE(IDLE);
911 PGAIO_HS_TOSTR_CASE(HANDED_OUT);
912 PGAIO_HS_TOSTR_CASE(DEFINED);
913 PGAIO_HS_TOSTR_CASE(STAGED);
914 PGAIO_HS_TOSTR_CASE(SUBMITTED);
915 PGAIO_HS_TOSTR_CASE(COMPLETED_IO);
916 PGAIO_HS_TOSTR_CASE(COMPLETED_SHARED);
917 PGAIO_HS_TOSTR_CASE(COMPLETED_LOCAL);
918 }
919 #undef PGAIO_HS_TOSTR_CASE
920
921 return NULL; /* silence compiler */
922 }
923
924 const char *
925 pgaio_io_get_state_name(PgAioHandle *ioh)
926 {
927 return pgaio_io_state_get_name(ioh->state);
0x000000000043670c <+222>: lbu a1,0(s0)
0x0000000000436710 <+226>: li a2,7
928 }
929
930 const char *
931 pgaio_result_status_string(PgAioResultStatus rs)
932 {
933 switch ((PgAioResultStatus) rs)
0x0000000000436738 <+266>: bltu a5,a2,0x43674c <pgaio_io_reclaim+286>
0x000000000043673c <+270>: slli a2,a2,0x3
0x000000000043673e <+272>: auipc a5,0x485
0x0000000000436742 <+276>: addi a5,a5,-1182 # 0x8bb2a0
0x0000000000436746 <+280>: add a2,a2,a5
0x0000000000436748 <+282>: ld a5,0(a2)
0x000000000043674a <+284>: j 0x43674e <pgaio_io_reclaim+288>
0x000000000043674c <+286>: li a5,0
End of assembler dump.
(gdb)