Applying your patch plus adding -fno-omit-frame-pointer, I got 54526.90 notpm.
The profile (part) below: # Samples: 610K of event 'cycles' # Event count (approx.): 6686532056450 # # Overhead Command Shared Object Symbol # ........ .......... ............. .................................................. # 4.08% postgres postgres [.] hash_search_with_hash_value | --- hash_search_with_hash_value | --2.87%-- BufTableLookup | --2.86%-- ReadBuffer_common ReadBufferExtended | |--0.86%-- index_fetch_heap | | | --0.62%-- index_getnext | IndexNext | ExecScan | ExecProcNode | |--0.71%-- BitmapHeapNext | ExecScan | ExecProcNode | --0.57%-- _bt_relandgetbuf | --0.57%-- _bt_search 3.75% postgres postgres [.] heap_hot_search_buffer | --- heap_hot_search_buffer | |--1.92%-- BitmapHeapNext | ExecScan | ExecProcNode | | | --1.12%-- standard_ExecutorRun | _SPI_execute_plan | SPI_execute_plan | | | --0.70%-- payment | ExecMakeFunctionResult | ExecProject | ExecResult | ExecProcNode | standard_ExecutorRun | PortalRunSelect | PortalRun | PostgresMain | ServerLoop | PostmasterMain | main | __libc_start_main | --1.74%-- index_fetch_heap | --1.46%-- index_getnext | --1.45%-- IndexNext ExecScan ExecProcNode | --0.96%-- standard_ExecutorRun _SPI_execute_plan SPI_execute_plan | --0.50%-- new_order ExecMakeFunctionResult ExecProject ExecResult ExecProcNode standard_ExecutorRun PortalRunSelect PortalRunFetch PerformPortalFetch standard_ProcessUtility PortalRunUtility FillPortalStore PortalRun PostgresMain ServerLoop PostmasterMain main __libc_start_main 3.15% postgres postgres [.] LWLockAcquire | --- LWLockAcquire | --1.65%-- ReadBuffer_common ReadBufferExtended 2.74% postgres postgres [.] PinBuffer | --- PinBuffer | --2.72%-- ReadBuffer_common ReadBufferExtended | |--0.71%-- index_fetch_heap | | | --0.51%-- index_getnext | IndexNext | ExecScan | ExecProcNode | |--0.67%-- BitmapHeapNext | ExecScan | ExecProcNode | --0.60%-- heapgetpage heapgettup_pagemode heap_getnext SeqNext ExecScan ExecProcNode 2.72% postgres postgres [.] _bt_compare | --- _bt_compare | --2.51%-- _bt_binsrch | |--1.18%-- _bt_search | | | --0.74%-- _bt_first | | | --0.72%-- btgettuple | FunctionCall2Coll | index_getnext_tid | | | --0.58%-- index_getnext | | | --0.58%-- IndexNext | ExecScan | ExecProcNode | --0.97%-- _bt_first | --0.94%-- btgettuple FunctionCall2Coll index_getnext_tid | --0.66%-- index_getnext | --0.66%-- IndexNext ExecScan ExecProcNode | --0.51%-- standard_ExecutorRun _SPI_execute_plan SPI_execute_plan 2.47% postgres postgres [.] AllocSetAlloc | --- AllocSetAlloc | --0.54%-- MemoryContextAllocZeroAligned