On Tue, 20 Jul 2021 at 18:17, Andres Freund <and...@anarazel.de> wrote:
> Any chance you could show a `perf annotate AllocSetAlloc` and `perf annotate
> palloc` from a patched run? And perhaps how high their percentages of the
> total work are. E.g. using something like
> perf report -g none|grep -E 'AllocSetAlloc|palloc|MemoryContextAlloc|pfree'

Sure. See attached.

David
 Percent |      Source code & Disassembly of postgres for cycles (626 samples, 
percent: local period)
-----------------------------------------------------------------------------------------------------
         :
         :
         :
         :            Disassembly of section .text:
         :
         :            000000000056bd80 <AllocSetAlloc>:
         :            AllocSetAlloc():
         :            * is marked, as mcxt.c will set it to UNDEFINED.  In some 
paths we will
         :            * return space that is marked NOACCESS - AllocSetRealloc 
has to beware!
         :            */
         :            static void *
         :            AllocSetAlloc(MemoryContext context, Size size, int flags)
         :            {
    7.66 :   56bd80: endbr64
         :
         :            /*
         :            * If requested size exceeds maximum for chunks, allocate 
an entire block
         :            * for this request.
         :            */
         :            if (unlikely(size > set->allocChunkLimit))
    2.68 :   56bd84: cmp    %rsi,0xc8(%rdi)
    3.34 :   56bd8b: jb     56be10 <AllocSetAlloc+0x90>
         :            AllocSetFreeIndex():
         :            idx = 0;
    0.17 :   56bd91: xor    %ecx,%ecx
         :            if (size > (1 << ALLOC_MINBITS))
    0.00 :   56bd93: cmp    $0x8,%rsi
    0.16 :   56bd97: jbe    56bda9 <AllocSetAlloc+0x29>
         :            idx = 31 - __builtin_clz((uint32) size - 1) - 
ALLOC_MINBITS + 1;
    0.00 :   56bd99: sub    $0x1,%esi
    0.47 :   56bd9c: mov    $0x1d,%ecx
    0.50 :   56bda1: bsr    %esi,%esi
    6.38 :   56bda4: xor    $0x1f,%esi
    1.92 :   56bda7: sub    %esi,%ecx
         :            AllocSetAlloc():
         :            * corresponding free list to see if there is a free chunk 
we could reuse.
         :            * If one is found, remove it from the free list, make it 
again a member
         :            * of the alloc set and return its data address.
         :            */
         :            fidx = AllocSetFreeIndex(size);
         :            chunk = set->freelist[fidx];
    1.44 :   56bda9: movslq %ecx,%rdx
    1.88 :   56bdac: add    $0xa,%rdx
    1.11 :   56bdb0: mov    0x8(%rdi,%rdx,8),%rax
         :            if (chunk != NULL)
   19.90 :   56bdb5: test   %rax,%rax
    0.32 :   56bdb8: je     56bdd0 <AllocSetAlloc+0x50>
         :            {
         :            Assert(chunk->size >= size);
         :
         :            set->freelist[fidx] = (AllocChunk) chunk->aset;
    0.79 :   56bdba: mov    0x8(%rax),%rcx
         :            AllocSetAllocReturnChunk():
         :            return AllocChunkGetPointer(chunk);
   13.97 :   56bdbe: add    $0x10,%rax
         :            AllocSetAlloc():
         :            set->freelist[fidx] = (AllocChunk) chunk->aset;
    0.00 :   56bdc2: mov    %rcx,0x8(%rdi,%rdx,8)
         :            AllocSetAllocReturnChunk():
         :            chunk->aset = (void *) set;
    0.00 :   56bdc7: mov    %rdi,-0x8(%rax)
         :            AllocSetAlloc():
         :
         :            return AllocSetAllocReturnChunk(set, size, chunk, 
chunk->size);
    0.16 :   56bdcb: ret
    0.00 :   56bdcc: nopl   0x0(%rax)
         :            }
         :
         :            /*
         :            * Choose the actual chunk size to allocate.
         :            */
         :            chunk_size = (1 << ALLOC_MINBITS) << fidx;
    1.10 :   56bdd0: mov    $0x8,%esi
         :
         :            /*
         :            * If there is enough room in the active allocation block, 
we will put the
         :            * chunk into that block.  Else must start a new one.
         :            */
         :            if ((block = set->blocks) != NULL)
    0.32 :   56bdd5: mov    0x50(%rdi),%rdx
         :            chunk_size = (1 << ALLOC_MINBITS) << fidx;
    0.32 :   56bdd9: shl    %cl,%esi
    0.00 :   56bddb: movslq %esi,%rsi
         :            if ((block = set->blocks) != NULL)
    0.00 :   56bdde: test   %rdx,%rdx
    0.47 :   56bde1: je     56be18 <AllocSetAlloc+0x98>
         :            {
         :            Size            availspace = block->endptr - 
block->freeptr;
    0.00 :   56bde3: mov    0x18(%rdx),%rax
    4.80 :   56bde7: mov    0x20(%rdx),%rcx
         :
         :            if (unlikely(availspace < (chunk_size + 
ALLOC_CHUNKHDRSZ)))
    1.75 :   56bdeb: lea    0x10(%rsi),%r8
         :            Size            availspace = block->endptr - 
block->freeptr;
    0.47 :   56bdef: sub    %rax,%rcx
         :            if (unlikely(availspace < (chunk_size + 
ALLOC_CHUNKHDRSZ)))
    0.16 :   56bdf2: cmp    %rcx,%r8
    1.90 :   56bdf5: ja     56be20 <AllocSetAlloc+0xa0>
         :            chunk = (AllocChunk) (block->freeptr);
         :
         :            /* Prepare to initialize the chunk header. */
         :            VALGRIND_MAKE_MEM_UNDEFINED(chunk, ALLOC_CHUNKHDRSZ);
         :
         :            chunk->size = chunk_size;
    0.00 :   56bdf7: mov    %rsi,(%rax)
         :            AllocSetAllocReturnChunk():
         :            return AllocChunkGetPointer(chunk);
   21.14 :   56bdfa: add    $0x10,%rax
         :            AllocSetAlloc():
         :
         :            block->freeptr += (chunk_size + ALLOC_CHUNKHDRSZ);
    0.00 :   56bdfe: add    %r8,0x18(%rdx)
         :            AllocSetAllocReturnChunk():
         :            chunk->aset = (void *) set;
    0.16 :   56be02: mov    %rdi,-0x8(%rax)
         :            AllocSetAlloc():
         :            Assert(block->freeptr <= block->endptr);
         :
         :            return AllocSetAllocReturnChunk(set, size, chunk, 
chunk_size);
         :            }
    4.58 :   56be06: ret
    0.00 :   56be07: nopw   0x0(%rax,%rax,1)
         :            return AllocSetAllocLarge(set, size, flags);
    0.00 :   56be10: jmp    56bcc0 <AllocSetAllocLarge>
    0.00 :   56be15: nopl   (%rax)
         :            return AllocSetAllocFromNewBlock(set, size, chunk_size);
    0.00 :   56be18: jmp    56bae0 <AllocSetAllocFromNewBlock.constprop.0>
    0.00 :   56be1d: nopl   (%rax)
         :            return AllocSetAllocCarveOldAndAlloc(set, size, 
chunk_size,
    0.00 :   56be20: jmp    56bbd0 <AllocSetAllocCarveOldAndAlloc.isra.0>
 Percent |      Source code & Disassembly of postgres for cycles (81 samples, 
percent: local period)
----------------------------------------------------------------------------------------------------
         :
         :
         :
         :            Disassembly of section .text:
         :
         :            0000000000571220 <palloc>:
         :            palloc():
         :            MemoryContextStatsDetail(TopMemoryContext, 100, false);
         :            }
         :
         :            void *
         :            palloc(Size size)
         :            {
   13.44 :   571220: endbr64
    2.45 :   571224: mov    %rdi,%rsi
         :            /* duplicates MemoryContextAlloc to avoid increased 
overhead */
         :            void       *ret;
         :            MemoryContext context = CurrentMemoryContext;
    2.44 :   571227: mov    0x2be9b2(%rip),%rdi        # 82fbe0 
<CurrentMemoryContext>
         :
         :            AssertArg(MemoryContextIsValid(context));
         :            AssertNotInCriticalSection(context);
         :            context->isReset = false;
         :
         :            ret = context->methods->alloc(context, size, 0);
   11.11 :   57122e: xor    %edx,%edx
    1.21 :   571230: mov    0x10(%rdi),%rax
         :            context->isReset = false;
   31.08 :   571234: movb   $0x0,0x4(%rdi)
         :            ret = context->methods->alloc(context, size, 0);
    3.67 :   571238: mov    (%rax),%rax
   34.59 :   57123b: jmp    *%rax
     2.38%  postgres  postgres            [.] AllocSetAlloc
     0.37%  postgres  postgres            [.] pfree
     0.31%  postgres  postgres            [.] palloc
     0.12%  postgres  postgres            [.] MemoryContextAlloc
     0.12%  postgres  postgres            [.] 
AllocSetAllocCarveOldAndAlloc.isra.0
     0.10%  postgres  postgres            [.] MemoryContextAllocZero
     0.10%  postgres  postgres            [.] palloc0
     0.06%  postgres  postgres            [.] MemoryContextAllocZeroAligned
     0.02%  postgres  postgres            [.] AllocSetAllocLarge

Reply via email to