On Tue, 20 Jul 2021 at 19:04, Andres Freund <and...@anarazel.de> wrote:
> > * AllocateSetAlloc.txt
> > * palloc.txt
> > * percent.txt
>
> Huh, that's interesting. You have some control flow enforcement stuff turned 
> on (the endbr64). And it looks like it has a non zero cost (or maybe it's 
> just skid). Did you enable that intentionally? If not, what 
> compiler/version/distro is it? I think at least on GCC that's 
> -fcf-protection=...

It's ubuntu 21.04 with gcc 10.3 (specifically gcc version 10.3.0
(Ubuntu 10.3.0-1ubuntu1)

I've attached the same results from compiling with clang 12
(12.0.0-3ubuntu1~21.04.1)

David
 Percent |      Source code & Disassembly of postgres for cycles (707 samples, 
percent: local period)
-----------------------------------------------------------------------------------------------------
         :
         :
         :
         :            Disassembly of section .text:
         :
         :            00000000008e7c10 <AllocSetAlloc>:
         :            AllocSetAlloc():
         :
         :            /*
         :            * If requested size exceeds maximum for chunks, allocate 
an entire block
         :            * for this request.
         :            */
         :            if (unlikely(size > set->allocChunkLimit))
    7.48 :   8e7c10: cmp    %rsi,0xc8(%rdi)
    3.26 :   8e7c17: jb     8e7c81 <AllocSetAlloc+0x71>
    0.00 :   8e7c19: xor    %eax,%eax
         :            AllocSetFreeIndex():
         :            if (size > (1 << ALLOC_MINBITS))
    0.44 :   8e7c1b: cmp    $0x9,%rsi
    0.00 :   8e7c1f: jb     8e7c2d <AllocSetAlloc+0x1d>
         :            idx = 31 - __builtin_clz((uint32) size - 1) - 
ALLOC_MINBITS + 1;
    0.00 :   8e7c21: add    $0xffffffff,%esi
    0.98 :   8e7c24: bsr    %esi,%eax
    9.59 :   8e7c27: xor    $0xffffffe0,%eax
    1.44 :   8e7c2a: add    $0x1e,%eax
         :            AllocSetAlloc():
         :            * corresponding free list to see if there is a free chunk 
we could reuse.
         :            * If one is found, remove it from the free list, make it 
again a member
         :            * of the alloc set and return its data address.
         :            */
         :            fidx = AllocSetFreeIndex(size);
         :            chunk = set->freelist[fidx];
    1.67 :   8e7c2d: movslq %eax,%rcx
    4.10 :   8e7c30: mov    0x58(%rdi,%rcx,8),%rax
         :            if (chunk != NULL)
   15.97 :   8e7c35: test   %rax,%rax
    0.28 :   8e7c38: je     8e7c45 <AllocSetAlloc+0x35>
         :            {
         :            Assert(chunk->size >= size);
         :
         :            set->freelist[fidx] = (AllocChunk) chunk->aset;
    0.00 :   8e7c3a: mov    0x8(%rax),%rdx
   13.33 :   8e7c3e: mov    %rdx,0x58(%rdi,%rcx,8)
    0.28 :   8e7c43: jmp    8e7c73 <AllocSetAlloc+0x63>
    0.00 :   8e7c45: mov    $0x8,%eax
         :            }
         :
         :            /*
         :            * Choose the actual chunk size to allocate.
         :            */
         :            chunk_size = (1 << ALLOC_MINBITS) << fidx;
    0.71 :   8e7c4a: shl    %cl,%eax
    0.15 :   8e7c4c: movslq %eax,%rsi
         :
         :            /*
         :            * If there is enough room in the active allocation block, 
we will put the
         :            * chunk into that block.  Else must start a new one.
         :            */
         :            if ((block = set->blocks) != NULL)
    0.43 :   8e7c4f: mov    0x50(%rdi),%rdx
    1.13 :   8e7c53: test   %rdx,%rdx
    0.14 :   8e7c56: je     8e7c7c <AllocSetAlloc+0x6c>
         :            {
         :            Size            availspace = block->endptr - 
block->freeptr;
    0.00 :   8e7c58: mov    0x18(%rdx),%rax
    6.98 :   8e7c5c: mov    0x20(%rdx),%rcx
    2.30 :   8e7c60: sub    %rax,%rcx
         :
         :            if (unlikely(availspace < (chunk_size + 
ALLOC_CHUNKHDRSZ)))
    0.00 :   8e7c63: lea    0x10(%rsi),%r8
    0.14 :   8e7c67: cmp    %r8,%rcx
    2.02 :   8e7c6a: jb     8e7c86 <AllocSetAlloc+0x76>
         :            chunk = (AllocChunk) (block->freeptr);
         :
         :            /* Prepare to initialize the chunk header. */
         :            VALGRIND_MAKE_MEM_UNDEFINED(chunk, ALLOC_CHUNKHDRSZ);
         :
         :            chunk->size = chunk_size;
    2.04 :   8e7c6c: mov    %rsi,(%rax)
         :
         :            block->freeptr += (chunk_size + ALLOC_CHUNKHDRSZ);
   20.16 :   8e7c6f: add    %r8,0x18(%rdx)
    0.28 :   8e7c73: mov    %rdi,0x8(%rax)
    4.70 :   8e7c77: add    $0x10,%rax
         :            Assert(block->freeptr <= block->endptr);
         :
         :            return AllocSetAllocReturnChunk(set, size, chunk, 
chunk_size);
         :            }
    0.00 :   8e7c7b: ret
         :            return AllocSetAllocFromNewBlock(set, size, chunk_size);
    0.00 :   8e7c7c: jmp    8e8470 <AllocSetAllocFromNewBlock>
         :            return AllocSetAllocLarge(set, size, flags);
    0.00 :   8e7c81: jmp    8e8330 <AllocSetAllocLarge>
         :            return AllocSetAllocCarveOldAndAlloc(set, size, 
chunk_size,
    0.00 :   8e7c86: jmp    8e83e0 <AllocSetAllocCarveOldAndAlloc>
 Percent |      Source code & Disassembly of postgres for cycles (123 samples, 
percent: local period)
-----------------------------------------------------------------------------------------------------
         :
         :
         :
         :            Disassembly of section .text:
         :
         :            00000000008ee3a0 <palloc>:
         :            palloc():
         :            MemoryContextStatsDetail(TopMemoryContext, 100, false);
         :            }
         :
         :            void *
         :            palloc(Size size)
         :            {
    9.12 :   8ee3a0: mov    %rdi,%rsi
         :            /* duplicates MemoryContextAlloc to avoid increased 
overhead */
         :            void       *ret;
         :            MemoryContext context = CurrentMemoryContext;
    2.47 :   8ee3a3: mov    0x295a86(%rip),%rdi        # b83e30 
<CurrentMemoryContext>
         :
         :            AssertArg(MemoryContextIsValid(context));
         :            AssertNotInCriticalSection(context);
         :            context->isReset = false;
   22.83 :   8ee3aa: movb   $0x0,0x4(%rdi)
         :
         :            ret = context->methods->alloc(context, size, 0);
   34.25 :   8ee3ae: mov    0x10(%rdi),%rax
    7.68 :   8ee3b2: mov    (%rax),%rax
   23.66 :   8ee3b5: xor    %edx,%edx
    0.00 :   8ee3b7: jmp    *%rax
     2.27%  postgres  postgres            [.] AllocSetAlloc
     0.59%  postgres  postgres            [.] pfree
     0.44%  postgres  postgres            [.] MemoryContextAllocZero
     0.39%  postgres  postgres            [.] palloc
     0.34%  postgres  postgres            [.] MemoryContextAllocZeroAligned
     0.27%  postgres  postgres            [.] palloc0
     0.17%  postgres  postgres            [.] AllocSetAllocCarveOldAndAlloc
     0.09%  postgres  postgres            [.] MemoryContextAlloc
     0.02%  postgres  postgres            [.] AllocSetAllocLarge
     0.01%  postgres  postgres            [.] AllocSetAllocFromNewBlock

Reply via email to