On Tue, 20 Jul 2021 at 19:04, Andres Freund <and...@anarazel.de> wrote: > > * AllocateSetAlloc.txt > > * palloc.txt > > * percent.txt > > Huh, that's interesting. You have some control flow enforcement stuff turned > on (the endbr64). And it looks like it has a non zero cost (or maybe it's > just skid). Did you enable that intentionally? If not, what > compiler/version/distro is it? I think at least on GCC that's > -fcf-protection=...
It's ubuntu 21.04 with gcc 10.3 (specifically gcc version 10.3.0 (Ubuntu 10.3.0-1ubuntu1) I've attached the same results from compiling with clang 12 (12.0.0-3ubuntu1~21.04.1) David
Percent | Source code & Disassembly of postgres for cycles (707 samples, percent: local period) ----------------------------------------------------------------------------------------------------- : : : : Disassembly of section .text: : : 00000000008e7c10 <AllocSetAlloc>: : AllocSetAlloc(): : : /* : * If requested size exceeds maximum for chunks, allocate an entire block : * for this request. : */ : if (unlikely(size > set->allocChunkLimit)) 7.48 : 8e7c10: cmp %rsi,0xc8(%rdi) 3.26 : 8e7c17: jb 8e7c81 <AllocSetAlloc+0x71> 0.00 : 8e7c19: xor %eax,%eax : AllocSetFreeIndex(): : if (size > (1 << ALLOC_MINBITS)) 0.44 : 8e7c1b: cmp $0x9,%rsi 0.00 : 8e7c1f: jb 8e7c2d <AllocSetAlloc+0x1d> : idx = 31 - __builtin_clz((uint32) size - 1) - ALLOC_MINBITS + 1; 0.00 : 8e7c21: add $0xffffffff,%esi 0.98 : 8e7c24: bsr %esi,%eax 9.59 : 8e7c27: xor $0xffffffe0,%eax 1.44 : 8e7c2a: add $0x1e,%eax : AllocSetAlloc(): : * corresponding free list to see if there is a free chunk we could reuse. : * If one is found, remove it from the free list, make it again a member : * of the alloc set and return its data address. : */ : fidx = AllocSetFreeIndex(size); : chunk = set->freelist[fidx]; 1.67 : 8e7c2d: movslq %eax,%rcx 4.10 : 8e7c30: mov 0x58(%rdi,%rcx,8),%rax : if (chunk != NULL) 15.97 : 8e7c35: test %rax,%rax 0.28 : 8e7c38: je 8e7c45 <AllocSetAlloc+0x35> : { : Assert(chunk->size >= size); : : set->freelist[fidx] = (AllocChunk) chunk->aset; 0.00 : 8e7c3a: mov 0x8(%rax),%rdx 13.33 : 8e7c3e: mov %rdx,0x58(%rdi,%rcx,8) 0.28 : 8e7c43: jmp 8e7c73 <AllocSetAlloc+0x63> 0.00 : 8e7c45: mov $0x8,%eax : } : : /* : * Choose the actual chunk size to allocate. : */ : chunk_size = (1 << ALLOC_MINBITS) << fidx; 0.71 : 8e7c4a: shl %cl,%eax 0.15 : 8e7c4c: movslq %eax,%rsi : : /* : * If there is enough room in the active allocation block, we will put the : * chunk into that block. Else must start a new one. : */ : if ((block = set->blocks) != NULL) 0.43 : 8e7c4f: mov 0x50(%rdi),%rdx 1.13 : 8e7c53: test %rdx,%rdx 0.14 : 8e7c56: je 8e7c7c <AllocSetAlloc+0x6c> : { : Size availspace = block->endptr - block->freeptr; 0.00 : 8e7c58: mov 0x18(%rdx),%rax 6.98 : 8e7c5c: mov 0x20(%rdx),%rcx 2.30 : 8e7c60: sub %rax,%rcx : : if (unlikely(availspace < (chunk_size + ALLOC_CHUNKHDRSZ))) 0.00 : 8e7c63: lea 0x10(%rsi),%r8 0.14 : 8e7c67: cmp %r8,%rcx 2.02 : 8e7c6a: jb 8e7c86 <AllocSetAlloc+0x76> : chunk = (AllocChunk) (block->freeptr); : : /* Prepare to initialize the chunk header. */ : VALGRIND_MAKE_MEM_UNDEFINED(chunk, ALLOC_CHUNKHDRSZ); : : chunk->size = chunk_size; 2.04 : 8e7c6c: mov %rsi,(%rax) : : block->freeptr += (chunk_size + ALLOC_CHUNKHDRSZ); 20.16 : 8e7c6f: add %r8,0x18(%rdx) 0.28 : 8e7c73: mov %rdi,0x8(%rax) 4.70 : 8e7c77: add $0x10,%rax : Assert(block->freeptr <= block->endptr); : : return AllocSetAllocReturnChunk(set, size, chunk, chunk_size); : } 0.00 : 8e7c7b: ret : return AllocSetAllocFromNewBlock(set, size, chunk_size); 0.00 : 8e7c7c: jmp 8e8470 <AllocSetAllocFromNewBlock> : return AllocSetAllocLarge(set, size, flags); 0.00 : 8e7c81: jmp 8e8330 <AllocSetAllocLarge> : return AllocSetAllocCarveOldAndAlloc(set, size, chunk_size, 0.00 : 8e7c86: jmp 8e83e0 <AllocSetAllocCarveOldAndAlloc>
Percent | Source code & Disassembly of postgres for cycles (123 samples, percent: local period) ----------------------------------------------------------------------------------------------------- : : : : Disassembly of section .text: : : 00000000008ee3a0 <palloc>: : palloc(): : MemoryContextStatsDetail(TopMemoryContext, 100, false); : } : : void * : palloc(Size size) : { 9.12 : 8ee3a0: mov %rdi,%rsi : /* duplicates MemoryContextAlloc to avoid increased overhead */ : void *ret; : MemoryContext context = CurrentMemoryContext; 2.47 : 8ee3a3: mov 0x295a86(%rip),%rdi # b83e30 <CurrentMemoryContext> : : AssertArg(MemoryContextIsValid(context)); : AssertNotInCriticalSection(context); : context->isReset = false; 22.83 : 8ee3aa: movb $0x0,0x4(%rdi) : : ret = context->methods->alloc(context, size, 0); 34.25 : 8ee3ae: mov 0x10(%rdi),%rax 7.68 : 8ee3b2: mov (%rax),%rax 23.66 : 8ee3b5: xor %edx,%edx 0.00 : 8ee3b7: jmp *%rax
2.27% postgres postgres [.] AllocSetAlloc 0.59% postgres postgres [.] pfree 0.44% postgres postgres [.] MemoryContextAllocZero 0.39% postgres postgres [.] palloc 0.34% postgres postgres [.] MemoryContextAllocZeroAligned 0.27% postgres postgres [.] palloc0 0.17% postgres postgres [.] AllocSetAllocCarveOldAndAlloc 0.09% postgres postgres [.] MemoryContextAlloc 0.02% postgres postgres [.] AllocSetAllocLarge 0.01% postgres postgres [.] AllocSetAllocFromNewBlock