http://llvm.org/bugs/show_bug.cgi?id=17185
Sanjay Patel <[email protected]> changed: What |Removed |Added ---------------------------------------------------------------------------- Status|NEW |RESOLVED CC| |[email protected] Resolution|--- |FIXED --- Comment #2 from Sanjay Patel <[email protected]> --- Using: $ ./clang -v clang version 3.5.0 (trunk 205798) (llvm/trunk 205792) Target: x86_64-apple-darwin13.1.0 Thread model: posix We're not generating any floating-point variants of the broadcast instruction. We're generating 'vpbroadcastq' now. Not sure if this is actually better codegen though...certainly bigger: _foo: ## @foo .cfi_startproc ## BB#0: ## %entry ## kill: ESI<def> ESI<kill> RSI<def> testl %esi, %esi jle LBB0_23 ## BB#1: ## %for.body.lr.ph movl %edx, %r8d leal -1(%rsi), %eax leaq 1(%rax), %r9 xorl %r10d, %r10d movabsq $8589934560, %rdx ## imm = 0x1FFFFFFE0 andq %r9, %rdx je LBB0_5 ## BB#2: ## %vector.ph vmovq %r8, %xmm0 vpbroadcastq %xmm0, %ymm0 <---- integer form of broadcast incq %rax andq $-32, %rax xorl %ecx, %ecx vmovdqa LCPI0_0(%rip), %ymm1 vmovdqa LCPI0_1(%rip), %ymm2 vmovdqa LCPI0_2(%rip), %ymm3 vmovdqa LCPI0_3(%rip), %ymm4 vmovdqa LCPI0_4(%rip), %ymm5 vmovdqa LCPI0_5(%rip), %ymm6 vmovdqa LCPI0_6(%rip), %ymm7 vmovdqa LCPI0_7(%rip), %ymm8 vmovdqa LCPI0_8(%rip), %ymm9 .align 4, 0x90 LBB0_3: ## %vector.body ## =>This Inner Loop Header: Depth=1 vmovq %rcx, %xmm10 vpermq $0, %ymm10, %ymm10 ## ymm10 = ymm10[0,0,0,0] vpaddq %ymm0, %ymm10, %ymm10 vpaddq %ymm1, %ymm10, %ymm11 vpaddq %ymm2, %ymm10, %ymm12 vpaddq %ymm3, %ymm10, %ymm13 vpaddq %ymm4, %ymm10, %ymm14 vpermd %ymm12, %ymm9, %ymm12 vpermd %ymm11, %ymm9, %ymm11 vinserti128 $1, %xmm11, %ymm12, %ymm11 vpaddq %ymm5, %ymm10, %ymm12 vpermd %ymm14, %ymm9, %ymm14 vpermd %ymm13, %ymm9, %ymm13 vinserti128 $1, %xmm13, %ymm14, %ymm13 vpaddq %ymm6, %ymm10, %ymm14 vpermd %ymm14, %ymm9, %ymm14 vpermd %ymm12, %ymm9, %ymm12 vinserti128 $1, %xmm12, %ymm14, %ymm12 vpaddq %ymm7, %ymm10, %ymm14 vpaddq %ymm8, %ymm10, %ymm10 vpermd %ymm10, %ymm9, %ymm10 vpermd %ymm14, %ymm9, %ymm14 vinserti128 $1, %xmm14, %ymm10, %ymm10 vmovdqu %ymm11, (%rdi,%rcx,4) vmovdqu %ymm13, 32(%rdi,%rcx,4) vmovdqu %ymm12, 64(%rdi,%rcx,4) vmovdqu %ymm10, 96(%rdi,%rcx,4) addq $32, %rcx cmpq %rcx, %rax jne LBB0_3 ## BB#4: movq %rdx, %r10 LBB0_5: ## %middle.block cmpq %r10, %r9 je LBB0_23 ## BB#6: ## %for.body.preheader leal 1(%rsi), %edx leal 1(%r10), %eax subl %eax, %edx movl %edx, %eax andl $7, %eax je LBB0_20 ## BB#7: ## %unr.cmp60 cmpl $1, %eax je LBB0_19 ## BB#8: ## %unr.cmp52 cmpl $2, %eax je LBB0_18 ## BB#9: ## %unr.cmp44 cmpl $3, %eax je LBB0_17 ## BB#10: ## %unr.cmp36 cmpl $4, %eax je LBB0_16 ## BB#11: ## %unr.cmp28 cmpl $5, %eax je LBB0_15 ## BB#12: ## %unr.cmp cmpl $6, %eax je LBB0_14 ## BB#13: ## %for.body.unr leal (%r10,%r8), %eax movl %eax, (%rdi,%r10,4) incq %r10 LBB0_14: ## %for.body.unr17 leal (%r10,%r8), %eax movl %eax, (%rdi,%r10,4) incq %r10 LBB0_15: ## %for.body.unr22 leal (%r10,%r8), %eax movl %eax, (%rdi,%r10,4) incq %r10 LBB0_16: ## %for.body.unr30 leal (%r10,%r8), %eax movl %eax, (%rdi,%r10,4) incq %r10 LBB0_17: ## %for.body.unr38 leal (%r10,%r8), %eax movl %eax, (%rdi,%r10,4) incq %r10 LBB0_18: ## %for.body.unr46 leal (%r10,%r8), %eax movl %eax, (%rdi,%r10,4) incq %r10 LBB0_19: ## %for.body.unr54 leal (%r10,%r8), %eax movl %eax, (%rdi,%r10,4) incq %r10 LBB0_20: ## %for.body.preheader.split cmpl $8, %edx jb LBB0_23 ## BB#21: ## %for.body.preheader.split.split leaq 28(%rdi,%r10,4), %rax leaq 3(%r10,%r8), %rdx subl %r10d, %esi xorl %edi, %edi .align 4, 0x90 LBB0_22: ## %for.body ## =>This Inner Loop Header: Depth=1 leal (%rdx,%rdi), %r8d leal -3(%rdx,%rdi), %ecx movl %ecx, -28(%rax,%rdi,4) leal -2(%rdx,%rdi), %ecx movl %ecx, -24(%rax,%rdi,4) leal -1(%rdx,%rdi), %ecx movl %ecx, -20(%rax,%rdi,4) movl %r8d, -16(%rax,%rdi,4) leal 1(%rdx,%rdi), %ecx movl %ecx, -12(%rax,%rdi,4) leal 2(%rdx,%rdi), %ecx movl %ecx, -8(%rax,%rdi,4) leal 3(%rdx,%rdi), %ecx movl %ecx, -4(%rax,%rdi,4) leal 4(%rdx,%rdi), %ecx movl %ecx, (%rax,%rdi,4) addq $8, %r10 addq $8, %rdi cmpl %edi, %esi jne LBB0_22 LBB0_23: ## %for.end vzeroupper retq -- You are receiving this mail because: You are on the CC list for the bug.
_______________________________________________ LLVMbugs mailing list [email protected] http://lists.cs.uiuc.edu/mailman/listinfo/llvmbugs
