Bug ID: 30433
           Summary: [X86][SSE] Improve FCOPYSIGN lowering of vectors
           Product: libraries
           Version: trunk
          Hardware: PC
                OS: Windows NT
            Status: NEW
          Severity: normal
          Priority: P
         Component: Backend: X86
    Classification: Unclassified

Lowering FCOPYSIGN of vector values are currently scalarized on SSE/AVX

Both vector lowering and suitable vectorization costs and tests need to be

define <4 x float> @fcopysign_v4f32(<4 x float> %x, <4 x float> %y) {
  %1 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> %y)
  ret <4 x float> %1
declare <4 x float> @llvm.copysign.v4f32(<4 x float> %Mag, <4 x float> %Sgn)

define <2 x double> @fcopysign_v2f64(<2 x double> %x, <2 x double> %y) {
  %1 = call <2 x double> @llvm.copysign.v2f64(<2 x double> %x, <2 x double> %y)
  ret <2 x double> %1
declare <2 x double> @llvm.copysign.v2f64(<2 x double> %Mag, <2 x double> %Sgn)

fcopysign_v4f32:                        # @fcopysign_v4f32
    vmovaps    .LCPI0_0(%rip), %xmm2   # xmm2 =
    vmovaps    .LCPI0_1(%rip), %xmm4   # xmm4 =
    vmovshdup    %xmm0, %xmm6    # xmm6 = xmm0[1,1,3,3]
    vandps    %xmm2, %xmm1, %xmm3
    vandps    %xmm4, %xmm0, %xmm5
    vandps    %xmm4, %xmm6, %xmm6
    vorps    %xmm3, %xmm5, %xmm3
    vmovshdup    %xmm1, %xmm5    # xmm5 = xmm1[1,1,3,3]
    vandps    %xmm2, %xmm5, %xmm5
    vorps    %xmm5, %xmm6, %xmm5
    vpermilpd    $1, %xmm0, %xmm6 # xmm6 = xmm0[1,0]
    vpermilps    $231, %xmm0, %xmm0 # xmm0 = xmm0[3,1,2,3]
    vinsertps    $16, %xmm5, %xmm3, %xmm3 # xmm3 = xmm3[0],xmm5[0],xmm3[2,3]
    vpermilpd    $1, %xmm1, %xmm5 # xmm5 = xmm1[1,0]
    vpermilps    $231, %xmm1, %xmm1 # xmm1 = xmm1[3,1,2,3]
    vandpd    %xmm4, %xmm6, %xmm6
    vandps    %xmm4, %xmm0, %xmm0
    vandpd    %xmm2, %xmm5, %xmm5
    vandps    %xmm2, %xmm1, %xmm1
    vorpd    %xmm5, %xmm6, %xmm5
    vorps    %xmm1, %xmm0, %xmm0
    vinsertps    $32, %xmm5, %xmm3, %xmm3 # xmm3 = xmm3[0,1],xmm5[0],xmm3[3]
    vinsertps    $48, %xmm0, %xmm3, %xmm0 # xmm0 = xmm3[0,1,2],xmm0[0]

fcopysign_v2f64:                        # @fcopysign_v2f64
    vmovapd    .LCPI1_0(%rip), %xmm2   # xmm2 = [-0.000000e+00,0.000000e+00]
    vmovapd    .LCPI1_1(%rip), %xmm4   # xmm4 = [nan,0.000000e+00]
    vandpd    %xmm2, %xmm1, %xmm3
    vandpd    %xmm4, %xmm0, %xmm5
    vpermilpd    $1, %xmm1, %xmm1 # xmm1 = xmm1[1,0]
    vpermilpd    $1, %xmm0, %xmm0 # xmm0 = xmm0[1,0]
    vandpd    %xmm2, %xmm1, %xmm1
    vandpd    %xmm4, %xmm0, %xmm0
    vorpd    %xmm3, %xmm5, %xmm3
    vorpd    %xmm1, %xmm0, %xmm0
    vunpcklpd    %xmm0, %xmm3, %xmm0 # xmm0 = xmm3[0],xmm0[0]

You are receiving this mail because:
You are on the CC list for the bug.
llvm-bugs mailing list

Reply via email to