On Sun, Dec 13, 2009 at 8:57 PM, Nate Begeman <[email protected]> wrote:
> --- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
> +++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Sun Dec 13 22:57:03 2009
> @@ -807,10 +807,38 @@
>   }
>   case X86::BI__builtin_ia32_palignr128:
>   case X86::BI__builtin_ia32_palignr: {
> -    Function *F = CGM.getIntrinsic(BuiltinID == 
> X86::BI__builtin_ia32_palignr128 ?
> -                                  Intrinsic::x86_ssse3_palign_r_128 :
> -                                  Intrinsic::x86_ssse3_palign_r);
> -    return Builder.CreateCall(F, &Ops[0], &Ops[0] + Ops.size());
> +    unsigned shiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
> +
> +    // If palignr is shifting the pair of input vectors less than 17 bytes,
> +    // emit a shuffle instruction.
> +    if (shiftVal <= 16) {
> +      const llvm::Type *IntTy = llvm::Type::getInt32Ty(VMContext);
> +
> +      llvm::SmallVector<llvm::Constant*, 16> Indices;
> +      for (unsigned i = 0; i != 16; ++i)
> +        Indices.push_back(llvm::ConstantInt::get(IntTy, shiftVal + i));
> +
> +      Value* SV = llvm::ConstantVector::get(Indices.begin(), Indices.size());
> +      return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr");
> +    }
> +
> +    // If palignr is shifting the pair of input vectors more than 16 but less
> +    // than 32 bytes, emit a logical right shift of the destination.
> +    if (shiftVal < 32) {
> +      const llvm::Type *EltTy = llvm::Type::getInt64Ty(VMContext);
> +      const llvm::Type *VecTy = llvm::VectorType::get(EltTy, 2);
> +      const llvm::Type *IntTy = llvm::Type::getInt32Ty(VMContext);
> +
> +      Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast");
> +      Ops[1] = llvm::ConstantInt::get(IntTy, (shiftVal-16) * 8);
> +
> +      // create i32 constant
> +      llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse2_psrl_dq);
> +      return Builder.CreateCall(F, &Ops[0], &Ops[0] + 2, "palignr");
> +    }

Two issues here:

1. The second operand of psrldq is in bytes, not bits.
2. It would be better to simply emit this as a shuffle; we're
generally trying to get rid of unnecessary intrinsics, not add more
uses of them.

-Eli

_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits

Reply via email to