Re: [PATCH] D20133: [OpenCL] Fix __builtin_astype for vec3 types.

2016-06-11 Thread Xiuli PAN via cfe-commits
pxli168 added a comment.

I was on a vecation. LGTM, thanks!


Repository:
  rL LLVM

http://reviews.llvm.org/D20133



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D20133: [OpenCL] Fix __builtin_astype for vec3 types.

2016-06-08 Thread Yaxun Liu via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rL272153: [OpenCL] Fix __builtin_astype for vec3 types. 
(authored by yaxunl).

Changed prior to commit:
  http://reviews.llvm.org/D20133?vs=59630=60043#toc

Repository:
  rL LLVM

http://reviews.llvm.org/D20133

Files:
  cfe/trunk/lib/CodeGen/CGExprScalar.cpp
  cfe/trunk/test/CodeGenOpenCL/as_type.cl
  cfe/trunk/test/SemaOpenCL/as_type.cl

Index: cfe/trunk/lib/CodeGen/CGExprScalar.cpp
===
--- cfe/trunk/lib/CodeGen/CGExprScalar.cpp
+++ cfe/trunk/lib/CodeGen/CGExprScalar.cpp
@@ -3382,50 +3382,48 @@
   return CGF.EmitBlockLiteral(block);
 }
 
+// Convert a vec3 to vec4, or vice versa.
+static Value *ConvertVec3AndVec4(CGBuilderTy , CodeGenFunction ,
+ Value *Src, unsigned NumElementsDst) {
+  llvm::Value *UnV = llvm::UndefValue::get(Src->getType());
+  SmallVector Args;
+  Args.push_back(Builder.getInt32(0));
+  Args.push_back(Builder.getInt32(1));
+  Args.push_back(Builder.getInt32(2));
+  if (NumElementsDst == 4)
+Args.push_back(llvm::UndefValue::get(CGF.Int32Ty));
+  llvm::Constant *Mask = llvm::ConstantVector::get(Args);
+  return Builder.CreateShuffleVector(Src, UnV, Mask);
+}
+
 Value *ScalarExprEmitter::VisitAsTypeExpr(AsTypeExpr *E) {
   Value *Src  = CGF.EmitScalarExpr(E->getSrcExpr());
   llvm::Type *DstTy = ConvertType(E->getType());
 
-  // Going from vec4->vec3 or vec3->vec4 is a special case and requires
-  // a shuffle vector instead of a bitcast.
   llvm::Type *SrcTy = Src->getType();
-  if (isa(DstTy) && isa(SrcTy)) {
-unsigned numElementsDst = cast(DstTy)->getNumElements();
-unsigned numElementsSrc = cast(SrcTy)->getNumElements();
-if ((numElementsDst == 3 && numElementsSrc == 4)
-|| (numElementsDst == 4 && numElementsSrc == 3)) {
-
-
-  // In the case of going from int4->float3, a bitcast is needed before
-  // doing a shuffle.
-  llvm::Type *srcElemTy =
-  cast(SrcTy)->getElementType();
-  llvm::Type *dstElemTy =
-  cast(DstTy)->getElementType();
-
-  if ((srcElemTy->isIntegerTy() && dstElemTy->isFloatTy())
-  || (srcElemTy->isFloatTy() && dstElemTy->isIntegerTy())) {
-// Create a float type of the same size as the source or destination.
-llvm::VectorType *newSrcTy = llvm::VectorType::get(dstElemTy,
- numElementsSrc);
-
-Src = Builder.CreateBitCast(Src, newSrcTy, "astypeCast");
-  }
-
-  llvm::Value *UnV = llvm::UndefValue::get(Src->getType());
-
-  SmallVector Args;
-  Args.push_back(Builder.getInt32(0));
-  Args.push_back(Builder.getInt32(1));
-  Args.push_back(Builder.getInt32(2));
-
-  if (numElementsDst == 4)
-Args.push_back(llvm::UndefValue::get(CGF.Int32Ty));
-
-  llvm::Constant *Mask = llvm::ConstantVector::get(Args);
+  unsigned NumElementsSrc = isa(SrcTy) ?
+cast(SrcTy)->getNumElements() : 0;
+  unsigned NumElementsDst = isa(DstTy) ?
+cast(DstTy)->getNumElements() : 0;
+
+  // Going from vec3 to non-vec3 is a special case and requires a shuffle
+  // vector to get a vec4, then a bitcast if the target type is different.
+  if (NumElementsSrc == 3 && NumElementsDst != 3) {
+Src = ConvertVec3AndVec4(Builder, CGF, Src, 4);
+Src = Builder.CreateBitCast(Src, DstTy);
+Src->setName("astype");
+return Src;
+  }
 
-  return Builder.CreateShuffleVector(Src, UnV, Mask, "astype");
-}
+  // Going from non-vec3 to vec3 is a special case and requires a bitcast
+  // to vec4 if the original type is not vec4, then a shuffle vector to
+  // get a vec3.
+  if (NumElementsSrc != 3 && NumElementsDst == 3) {
+auto Vec4Ty = llvm::VectorType::get(DstTy->getVectorElementType(), 4);
+Src = Builder.CreateBitCast(Src, Vec4Ty);
+Src = ConvertVec3AndVec4(Builder, CGF, Src, 3);
+Src->setName("astype");
+return Src;
   }
 
   return Builder.CreateBitCast(Src, DstTy, "astype");
Index: cfe/trunk/test/SemaOpenCL/as_type.cl
===
--- cfe/trunk/test/SemaOpenCL/as_type.cl
+++ cfe/trunk/test/SemaOpenCL/as_type.cl
@@ -0,0 +1,13 @@
+// RUN: %clang_cc1 %s -emit-llvm -triple spir-unknown-unknown -o - -verify -fsyntax-only
+
+typedef __attribute__(( ext_vector_type(3) )) char char3;
+typedef __attribute__(( ext_vector_type(16) )) char char16;
+
+char3 f1(char16 x) {
+  return  __builtin_astype(x, char3); // expected-error{{invalid reinterpretation: sizes of 'char3' (vector of 3 'char' values) and 'char16' (vector of 16 'char' values) must match}}
+}
+
+char16 f3(int x) {
+  return __builtin_astype(x, char16); // expected-error{{invalid reinterpretation: sizes of 'char16' (vector of 16 'char' values) and 'int' must match}}
+}
+
Index: cfe/trunk/test/CodeGenOpenCL/as_type.cl

Re: [PATCH] D20133: [OpenCL] Fix __builtin_astype for vec3 types.

2016-06-06 Thread Yaxun Liu via cfe-commits
yaxunl marked 5 inline comments as done.
yaxunl added a comment.

Ping

Xiuli, could you please take a look? Thanks.


http://reviews.llvm.org/D20133



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D20133: [OpenCL] Fix __builtin_astype for vec3 types.

2016-06-06 Thread Anastasia Stulova via cfe-commits
Anastasia accepted this revision.
Anastasia added a comment.

LGTM!


http://reviews.llvm.org/D20133



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D20133: [OpenCL] Fix __builtin_astype for vec3 types.

2016-06-03 Thread Yaxun Liu via cfe-commits
yaxunl updated this revision to Diff 59630.
yaxunl marked 10 inline comments as done.
yaxunl added a comment.

Add a sema test for mismatched type size. Fix codegen test.


http://reviews.llvm.org/D20133

Files:
  lib/CodeGen/CGExprScalar.cpp
  test/CodeGenOpenCL/as_type.cl
  test/SemaOpenCL/as_type.cl

Index: test/SemaOpenCL/as_type.cl
===
--- /dev/null
+++ test/SemaOpenCL/as_type.cl
@@ -0,0 +1,13 @@
+// RUN: %clang_cc1 %s -emit-llvm -triple spir-unknown-unknown -o - -verify -fsyntax-only
+
+typedef __attribute__(( ext_vector_type(3) )) char char3;
+typedef __attribute__(( ext_vector_type(16) )) char char16;
+
+char3 f1(char16 x) {
+  return  __builtin_astype(x, char3); // expected-error{{invalid reinterpretation: sizes of 'char3' (vector of 3 'char' values) and 'char16' (vector of 16 'char' values) must match}}
+}
+
+char16 f3(int x) {
+  return __builtin_astype(x, char16); // expected-error{{invalid reinterpretation: sizes of 'char16' (vector of 16 'char' values) and 'int' must match}}
+}
+
Index: test/CodeGenOpenCL/as_type.cl
===
--- /dev/null
+++ test/CodeGenOpenCL/as_type.cl
@@ -0,0 +1,68 @@
+// RUN: %clang_cc1 %s -emit-llvm -triple spir-unknown-unknown -o - | FileCheck %s
+
+typedef __attribute__(( ext_vector_type(3) )) char char3;
+typedef __attribute__(( ext_vector_type(4) )) char char4;
+typedef __attribute__(( ext_vector_type(16) )) char char16;
+typedef __attribute__(( ext_vector_type(3) )) int int3;
+
+//CHECK: define spir_func <3 x i8> @f1(<4 x i8> %[[x:.*]])
+//CHECK: %[[astype:.*]] = shufflevector <4 x i8> %[[x]], <4 x i8> undef, <3 x i32> 
+//CHECK: ret <3 x i8> %[[astype]]
+char3 f1(char4 x) {
+  return  __builtin_astype(x, char3);
+}
+
+//CHECK: define spir_func <4 x i8> @f2(<3 x i8> %[[x:.*]])
+//CHECK: %[[astype:.*]] = shufflevector <3 x i8> %[[x]], <3 x i8> undef, <4 x i32> 
+//CHECK: ret <4 x i8> %[[astype]]
+char4 f2(char3 x) {
+  return __builtin_astype(x, char4);
+}
+
+//CHECK: define spir_func <3 x i8> @f3(i32 %[[x:.*]])
+//CHECK: %[[cast:.*]] = bitcast i32 %[[x]] to <4 x i8>
+//CHECK: %[[astype:.*]] = shufflevector <4 x i8> %[[cast]], <4 x i8> undef, <3 x i32> 
+//CHECK: ret <3 x i8> %[[astype]]
+char3 f3(int x) {
+  return __builtin_astype(x, char3);
+}
+
+//CHECK: define spir_func <4 x i8> @f4(i32 %[[x:.*]])
+//CHECK: %[[astype:.*]] = bitcast i32 %[[x]] to <4 x i8>
+//CHECK-NOT: shufflevector
+//CHECK: ret <4 x i8> %[[astype]]
+char4 f4(int x) {
+  return __builtin_astype(x, char4);
+}
+
+//CHECK: define spir_func i32 @f5(<3 x i8> %[[x:.*]])
+//CHECK: %[[shuffle:.*]] = shufflevector <3 x i8> %[[x]], <3 x i8> undef, <4 x i32> 
+//CHECK: %[[astype:.*]] = bitcast <4 x i8> %[[shuffle]] to i32
+//CHECK: ret i32 %[[astype]]
+int f5(char3 x) {
+  return __builtin_astype(x, int);
+}
+
+//CHECK: define spir_func i32 @f6(<4 x i8> %[[x:.*]])
+//CHECK: %[[astype]] = bitcast <4 x i8> %[[x]] to i32
+//CHECK-NOT: shufflevector
+//CHECK: ret i32 %[[astype]]
+int f6(char4 x) {
+  return __builtin_astype(x, int);
+}
+
+//CHECK: define spir_func <3 x i8> @f7(<3 x i8> %[[x:.*]])
+//CHECK-NOT: bitcast
+//CHECK-NOT: shufflevector
+//CHECK: ret <3 x i8> %[[x]]
+char3 f7(char3 x) {
+  return __builtin_astype(x, char3);
+}
+
+//CHECK: define spir_func <3 x i32> @f8(<16 x i8> %[[x:.*]])
+//CHECK: %[[cast:.*]] = bitcast <16 x i8> %[[x]] to <4 x i32>
+//CHECK: %[[astype:.*]] = shufflevector <4 x i32> %[[cast]], <4 x i32> undef, <3 x i32> 
+//CHECK: ret <3 x i32> %[[astype]]
+int3 f8(char16 x) {
+  return __builtin_astype(x, int3);
+}
Index: lib/CodeGen/CGExprScalar.cpp
===
--- lib/CodeGen/CGExprScalar.cpp
+++ lib/CodeGen/CGExprScalar.cpp
@@ -3382,50 +3382,48 @@
   return CGF.EmitBlockLiteral(block);
 }
 
+// Convert a vec3 to vec4, or vice versa.
+static Value *ConvertVec3AndVec4(CGBuilderTy , CodeGenFunction ,
+ Value *Src, unsigned NumElementsDst) {
+  llvm::Value *UnV = llvm::UndefValue::get(Src->getType());
+  SmallVector Args;
+  Args.push_back(Builder.getInt32(0));
+  Args.push_back(Builder.getInt32(1));
+  Args.push_back(Builder.getInt32(2));
+  if (NumElementsDst == 4)
+Args.push_back(llvm::UndefValue::get(CGF.Int32Ty));
+  llvm::Constant *Mask = llvm::ConstantVector::get(Args);
+  return Builder.CreateShuffleVector(Src, UnV, Mask);
+}
+
 Value *ScalarExprEmitter::VisitAsTypeExpr(AsTypeExpr *E) {
   Value *Src  = CGF.EmitScalarExpr(E->getSrcExpr());
   llvm::Type *DstTy = ConvertType(E->getType());
 
-  // Going from vec4->vec3 or vec3->vec4 is a special case and requires
-  // a shuffle vector instead of a bitcast.
   llvm::Type *SrcTy = Src->getType();
-  if (isa(DstTy) && isa(SrcTy)) {
-unsigned numElementsDst = cast(DstTy)->getNumElements();
-unsigned numElementsSrc = cast(SrcTy)->getNumElements();
-if ((numElementsDst == 3 && 

Re: [PATCH] D20133: [OpenCL] Fix __builtin_astype for vec3 types.

2016-06-03 Thread Yaxun Liu via cfe-commits
yaxunl added inline comments.


Comment at: lib/CodeGen/CGExprScalar.cpp:3394
@@ +3393,3 @@
+  if (NumElementsDst == 4)
+Args.push_back(llvm::UndefValue::get(CGF.Int32Ty));
+  llvm::Constant *Mask = llvm::ConstantVector::get(Args);

Anastasia wrote:
> should this be 3 unstead of undef?
3 or undef are both OK here since the 4th component is undefined. undef may be 
better since if optimizer checks mask first, it knows that the 4th component is 
undefined without further checking the second operand.


Comment at: test/CodeGenOpenCL/as_type.cl:8
@@ +7,3 @@
+
+//CHECK: define spir_func <3 x i8> @f1(<4 x i8> %[[x:.*]])
+//CHECK: %[[astype:.*]] = shufflevector <4 x i8> %[[x]], <4 x i8> undef, <3 x 
i32> 

Anastasia wrote:
> So what happens if the number of bytes don't match?
There will be compilation error and diag msg. I will add a sema test.


Comment at: test/CodeGenOpenCL/as_type.cl:46
@@ +45,3 @@
+//CHECK: define spir_func i32 @f6(<4 x i8> %[[x:.*]])
+//CHECK: %[[astype]] = bitcast <4 x i8> %[[x]] to i32
+//CHECK: ret i32 %[[astype]]

Anastasia wrote:
> Would it make sense to check that shufflevector is not generated?
will fix


Comment at: test/CodeGenOpenCL/as_type.cl:53
@@ +52,3 @@
+//CHECK: define spir_func <3 x i8> @f7(<3 x i8> %[[x:.*]])
+//CHECK: ret <3 x i8> %[[x]]
+char3 f7(char3 x) {

Anastasia wrote:
> Could we add CHECK-NOT bitcast here?
will fix


http://reviews.llvm.org/D20133



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D20133: [OpenCL] Fix __builtin_astype for vec3 types.

2016-06-03 Thread Anastasia Stulova via cfe-commits
Anastasia added inline comments.


Comment at: lib/CodeGen/CGExprScalar.cpp:3394
@@ +3393,3 @@
+  if (NumElementsDst == 4)
+Args.push_back(llvm::UndefValue::get(CGF.Int32Ty));
+  llvm::Constant *Mask = llvm::ConstantVector::get(Args);

should this be 3 unstead of undef?


Comment at: lib/CodeGen/CGExprScalar.cpp:3428
@@ -3429,3 +3427,3 @@
   }
 
   return Builder.CreateBitCast(Src, DstTy, "astype");

I see. Not related to your change, but I was just wondering if it would be 
better to change this to a Clang builtin with a custom check at some point. It 
would be easier to understand and we can avoid all this parsing/AST handling 
complications.


Comment at: test/CodeGenOpenCL/as_type.cl:8
@@ +7,3 @@
+
+//CHECK: define spir_func <3 x i8> @f1(<4 x i8> %[[x:.*]])
+//CHECK: %[[astype:.*]] = shufflevector <4 x i8> %[[x]], <4 x i8> undef, <3 x 
i32> 

So what happens if the number of bytes don't match?


Comment at: test/CodeGenOpenCL/as_type.cl:46
@@ +45,3 @@
+//CHECK: define spir_func i32 @f6(<4 x i8> %[[x:.*]])
+//CHECK: %[[astype]] = bitcast <4 x i8> %[[x]] to i32
+//CHECK: ret i32 %[[astype]]

Would it make sense to check that shufflevector is not generated?


Comment at: test/CodeGenOpenCL/as_type.cl:53
@@ +52,3 @@
+//CHECK: define spir_func <3 x i8> @f7(<3 x i8> %[[x:.*]])
+//CHECK: ret <3 x i8> %[[x]]
+char3 f7(char3 x) {

Could we add CHECK-NOT bitcast here?


http://reviews.llvm.org/D20133



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D20133: [OpenCL] Fix __builtin_astype for vec3 types.

2016-05-30 Thread Yaxun Liu via cfe-commits
yaxunl updated this revision to Diff 58991.
yaxunl marked 5 inline comments as done.
yaxunl added a comment.

Fix variable capitalization and indentation.


http://reviews.llvm.org/D20133

Files:
  lib/CodeGen/CGExprScalar.cpp
  test/CodeGenOpenCL/as_type.cl

Index: test/CodeGenOpenCL/as_type.cl
===
--- /dev/null
+++ test/CodeGenOpenCL/as_type.cl
@@ -0,0 +1,64 @@
+// RUN: %clang_cc1 %s -emit-llvm -triple spir-unknown-unknown -o - | FileCheck %s
+
+typedef __attribute__(( ext_vector_type(3) )) char char3;
+typedef __attribute__(( ext_vector_type(4) )) char char4;
+typedef __attribute__(( ext_vector_type(16) )) char char16;
+typedef __attribute__(( ext_vector_type(3) )) int int3;
+
+//CHECK: define spir_func <3 x i8> @f1(<4 x i8> %[[x:.*]])
+//CHECK: %[[astype:.*]] = shufflevector <4 x i8> %[[x]], <4 x i8> undef, <3 x i32> 
+//CHECK: ret <3 x i8> %[[astype]]
+char3 f1(char4 x) {
+  return  __builtin_astype(x, char3);
+}
+
+//CHECK: define spir_func <4 x i8> @f2(<3 x i8> %[[x:.*]])
+//CHECK: %[[astype:.*]] = shufflevector <3 x i8> %[[x]], <3 x i8> undef, <4 x i32> 
+//CHECK: ret <4 x i8> %[[astype]]
+char4 f2(char3 x) {
+  return __builtin_astype(x, char4);
+}
+
+//CHECK: define spir_func <3 x i8> @f3(i32 %[[x:.*]])
+//CHECK: %[[cast:.*]] = bitcast i32 %[[x]] to <4 x i8>
+//CHECK: %[[astype:.*]] = shufflevector <4 x i8> %[[cast]], <4 x i8> undef, <3 x i32> 
+//CHECK: ret <3 x i8> %[[astype]]
+char3 f3(int x) {
+  return __builtin_astype(x, char3);
+}
+
+//CHECK: define spir_func <4 x i8> @f4(i32 %[[x:.*]])
+//CHECK: %[[astype:.*]] = bitcast i32 %[[x]] to <4 x i8>
+//CHECK: ret <4 x i8> %[[astype]]
+char4 f4(int x) {
+  return __builtin_astype(x, char4);
+}
+
+//CHECK: define spir_func i32 @f5(<3 x i8> %[[x:.*]])
+//CHECK: %[[shuffle:.*]] = shufflevector <3 x i8> %[[x]], <3 x i8> undef, <4 x i32> 
+//CHECK: %[[astype:.*]] = bitcast <4 x i8> %[[shuffle]] to i32
+//CHECK: ret i32 %[[astype]]
+int f5(char3 x) {
+  return __builtin_astype(x, int);
+}
+
+//CHECK: define spir_func i32 @f6(<4 x i8> %[[x:.*]])
+//CHECK: %[[astype]] = bitcast <4 x i8> %[[x]] to i32
+//CHECK: ret i32 %[[astype]]
+int f6(char4 x) {
+  return __builtin_astype(x, int);
+}
+
+//CHECK: define spir_func <3 x i8> @f7(<3 x i8> %[[x:.*]])
+//CHECK: ret <3 x i8> %[[x]]
+char3 f7(char3 x) {
+  return __builtin_astype(x, char3);
+}
+
+//CHECK: define spir_func <3 x i32> @f8(<16 x i8> %[[x:.*]])
+//CHECK: %[[cast:.*]] = bitcast <16 x i8> %[[x]] to <4 x i32>
+//CHECK: %[[astype:.*]] = shufflevector <4 x i32> %[[cast]], <4 x i32> undef, <3 x i32> 
+//CHECK: ret <3 x i32> %[[astype]]
+int3 f8(char16 x) {
+  return __builtin_astype(x, int3);
+}
Index: lib/CodeGen/CGExprScalar.cpp
===
--- lib/CodeGen/CGExprScalar.cpp
+++ lib/CodeGen/CGExprScalar.cpp
@@ -3382,50 +3382,48 @@
   return CGF.EmitBlockLiteral(block);
 }
 
+// Convert a vec3 to vec4, or vice versa.
+static Value *ConvertVec3AndVec4(CGBuilderTy , CodeGenFunction ,
+ Value *Src, unsigned NumElementsDst) {
+  llvm::Value *UnV = llvm::UndefValue::get(Src->getType());
+  SmallVector Args;
+  Args.push_back(Builder.getInt32(0));
+  Args.push_back(Builder.getInt32(1));
+  Args.push_back(Builder.getInt32(2));
+  if (NumElementsDst == 4)
+Args.push_back(llvm::UndefValue::get(CGF.Int32Ty));
+  llvm::Constant *Mask = llvm::ConstantVector::get(Args);
+  return Builder.CreateShuffleVector(Src, UnV, Mask);
+}
+
 Value *ScalarExprEmitter::VisitAsTypeExpr(AsTypeExpr *E) {
   Value *Src  = CGF.EmitScalarExpr(E->getSrcExpr());
   llvm::Type *DstTy = ConvertType(E->getType());
 
-  // Going from vec4->vec3 or vec3->vec4 is a special case and requires
-  // a shuffle vector instead of a bitcast.
   llvm::Type *SrcTy = Src->getType();
-  if (isa(DstTy) && isa(SrcTy)) {
-unsigned numElementsDst = cast(DstTy)->getNumElements();
-unsigned numElementsSrc = cast(SrcTy)->getNumElements();
-if ((numElementsDst == 3 && numElementsSrc == 4)
-|| (numElementsDst == 4 && numElementsSrc == 3)) {
-
-
-  // In the case of going from int4->float3, a bitcast is needed before
-  // doing a shuffle.
-  llvm::Type *srcElemTy =
-  cast(SrcTy)->getElementType();
-  llvm::Type *dstElemTy =
-  cast(DstTy)->getElementType();
-
-  if ((srcElemTy->isIntegerTy() && dstElemTy->isFloatTy())
-  || (srcElemTy->isFloatTy() && dstElemTy->isIntegerTy())) {
-// Create a float type of the same size as the source or destination.
-llvm::VectorType *newSrcTy = llvm::VectorType::get(dstElemTy,
- numElementsSrc);
-
-Src = Builder.CreateBitCast(Src, newSrcTy, "astypeCast");
-  }
-
-  llvm::Value *UnV = llvm::UndefValue::get(Src->getType());
-
-  SmallVector Args;
-  

Re: [PATCH] D20133: [OpenCL] Fix __builtin_astype for vec3 types.

2016-05-30 Thread Yaxun Liu via cfe-commits
yaxunl marked 7 inline comments as done.


Comment at: lib/CodeGen/CGExprScalar.cpp:3420
@@ -3407,35 +3419,3 @@
   llvm::Type *SrcTy = Src->getType();
-  if (isa(DstTy) && isa(SrcTy)) {
-unsigned numElementsDst = cast(DstTy)->getNumElements();
-unsigned numElementsSrc = cast(SrcTy)->getNumElements();
-if ((numElementsDst == 3 && numElementsSrc == 4)
-|| (numElementsDst == 4 && numElementsSrc == 3)) {
-
-
-  // In the case of going from int4->float3, a bitcast is needed before
-  // doing a shuffle.
-  llvm::Type *srcElemTy =
-  cast(SrcTy)->getElementType();
-  llvm::Type *dstElemTy =
-  cast(DstTy)->getElementType();
-
-  if ((srcElemTy->isIntegerTy() && dstElemTy->isFloatTy())
-  || (srcElemTy->isFloatTy() && dstElemTy->isIntegerTy())) {
-// Create a float type of the same size as the source or destination.
-llvm::VectorType *newSrcTy = llvm::VectorType::get(dstElemTy,
- 
numElementsSrc);
-
-Src = Builder.CreateBitCast(Src, newSrcTy, "astypeCast");
-  }
-
-  llvm::Value *UnV = llvm::UndefValue::get(Src->getType());
-
-  SmallVector Args;
-  Args.push_back(Builder.getInt32(0));
-  Args.push_back(Builder.getInt32(1));
-  Args.push_back(Builder.getInt32(2));
-
-  if (numElementsDst == 4)
-Args.push_back(llvm::UndefValue::get(CGF.Int32Ty));
-
-  llvm::Constant *Mask = llvm::ConstantVector::get(Args);
+  unsigned numElementsSrc = isa(SrcTy) ?
+cast(SrcTy)->getNumElements() : 0;

Anastasia wrote:
> So this code no longer applies to just vectors?
Right. The issue happens when converting vec3 type to/from non-vector types.


Comment at: lib/CodeGen/CGExprScalar.cpp:3427
@@ +3426,3 @@
+  // vector to get a vec4, then a bitcast if the target type is different.
+  if (numElementsSrc == 3 && numElementsDst != 3) {
+Src = ConvertVec3AndVec4(Builder, CGF, Src, 4);

Anastasia wrote:
> Should we check numElementsDst == 4 (the same above)?
No. Special handling is needed not just for conversion between vec3/vec4 types, 
but also between vec3 and non-vector types.


Comment at: lib/CodeGen/CGExprScalar.cpp:3428
@@ +3427,3 @@
+  if (numElementsSrc == 3 && numElementsDst != 3) {
+Src = ConvertVec3AndVec4(Builder, CGF, Src, 4);
+Src = Builder.CreateBitCast(Src, DstTy);

Anastasia wrote:
> I am not sure why is this chosen to be this way?
> 
> If I check the OpenCL spec for type reinterpreting it doesn't seem to require 
> shuffle vector. Also s6.2.4.2 says: 
>   "It is an error to use as_type() or as_typen() operator to reinterpret data 
> to a type of a different number of bytes."
> 
> The only valid conversion according to the spec seems to be vec4->vec3:
> 
>   "When the operand and result type contain a different number of elements, 
> the result shall be implementation-defined except if the operand is a 
> 4-component vector and the result is a 3-component vector."
> 
> This change is affecting non-OpenCL code too. Is this reasonable approach for 
> other vector implementations?
This change only affects OpenCL code since __builtin_astype is defined as a 
keyword for OpenCL only.

According to OpenCL spec v1.1 s6.1.5, vec3 type has the same size as vec4 type, 
so it is allowed to be converted to any other types which have the same size as 
vec4 type.


Comment at: lib/CodeGen/CGExprScalar.cpp:3429
@@ +3428,3 @@
+Src = ConvertVec3AndVec4(Builder, CGF, Src, 4);
+Src = Builder.CreateBitCast(Src, DstTy);
+Src->setName("astype");

Anastasia wrote:
> I think we only need bitcast if type don't match?
Builder.CreateBitCast automatically handle this.


Comment at: test/CodeGenOpenCL/as_type.cl:7
@@ +6,3 @@
+typedef __attribute__(( ext_vector_type(3) )) int int3;
+
+//CHECK: define spir_func <3 x i8> @f1(<4 x i8> %[[x:.*]])

Anastasia wrote:
> Should this be disallowed by the frontend -? according to the spec s6.2.4.2:
> 
>   "It is an error to use as_type() or as_typen() operator to reinterpret data 
> to a type of a different number of bytes."
According to the spec v1.1 s6.1.5, vec3 type has the same size as vec4 type, so 
it is allowed to be converted to other types which have the same size as vec4 
type.


http://reviews.llvm.org/D20133



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D20133: [OpenCL] Fix __builtin_astype for vec3 types.

2016-05-23 Thread Anastasia Stulova via cfe-commits
Anastasia added inline comments.


Comment at: lib/CodeGen/CGExprScalar.cpp:3403
@@ +3402,3 @@
+static Value *ConvertVec3AndVec4(CGBuilderTy , CodeGenFunction ,
+  Value *Src, unsigned numElementsDst) {
+  llvm::Value *UnV = llvm::UndefValue::get(Src->getType());

numElementsDst -> NumElementsDst

Also I would expect formatter to align to the parameter list area...


Comment at: lib/CodeGen/CGExprScalar.cpp:3420
@@ -3407,35 +3419,3 @@
   llvm::Type *SrcTy = Src->getType();
-  if (isa(DstTy) && isa(SrcTy)) {
-unsigned numElementsDst = cast(DstTy)->getNumElements();
-unsigned numElementsSrc = cast(SrcTy)->getNumElements();
-if ((numElementsDst == 3 && numElementsSrc == 4)
-|| (numElementsDst == 4 && numElementsSrc == 3)) {
-
-
-  // In the case of going from int4->float3, a bitcast is needed before
-  // doing a shuffle.
-  llvm::Type *srcElemTy =
-  cast(SrcTy)->getElementType();
-  llvm::Type *dstElemTy =
-  cast(DstTy)->getElementType();
-
-  if ((srcElemTy->isIntegerTy() && dstElemTy->isFloatTy())
-  || (srcElemTy->isFloatTy() && dstElemTy->isIntegerTy())) {
-// Create a float type of the same size as the source or destination.
-llvm::VectorType *newSrcTy = llvm::VectorType::get(dstElemTy,
- 
numElementsSrc);
-
-Src = Builder.CreateBitCast(Src, newSrcTy, "astypeCast");
-  }
-
-  llvm::Value *UnV = llvm::UndefValue::get(Src->getType());
-
-  SmallVector Args;
-  Args.push_back(Builder.getInt32(0));
-  Args.push_back(Builder.getInt32(1));
-  Args.push_back(Builder.getInt32(2));
-
-  if (numElementsDst == 4)
-Args.push_back(llvm::UndefValue::get(CGF.Int32Ty));
-
-  llvm::Constant *Mask = llvm::ConstantVector::get(Args);
+  unsigned numElementsSrc = isa(SrcTy) ?
+cast(SrcTy)->getNumElements() : 0;

So this code no longer applies to just vectors?


Comment at: lib/CodeGen/CGExprScalar.cpp:3422
@@ +3421,3 @@
+cast(SrcTy)->getNumElements() : 0;
+  unsigned numElementsDst = isa(DstTy) ?
+cast(DstTy)->getNumElements() : 0;

numElementsSrc -> NumElementsSrc

The same below!


Comment at: lib/CodeGen/CGExprScalar.cpp:3427
@@ +3426,3 @@
+  // vector to get a vec4, then a bitcast if the target type is different.
+  if (numElementsSrc == 3 && numElementsDst != 3) {
+Src = ConvertVec3AndVec4(Builder, CGF, Src, 4);

Should we check numElementsDst == 4 (the same above)?


Comment at: lib/CodeGen/CGExprScalar.cpp:3428
@@ +3427,3 @@
+  if (numElementsSrc == 3 && numElementsDst != 3) {
+Src = ConvertVec3AndVec4(Builder, CGF, Src, 4);
+Src = Builder.CreateBitCast(Src, DstTy);

I am not sure why is this chosen to be this way?

If I check the OpenCL spec for type reinterpreting it doesn't seem to require 
shuffle vector. Also s6.2.4.2 says: 
  "It is an error to use as_type() or as_typen() operator to reinterpret data 
to a type of a different number of bytes."

The only valid conversion according to the spec seems to be vec4->vec3:

  "When the operand and result type contain a different number of elements, the 
result shall be implementation-defined except if the operand is a 4-component 
vector and the result is a 3-component vector."

This change is affecting non-OpenCL code too. Is this reasonable approach for 
other vector implementations?


Comment at: lib/CodeGen/CGExprScalar.cpp:3429
@@ +3428,3 @@
+Src = ConvertVec3AndVec4(Builder, CGF, Src, 4);
+Src = Builder.CreateBitCast(Src, DstTy);
+Src->setName("astype");

I think we only need bitcast if type don't match?


Comment at: test/CodeGenOpenCL/as_type.cl:7
@@ +6,3 @@
+typedef __attribute__(( ext_vector_type(3) )) int int3;
+
+//CHECK: define spir_func <3 x i8> @f1(<4 x i8> %[[x:.*]])

Should this be disallowed by the frontend -? according to the spec s6.2.4.2:

  "It is an error to use as_type() or as_typen() operator to reinterpret data 
to a type of a different number of bytes."


http://reviews.llvm.org/D20133



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D20133: [OpenCL] Fix __builtin_astype for vec3 types.

2016-05-16 Thread Yaxun Liu via cfe-commits
yaxunl updated this revision to Diff 57393.
yaxunl added a comment.

Update test.


http://reviews.llvm.org/D20133

Files:
  lib/CodeGen/CGExprScalar.cpp
  test/CodeGenOpenCL/as_type.cl

Index: test/CodeGenOpenCL/as_type.cl
===
--- /dev/null
+++ test/CodeGenOpenCL/as_type.cl
@@ -0,0 +1,64 @@
+// RUN: %clang_cc1 %s -emit-llvm -triple spir-unknown-unknown -o - | FileCheck %s
+
+typedef __attribute__(( ext_vector_type(3) )) char char3;
+typedef __attribute__(( ext_vector_type(4) )) char char4;
+typedef __attribute__(( ext_vector_type(16) )) char char16;
+typedef __attribute__(( ext_vector_type(3) )) int int3;
+
+//CHECK: define spir_func <3 x i8> @f1(<4 x i8> %[[x:.*]])
+//CHECK: %[[astype:.*]] = shufflevector <4 x i8> %[[x]], <4 x i8> undef, <3 x i32> 
+//CHECK: ret <3 x i8> %[[astype]]
+char3 f1(char4 x) {
+  return  __builtin_astype(x, char3);
+}
+
+//CHECK: define spir_func <4 x i8> @f2(<3 x i8> %[[x:.*]])
+//CHECK: %[[astype:.*]] = shufflevector <3 x i8> %[[x]], <3 x i8> undef, <4 x i32> 
+//CHECK: ret <4 x i8> %[[astype]]
+char4 f2(char3 x) {
+  return __builtin_astype(x, char4);
+}
+
+//CHECK: define spir_func <3 x i8> @f3(i32 %[[x:.*]])
+//CHECK: %[[cast:.*]] = bitcast i32 %[[x]] to <4 x i8>
+//CHECK: %[[astype:.*]] = shufflevector <4 x i8> %[[cast]], <4 x i8> undef, <3 x i32> 
+//CHECK: ret <3 x i8> %[[astype]]
+char3 f3(int x) {
+  return __builtin_astype(x, char3);
+}
+
+//CHECK: define spir_func <4 x i8> @f4(i32 %[[x:.*]])
+//CHECK: %[[astype:.*]] = bitcast i32 %[[x]] to <4 x i8>
+//CHECK: ret <4 x i8> %[[astype]]
+char4 f4(int x) {
+  return __builtin_astype(x, char4);
+}
+
+//CHECK: define spir_func i32 @f5(<3 x i8> %[[x:.*]])
+//CHECK: %[[shuffle:.*]] = shufflevector <3 x i8> %[[x]], <3 x i8> undef, <4 x i32> 
+//CHECK: %[[astype:.*]] = bitcast <4 x i8> %[[shuffle]] to i32
+//CHECK: ret i32 %[[astype]]
+int f5(char3 x) {
+  return __builtin_astype(x, int);
+}
+
+//CHECK: define spir_func i32 @f6(<4 x i8> %[[x:.*]])
+//CHECK: %[[astype]] = bitcast <4 x i8> %[[x]] to i32
+//CHECK: ret i32 %[[astype]]
+int f6(char4 x) {
+  return __builtin_astype(x, int);
+}
+
+//CHECK: define spir_func <3 x i8> @f7(<3 x i8> %[[x:.*]])
+//CHECK: ret <3 x i8> %[[x]]
+char3 f7(char3 x) {
+  return __builtin_astype(x, char3);
+}
+
+//CHECK: define spir_func <3 x i32> @f8(<16 x i8> %[[x:.*]])
+//CHECK: %[[cast:.*]] = bitcast <16 x i8> %[[x]] to <4 x i32>
+//CHECK: %[[astype:.*]] = shufflevector <4 x i32> %[[cast]], <4 x i32> undef, <3 x i32> 
+//CHECK: ret <3 x i32> %[[astype]]
+int3 f8(char16 x) {
+  return __builtin_astype(x, int3);
+}
Index: lib/CodeGen/CGExprScalar.cpp
===
--- lib/CodeGen/CGExprScalar.cpp
+++ lib/CodeGen/CGExprScalar.cpp
@@ -3398,50 +3398,48 @@
   return CGF.EmitBlockLiteral(block);
 }
 
+// Convert a vec3 to vec4, or vice versa.
+static Value *ConvertVec3AndVec4(CGBuilderTy , CodeGenFunction ,
+  Value *Src, unsigned numElementsDst) {
+  llvm::Value *UnV = llvm::UndefValue::get(Src->getType());
+  SmallVector Args;
+  Args.push_back(Builder.getInt32(0));
+  Args.push_back(Builder.getInt32(1));
+  Args.push_back(Builder.getInt32(2));
+  if (numElementsDst == 4)
+Args.push_back(llvm::UndefValue::get(CGF.Int32Ty));
+  llvm::Constant *Mask = llvm::ConstantVector::get(Args);
+  return Builder.CreateShuffleVector(Src, UnV, Mask);
+}
+
 Value *ScalarExprEmitter::VisitAsTypeExpr(AsTypeExpr *E) {
   Value *Src  = CGF.EmitScalarExpr(E->getSrcExpr());
   llvm::Type *DstTy = ConvertType(E->getType());
 
-  // Going from vec4->vec3 or vec3->vec4 is a special case and requires
-  // a shuffle vector instead of a bitcast.
   llvm::Type *SrcTy = Src->getType();
-  if (isa(DstTy) && isa(SrcTy)) {
-unsigned numElementsDst = cast(DstTy)->getNumElements();
-unsigned numElementsSrc = cast(SrcTy)->getNumElements();
-if ((numElementsDst == 3 && numElementsSrc == 4)
-|| (numElementsDst == 4 && numElementsSrc == 3)) {
-
-
-  // In the case of going from int4->float3, a bitcast is needed before
-  // doing a shuffle.
-  llvm::Type *srcElemTy =
-  cast(SrcTy)->getElementType();
-  llvm::Type *dstElemTy =
-  cast(DstTy)->getElementType();
-
-  if ((srcElemTy->isIntegerTy() && dstElemTy->isFloatTy())
-  || (srcElemTy->isFloatTy() && dstElemTy->isIntegerTy())) {
-// Create a float type of the same size as the source or destination.
-llvm::VectorType *newSrcTy = llvm::VectorType::get(dstElemTy,
- numElementsSrc);
-
-Src = Builder.CreateBitCast(Src, newSrcTy, "astypeCast");
-  }
-
-  llvm::Value *UnV = llvm::UndefValue::get(Src->getType());
-
-  SmallVector Args;
-  Args.push_back(Builder.getInt32(0));
-  Args.push_back(Builder.getInt32(1));
-  Args.push_back(Builder.getInt32(2));
-
-  if 

Re: [PATCH] D20133: [OpenCL] Fix __builtin_astype for vec3 types.

2016-05-16 Thread Yaxun Liu via cfe-commits
yaxunl updated this revision to Diff 57392.
yaxunl added a comment.

Add a test for casting char16 to i3 as Alexey suggested.


http://reviews.llvm.org/D20133

Files:
  lib/CodeGen/CGExprScalar.cpp
  test/CodeGenOpenCL/as_type.cl

Index: test/CodeGenOpenCL/as_type.cl
===
--- /dev/null
+++ test/CodeGenOpenCL/as_type.cl
@@ -0,0 +1,63 @@
+// RUN: %clang_cc1 %s -emit-llvm -triple spir-unknown-unknown -o - | FileCheck %s
+
+typedef __attribute__(( ext_vector_type(3) )) char char3;
+typedef __attribute__(( ext_vector_type(4) )) char char4;
+
+//CHECK: define spir_func <3 x i8> @f1(<4 x i8> %[[x:.*]])
+//CHECK: %[[astype:.*]] = shufflevector <4 x i8> %[[x]], <4 x i8> undef, <3 x i32> 
+//CHECK: ret <3 x i8> %[[astype]]
+char3 f1(char4 x) {
+  return  __builtin_astype(x, char3);
+}
+
+//CHECK: define spir_func <4 x i8> @f2(<3 x i8> %[[x:.*]])
+//CHECK: %[[astype:.*]] = shufflevector <3 x i8> %[[x]], <3 x i8> undef, <4 x i32> 
+//CHECK: ret <4 x i8> %[[astype]]
+char4 f2(char3 x) {
+  return __builtin_astype(x, char4);
+}
+
+//CHECK: define spir_func <3 x i8> @f3(i32 %[[x:.*]])
+//CHECK: %[[cast:.*]] = bitcast i32 %[[x]] to <4 x i8>
+//CHECK: %[[astype:.*]] = shufflevector <4 x i8> %[[cast]], <4 x i8> undef, <3 x i32> 
+//CHECK: ret <3 x i8> %[[astype]]
+char3 f3(int x) {
+  return __builtin_astype(x, char3);
+}
+
+//CHECK: define spir_func <4 x i8> @f4(i32 %[[x:.*]])
+//CHECK: %[[astype:.*]] = bitcast i32 %[[x]] to <4 x i8>
+//CHECK: ret <4 x i8> %[[astype]]
+char4 f4(int x) {
+  return __builtin_astype(x, char4);
+}
+
+//CHECK: define spir_func i32 @f5(<3 x i8> %[[x:.*]])
+//CHECK: %[[shuffle:.*]] = shufflevector <3 x i8> %[[x]], <3 x i8> undef, <4 x i32> 
+//CHECK: %[[astype:.*]] = bitcast <4 x i8> %[[shuffle]] to i32
+//CHECK: ret i32 %[[astype]]
+int f5(char3 x) {
+  return __builtin_astype(x, int);
+}
+
+//CHECK: define spir_func i32 @f6(<4 x i8> %[[x:.*]])
+//CHECK: %[[astype]] = bitcast <4 x i8> %[[x]] to i32
+//CHECK: ret i32 %[[astype]]
+int f6(char4 x) {
+  return __builtin_astype(x, int);
+}
+
+//CHECK: define spir_func <3 x i8> @f7(<3 x i8> %[[x:.*]])
+//CHECK: ret <3 x i8> %[[x]]
+char3 f7(char3 x) {
+  return __builtin_astype(x, char3);
+}
+
+//CHECK: define spir_func <3 x i32> @f3(<16 x i8> %[[x:.*]])
+//CHECK: %[[cast:.*]] = bitcast <16 x i8> %[[x]] to <4 x i32>
+//CHECK: %[[astype:.*]] = shufflevector <4 x i32> %[[cast]], <4 x i32> undef, <3 x i32> 
+//CHECK: ret <3 x i32> %[[astype]]
+int3 f8(char16 x) {
+  return __builtin_astype(x, int3);
+}
+}
Index: lib/CodeGen/CGExprScalar.cpp
===
--- lib/CodeGen/CGExprScalar.cpp
+++ lib/CodeGen/CGExprScalar.cpp
@@ -3398,50 +3398,48 @@
   return CGF.EmitBlockLiteral(block);
 }
 
+// Convert a vec3 to vec4, or vice versa.
+static Value *ConvertVec3AndVec4(CGBuilderTy , CodeGenFunction ,
+  Value *Src, unsigned numElementsDst) {
+  llvm::Value *UnV = llvm::UndefValue::get(Src->getType());
+  SmallVector Args;
+  Args.push_back(Builder.getInt32(0));
+  Args.push_back(Builder.getInt32(1));
+  Args.push_back(Builder.getInt32(2));
+  if (numElementsDst == 4)
+Args.push_back(llvm::UndefValue::get(CGF.Int32Ty));
+  llvm::Constant *Mask = llvm::ConstantVector::get(Args);
+  return Builder.CreateShuffleVector(Src, UnV, Mask);
+}
+
 Value *ScalarExprEmitter::VisitAsTypeExpr(AsTypeExpr *E) {
   Value *Src  = CGF.EmitScalarExpr(E->getSrcExpr());
   llvm::Type *DstTy = ConvertType(E->getType());
 
-  // Going from vec4->vec3 or vec3->vec4 is a special case and requires
-  // a shuffle vector instead of a bitcast.
   llvm::Type *SrcTy = Src->getType();
-  if (isa(DstTy) && isa(SrcTy)) {
-unsigned numElementsDst = cast(DstTy)->getNumElements();
-unsigned numElementsSrc = cast(SrcTy)->getNumElements();
-if ((numElementsDst == 3 && numElementsSrc == 4)
-|| (numElementsDst == 4 && numElementsSrc == 3)) {
-
-
-  // In the case of going from int4->float3, a bitcast is needed before
-  // doing a shuffle.
-  llvm::Type *srcElemTy =
-  cast(SrcTy)->getElementType();
-  llvm::Type *dstElemTy =
-  cast(DstTy)->getElementType();
-
-  if ((srcElemTy->isIntegerTy() && dstElemTy->isFloatTy())
-  || (srcElemTy->isFloatTy() && dstElemTy->isIntegerTy())) {
-// Create a float type of the same size as the source or destination.
-llvm::VectorType *newSrcTy = llvm::VectorType::get(dstElemTy,
- numElementsSrc);
-
-Src = Builder.CreateBitCast(Src, newSrcTy, "astypeCast");
-  }
-
-  llvm::Value *UnV = llvm::UndefValue::get(Src->getType());
-
-  SmallVector Args;
-  Args.push_back(Builder.getInt32(0));
-  Args.push_back(Builder.getInt32(1));
-  Args.push_back(Builder.getInt32(2));
-
-  if (numElementsDst == 4)
-

Re: [PATCH] D20133: [OpenCL] Fix __builtin_astype for vec3 types.

2016-05-12 Thread Alexey Bader via cfe-commits
bader accepted this revision.
bader added a comment.
This revision is now accepted and ready to land.

LGTM.



Comment at: test/CodeGenOpenCL/as_type.cl:6
@@ +5,3 @@
+
+//CHECK: define spir_func <3 x i8> @f1(<4 x i8> %[[x:.*]])
+//CHECK: %[[astype:.*]] = shufflevector <4 x i8> %[[x]], <4 x i8> undef, <3 x 
i32> 

Could you also one test case that requires bitcast from vector type to 
4-element vector type, please?
E.g. short2 <-> char3 or char16 <-> int3.


http://reviews.llvm.org/D20133



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D20133: [OpenCL] Fix __builtin_astype for vec3 types.

2016-05-10 Thread Yaxun Liu via cfe-commits
yaxunl created this revision.
yaxunl added reviewers: Anastasia, pxli168, bader.
yaxunl added subscribers: cfe-commits, tstellarAMD.

__builtin_astype does not generate correct LLVM IR for vec3 types. This patch 
inserts bitcasts to/from vec4 when necessary in addition to generating vector 
shuffle. A codegen test is added.

http://reviews.llvm.org/D20133

Files:
  lib/CodeGen/CGExprScalar.cpp
  test/CodeGenOpenCL/as_type.cl

Index: test/CodeGenOpenCL/as_type.cl
===
--- /dev/null
+++ test/CodeGenOpenCL/as_type.cl
@@ -0,0 +1,54 @@
+// RUN: %clang_cc1 %s -emit-llvm -triple spir-unknown-unknown -o - | FileCheck %s
+
+typedef __attribute__(( ext_vector_type(3) )) char char3;
+typedef __attribute__(( ext_vector_type(4) )) char char4;
+
+//CHECK: define spir_func <3 x i8> @f1(<4 x i8> %[[x:.*]])
+//CHECK: %[[astype:.*]] = shufflevector <4 x i8> %[[x]], <4 x i8> undef, <3 x i32> 
+//CHECK: ret <3 x i8> %[[astype]]
+char3 f1(char4 x) {
+  return  __builtin_astype(x, char3);
+}
+
+//CHECK: define spir_func <4 x i8> @f2(<3 x i8> %[[x:.*]])
+//CHECK: %[[astype:.*]] = shufflevector <3 x i8> %[[x]], <3 x i8> undef, <4 x i32> 
+//CHECK: ret <4 x i8> %[[astype]]
+char4 f2(char3 x) {
+  return __builtin_astype(x, char4);
+}
+
+//CHECK: define spir_func <3 x i8> @f3(i32 %[[x:.*]])
+//CHECK: %[[cast:.*]] = bitcast i32 %[[x]] to <4 x i8>
+//CHECK: %[[astype:.*]] = shufflevector <4 x i8> %[[cast]], <4 x i8> undef, <3 x i32> 
+//CHECK: ret <3 x i8> %[[astype]]
+char3 f3(int x) {
+  return __builtin_astype(x, char3);
+}
+
+//CHECK: define spir_func <4 x i8> @f4(i32 %[[x:.*]])
+//CHECK: %[[astype:.*]] = bitcast i32 %[[x]] to <4 x i8>
+//CHECK: ret <4 x i8> %[[astype]]
+char4 f4(int x) {
+  return __builtin_astype(x, char4);
+}
+
+//CHECK: define spir_func i32 @f5(<3 x i8> %[[x:.*]])
+//CHECK: %[[shuffle:.*]] = shufflevector <3 x i8> %[[x]], <3 x i8> undef, <4 x i32> 
+//CHECK: %[[astype:.*]] = bitcast <4 x i8> %[[shuffle]] to i32
+//CHECK: ret i32 %[[astype]]
+int f5(char3 x) {
+  return __builtin_astype(x, int);
+}
+
+//CHECK: define spir_func i32 @f6(<4 x i8> %[[x:.*]])
+//CHECK: %[[astype]] = bitcast <4 x i8> %[[x]] to i32
+//CHECK: ret i32 %[[astype]]
+int f6(char4 x) {
+  return __builtin_astype(x, int);
+}
+
+//CHECK: define spir_func <3 x i8> @f7(<3 x i8> %[[x:.*]])
+//CHECK: ret <3 x i8> %[[x]]
+char3 f7(char3 x) {
+  return __builtin_astype(x, char3);
+}
Index: lib/CodeGen/CGExprScalar.cpp
===
--- lib/CodeGen/CGExprScalar.cpp
+++ lib/CodeGen/CGExprScalar.cpp
@@ -3394,50 +3394,48 @@
   return CGF.EmitBlockLiteral(block);
 }
 
+// Convert a vec3 to vec4, or vice versa.
+static Value *ConvertVec3AndVec4(CGBuilderTy , CodeGenFunction ,
+  Value *Src, unsigned numElementsDst) {
+  llvm::Value *UnV = llvm::UndefValue::get(Src->getType());
+  SmallVector Args;
+  Args.push_back(Builder.getInt32(0));
+  Args.push_back(Builder.getInt32(1));
+  Args.push_back(Builder.getInt32(2));
+  if (numElementsDst == 4)
+Args.push_back(llvm::UndefValue::get(CGF.Int32Ty));
+  llvm::Constant *Mask = llvm::ConstantVector::get(Args);
+  return Builder.CreateShuffleVector(Src, UnV, Mask);
+}
+
 Value *ScalarExprEmitter::VisitAsTypeExpr(AsTypeExpr *E) {
   Value *Src  = CGF.EmitScalarExpr(E->getSrcExpr());
   llvm::Type *DstTy = ConvertType(E->getType());
 
-  // Going from vec4->vec3 or vec3->vec4 is a special case and requires
-  // a shuffle vector instead of a bitcast.
   llvm::Type *SrcTy = Src->getType();
-  if (isa(DstTy) && isa(SrcTy)) {
-unsigned numElementsDst = cast(DstTy)->getNumElements();
-unsigned numElementsSrc = cast(SrcTy)->getNumElements();
-if ((numElementsDst == 3 && numElementsSrc == 4)
-|| (numElementsDst == 4 && numElementsSrc == 3)) {
-
-
-  // In the case of going from int4->float3, a bitcast is needed before
-  // doing a shuffle.
-  llvm::Type *srcElemTy =
-  cast(SrcTy)->getElementType();
-  llvm::Type *dstElemTy =
-  cast(DstTy)->getElementType();
-
-  if ((srcElemTy->isIntegerTy() && dstElemTy->isFloatTy())
-  || (srcElemTy->isFloatTy() && dstElemTy->isIntegerTy())) {
-// Create a float type of the same size as the source or destination.
-llvm::VectorType *newSrcTy = llvm::VectorType::get(dstElemTy,
- numElementsSrc);
-
-Src = Builder.CreateBitCast(Src, newSrcTy, "astypeCast");
-  }
-
-  llvm::Value *UnV = llvm::UndefValue::get(Src->getType());
-
-  SmallVector Args;
-  Args.push_back(Builder.getInt32(0));
-  Args.push_back(Builder.getInt32(1));
-  Args.push_back(Builder.getInt32(2));
-
-  if (numElementsDst == 4)
-Args.push_back(llvm::UndefValue::get(CGF.Int32Ty));
-
-  llvm::Constant *Mask = llvm::ConstantVector::get(Args);
+  unsigned numElementsSrc = isa(SrcTy) ?