Re: [PATCH] D20794: [CUDA] Fix order of vectorized ldg intrinsics' elements.

2016-05-30 Thread Justin Lebar via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rL271215: [CUDA] Fix order of vectorized ldg intrinsics' 
elements. (authored by jlebar).

Changed prior to commit:
  http://reviews.llvm.org/D20794?vs=58972=58976#toc

Repository:
  rL LLVM

http://reviews.llvm.org/D20794

Files:
  cfe/trunk/lib/Headers/__clang_cuda_intrinsics.h

Index: cfe/trunk/lib/Headers/__clang_cuda_intrinsics.h
===
--- cfe/trunk/lib/Headers/__clang_cuda_intrinsics.h
+++ cfe/trunk/lib/Headers/__clang_cuda_intrinsics.h
@@ -74,10 +74,10 @@
   typedef char c4 __attribute__((ext_vector_type(4)));
   c4 rv = __nvvm_ldg_c4(reinterpret_cast(ptr));
   char4 ret;
-  ret.w = rv[0];
-  ret.x = rv[1];
-  ret.y = rv[2];
-  ret.z = rv[3];
+  ret.x = rv[0];
+  ret.y = rv[1];
+  ret.z = rv[2];
+  ret.w = rv[3];
   return ret;
 }
 inline __device__ short2 __ldg(const short2 *ptr) {
@@ -92,10 +92,10 @@
   typedef short s4 __attribute__((ext_vector_type(4)));
   s4 rv = __nvvm_ldg_s4(reinterpret_cast(ptr));
   short4 ret;
-  ret.w = rv[0];
-  ret.x = rv[1];
-  ret.y = rv[2];
-  ret.z = rv[3];
+  ret.x = rv[0];
+  ret.y = rv[1];
+  ret.z = rv[2];
+  ret.w = rv[3];
   return ret;
 }
 inline __device__ int2 __ldg(const int2 *ptr) {
@@ -110,10 +110,10 @@
   typedef int i4 __attribute__((ext_vector_type(4)));
   i4 rv = __nvvm_ldg_i4(reinterpret_cast(ptr));
   int4 ret;
-  ret.w = rv[0];
-  ret.x = rv[1];
-  ret.y = rv[2];
-  ret.z = rv[3];
+  ret.x = rv[0];
+  ret.y = rv[1];
+  ret.z = rv[2];
+  ret.w = rv[3];
   return ret;
 }
 inline __device__ longlong2 __ldg(const longlong2 *ptr) {
@@ -137,10 +137,10 @@
   typedef unsigned char uc4 __attribute__((ext_vector_type(4)));
   uc4 rv = __nvvm_ldg_uc4(reinterpret_cast(ptr));
   uchar4 ret;
-  ret.w = rv[0];
-  ret.x = rv[1];
-  ret.y = rv[2];
-  ret.z = rv[3];
+  ret.x = rv[0];
+  ret.y = rv[1];
+  ret.z = rv[2];
+  ret.w = rv[3];
   return ret;
 }
 inline __device__ ushort2 __ldg(const ushort2 *ptr) {
@@ -155,10 +155,10 @@
   typedef unsigned short us4 __attribute__((ext_vector_type(4)));
   us4 rv = __nvvm_ldg_us4(reinterpret_cast(ptr));
   ushort4 ret;
-  ret.w = rv[0];
-  ret.x = rv[1];
-  ret.y = rv[2];
-  ret.z = rv[3];
+  ret.x = rv[0];
+  ret.y = rv[1];
+  ret.z = rv[2];
+  ret.w = rv[3];
   return ret;
 }
 inline __device__ uint2 __ldg(const uint2 *ptr) {
@@ -173,10 +173,10 @@
   typedef unsigned int ui4 __attribute__((ext_vector_type(4)));
   ui4 rv = __nvvm_ldg_ui4(reinterpret_cast(ptr));
   uint4 ret;
-  ret.w = rv[0];
-  ret.x = rv[1];
-  ret.y = rv[2];
-  ret.z = rv[3];
+  ret.x = rv[0];
+  ret.y = rv[1];
+  ret.z = rv[2];
+  ret.w = rv[3];
   return ret;
 }
 inline __device__ ulonglong2 __ldg(const ulonglong2 *ptr) {
@@ -200,10 +200,10 @@
   typedef float f4 __attribute__((ext_vector_type(4)));
   f4 rv = __nvvm_ldg_f4(reinterpret_cast(ptr));
   float4 ret;
-  ret.w = rv[0];
-  ret.x = rv[1];
-  ret.y = rv[2];
-  ret.z = rv[3];
+  ret.x = rv[0];
+  ret.y = rv[1];
+  ret.z = rv[2];
+  ret.w = rv[3];
   return ret;
 }
 inline __device__ double2 __ldg(const double2 *ptr) {
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D20794: [CUDA] Fix order of vectorized ldg intrinsics' elements.

2016-05-30 Thread Justin Lebar via cfe-commits
jlebar created this revision.
jlebar added subscribers: tra, cfe-commits.

The order is [x, y, z, w], not [w, x, y, z].

http://reviews.llvm.org/D20794

Files:
  lib/Headers/__clang_cuda_intrinsics.h

Index: lib/Headers/__clang_cuda_intrinsics.h
===
--- lib/Headers/__clang_cuda_intrinsics.h
+++ lib/Headers/__clang_cuda_intrinsics.h
@@ -74,10 +74,10 @@
   typedef char c4 __attribute__((ext_vector_type(4)));
   c4 rv = __nvvm_ldg_c4(reinterpret_cast(ptr));
   char4 ret;
-  ret.w = rv[0];
-  ret.x = rv[1];
-  ret.y = rv[2];
-  ret.z = rv[3];
+  ret.x = rv[0];
+  ret.y = rv[1];
+  ret.z = rv[2];
+  ret.w = rv[3];
   return ret;
 }
 inline __device__ short2 __ldg(const short2 *ptr) {
@@ -92,10 +92,10 @@
   typedef short s4 __attribute__((ext_vector_type(4)));
   s4 rv = __nvvm_ldg_s4(reinterpret_cast(ptr));
   short4 ret;
-  ret.w = rv[0];
-  ret.x = rv[1];
-  ret.y = rv[2];
-  ret.z = rv[3];
+  ret.x = rv[0];
+  ret.y = rv[1];
+  ret.z = rv[2];
+  ret.w = rv[3];
   return ret;
 }
 inline __device__ int2 __ldg(const int2 *ptr) {
@@ -110,10 +110,10 @@
   typedef int i4 __attribute__((ext_vector_type(4)));
   i4 rv = __nvvm_ldg_i4(reinterpret_cast(ptr));
   int4 ret;
-  ret.w = rv[0];
-  ret.x = rv[1];
-  ret.y = rv[2];
-  ret.z = rv[3];
+  ret.x = rv[0];
+  ret.y = rv[1];
+  ret.z = rv[2];
+  ret.w = rv[3];
   return ret;
 }
 inline __device__ longlong2 __ldg(const longlong2 *ptr) {
@@ -137,10 +137,10 @@
   typedef unsigned char uc4 __attribute__((ext_vector_type(4)));
   uc4 rv = __nvvm_ldg_uc4(reinterpret_cast(ptr));
   uchar4 ret;
-  ret.w = rv[0];
-  ret.x = rv[1];
-  ret.y = rv[2];
-  ret.z = rv[3];
+  ret.x = rv[0];
+  ret.y = rv[1];
+  ret.z = rv[2];
+  ret.w = rv[3];
   return ret;
 }
 inline __device__ ushort2 __ldg(const ushort2 *ptr) {
@@ -155,10 +155,10 @@
   typedef unsigned short us4 __attribute__((ext_vector_type(4)));
   us4 rv = __nvvm_ldg_us4(reinterpret_cast(ptr));
   ushort4 ret;
-  ret.w = rv[0];
-  ret.x = rv[1];
-  ret.y = rv[2];
-  ret.z = rv[3];
+  ret.x = rv[0];
+  ret.y = rv[1];
+  ret.z = rv[2];
+  ret.w = rv[3];
   return ret;
 }
 inline __device__ uint2 __ldg(const uint2 *ptr) {
@@ -173,10 +173,10 @@
   typedef unsigned int ui4 __attribute__((ext_vector_type(4)));
   ui4 rv = __nvvm_ldg_ui4(reinterpret_cast(ptr));
   uint4 ret;
-  ret.w = rv[0];
-  ret.x = rv[1];
-  ret.y = rv[2];
-  ret.z = rv[3];
+  ret.x = rv[0];
+  ret.y = rv[1];
+  ret.z = rv[2];
+  ret.w = rv[3];
   return ret;
 }
 inline __device__ ulonglong2 __ldg(const ulonglong2 *ptr) {
@@ -200,10 +200,10 @@
   typedef float f4 __attribute__((ext_vector_type(4)));
   f4 rv = __nvvm_ldg_f4(reinterpret_cast(ptr));
   float4 ret;
-  ret.w = rv[0];
-  ret.x = rv[1];
-  ret.y = rv[2];
-  ret.z = rv[3];
+  ret.x = rv[0];
+  ret.y = rv[1];
+  ret.z = rv[2];
+  ret.w = rv[3];
   return ret;
 }
 inline __device__ double2 __ldg(const double2 *ptr) {
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits