[PATCH] D42811: [CodeGen][va_args] Correct Vector Struct va-arg 'in_reg' code gen

2018-02-02 Thread Erich Keane via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rL324098: [CodeGen][va_args] Correct Vector Struct va-arg 
in_reg code gen (authored by erichkeane, committed by ).
Herald added a subscriber: llvm-commits.

Changed prior to commit:
  https://reviews.llvm.org/D42811?vs=132452=132594#toc

Repository:
  rL LLVM

https://reviews.llvm.org/D42811

Files:
  cfe/trunk/lib/CodeGen/TargetInfo.cpp
  cfe/trunk/test/CodeGen/x86_64-floatvectors.c

Index: cfe/trunk/test/CodeGen/x86_64-floatvectors.c
===
--- cfe/trunk/test/CodeGen/x86_64-floatvectors.c
+++ cfe/trunk/test/CodeGen/x86_64-floatvectors.c
@@ -0,0 +1,131 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s | \
+// RUN:   FileCheck %s
+
+// This test validates that the inreg branch generation for __builtin_va_arg 
+// does not exceed the alloca size of the type, which can cause the SROA pass to
+// eliminate the assignment.
+
+typedef struct { float x, y, z; } vec3f;
+
+double Vec3FTest(__builtin_va_list ap) {
+  vec3f vec = __builtin_va_arg(ap, vec3f);
+  return vec.x + vec.y + vec.z;
+}
+// CHECK: define double @Vec3FTest
+// CHECK: vaarg.in_reg:
+// CHECK: [[Vec3FLoad1:%.*]] = load <2 x float>, <2 x float>*
+// CHECK: [[Vec3FGEP1:%.*]] = getelementptr inbounds { <2 x float>, float }, { <2 x float>, float }* {{%.*}}, i32 0, i32 0
+// CHECK: store <2 x float> [[Vec3FLoad1]], <2 x float>* [[Vec3FGEP1]]
+// CHECK: [[Vec3FLoad2:%.*]] = load float, float*
+// CHECK: [[Vec3FGEP2:%.*]] = getelementptr inbounds { <2 x float>, float }, { <2 x float>, float }* {{%.*}}, i32 0, i32 1
+// CHECK: store float [[Vec3FLoad2]], float* [[Vec3FGEP2]]
+// CHECK: vaarg.in_mem:
+
+
+typedef struct { float x, y, z, q; } vec4f;
+
+double Vec4FTest(__builtin_va_list ap) {
+  vec4f vec = __builtin_va_arg(ap, vec4f);
+  return vec.x + vec.y + vec.z + vec.q;
+}
+// CHECK: define double @Vec4FTest
+// CHECK: vaarg.in_reg:
+// CHECK: [[Vec4FLoad1:%.*]] = load <2 x float>, <2 x float>*
+// CHECK: [[Vec4FGEP1:%.*]] = getelementptr inbounds { <2 x float>, <2 x float> }, { <2 x float>, <2 x float> }* {{%.*}}, i32 0, i32 0
+// CHECK: store <2 x float> [[Vec4FLoad1]], <2 x float>* [[Vec4FGEP1]]
+// CHECK: [[Vec4FLoad2:%.*]] = load <2 x float>, <2 x float>*
+// CHECK: [[Vec4FGEP2:%.*]] = getelementptr inbounds { <2 x float>, <2 x float> }, { <2 x float>, <2 x float> }* {{%.*}}, i32 0, i32 1
+// CHECK: store <2 x float> [[Vec4FLoad2]], <2 x float>* [[Vec4FGEP2]]
+// CHECK: vaarg.in_mem:
+
+typedef struct { double x, y; } vec2d;
+
+double Vec2DTest(__builtin_va_list ap) {
+  vec2d vec = __builtin_va_arg(ap, vec2d);
+  return vec.x + vec.y;
+}
+// CHECK: define double @Vec2DTest
+// CHECK: vaarg.in_reg:
+// CHECK: [[Vec2DLoad1:%.*]] = load double, double*
+// CHECK: [[Vec2DGEP1:%.*]] = getelementptr inbounds { double, double }, { double, double }* {{%.*}}, i32 0, i32 0
+// CHECK: store double [[Vec2DLoad1]], double* [[Vec2DGEP1]]
+// CHECK: [[Vec2DLoad2:%.*]] = load double, double*
+// CHECK: [[Vec2DGEP2:%.*]] = getelementptr inbounds { double, double }, { double, double }* {{%.*}}, i32 0, i32 1
+// CHECK: store double [[Vec2DLoad2]], double* [[Vec2DGEP2]]
+// CHECK: vaarg.in_mem:
+
+typedef struct {
+  float x, y;
+  double z;
+} vec2f1d;
+
+double Vec2F1DTest(__builtin_va_list ap) {
+  vec2f1d vec = __builtin_va_arg(ap, vec2f1d);
+  return vec.x + vec.y + vec.z;
+}
+// CHECK: define double @Vec2F1DTest
+// CHECK: vaarg.in_reg:
+// CHECK: [[Vec2F1DLoad1:%.*]] = load <2 x float>, <2 x float>*
+// CHECK: [[Vec2F1DGEP1:%.*]] = getelementptr inbounds { <2 x float>, double }, { <2 x float>, double }* {{%.*}}, i32 0, i32 0
+// CHECK: store <2 x float> [[Vec2F1DLoad1]], <2 x float>* [[Vec2F1DGEP1]]
+// CHECK: [[Vec2F1DLoad2:%.*]] = load double, double*
+// CHECK: [[Vec2F1DGEP2:%.*]] = getelementptr inbounds { <2 x float>, double }, { <2 x float>, double }* {{%.*}}, i32 0, i32 1
+// CHECK: store double [[Vec2F1DLoad2]], double* [[Vec2F1DGEP2]]
+// CHECK: vaarg.in_mem:
+
+typedef struct {
+  double x;
+  float y, z;
+} vec1d2f;
+
+double Vec1D2FTest(__builtin_va_list ap) {
+  vec1d2f vec = __builtin_va_arg(ap, vec1d2f);
+  return vec.x + vec.y + vec.z;
+}
+// CHECK: define double @Vec1D2FTest
+// CHECK: vaarg.in_reg:
+// CHECK: [[Vec1D2FLoad1:%.*]] = load double, double*
+// CHECK: [[Vec1D2FGEP1:%.*]] = getelementptr inbounds { double, <2 x float> }, { double, <2 x float> }* {{%.*}}, i32 0, i32 0
+// CHECK: store double [[Vec1D2FLoad1]], double* [[Vec1D2FGEP1]]
+// CHECK: [[Vec1D2FLoad2:%.*]] = load <2 x float>, <2 x float>*
+// CHECK: [[Vec1D2FGEP2:%.*]] = getelementptr inbounds { double, <2 x float> }, { double, <2 x float> }* {{%.*}}, i32 0, i32 1
+// CHECK: store <2 x float> [[Vec1D2FLoad2]], <2 x float>* [[Vec1D2FGEP2]]
+// CHECK: vaarg.in_mem:
+
+typedef struct {
+  float x;
+  double z;
+} vec1f1d;
+
+double Vec1F1DTest(__builtin_va_list ap) {
+  vec1f1d vec = 

[PATCH] D42811: [CodeGen][va_args] Correct Vector Struct va-arg 'in_reg' code gen

2018-02-01 Thread John McCall via Phabricator via cfe-commits
rjmccall accepted this revision.
rjmccall added a comment.
This revision is now accepted and ready to land.

Okay.  LGTM.


Repository:
  rC Clang

https://reviews.llvm.org/D42811



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D42811: [CodeGen][va_args] Correct Vector Struct va-arg 'in_reg' code gen

2018-02-01 Thread Erich Keane via Phabricator via cfe-commits
erichkeane created this revision.
erichkeane added reviewers: rsmith, rjmccall.

When trying to track down a different bug, we discovered
that calling `__builtin_va_arg` on a vec3f type caused
the SROA pass to issue a warning that there was an illegal
access.

Further research showed that the vec3f type is
alloca'ed as size '12', but the `_builtin_va_arg` code
on x86_64 was always loading this out of registers as 
{double, double}. Thus, the 2nd store into the vec3f
was storing in bytes 12-15!

This patch alters the original implementation which always
assumed {double, double} to use the actual coerced type 
instead, so the LLVM-IR generated is a load/GEP/store of
a <2 x float> and a float, rather than a double and a double.

Tests were added for all combinations I could think of that
would fit in 2 FP registers, and all work exactly as expected.


Repository:
  rC Clang

https://reviews.llvm.org/D42811

Files:
  lib/CodeGen/TargetInfo.cpp
  test/CodeGen/x86_64-floatvectors.c

Index: lib/CodeGen/TargetInfo.cpp
===
--- lib/CodeGen/TargetInfo.cpp
+++ lib/CodeGen/TargetInfo.cpp
@@ -3790,17 +3790,18 @@
 Address RegAddrHi =
   CGF.Builder.CreateConstInBoundsByteGEP(RegAddrLo,
  CharUnits::fromQuantity(16));
-llvm::Type *DoubleTy = CGF.DoubleTy;
-llvm::StructType *ST = llvm::StructType::get(DoubleTy, DoubleTy);
+llvm::Type *ST = AI.canHaveCoerceToType()
+ ? AI.getCoerceToType()
+ : llvm::StructType::get(CGF.DoubleTy, CGF.DoubleTy);
 llvm::Value *V;
 Address Tmp = CGF.CreateMemTemp(Ty);
 Tmp = CGF.Builder.CreateElementBitCast(Tmp, ST);
-V = CGF.Builder.CreateLoad(
-   CGF.Builder.CreateElementBitCast(RegAddrLo, DoubleTy));
+V = CGF.Builder.CreateLoad(CGF.Builder.CreateElementBitCast(
+RegAddrLo, ST->getStructElementType(0)));
 CGF.Builder.CreateStore(V,
CGF.Builder.CreateStructGEP(Tmp, 0, CharUnits::Zero()));
-V = CGF.Builder.CreateLoad(
-   CGF.Builder.CreateElementBitCast(RegAddrHi, DoubleTy));
+V = CGF.Builder.CreateLoad(CGF.Builder.CreateElementBitCast(
+RegAddrHi, ST->getStructElementType(1)));
 CGF.Builder.CreateStore(V,
   CGF.Builder.CreateStructGEP(Tmp, 1, CharUnits::fromQuantity(8)));
 
Index: test/CodeGen/x86_64-floatvectors.c
===
--- test/CodeGen/x86_64-floatvectors.c
+++ test/CodeGen/x86_64-floatvectors.c
@@ -0,0 +1,131 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s | \
+// RUN:   FileCheck %s
+
+// This test validates that the inreg branch generation for __builtin_va_arg 
+// does not exceed the alloca size of the type, which can cause the SROA pass to
+// eliminate the assignment.
+
+typedef struct { float x, y, z; } vec3f;
+
+double Vec3FTest(__builtin_va_list ap) {
+  vec3f vec = __builtin_va_arg(ap, vec3f);
+  return vec.x + vec.y + vec.z;
+}
+// CHECK: define double @Vec3FTest
+// CHECK: vaarg.in_reg:
+// CHECK: [[Vec3FLoad1:%.*]] = load <2 x float>, <2 x float>*
+// CHECK: [[Vec3FGEP1:%.*]] = getelementptr inbounds { <2 x float>, float }, { <2 x float>, float }* {{%.*}}, i32 0, i32 0
+// CHECK: store <2 x float> [[Vec3FLoad1]], <2 x float>* [[Vec3FGEP1]]
+// CHECK: [[Vec3FLoad2:%.*]] = load float, float*
+// CHECK: [[Vec3FGEP2:%.*]] = getelementptr inbounds { <2 x float>, float }, { <2 x float>, float }* {{%.*}}, i32 0, i32 1
+// CHECK: store float [[Vec3FLoad2]], float* [[Vec3FGEP2]]
+// CHECK: vaarg.in_mem:
+
+
+typedef struct { float x, y, z, q; } vec4f;
+
+double Vec4FTest(__builtin_va_list ap) {
+  vec4f vec = __builtin_va_arg(ap, vec4f);
+  return vec.x + vec.y + vec.z + vec.q;
+}
+// CHECK: define double @Vec4FTest
+// CHECK: vaarg.in_reg:
+// CHECK: [[Vec4FLoad1:%.*]] = load <2 x float>, <2 x float>*
+// CHECK: [[Vec4FGEP1:%.*]] = getelementptr inbounds { <2 x float>, <2 x float> }, { <2 x float>, <2 x float> }* {{%.*}}, i32 0, i32 0
+// CHECK: store <2 x float> [[Vec4FLoad1]], <2 x float>* [[Vec4FGEP1]]
+// CHECK: [[Vec4FLoad2:%.*]] = load <2 x float>, <2 x float>*
+// CHECK: [[Vec4FGEP2:%.*]] = getelementptr inbounds { <2 x float>, <2 x float> }, { <2 x float>, <2 x float> }* {{%.*}}, i32 0, i32 1
+// CHECK: store <2 x float> [[Vec4FLoad2]], <2 x float>* [[Vec4FGEP2]]
+// CHECK: vaarg.in_mem:
+
+typedef struct { double x, y; } vec2d;
+
+double Vec2DTest(__builtin_va_list ap) {
+  vec2d vec = __builtin_va_arg(ap, vec2d);
+  return vec.x + vec.y;
+}
+// CHECK: define double @Vec2DTest
+// CHECK: vaarg.in_reg:
+// CHECK: [[Vec2DLoad1:%.*]] = load double, double*
+// CHECK: [[Vec2DGEP1:%.*]] = getelementptr inbounds { double, double }, { double, double }* {{%.*}}, i32 0, i32 0
+// CHECK: store double [[Vec2DLoad1]], double* [[Vec2DGEP1]]
+// CHECK: [[Vec2DLoad2:%.*]] = load double, double*
+// CHECK: