[PATCH] D25334: Implement __stosb intrinsic as a volatile memset

2016-10-14 Thread David Majnemer via cfe-commits
majnemer accepted this revision.
majnemer added a comment.
This revision is now accepted and ready to land.

LGTM


https://reviews.llvm.org/D25334



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D25334: Implement __stosb intrinsic as a volatile memset

2016-10-13 Thread Albert Gutowski via cfe-commits
agutowski updated this revision to Diff 74603.
agutowski added a comment.

force the alignment to 1


https://reviews.llvm.org/D25334

Files:
  include/clang/Basic/BuiltinsX86.def
  lib/CodeGen/CGBuiltin.cpp
  lib/Headers/intrin.h
  test/CodeGen/ms-intrinsics.c
  test/Headers/ms-intrin.cpp


Index: lib/Headers/intrin.h
===
--- lib/Headers/intrin.h
+++ lib/Headers/intrin.h
@@ -1024,11 +1024,6 @@
 : "%edi", "%esi", "%ecx");
 }
 static __inline__ void __DEFAULT_FN_ATTRS
-__stosb(unsigned char *__dst, unsigned char __x, size_t __n) {
-  __asm__("rep stosb" : : "D"(__dst), "a"(__x), "c"(__n)
-: "%edi", "%ecx");
-}
-static __inline__ void __DEFAULT_FN_ATTRS
 __stosd(unsigned long *__dst, unsigned long __x, size_t __n) {
   __asm__("rep stosl" : : "D"(__dst), "a"(__x), "c"(__n)
 : "%edi", "%ecx");
Index: lib/CodeGen/CGBuiltin.cpp
===
--- lib/CodeGen/CGBuiltin.cpp
+++ lib/CodeGen/CGBuiltin.cpp
@@ -7770,7 +7770,12 @@
 Value *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress);
 return Builder.CreateCall(F);
   }
+  case X86::BI__stosb: {
+// We treat __stosb as a volatile memset - it may not generate "rep stosb"
+// instruction, but it will create a memset that won't be optimized away.
+return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], 1, true);
   }
+  }
 }
 
 
Index: include/clang/Basic/BuiltinsX86.def
===
--- include/clang/Basic/BuiltinsX86.def
+++ include/clang/Basic/BuiltinsX86.def
@@ -2041,6 +2041,8 @@
 
 TARGET_HEADER_BUILTIN(_AddressOfReturnAddress, "v*", "nh", "intrin.h", 
ALL_MS_LANGUAGES, "")
 
+TARGET_HEADER_BUILTIN(__stosb, "vUc*Ucz", "nh", "intrin.h", ALL_MS_LANGUAGES, 
"")
+
 #undef BUILTIN
 #undef TARGET_BUILTIN
 #undef TARGET_HEADER_BUILTIN
Index: test/CodeGen/ms-intrinsics.c
===
--- test/CodeGen/ms-intrinsics.c
+++ test/CodeGen/ms-intrinsics.c
@@ -14,6 +14,22 @@
 
 #include 
 
+#if defined(__i386__) || defined(__x86_64__)
+void test__stosb(unsigned char *Dest, unsigned char Data, size_t Count) {
+  return __stosb(Dest, Data, Count);
+}
+
+// CHECK-I386: define{{.*}}void @test__stosb
+// CHECK-I386:   tail call void @llvm.memset.p0i8.i32(i8* %Dest, i8 %Data, i32 
%Count, i32 1, i1 true)
+// CHECK-I386:   ret void
+// CHECK-I386: }
+
+// CHECK-X64: define{{.*}}void @test__stosb
+// CHECK-X64:   tail call void @llvm.memset.p0i8.i64(i8* %Dest, i8 %Data, i64 
%Count, i32 1, i1 true)
+// CHECK-X64:   ret void
+// CHECK-X64: }
+#endif
+
 void *test_ReturnAddress() {
   return _ReturnAddress();
 }
Index: test/Headers/ms-intrin.cpp
===
--- test/Headers/ms-intrin.cpp
+++ test/Headers/ms-intrin.cpp
@@ -38,7 +38,6 @@
   __movsd(0, 0, 0);
   __movsw(0, 0, 0);
 
-  __stosb(0, 0, 0);
   __stosd(0, 0, 0);
   __stosw(0, 0, 0);
 


Index: lib/Headers/intrin.h
===
--- lib/Headers/intrin.h
+++ lib/Headers/intrin.h
@@ -1024,11 +1024,6 @@
 : "%edi", "%esi", "%ecx");
 }
 static __inline__ void __DEFAULT_FN_ATTRS
-__stosb(unsigned char *__dst, unsigned char __x, size_t __n) {
-  __asm__("rep stosb" : : "D"(__dst), "a"(__x), "c"(__n)
-: "%edi", "%ecx");
-}
-static __inline__ void __DEFAULT_FN_ATTRS
 __stosd(unsigned long *__dst, unsigned long __x, size_t __n) {
   __asm__("rep stosl" : : "D"(__dst), "a"(__x), "c"(__n)
 : "%edi", "%ecx");
Index: lib/CodeGen/CGBuiltin.cpp
===
--- lib/CodeGen/CGBuiltin.cpp
+++ lib/CodeGen/CGBuiltin.cpp
@@ -7770,7 +7770,12 @@
 Value *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress);
 return Builder.CreateCall(F);
   }
+  case X86::BI__stosb: {
+// We treat __stosb as a volatile memset - it may not generate "rep stosb"
+// instruction, but it will create a memset that won't be optimized away.
+return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], 1, true);
   }
+  }
 }
 
 
Index: include/clang/Basic/BuiltinsX86.def
===
--- include/clang/Basic/BuiltinsX86.def
+++ include/clang/Basic/BuiltinsX86.def
@@ -2041,6 +2041,8 @@
 
 TARGET_HEADER_BUILTIN(_AddressOfReturnAddress, "v*", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
 
+TARGET_HEADER_BUILTIN(__stosb, "vUc*Ucz", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
+
 #undef BUILTIN
 #undef TARGET_BUILTIN
 #undef TARGET_HEADER_BUILTIN
Index: test/CodeGen/ms-intrinsics.c
===
--- test/CodeGen/ms-intrinsics.c
+++ test/CodeGen/ms-intrinsics.c
@@ -14,6 +14,22 @@
 
 #include 
 
+#if defined(__i386__) || 

[PATCH] D25334: Implement __stosb intrinsic as a volatile memset

2016-10-13 Thread David Majnemer via cfe-commits
majnemer added inline comments.



Comment at: lib/CodeGen/CGBuiltin.cpp:
+Address Dest = EmitPointerWithAlignment(E->getArg(0));
+return Builder.CreateMemSet(Dest, Ops[1], Ops[2], true);
   }

I think we should manually force the alignment to 1 instead of trying to be 
clever.


https://reviews.llvm.org/D25334



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D25334: Implement __stosb intrinsic as a volatile memset

2016-10-13 Thread Albert Gutowski via cfe-commits
agutowski updated this revision to Diff 74596.
agutowski added a comment.

rebase


https://reviews.llvm.org/D25334

Files:
  include/clang/Basic/BuiltinsX86.def
  lib/CodeGen/CGBuiltin.cpp
  lib/Headers/intrin.h
  test/CodeGen/ms-intrinsics.c
  test/Headers/ms-intrin.cpp


Index: lib/Headers/intrin.h
===
--- lib/Headers/intrin.h
+++ lib/Headers/intrin.h
@@ -1024,11 +1024,6 @@
 : "%edi", "%esi", "%ecx");
 }
 static __inline__ void __DEFAULT_FN_ATTRS
-__stosb(unsigned char *__dst, unsigned char __x, size_t __n) {
-  __asm__("rep stosb" : : "D"(__dst), "a"(__x), "c"(__n)
-: "%edi", "%ecx");
-}
-static __inline__ void __DEFAULT_FN_ATTRS
 __stosd(unsigned long *__dst, unsigned long __x, size_t __n) {
   __asm__("rep stosl" : : "D"(__dst), "a"(__x), "c"(__n)
 : "%edi", "%ecx");
Index: lib/CodeGen/CGBuiltin.cpp
===
--- lib/CodeGen/CGBuiltin.cpp
+++ lib/CodeGen/CGBuiltin.cpp
@@ -7770,7 +7770,13 @@
 Value *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress);
 return Builder.CreateCall(F);
   }
+  case X86::BI__stosb: {
+// We treat __stosb as a volatile memset - it may not generate "rep stosb"
+// instruction, but it will create a memset that won't be optimized away.
+Address Dest = EmitPointerWithAlignment(E->getArg(0));
+return Builder.CreateMemSet(Dest, Ops[1], Ops[2], true);
   }
+  }
 }
 
 
Index: include/clang/Basic/BuiltinsX86.def
===
--- include/clang/Basic/BuiltinsX86.def
+++ include/clang/Basic/BuiltinsX86.def
@@ -2041,6 +2041,8 @@
 
 TARGET_HEADER_BUILTIN(_AddressOfReturnAddress, "v*", "nh", "intrin.h", 
ALL_MS_LANGUAGES, "")
 
+TARGET_HEADER_BUILTIN(__stosb, "vUc*Ucz", "nh", "intrin.h", ALL_MS_LANGUAGES, 
"")
+
 #undef BUILTIN
 #undef TARGET_BUILTIN
 #undef TARGET_HEADER_BUILTIN
Index: test/CodeGen/ms-intrinsics.c
===
--- test/CodeGen/ms-intrinsics.c
+++ test/CodeGen/ms-intrinsics.c
@@ -14,6 +14,22 @@
 
 #include 
 
+#if defined(__i386__) || defined(__x86_64__)
+void test__stosb(unsigned char *Dest, unsigned char Data, size_t Count) {
+  return __stosb(Dest, Data, Count);
+}
+
+// CHECK-I386: define{{.*}}void @test__stosb
+// CHECK-I386:   tail call void @llvm.memset.p0i8.i32(i8* %Dest, i8 %Data, i32 
%Count, i32 1, i1 true)
+// CHECK-I386:   ret void
+// CHECK-I386: }
+
+// CHECK-X64: define{{.*}}void @test__stosb
+// CHECK-X64:   tail call void @llvm.memset.p0i8.i64(i8* %Dest, i8 %Data, i64 
%Count, i32 1, i1 true)
+// CHECK-X64:   ret void
+// CHECK-X64: }
+#endif
+
 void *test_ReturnAddress() {
   return _ReturnAddress();
 }
Index: test/Headers/ms-intrin.cpp
===
--- test/Headers/ms-intrin.cpp
+++ test/Headers/ms-intrin.cpp
@@ -38,7 +38,6 @@
   __movsd(0, 0, 0);
   __movsw(0, 0, 0);
 
-  __stosb(0, 0, 0);
   __stosd(0, 0, 0);
   __stosw(0, 0, 0);
 


Index: lib/Headers/intrin.h
===
--- lib/Headers/intrin.h
+++ lib/Headers/intrin.h
@@ -1024,11 +1024,6 @@
 : "%edi", "%esi", "%ecx");
 }
 static __inline__ void __DEFAULT_FN_ATTRS
-__stosb(unsigned char *__dst, unsigned char __x, size_t __n) {
-  __asm__("rep stosb" : : "D"(__dst), "a"(__x), "c"(__n)
-: "%edi", "%ecx");
-}
-static __inline__ void __DEFAULT_FN_ATTRS
 __stosd(unsigned long *__dst, unsigned long __x, size_t __n) {
   __asm__("rep stosl" : : "D"(__dst), "a"(__x), "c"(__n)
 : "%edi", "%ecx");
Index: lib/CodeGen/CGBuiltin.cpp
===
--- lib/CodeGen/CGBuiltin.cpp
+++ lib/CodeGen/CGBuiltin.cpp
@@ -7770,7 +7770,13 @@
 Value *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress);
 return Builder.CreateCall(F);
   }
+  case X86::BI__stosb: {
+// We treat __stosb as a volatile memset - it may not generate "rep stosb"
+// instruction, but it will create a memset that won't be optimized away.
+Address Dest = EmitPointerWithAlignment(E->getArg(0));
+return Builder.CreateMemSet(Dest, Ops[1], Ops[2], true);
   }
+  }
 }
 
 
Index: include/clang/Basic/BuiltinsX86.def
===
--- include/clang/Basic/BuiltinsX86.def
+++ include/clang/Basic/BuiltinsX86.def
@@ -2041,6 +2041,8 @@
 
 TARGET_HEADER_BUILTIN(_AddressOfReturnAddress, "v*", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
 
+TARGET_HEADER_BUILTIN(__stosb, "vUc*Ucz", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
+
 #undef BUILTIN
 #undef TARGET_BUILTIN
 #undef TARGET_HEADER_BUILTIN
Index: test/CodeGen/ms-intrinsics.c
===
--- test/CodeGen/ms-intrinsics.c
+++ 

[PATCH] D25334: Implement __stosb intrinsic as a volatile memset

2016-10-10 Thread David Majnemer via cfe-commits
majnemer added inline comments.



Comment at: lib/CodeGen/CGBuiltin.cpp:7610
+Value *SizeVal = EmitScalarExpr(E->getArg(2));
+EmitNonNullArgCheck(RValue::get(Dest.getPointer()), 
E->getArg(0)->getType(),
+E->getArg(0)->getExprLoc(), FD, 0);

agutowski wrote:
> hans wrote:
> > Hmm, does the __stosb intrinsic require Dest to be non-null (e.g. would 
> > Dest=NULL, Count=0 be OK?) I'm not even sure what llvm's memset requires 
> > actually.
> I can't find any guarantee that memset accepts Dest=NULL and Count=0. So I 
> guess we can either add a branch here, checking if the pointer is NULL (and 
> that Count=0?) or assume that memset won't do anything strange. I vote for 
> the latter (the current code does it).
I think the approach you've taken here is fine, we shouldn't need a branch.


https://reviews.llvm.org/D25334



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D25334: Implement __stosb intrinsic as a volatile memset

2016-10-10 Thread Albert Gutowski via cfe-commits
agutowski marked 2 inline comments as done.
agutowski added inline comments.



Comment at: lib/CodeGen/CGBuiltin.cpp:7610
+Value *SizeVal = EmitScalarExpr(E->getArg(2));
+EmitNonNullArgCheck(RValue::get(Dest.getPointer()), 
E->getArg(0)->getType(),
+E->getArg(0)->getExprLoc(), FD, 0);

hans wrote:
> Hmm, does the __stosb intrinsic require Dest to be non-null (e.g. would 
> Dest=NULL, Count=0 be OK?) I'm not even sure what llvm's memset requires 
> actually.
I can't find any guarantee that memset accepts Dest=NULL and Count=0. So I 
guess we can either add a branch here, checking if the pointer is NULL (and 
that Count=0?) or assume that memset won't do anything strange. I vote for the 
latter (the current code does it).


https://reviews.llvm.org/D25334



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D25334: Implement __stosb intrinsic as a volatile memset

2016-10-10 Thread Albert Gutowski via cfe-commits
agutowski updated this revision to Diff 74157.
agutowski added a comment.

remove nullptr check; don't repeat EmitScalarExpr


https://reviews.llvm.org/D25334

Files:
  include/clang/Basic/BuiltinsX86.def
  lib/CodeGen/CGBuiltin.cpp
  lib/Headers/intrin.h
  test/CodeGen/ms-intrinsics.c
  test/Headers/ms-intrin.cpp


Index: lib/Headers/intrin.h
===
--- lib/Headers/intrin.h
+++ lib/Headers/intrin.h
@@ -1148,11 +1148,6 @@
 : "%edi", "%esi", "%ecx");
 }
 static __inline__ void __DEFAULT_FN_ATTRS
-__stosb(unsigned char *__dst, unsigned char __x, size_t __n) {
-  __asm__("rep stosb" : : "D"(__dst), "a"(__x), "c"(__n)
-: "%edi", "%ecx");
-}
-static __inline__ void __DEFAULT_FN_ATTRS
 __stosd(unsigned long *__dst, unsigned long __x, size_t __n) {
   __asm__("rep stosl" : : "D"(__dst), "a"(__x), "c"(__n)
 : "%edi", "%ecx");
Index: lib/CodeGen/CGBuiltin.cpp
===
--- lib/CodeGen/CGBuiltin.cpp
+++ lib/CodeGen/CGBuiltin.cpp
@@ -7599,7 +7599,14 @@
 HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
 return HigherBits;
   }
+
+  case X86::BI__stosb: {
+// We treat __stosb as a volatile memset - it may not generate "rep stosb"
+// instruction, but it will create a memset that won't be optimized away.
+Address Dest = EmitPointerWithAlignment(E->getArg(0));
+return Builder.CreateMemSet(Dest, Ops[1], Ops[2], true);
   }
+  }
 }
 
 
Index: include/clang/Basic/BuiltinsX86.def
===
--- include/clang/Basic/BuiltinsX86.def
+++ include/clang/Basic/BuiltinsX86.def
@@ -2071,6 +2071,8 @@
 TARGET_BUILTIN(__builtin_ia32_monitorx, "vv*UiUi", "", "mwaitx")
 TARGET_BUILTIN(__builtin_ia32_mwaitx, "vUiUiUi", "", "mwaitx")
 
+TARGET_HEADER_BUILTIN(__stosb, "vUc*Ucz", "nh", "intrin.h", ALL_MS_LANGUAGES, 
"")
+
 #undef BUILTIN
 #undef TARGET_BUILTIN
 #undef TARGET_HEADER_BUILTIN
Index: test/CodeGen/ms-intrinsics.c
===
--- test/CodeGen/ms-intrinsics.c
+++ test/CodeGen/ms-intrinsics.c
@@ -14,6 +14,22 @@
 
 #include 
 
+#if defined(__i386__) || defined(__x86_64__)
+void test__stosb(unsigned char *Dest, unsigned char Data, size_t Count) {
+  return __stosb(Dest, Data, Count);
+}
+
+// CHECK-I386: define{{.*}}void @test__stosb
+// CHECK-I386:   tail call void @llvm.memset.p0i8.i32(i8* %Dest, i8 %Data, i32 
%Count, i32 1, i1 true)
+// CHECK-I386:   ret void
+// CHECK-I386: }
+
+// CHECK-X64: define{{.*}}void @test__stosb
+// CHECK-X64:   tail call void @llvm.memset.p0i8.i64(i8* %Dest, i8 %Data, i64 
%Count, i32 1, i1 true)
+// CHECK-X64:   ret void
+// CHECK-X64: }
+#endif
+
 void *test_InterlockedExchangePointer(void * volatile *Target, void *Value) {
   return _InterlockedExchangePointer(Target, Value);
 }
Index: test/Headers/ms-intrin.cpp
===
--- test/Headers/ms-intrin.cpp
+++ test/Headers/ms-intrin.cpp
@@ -38,7 +38,6 @@
   __movsd(0, 0, 0);
   __movsw(0, 0, 0);
 
-  __stosb(0, 0, 0);
   __stosd(0, 0, 0);
   __stosw(0, 0, 0);
 


Index: lib/Headers/intrin.h
===
--- lib/Headers/intrin.h
+++ lib/Headers/intrin.h
@@ -1148,11 +1148,6 @@
 : "%edi", "%esi", "%ecx");
 }
 static __inline__ void __DEFAULT_FN_ATTRS
-__stosb(unsigned char *__dst, unsigned char __x, size_t __n) {
-  __asm__("rep stosb" : : "D"(__dst), "a"(__x), "c"(__n)
-: "%edi", "%ecx");
-}
-static __inline__ void __DEFAULT_FN_ATTRS
 __stosd(unsigned long *__dst, unsigned long __x, size_t __n) {
   __asm__("rep stosl" : : "D"(__dst), "a"(__x), "c"(__n)
 : "%edi", "%ecx");
Index: lib/CodeGen/CGBuiltin.cpp
===
--- lib/CodeGen/CGBuiltin.cpp
+++ lib/CodeGen/CGBuiltin.cpp
@@ -7599,7 +7599,14 @@
 HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
 return HigherBits;
   }
+
+  case X86::BI__stosb: {
+// We treat __stosb as a volatile memset - it may not generate "rep stosb"
+// instruction, but it will create a memset that won't be optimized away.
+Address Dest = EmitPointerWithAlignment(E->getArg(0));
+return Builder.CreateMemSet(Dest, Ops[1], Ops[2], true);
   }
+  }
 }
 
 
Index: include/clang/Basic/BuiltinsX86.def
===
--- include/clang/Basic/BuiltinsX86.def
+++ include/clang/Basic/BuiltinsX86.def
@@ -2071,6 +2071,8 @@
 TARGET_BUILTIN(__builtin_ia32_monitorx, "vv*UiUi", "", "mwaitx")
 TARGET_BUILTIN(__builtin_ia32_mwaitx, "vUiUiUi", "", "mwaitx")
 
+TARGET_HEADER_BUILTIN(__stosb, "vUc*Ucz", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
+
 #undef BUILTIN
 #undef TARGET_BUILTIN
 

[PATCH] D25334: Implement __stosb intrinsic as a volatile memset

2016-10-06 Thread Albert Gutowski via cfe-commits
agutowski marked an inline comment as done.
agutowski added inline comments.


> hans wrote in CGBuiltin.cpp:7613
> Why is it returning Dest here, and not the result of Builder.CreateMemSet?

My mistake (that's what memset returns), thanks!

https://reviews.llvm.org/D25334



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D25334: Implement __stosb intrinsic as a volatile memset

2016-10-06 Thread Albert Gutowski via cfe-commits
agutowski updated this revision to Diff 73828.
agutowski marked an inline comment as done.
agutowski added a comment.

fix return value and comment


https://reviews.llvm.org/D25334

Files:
  include/clang/Basic/BuiltinsX86.def
  lib/CodeGen/CGBuiltin.cpp
  lib/Headers/intrin.h
  test/CodeGen/ms-intrinsics.c
  test/Headers/ms-intrin.cpp


Index: lib/Headers/intrin.h
===
--- lib/Headers/intrin.h
+++ lib/Headers/intrin.h
@@ -1148,11 +1148,6 @@
 : "%edi", "%esi", "%ecx");
 }
 static __inline__ void __DEFAULT_FN_ATTRS
-__stosb(unsigned char *__dst, unsigned char __x, size_t __n) {
-  __asm__("rep stosb" : : "D"(__dst), "a"(__x), "c"(__n)
-: "%edi", "%ecx");
-}
-static __inline__ void __DEFAULT_FN_ATTRS
 __stosd(unsigned long *__dst, unsigned long __x, size_t __n) {
   __asm__("rep stosl" : : "D"(__dst), "a"(__x), "c"(__n)
 : "%edi", "%ecx");
Index: lib/CodeGen/CGBuiltin.cpp
===
--- lib/CodeGen/CGBuiltin.cpp
+++ lib/CodeGen/CGBuiltin.cpp
@@ -7599,7 +7599,19 @@
 HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
 return HigherBits;
   }
+
+  case X86::BI__stosb: {
+// We treat __stosb as a volatile memset - it may not generate "rep stosb"
+// instruction, but it will create a memset that won't be optimized away.
+const FunctionDecl *FD = E->getDirectCallee();
+Address Dest = EmitPointerWithAlignment(E->getArg(0));
+Value *ByteVal = EmitScalarExpr(E->getArg(1));
+Value *SizeVal = EmitScalarExpr(E->getArg(2));
+EmitNonNullArgCheck(RValue::get(Dest.getPointer()), 
E->getArg(0)->getType(),
+E->getArg(0)->getExprLoc(), FD, 0);
+return Builder.CreateMemSet(Dest, ByteVal, SizeVal, true);
   }
+  }
 }
 
 
Index: include/clang/Basic/BuiltinsX86.def
===
--- include/clang/Basic/BuiltinsX86.def
+++ include/clang/Basic/BuiltinsX86.def
@@ -2071,6 +2071,8 @@
 TARGET_BUILTIN(__builtin_ia32_monitorx, "vv*UiUi", "", "mwaitx")
 TARGET_BUILTIN(__builtin_ia32_mwaitx, "vUiUiUi", "", "mwaitx")
 
+TARGET_HEADER_BUILTIN(__stosb, "vUc*Ucz", "nh", "intrin.h", ALL_MS_LANGUAGES, 
"")
+
 #undef BUILTIN
 #undef TARGET_BUILTIN
 #undef TARGET_HEADER_BUILTIN
Index: test/CodeGen/ms-intrinsics.c
===
--- test/CodeGen/ms-intrinsics.c
+++ test/CodeGen/ms-intrinsics.c
@@ -14,6 +14,22 @@
 
 #include 
 
+#if defined(__i386__) || defined(__x86_64__)
+void test__stosb(unsigned char *Dest, unsigned char Data, size_t Count) {
+  return __stosb(Dest, Data, Count);
+}
+
+// CHECK-I386: define{{.*}}void @test__stosb
+// CHECK-I386:   tail call void @llvm.memset.p0i8.i32(i8* %Dest, i8 %Data, i32 
%Count, i32 1, i1 true)
+// CHECK-I386:   ret void
+// CHECK-I386: }
+
+// CHECK-X64: define{{.*}}void @test__stosb
+// CHECK-X64:   tail call void @llvm.memset.p0i8.i64(i8* %Dest, i8 %Data, i64 
%Count, i32 1, i1 true)
+// CHECK-X64:   ret void
+// CHECK-X64: }
+#endif
+
 void *test_InterlockedExchangePointer(void * volatile *Target, void *Value) {
   return _InterlockedExchangePointer(Target, Value);
 }
Index: test/Headers/ms-intrin.cpp
===
--- test/Headers/ms-intrin.cpp
+++ test/Headers/ms-intrin.cpp
@@ -38,7 +38,6 @@
   __movsd(0, 0, 0);
   __movsw(0, 0, 0);
 
-  __stosb(0, 0, 0);
   __stosd(0, 0, 0);
   __stosw(0, 0, 0);
 


Index: lib/Headers/intrin.h
===
--- lib/Headers/intrin.h
+++ lib/Headers/intrin.h
@@ -1148,11 +1148,6 @@
 : "%edi", "%esi", "%ecx");
 }
 static __inline__ void __DEFAULT_FN_ATTRS
-__stosb(unsigned char *__dst, unsigned char __x, size_t __n) {
-  __asm__("rep stosb" : : "D"(__dst), "a"(__x), "c"(__n)
-: "%edi", "%ecx");
-}
-static __inline__ void __DEFAULT_FN_ATTRS
 __stosd(unsigned long *__dst, unsigned long __x, size_t __n) {
   __asm__("rep stosl" : : "D"(__dst), "a"(__x), "c"(__n)
 : "%edi", "%ecx");
Index: lib/CodeGen/CGBuiltin.cpp
===
--- lib/CodeGen/CGBuiltin.cpp
+++ lib/CodeGen/CGBuiltin.cpp
@@ -7599,7 +7599,19 @@
 HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
 return HigherBits;
   }
+
+  case X86::BI__stosb: {
+// We treat __stosb as a volatile memset - it may not generate "rep stosb"
+// instruction, but it will create a memset that won't be optimized away.
+const FunctionDecl *FD = E->getDirectCallee();
+Address Dest = EmitPointerWithAlignment(E->getArg(0));
+Value *ByteVal = EmitScalarExpr(E->getArg(1));
+Value *SizeVal = EmitScalarExpr(E->getArg(2));
+EmitNonNullArgCheck(RValue::get(Dest.getPointer()), 

[PATCH] D25334: Implement __stosb intrinsic as a volatile memset

2016-10-06 Thread Hans Wennborg via cfe-commits
hans added inline comments.


> CGBuiltin.cpp:7604
> +  case X86::BI__stosb: {
> +// we treat __stosb as volatile memset  - it may not generate "rep stosb"
> +// instruction, but it will create a memset that won't be optimized away

Nit: I'd suggest capital w for "We" and ending the sentence with a period. I 
see many other comments in the file don't do this, but we can at least set a 
good example :-)

> CGBuiltin.cpp:7610
> +Value *SizeVal = EmitScalarExpr(E->getArg(2));
> +EmitNonNullArgCheck(RValue::get(Dest.getPointer()), 
> E->getArg(0)->getType(),
> +E->getArg(0)->getExprLoc(), FD, 0);

Hmm, does the __stosb intrinsic require Dest to be non-null (e.g. would 
Dest=NULL, Count=0 be OK?) I'm not even sure what llvm's memset requires 
actually.

> CGBuiltin.cpp:7613
> +Builder.CreateMemSet(Dest, ByteVal, SizeVal, true);
> +return Dest.getPointer();
>}

Why is it returning Dest here, and not the result of Builder.CreateMemSet?

https://reviews.llvm.org/D25334



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D25334: Implement __stosb intrinsic as a volatile memset

2016-10-06 Thread Albert Gutowski via cfe-commits
agutowski created this revision.
agutowski added reviewers: rnk, hans, thakis, majnemer.
agutowski added a subscriber: cfe-commits.

We need __stosb to be an intrinsic, because SecureZeroMemory function uses it 
without including intrin.h. Implementing it as a volatile memset is not 
consistent with MSDN specification, but it gives us target-independent IR while 
keeping the most important properties of __stosb.


https://reviews.llvm.org/D25334

Files:
  include/clang/Basic/BuiltinsX86.def
  lib/CodeGen/CGBuiltin.cpp
  lib/Headers/intrin.h
  test/CodeGen/ms-intrinsics.c
  test/Headers/ms-intrin.cpp


Index: lib/Headers/intrin.h
===
--- lib/Headers/intrin.h
+++ lib/Headers/intrin.h
@@ -1148,11 +1148,6 @@
 : "%edi", "%esi", "%ecx");
 }
 static __inline__ void __DEFAULT_FN_ATTRS
-__stosb(unsigned char *__dst, unsigned char __x, size_t __n) {
-  __asm__("rep stosb" : : "D"(__dst), "a"(__x), "c"(__n)
-: "%edi", "%ecx");
-}
-static __inline__ void __DEFAULT_FN_ATTRS
 __stosd(unsigned long *__dst, unsigned long __x, size_t __n) {
   __asm__("rep stosl" : : "D"(__dst), "a"(__x), "c"(__n)
 : "%edi", "%ecx");
Index: lib/CodeGen/CGBuiltin.cpp
===
--- lib/CodeGen/CGBuiltin.cpp
+++ lib/CodeGen/CGBuiltin.cpp
@@ -7599,7 +7599,20 @@
 HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
 return HigherBits;
   }
+
+  case X86::BI__stosb: {
+// we treat __stosb as volatile memset  - it may not generate "rep stosb"
+// instruction, but it will create a memset that won't be optimized away
+const FunctionDecl *FD = E->getDirectCallee();
+Address Dest = EmitPointerWithAlignment(E->getArg(0));
+Value *ByteVal = EmitScalarExpr(E->getArg(1));
+Value *SizeVal = EmitScalarExpr(E->getArg(2));
+EmitNonNullArgCheck(RValue::get(Dest.getPointer()), 
E->getArg(0)->getType(),
+E->getArg(0)->getExprLoc(), FD, 0);
+Builder.CreateMemSet(Dest, ByteVal, SizeVal, true);
+return Dest.getPointer();
   }
+  }
 }
 
 
Index: include/clang/Basic/BuiltinsX86.def
===
--- include/clang/Basic/BuiltinsX86.def
+++ include/clang/Basic/BuiltinsX86.def
@@ -2071,6 +2071,8 @@
 TARGET_BUILTIN(__builtin_ia32_monitorx, "vv*UiUi", "", "mwaitx")
 TARGET_BUILTIN(__builtin_ia32_mwaitx, "vUiUiUi", "", "mwaitx")
 
+TARGET_HEADER_BUILTIN(__stosb, "vUc*Ucz", "nh", "intrin.h", ALL_MS_LANGUAGES, 
"")
+
 #undef BUILTIN
 #undef TARGET_BUILTIN
 #undef TARGET_HEADER_BUILTIN
Index: test/CodeGen/ms-intrinsics.c
===
--- test/CodeGen/ms-intrinsics.c
+++ test/CodeGen/ms-intrinsics.c
@@ -14,6 +14,22 @@
 
 #include 
 
+#if defined(__i386__) || defined(__x86_64__)
+void test__stosb(unsigned char *Dest, unsigned char Data, size_t Count) {
+  return __stosb(Dest, Data, Count);
+}
+
+// CHECK-I386: define{{.*}}void @test__stosb
+// CHECK-I386:   tail call void @llvm.memset.p0i8.i32(i8* %Dest, i8 %Data, i32 
%Count, i32 1, i1 true)
+// CHECK-I386:   ret void
+// CHECK-I386: }
+
+// CHECK-X64: define{{.*}}void @test__stosb
+// CHECK-X64:   tail call void @llvm.memset.p0i8.i64(i8* %Dest, i8 %Data, i64 
%Count, i32 1, i1 true)
+// CHECK-X64:   ret void
+// CHECK-X64: }
+#endif
+
 void *test_InterlockedExchangePointer(void * volatile *Target, void *Value) {
   return _InterlockedExchangePointer(Target, Value);
 }
Index: test/Headers/ms-intrin.cpp
===
--- test/Headers/ms-intrin.cpp
+++ test/Headers/ms-intrin.cpp
@@ -38,7 +38,6 @@
   __movsd(0, 0, 0);
   __movsw(0, 0, 0);
 
-  __stosb(0, 0, 0);
   __stosd(0, 0, 0);
   __stosw(0, 0, 0);
 


Index: lib/Headers/intrin.h
===
--- lib/Headers/intrin.h
+++ lib/Headers/intrin.h
@@ -1148,11 +1148,6 @@
 : "%edi", "%esi", "%ecx");
 }
 static __inline__ void __DEFAULT_FN_ATTRS
-__stosb(unsigned char *__dst, unsigned char __x, size_t __n) {
-  __asm__("rep stosb" : : "D"(__dst), "a"(__x), "c"(__n)
-: "%edi", "%ecx");
-}
-static __inline__ void __DEFAULT_FN_ATTRS
 __stosd(unsigned long *__dst, unsigned long __x, size_t __n) {
   __asm__("rep stosl" : : "D"(__dst), "a"(__x), "c"(__n)
 : "%edi", "%ecx");
Index: lib/CodeGen/CGBuiltin.cpp
===
--- lib/CodeGen/CGBuiltin.cpp
+++ lib/CodeGen/CGBuiltin.cpp
@@ -7599,7 +7599,20 @@
 HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
 return HigherBits;
   }
+
+  case X86::BI__stosb: {
+// we treat __stosb as volatile memset  - it may not generate "rep stosb"
+// instruction, but it will create a memset that won't be optimized