[PATCH] D25334: Implement __stosb intrinsic as a volatile memset
majnemer accepted this revision. majnemer added a comment. This revision is now accepted and ready to land. LGTM https://reviews.llvm.org/D25334 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D25334: Implement __stosb intrinsic as a volatile memset
agutowski updated this revision to Diff 74603. agutowski added a comment. force the alignment to 1 https://reviews.llvm.org/D25334 Files: include/clang/Basic/BuiltinsX86.def lib/CodeGen/CGBuiltin.cpp lib/Headers/intrin.h test/CodeGen/ms-intrinsics.c test/Headers/ms-intrin.cpp Index: lib/Headers/intrin.h === --- lib/Headers/intrin.h +++ lib/Headers/intrin.h @@ -1024,11 +1024,6 @@ : "%edi", "%esi", "%ecx"); } static __inline__ void __DEFAULT_FN_ATTRS -__stosb(unsigned char *__dst, unsigned char __x, size_t __n) { - __asm__("rep stosb" : : "D"(__dst), "a"(__x), "c"(__n) -: "%edi", "%ecx"); -} -static __inline__ void __DEFAULT_FN_ATTRS __stosd(unsigned long *__dst, unsigned long __x, size_t __n) { __asm__("rep stosl" : : "D"(__dst), "a"(__x), "c"(__n) : "%edi", "%ecx"); Index: lib/CodeGen/CGBuiltin.cpp === --- lib/CodeGen/CGBuiltin.cpp +++ lib/CodeGen/CGBuiltin.cpp @@ -7770,7 +7770,12 @@ Value *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress); return Builder.CreateCall(F); } + case X86::BI__stosb: { +// We treat __stosb as a volatile memset - it may not generate "rep stosb" +// instruction, but it will create a memset that won't be optimized away. +return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], 1, true); } + } } Index: include/clang/Basic/BuiltinsX86.def === --- include/clang/Basic/BuiltinsX86.def +++ include/clang/Basic/BuiltinsX86.def @@ -2041,6 +2041,8 @@ TARGET_HEADER_BUILTIN(_AddressOfReturnAddress, "v*", "nh", "intrin.h", ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(__stosb, "vUc*Ucz", "nh", "intrin.h", ALL_MS_LANGUAGES, "") + #undef BUILTIN #undef TARGET_BUILTIN #undef TARGET_HEADER_BUILTIN Index: test/CodeGen/ms-intrinsics.c === --- test/CodeGen/ms-intrinsics.c +++ test/CodeGen/ms-intrinsics.c @@ -14,6 +14,22 @@ #include +#if defined(__i386__) || defined(__x86_64__) +void test__stosb(unsigned char *Dest, unsigned char Data, size_t Count) { + return __stosb(Dest, Data, Count); +} + +// CHECK-I386: define{{.*}}void @test__stosb +// CHECK-I386: tail call void @llvm.memset.p0i8.i32(i8* %Dest, i8 %Data, i32 %Count, i32 1, i1 true) +// CHECK-I386: ret void +// CHECK-I386: } + +// CHECK-X64: define{{.*}}void @test__stosb +// CHECK-X64: tail call void @llvm.memset.p0i8.i64(i8* %Dest, i8 %Data, i64 %Count, i32 1, i1 true) +// CHECK-X64: ret void +// CHECK-X64: } +#endif + void *test_ReturnAddress() { return _ReturnAddress(); } Index: test/Headers/ms-intrin.cpp === --- test/Headers/ms-intrin.cpp +++ test/Headers/ms-intrin.cpp @@ -38,7 +38,6 @@ __movsd(0, 0, 0); __movsw(0, 0, 0); - __stosb(0, 0, 0); __stosd(0, 0, 0); __stosw(0, 0, 0); Index: lib/Headers/intrin.h === --- lib/Headers/intrin.h +++ lib/Headers/intrin.h @@ -1024,11 +1024,6 @@ : "%edi", "%esi", "%ecx"); } static __inline__ void __DEFAULT_FN_ATTRS -__stosb(unsigned char *__dst, unsigned char __x, size_t __n) { - __asm__("rep stosb" : : "D"(__dst), "a"(__x), "c"(__n) -: "%edi", "%ecx"); -} -static __inline__ void __DEFAULT_FN_ATTRS __stosd(unsigned long *__dst, unsigned long __x, size_t __n) { __asm__("rep stosl" : : "D"(__dst), "a"(__x), "c"(__n) : "%edi", "%ecx"); Index: lib/CodeGen/CGBuiltin.cpp === --- lib/CodeGen/CGBuiltin.cpp +++ lib/CodeGen/CGBuiltin.cpp @@ -7770,7 +7770,12 @@ Value *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress); return Builder.CreateCall(F); } + case X86::BI__stosb: { +// We treat __stosb as a volatile memset - it may not generate "rep stosb" +// instruction, but it will create a memset that won't be optimized away. +return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], 1, true); } + } } Index: include/clang/Basic/BuiltinsX86.def === --- include/clang/Basic/BuiltinsX86.def +++ include/clang/Basic/BuiltinsX86.def @@ -2041,6 +2041,8 @@ TARGET_HEADER_BUILTIN(_AddressOfReturnAddress, "v*", "nh", "intrin.h", ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(__stosb, "vUc*Ucz", "nh", "intrin.h", ALL_MS_LANGUAGES, "") + #undef BUILTIN #undef TARGET_BUILTIN #undef TARGET_HEADER_BUILTIN Index: test/CodeGen/ms-intrinsics.c === --- test/CodeGen/ms-intrinsics.c +++ test/CodeGen/ms-intrinsics.c @@ -14,6 +14,22 @@ #include +#if defined(__i386__) ||
[PATCH] D25334: Implement __stosb intrinsic as a volatile memset
majnemer added inline comments. Comment at: lib/CodeGen/CGBuiltin.cpp: +Address Dest = EmitPointerWithAlignment(E->getArg(0)); +return Builder.CreateMemSet(Dest, Ops[1], Ops[2], true); } I think we should manually force the alignment to 1 instead of trying to be clever. https://reviews.llvm.org/D25334 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D25334: Implement __stosb intrinsic as a volatile memset
agutowski updated this revision to Diff 74596. agutowski added a comment. rebase https://reviews.llvm.org/D25334 Files: include/clang/Basic/BuiltinsX86.def lib/CodeGen/CGBuiltin.cpp lib/Headers/intrin.h test/CodeGen/ms-intrinsics.c test/Headers/ms-intrin.cpp Index: lib/Headers/intrin.h === --- lib/Headers/intrin.h +++ lib/Headers/intrin.h @@ -1024,11 +1024,6 @@ : "%edi", "%esi", "%ecx"); } static __inline__ void __DEFAULT_FN_ATTRS -__stosb(unsigned char *__dst, unsigned char __x, size_t __n) { - __asm__("rep stosb" : : "D"(__dst), "a"(__x), "c"(__n) -: "%edi", "%ecx"); -} -static __inline__ void __DEFAULT_FN_ATTRS __stosd(unsigned long *__dst, unsigned long __x, size_t __n) { __asm__("rep stosl" : : "D"(__dst), "a"(__x), "c"(__n) : "%edi", "%ecx"); Index: lib/CodeGen/CGBuiltin.cpp === --- lib/CodeGen/CGBuiltin.cpp +++ lib/CodeGen/CGBuiltin.cpp @@ -7770,7 +7770,13 @@ Value *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress); return Builder.CreateCall(F); } + case X86::BI__stosb: { +// We treat __stosb as a volatile memset - it may not generate "rep stosb" +// instruction, but it will create a memset that won't be optimized away. +Address Dest = EmitPointerWithAlignment(E->getArg(0)); +return Builder.CreateMemSet(Dest, Ops[1], Ops[2], true); } + } } Index: include/clang/Basic/BuiltinsX86.def === --- include/clang/Basic/BuiltinsX86.def +++ include/clang/Basic/BuiltinsX86.def @@ -2041,6 +2041,8 @@ TARGET_HEADER_BUILTIN(_AddressOfReturnAddress, "v*", "nh", "intrin.h", ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(__stosb, "vUc*Ucz", "nh", "intrin.h", ALL_MS_LANGUAGES, "") + #undef BUILTIN #undef TARGET_BUILTIN #undef TARGET_HEADER_BUILTIN Index: test/CodeGen/ms-intrinsics.c === --- test/CodeGen/ms-intrinsics.c +++ test/CodeGen/ms-intrinsics.c @@ -14,6 +14,22 @@ #include +#if defined(__i386__) || defined(__x86_64__) +void test__stosb(unsigned char *Dest, unsigned char Data, size_t Count) { + return __stosb(Dest, Data, Count); +} + +// CHECK-I386: define{{.*}}void @test__stosb +// CHECK-I386: tail call void @llvm.memset.p0i8.i32(i8* %Dest, i8 %Data, i32 %Count, i32 1, i1 true) +// CHECK-I386: ret void +// CHECK-I386: } + +// CHECK-X64: define{{.*}}void @test__stosb +// CHECK-X64: tail call void @llvm.memset.p0i8.i64(i8* %Dest, i8 %Data, i64 %Count, i32 1, i1 true) +// CHECK-X64: ret void +// CHECK-X64: } +#endif + void *test_ReturnAddress() { return _ReturnAddress(); } Index: test/Headers/ms-intrin.cpp === --- test/Headers/ms-intrin.cpp +++ test/Headers/ms-intrin.cpp @@ -38,7 +38,6 @@ __movsd(0, 0, 0); __movsw(0, 0, 0); - __stosb(0, 0, 0); __stosd(0, 0, 0); __stosw(0, 0, 0); Index: lib/Headers/intrin.h === --- lib/Headers/intrin.h +++ lib/Headers/intrin.h @@ -1024,11 +1024,6 @@ : "%edi", "%esi", "%ecx"); } static __inline__ void __DEFAULT_FN_ATTRS -__stosb(unsigned char *__dst, unsigned char __x, size_t __n) { - __asm__("rep stosb" : : "D"(__dst), "a"(__x), "c"(__n) -: "%edi", "%ecx"); -} -static __inline__ void __DEFAULT_FN_ATTRS __stosd(unsigned long *__dst, unsigned long __x, size_t __n) { __asm__("rep stosl" : : "D"(__dst), "a"(__x), "c"(__n) : "%edi", "%ecx"); Index: lib/CodeGen/CGBuiltin.cpp === --- lib/CodeGen/CGBuiltin.cpp +++ lib/CodeGen/CGBuiltin.cpp @@ -7770,7 +7770,13 @@ Value *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress); return Builder.CreateCall(F); } + case X86::BI__stosb: { +// We treat __stosb as a volatile memset - it may not generate "rep stosb" +// instruction, but it will create a memset that won't be optimized away. +Address Dest = EmitPointerWithAlignment(E->getArg(0)); +return Builder.CreateMemSet(Dest, Ops[1], Ops[2], true); } + } } Index: include/clang/Basic/BuiltinsX86.def === --- include/clang/Basic/BuiltinsX86.def +++ include/clang/Basic/BuiltinsX86.def @@ -2041,6 +2041,8 @@ TARGET_HEADER_BUILTIN(_AddressOfReturnAddress, "v*", "nh", "intrin.h", ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(__stosb, "vUc*Ucz", "nh", "intrin.h", ALL_MS_LANGUAGES, "") + #undef BUILTIN #undef TARGET_BUILTIN #undef TARGET_HEADER_BUILTIN Index: test/CodeGen/ms-intrinsics.c === --- test/CodeGen/ms-intrinsics.c +++
[PATCH] D25334: Implement __stosb intrinsic as a volatile memset
majnemer added inline comments. Comment at: lib/CodeGen/CGBuiltin.cpp:7610 +Value *SizeVal = EmitScalarExpr(E->getArg(2)); +EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(), +E->getArg(0)->getExprLoc(), FD, 0); agutowski wrote: > hans wrote: > > Hmm, does the __stosb intrinsic require Dest to be non-null (e.g. would > > Dest=NULL, Count=0 be OK?) I'm not even sure what llvm's memset requires > > actually. > I can't find any guarantee that memset accepts Dest=NULL and Count=0. So I > guess we can either add a branch here, checking if the pointer is NULL (and > that Count=0?) or assume that memset won't do anything strange. I vote for > the latter (the current code does it). I think the approach you've taken here is fine, we shouldn't need a branch. https://reviews.llvm.org/D25334 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D25334: Implement __stosb intrinsic as a volatile memset
agutowski marked 2 inline comments as done. agutowski added inline comments. Comment at: lib/CodeGen/CGBuiltin.cpp:7610 +Value *SizeVal = EmitScalarExpr(E->getArg(2)); +EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(), +E->getArg(0)->getExprLoc(), FD, 0); hans wrote: > Hmm, does the __stosb intrinsic require Dest to be non-null (e.g. would > Dest=NULL, Count=0 be OK?) I'm not even sure what llvm's memset requires > actually. I can't find any guarantee that memset accepts Dest=NULL and Count=0. So I guess we can either add a branch here, checking if the pointer is NULL (and that Count=0?) or assume that memset won't do anything strange. I vote for the latter (the current code does it). https://reviews.llvm.org/D25334 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D25334: Implement __stosb intrinsic as a volatile memset
agutowski updated this revision to Diff 74157. agutowski added a comment. remove nullptr check; don't repeat EmitScalarExpr https://reviews.llvm.org/D25334 Files: include/clang/Basic/BuiltinsX86.def lib/CodeGen/CGBuiltin.cpp lib/Headers/intrin.h test/CodeGen/ms-intrinsics.c test/Headers/ms-intrin.cpp Index: lib/Headers/intrin.h === --- lib/Headers/intrin.h +++ lib/Headers/intrin.h @@ -1148,11 +1148,6 @@ : "%edi", "%esi", "%ecx"); } static __inline__ void __DEFAULT_FN_ATTRS -__stosb(unsigned char *__dst, unsigned char __x, size_t __n) { - __asm__("rep stosb" : : "D"(__dst), "a"(__x), "c"(__n) -: "%edi", "%ecx"); -} -static __inline__ void __DEFAULT_FN_ATTRS __stosd(unsigned long *__dst, unsigned long __x, size_t __n) { __asm__("rep stosl" : : "D"(__dst), "a"(__x), "c"(__n) : "%edi", "%ecx"); Index: lib/CodeGen/CGBuiltin.cpp === --- lib/CodeGen/CGBuiltin.cpp +++ lib/CodeGen/CGBuiltin.cpp @@ -7599,7 +7599,14 @@ HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned); return HigherBits; } + + case X86::BI__stosb: { +// We treat __stosb as a volatile memset - it may not generate "rep stosb" +// instruction, but it will create a memset that won't be optimized away. +Address Dest = EmitPointerWithAlignment(E->getArg(0)); +return Builder.CreateMemSet(Dest, Ops[1], Ops[2], true); } + } } Index: include/clang/Basic/BuiltinsX86.def === --- include/clang/Basic/BuiltinsX86.def +++ include/clang/Basic/BuiltinsX86.def @@ -2071,6 +2071,8 @@ TARGET_BUILTIN(__builtin_ia32_monitorx, "vv*UiUi", "", "mwaitx") TARGET_BUILTIN(__builtin_ia32_mwaitx, "vUiUiUi", "", "mwaitx") +TARGET_HEADER_BUILTIN(__stosb, "vUc*Ucz", "nh", "intrin.h", ALL_MS_LANGUAGES, "") + #undef BUILTIN #undef TARGET_BUILTIN #undef TARGET_HEADER_BUILTIN Index: test/CodeGen/ms-intrinsics.c === --- test/CodeGen/ms-intrinsics.c +++ test/CodeGen/ms-intrinsics.c @@ -14,6 +14,22 @@ #include +#if defined(__i386__) || defined(__x86_64__) +void test__stosb(unsigned char *Dest, unsigned char Data, size_t Count) { + return __stosb(Dest, Data, Count); +} + +// CHECK-I386: define{{.*}}void @test__stosb +// CHECK-I386: tail call void @llvm.memset.p0i8.i32(i8* %Dest, i8 %Data, i32 %Count, i32 1, i1 true) +// CHECK-I386: ret void +// CHECK-I386: } + +// CHECK-X64: define{{.*}}void @test__stosb +// CHECK-X64: tail call void @llvm.memset.p0i8.i64(i8* %Dest, i8 %Data, i64 %Count, i32 1, i1 true) +// CHECK-X64: ret void +// CHECK-X64: } +#endif + void *test_InterlockedExchangePointer(void * volatile *Target, void *Value) { return _InterlockedExchangePointer(Target, Value); } Index: test/Headers/ms-intrin.cpp === --- test/Headers/ms-intrin.cpp +++ test/Headers/ms-intrin.cpp @@ -38,7 +38,6 @@ __movsd(0, 0, 0); __movsw(0, 0, 0); - __stosb(0, 0, 0); __stosd(0, 0, 0); __stosw(0, 0, 0); Index: lib/Headers/intrin.h === --- lib/Headers/intrin.h +++ lib/Headers/intrin.h @@ -1148,11 +1148,6 @@ : "%edi", "%esi", "%ecx"); } static __inline__ void __DEFAULT_FN_ATTRS -__stosb(unsigned char *__dst, unsigned char __x, size_t __n) { - __asm__("rep stosb" : : "D"(__dst), "a"(__x), "c"(__n) -: "%edi", "%ecx"); -} -static __inline__ void __DEFAULT_FN_ATTRS __stosd(unsigned long *__dst, unsigned long __x, size_t __n) { __asm__("rep stosl" : : "D"(__dst), "a"(__x), "c"(__n) : "%edi", "%ecx"); Index: lib/CodeGen/CGBuiltin.cpp === --- lib/CodeGen/CGBuiltin.cpp +++ lib/CodeGen/CGBuiltin.cpp @@ -7599,7 +7599,14 @@ HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned); return HigherBits; } + + case X86::BI__stosb: { +// We treat __stosb as a volatile memset - it may not generate "rep stosb" +// instruction, but it will create a memset that won't be optimized away. +Address Dest = EmitPointerWithAlignment(E->getArg(0)); +return Builder.CreateMemSet(Dest, Ops[1], Ops[2], true); } + } } Index: include/clang/Basic/BuiltinsX86.def === --- include/clang/Basic/BuiltinsX86.def +++ include/clang/Basic/BuiltinsX86.def @@ -2071,6 +2071,8 @@ TARGET_BUILTIN(__builtin_ia32_monitorx, "vv*UiUi", "", "mwaitx") TARGET_BUILTIN(__builtin_ia32_mwaitx, "vUiUiUi", "", "mwaitx") +TARGET_HEADER_BUILTIN(__stosb, "vUc*Ucz", "nh", "intrin.h", ALL_MS_LANGUAGES, "") + #undef BUILTIN #undef TARGET_BUILTIN
[PATCH] D25334: Implement __stosb intrinsic as a volatile memset
agutowski marked an inline comment as done. agutowski added inline comments. > hans wrote in CGBuiltin.cpp:7613 > Why is it returning Dest here, and not the result of Builder.CreateMemSet? My mistake (that's what memset returns), thanks! https://reviews.llvm.org/D25334 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D25334: Implement __stosb intrinsic as a volatile memset
agutowski updated this revision to Diff 73828. agutowski marked an inline comment as done. agutowski added a comment. fix return value and comment https://reviews.llvm.org/D25334 Files: include/clang/Basic/BuiltinsX86.def lib/CodeGen/CGBuiltin.cpp lib/Headers/intrin.h test/CodeGen/ms-intrinsics.c test/Headers/ms-intrin.cpp Index: lib/Headers/intrin.h === --- lib/Headers/intrin.h +++ lib/Headers/intrin.h @@ -1148,11 +1148,6 @@ : "%edi", "%esi", "%ecx"); } static __inline__ void __DEFAULT_FN_ATTRS -__stosb(unsigned char *__dst, unsigned char __x, size_t __n) { - __asm__("rep stosb" : : "D"(__dst), "a"(__x), "c"(__n) -: "%edi", "%ecx"); -} -static __inline__ void __DEFAULT_FN_ATTRS __stosd(unsigned long *__dst, unsigned long __x, size_t __n) { __asm__("rep stosl" : : "D"(__dst), "a"(__x), "c"(__n) : "%edi", "%ecx"); Index: lib/CodeGen/CGBuiltin.cpp === --- lib/CodeGen/CGBuiltin.cpp +++ lib/CodeGen/CGBuiltin.cpp @@ -7599,7 +7599,19 @@ HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned); return HigherBits; } + + case X86::BI__stosb: { +// We treat __stosb as a volatile memset - it may not generate "rep stosb" +// instruction, but it will create a memset that won't be optimized away. +const FunctionDecl *FD = E->getDirectCallee(); +Address Dest = EmitPointerWithAlignment(E->getArg(0)); +Value *ByteVal = EmitScalarExpr(E->getArg(1)); +Value *SizeVal = EmitScalarExpr(E->getArg(2)); +EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(), +E->getArg(0)->getExprLoc(), FD, 0); +return Builder.CreateMemSet(Dest, ByteVal, SizeVal, true); } + } } Index: include/clang/Basic/BuiltinsX86.def === --- include/clang/Basic/BuiltinsX86.def +++ include/clang/Basic/BuiltinsX86.def @@ -2071,6 +2071,8 @@ TARGET_BUILTIN(__builtin_ia32_monitorx, "vv*UiUi", "", "mwaitx") TARGET_BUILTIN(__builtin_ia32_mwaitx, "vUiUiUi", "", "mwaitx") +TARGET_HEADER_BUILTIN(__stosb, "vUc*Ucz", "nh", "intrin.h", ALL_MS_LANGUAGES, "") + #undef BUILTIN #undef TARGET_BUILTIN #undef TARGET_HEADER_BUILTIN Index: test/CodeGen/ms-intrinsics.c === --- test/CodeGen/ms-intrinsics.c +++ test/CodeGen/ms-intrinsics.c @@ -14,6 +14,22 @@ #include +#if defined(__i386__) || defined(__x86_64__) +void test__stosb(unsigned char *Dest, unsigned char Data, size_t Count) { + return __stosb(Dest, Data, Count); +} + +// CHECK-I386: define{{.*}}void @test__stosb +// CHECK-I386: tail call void @llvm.memset.p0i8.i32(i8* %Dest, i8 %Data, i32 %Count, i32 1, i1 true) +// CHECK-I386: ret void +// CHECK-I386: } + +// CHECK-X64: define{{.*}}void @test__stosb +// CHECK-X64: tail call void @llvm.memset.p0i8.i64(i8* %Dest, i8 %Data, i64 %Count, i32 1, i1 true) +// CHECK-X64: ret void +// CHECK-X64: } +#endif + void *test_InterlockedExchangePointer(void * volatile *Target, void *Value) { return _InterlockedExchangePointer(Target, Value); } Index: test/Headers/ms-intrin.cpp === --- test/Headers/ms-intrin.cpp +++ test/Headers/ms-intrin.cpp @@ -38,7 +38,6 @@ __movsd(0, 0, 0); __movsw(0, 0, 0); - __stosb(0, 0, 0); __stosd(0, 0, 0); __stosw(0, 0, 0); Index: lib/Headers/intrin.h === --- lib/Headers/intrin.h +++ lib/Headers/intrin.h @@ -1148,11 +1148,6 @@ : "%edi", "%esi", "%ecx"); } static __inline__ void __DEFAULT_FN_ATTRS -__stosb(unsigned char *__dst, unsigned char __x, size_t __n) { - __asm__("rep stosb" : : "D"(__dst), "a"(__x), "c"(__n) -: "%edi", "%ecx"); -} -static __inline__ void __DEFAULT_FN_ATTRS __stosd(unsigned long *__dst, unsigned long __x, size_t __n) { __asm__("rep stosl" : : "D"(__dst), "a"(__x), "c"(__n) : "%edi", "%ecx"); Index: lib/CodeGen/CGBuiltin.cpp === --- lib/CodeGen/CGBuiltin.cpp +++ lib/CodeGen/CGBuiltin.cpp @@ -7599,7 +7599,19 @@ HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned); return HigherBits; } + + case X86::BI__stosb: { +// We treat __stosb as a volatile memset - it may not generate "rep stosb" +// instruction, but it will create a memset that won't be optimized away. +const FunctionDecl *FD = E->getDirectCallee(); +Address Dest = EmitPointerWithAlignment(E->getArg(0)); +Value *ByteVal = EmitScalarExpr(E->getArg(1)); +Value *SizeVal = EmitScalarExpr(E->getArg(2)); +EmitNonNullArgCheck(RValue::get(Dest.getPointer()),
[PATCH] D25334: Implement __stosb intrinsic as a volatile memset
hans added inline comments. > CGBuiltin.cpp:7604 > + case X86::BI__stosb: { > +// we treat __stosb as volatile memset - it may not generate "rep stosb" > +// instruction, but it will create a memset that won't be optimized away Nit: I'd suggest capital w for "We" and ending the sentence with a period. I see many other comments in the file don't do this, but we can at least set a good example :-) > CGBuiltin.cpp:7610 > +Value *SizeVal = EmitScalarExpr(E->getArg(2)); > +EmitNonNullArgCheck(RValue::get(Dest.getPointer()), > E->getArg(0)->getType(), > +E->getArg(0)->getExprLoc(), FD, 0); Hmm, does the __stosb intrinsic require Dest to be non-null (e.g. would Dest=NULL, Count=0 be OK?) I'm not even sure what llvm's memset requires actually. > CGBuiltin.cpp:7613 > +Builder.CreateMemSet(Dest, ByteVal, SizeVal, true); > +return Dest.getPointer(); >} Why is it returning Dest here, and not the result of Builder.CreateMemSet? https://reviews.llvm.org/D25334 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D25334: Implement __stosb intrinsic as a volatile memset
agutowski created this revision. agutowski added reviewers: rnk, hans, thakis, majnemer. agutowski added a subscriber: cfe-commits. We need __stosb to be an intrinsic, because SecureZeroMemory function uses it without including intrin.h. Implementing it as a volatile memset is not consistent with MSDN specification, but it gives us target-independent IR while keeping the most important properties of __stosb. https://reviews.llvm.org/D25334 Files: include/clang/Basic/BuiltinsX86.def lib/CodeGen/CGBuiltin.cpp lib/Headers/intrin.h test/CodeGen/ms-intrinsics.c test/Headers/ms-intrin.cpp Index: lib/Headers/intrin.h === --- lib/Headers/intrin.h +++ lib/Headers/intrin.h @@ -1148,11 +1148,6 @@ : "%edi", "%esi", "%ecx"); } static __inline__ void __DEFAULT_FN_ATTRS -__stosb(unsigned char *__dst, unsigned char __x, size_t __n) { - __asm__("rep stosb" : : "D"(__dst), "a"(__x), "c"(__n) -: "%edi", "%ecx"); -} -static __inline__ void __DEFAULT_FN_ATTRS __stosd(unsigned long *__dst, unsigned long __x, size_t __n) { __asm__("rep stosl" : : "D"(__dst), "a"(__x), "c"(__n) : "%edi", "%ecx"); Index: lib/CodeGen/CGBuiltin.cpp === --- lib/CodeGen/CGBuiltin.cpp +++ lib/CodeGen/CGBuiltin.cpp @@ -7599,7 +7599,20 @@ HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned); return HigherBits; } + + case X86::BI__stosb: { +// we treat __stosb as volatile memset - it may not generate "rep stosb" +// instruction, but it will create a memset that won't be optimized away +const FunctionDecl *FD = E->getDirectCallee(); +Address Dest = EmitPointerWithAlignment(E->getArg(0)); +Value *ByteVal = EmitScalarExpr(E->getArg(1)); +Value *SizeVal = EmitScalarExpr(E->getArg(2)); +EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(), +E->getArg(0)->getExprLoc(), FD, 0); +Builder.CreateMemSet(Dest, ByteVal, SizeVal, true); +return Dest.getPointer(); } + } } Index: include/clang/Basic/BuiltinsX86.def === --- include/clang/Basic/BuiltinsX86.def +++ include/clang/Basic/BuiltinsX86.def @@ -2071,6 +2071,8 @@ TARGET_BUILTIN(__builtin_ia32_monitorx, "vv*UiUi", "", "mwaitx") TARGET_BUILTIN(__builtin_ia32_mwaitx, "vUiUiUi", "", "mwaitx") +TARGET_HEADER_BUILTIN(__stosb, "vUc*Ucz", "nh", "intrin.h", ALL_MS_LANGUAGES, "") + #undef BUILTIN #undef TARGET_BUILTIN #undef TARGET_HEADER_BUILTIN Index: test/CodeGen/ms-intrinsics.c === --- test/CodeGen/ms-intrinsics.c +++ test/CodeGen/ms-intrinsics.c @@ -14,6 +14,22 @@ #include +#if defined(__i386__) || defined(__x86_64__) +void test__stosb(unsigned char *Dest, unsigned char Data, size_t Count) { + return __stosb(Dest, Data, Count); +} + +// CHECK-I386: define{{.*}}void @test__stosb +// CHECK-I386: tail call void @llvm.memset.p0i8.i32(i8* %Dest, i8 %Data, i32 %Count, i32 1, i1 true) +// CHECK-I386: ret void +// CHECK-I386: } + +// CHECK-X64: define{{.*}}void @test__stosb +// CHECK-X64: tail call void @llvm.memset.p0i8.i64(i8* %Dest, i8 %Data, i64 %Count, i32 1, i1 true) +// CHECK-X64: ret void +// CHECK-X64: } +#endif + void *test_InterlockedExchangePointer(void * volatile *Target, void *Value) { return _InterlockedExchangePointer(Target, Value); } Index: test/Headers/ms-intrin.cpp === --- test/Headers/ms-intrin.cpp +++ test/Headers/ms-intrin.cpp @@ -38,7 +38,6 @@ __movsd(0, 0, 0); __movsw(0, 0, 0); - __stosb(0, 0, 0); __stosd(0, 0, 0); __stosw(0, 0, 0); Index: lib/Headers/intrin.h === --- lib/Headers/intrin.h +++ lib/Headers/intrin.h @@ -1148,11 +1148,6 @@ : "%edi", "%esi", "%ecx"); } static __inline__ void __DEFAULT_FN_ATTRS -__stosb(unsigned char *__dst, unsigned char __x, size_t __n) { - __asm__("rep stosb" : : "D"(__dst), "a"(__x), "c"(__n) -: "%edi", "%ecx"); -} -static __inline__ void __DEFAULT_FN_ATTRS __stosd(unsigned long *__dst, unsigned long __x, size_t __n) { __asm__("rep stosl" : : "D"(__dst), "a"(__x), "c"(__n) : "%edi", "%ecx"); Index: lib/CodeGen/CGBuiltin.cpp === --- lib/CodeGen/CGBuiltin.cpp +++ lib/CodeGen/CGBuiltin.cpp @@ -7599,7 +7599,20 @@ HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned); return HigherBits; } + + case X86::BI__stosb: { +// we treat __stosb as volatile memset - it may not generate "rep stosb" +// instruction, but it will create a memset that won't be optimized