[PATCH] D132141: [X86] Emulate _rdrand64_step with two rdrand32 if it is 32bit
This revision was landed with ongoing or failed builds. This revision was automatically updated to reflect the committed changes. Closed by commit rG6d8ddf53cc80: [X86] Emulate _rdrand64_step with two rdrand32 if it is 32bit (authored by yubing). Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D132141/new/ https://reviews.llvm.org/D132141 Files: clang/lib/Headers/immintrin.h clang/test/CodeGen/X86/rdrand-builtins.c Index: clang/test/CodeGen/X86/rdrand-builtins.c === --- clang/test/CodeGen/X86/rdrand-builtins.c +++ clang/test/CodeGen/X86/rdrand-builtins.c @@ -1,5 +1,5 @@ // RUN: %clang_cc1 -no-opaque-pointers -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +rdrnd -target-feature +rdseed -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X64 -// RUN: %clang_cc1 -no-opaque-pointers -ffreestanding %s -triple=i386-unknown-unknown -target-feature +rdrnd -target-feature +rdseed -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK +// RUN: %clang_cc1 -no-opaque-pointers -ffreestanding %s -triple=i386-unknown-unknown -target-feature +rdrnd -target-feature +rdseed -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X86 #include @@ -17,14 +17,61 @@ // CHECK: store i32 } -#if __x86_64__ int rdrand64(unsigned long long *p) { return _rdrand64_step(p); // X64: @rdrand64 // X64: call { i64, i32 } @llvm.x86.rdrand.64 // X64: store i64 + +// X86-LABEL: @rdrand64( +// X86-NEXT: entry: +// X86-NEXT:[[RETVAL_I:%.*]] = alloca i32, align 4 +// X86-NEXT:[[__P_ADDR_I:%.*]] = alloca i64*, align 4 +// X86-NEXT:[[__LO_I:%.*]] = alloca i32, align 4 +// X86-NEXT:[[__HI_I:%.*]] = alloca i32, align 4 +// X86-NEXT:[[__RES_LO_I:%.*]] = alloca i32, align 4 +// X86-NEXT:[[__RES_HI_I:%.*]] = alloca i32, align 4 +// X86-NEXT:[[P_ADDR:%.*]] = alloca i64*, align 4 +// X86-NEXT:store i64* [[P:%.*]], i64** [[P_ADDR]], align 4 +// X86-NEXT:[[TMP0:%.*]] = load i64*, i64** [[P_ADDR]], align 4 +// X86-NEXT:store i64* [[TMP0]], i64** [[__P_ADDR_I]], align 4 +// X86-NEXT:[[TMP1:%.*]] = call { i32, i32 } @llvm.x86.rdrand.32() +// X86-NEXT:[[TMP2:%.*]] = extractvalue { i32, i32 } [[TMP1]], 0 +// X86-NEXT:store i32 [[TMP2]], i32* [[__LO_I]], align 4 +// X86-NEXT:[[TMP3:%.*]] = extractvalue { i32, i32 } [[TMP1]], 1 +// X86-NEXT:store i32 [[TMP3]], i32* [[__RES_LO_I]], align 4 +// X86-NEXT:[[TMP4:%.*]] = call { i32, i32 } @llvm.x86.rdrand.32() +// X86-NEXT:[[TMP5:%.*]] = extractvalue { i32, i32 } [[TMP4]], 0 +// X86-NEXT:store i32 [[TMP5]], i32* [[__HI_I]], align 4 +// X86-NEXT:[[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP4]], 1 +// X86-NEXT:store i32 [[TMP6]], i32* [[__RES_HI_I]], align 4 +// X86-NEXT:[[TMP7:%.*]] = load i32, i32* [[__RES_LO_I]], align 4 +// X86-NEXT:[[TOBOOL_I:%.*]] = icmp ne i32 [[TMP7]], 0 +// X86-NEXT:br i1 [[TOBOOL_I]], label [[LAND_LHS_TRUE_I:%.*]], label [[IF_ELSE_I:%.*]] +// X86: land.lhs.true.i: +// X86-NEXT:[[TMP8:%.*]] = load i32, i32* [[__RES_HI_I]], align 4 +// X86-NEXT:[[TOBOOL1_I:%.*]] = icmp ne i32 [[TMP8]], 0 +// X86-NEXT:br i1 [[TOBOOL1_I]], label [[IF_THEN_I:%.*]], label [[IF_ELSE_I]] +// X86: if.then.i: +// X86-NEXT:[[TMP9:%.*]] = load i32, i32* [[__HI_I]], align 4 +// X86-NEXT:[[CONV_I:%.*]] = zext i32 [[TMP9]] to i64 +// X86-NEXT:[[SHL_I:%.*]] = shl i64 [[CONV_I]], 32 +// X86-NEXT:[[TMP10:%.*]] = load i32, i32* [[__LO_I]], align 4 +// X86-NEXT:[[CONV2_I:%.*]] = zext i32 [[TMP10]] to i64 +// X86-NEXT:[[OR_I:%.*]] = or i64 [[SHL_I]], [[CONV2_I]] +// X86-NEXT:[[TMP11:%.*]] = load i64*, i64** [[__P_ADDR_I]], align 4 +// X86-NEXT:store i64 [[OR_I]], i64* [[TMP11]], align 4 +// X86-NEXT:store i32 1, i32* [[RETVAL_I]], align 4 +// X86-NEXT:br label [[_RDRAND64_STEP_EXIT:%.*]] +// X86: if.else.i: +// X86-NEXT:[[TMP12:%.*]] = load i64*, i64** [[__P_ADDR_I]], align 4 +// X86-NEXT:store i64 0, i64* [[TMP12]], align 4 +// X86-NEXT:store i32 0, i32* [[RETVAL_I]], align 4 +// X86-NEXT:br label [[_RDRAND64_STEP_EXIT]] +// X86: _rdrand64_step.exit: +// X86-NEXT:[[TMP13:%.*]] = load i32, i32* [[RETVAL_I]], align 4 +// X86-NEXT:ret i32 [[TMP13]] } -#endif int rdseed16(unsigned short *p) { return _rdseed16_step(p); Index: clang/lib/Headers/immintrin.h === --- clang/lib/Headers/immintrin.h +++ clang/lib/Headers/immintrin.h @@ -287,6 +287,23 @@ { return (int)__builtin_ia32_rdrand64_step(__p); } +#else +// We need to emulate the functionality of 64-bit rdrand with 2 32-bit +// rdrand instructions. +static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd"))) +_rdrand64_step(unsigned long long *__p) +{ + unsigned int __lo, __hi; + unsigned int
[PATCH] D132141: [X86] Emulate _rdrand64_step with two rdrand32 if it is 32bit
yubing updated this revision to Diff 455041. yubing added a comment. address sign-conversion issue Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D132141/new/ https://reviews.llvm.org/D132141 Files: clang/lib/Headers/immintrin.h clang/test/CodeGen/X86/rdrand-builtins.c Index: clang/test/CodeGen/X86/rdrand-builtins.c === --- clang/test/CodeGen/X86/rdrand-builtins.c +++ clang/test/CodeGen/X86/rdrand-builtins.c @@ -1,5 +1,5 @@ // RUN: %clang_cc1 -no-opaque-pointers -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +rdrnd -target-feature +rdseed -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X64 -// RUN: %clang_cc1 -no-opaque-pointers -ffreestanding %s -triple=i386-unknown-unknown -target-feature +rdrnd -target-feature +rdseed -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK +// RUN: %clang_cc1 -no-opaque-pointers -ffreestanding %s -triple=i386-unknown-unknown -target-feature +rdrnd -target-feature +rdseed -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X86 #include @@ -17,14 +17,61 @@ // CHECK: store i32 } -#if __x86_64__ int rdrand64(unsigned long long *p) { return _rdrand64_step(p); // X64: @rdrand64 // X64: call { i64, i32 } @llvm.x86.rdrand.64 // X64: store i64 + +// X86-LABEL: @rdrand64( +// X86-NEXT: entry: +// X86-NEXT:[[RETVAL_I:%.*]] = alloca i32, align 4 +// X86-NEXT:[[__P_ADDR_I:%.*]] = alloca i64*, align 4 +// X86-NEXT:[[__LO_I:%.*]] = alloca i32, align 4 +// X86-NEXT:[[__HI_I:%.*]] = alloca i32, align 4 +// X86-NEXT:[[__RES_LO_I:%.*]] = alloca i32, align 4 +// X86-NEXT:[[__RES_HI_I:%.*]] = alloca i32, align 4 +// X86-NEXT:[[P_ADDR:%.*]] = alloca i64*, align 4 +// X86-NEXT:store i64* [[P:%.*]], i64** [[P_ADDR]], align 4 +// X86-NEXT:[[TMP0:%.*]] = load i64*, i64** [[P_ADDR]], align 4 +// X86-NEXT:store i64* [[TMP0]], i64** [[__P_ADDR_I]], align 4 +// X86-NEXT:[[TMP1:%.*]] = call { i32, i32 } @llvm.x86.rdrand.32() +// X86-NEXT:[[TMP2:%.*]] = extractvalue { i32, i32 } [[TMP1]], 0 +// X86-NEXT:store i32 [[TMP2]], i32* [[__LO_I]], align 4 +// X86-NEXT:[[TMP3:%.*]] = extractvalue { i32, i32 } [[TMP1]], 1 +// X86-NEXT:store i32 [[TMP3]], i32* [[__RES_LO_I]], align 4 +// X86-NEXT:[[TMP4:%.*]] = call { i32, i32 } @llvm.x86.rdrand.32() +// X86-NEXT:[[TMP5:%.*]] = extractvalue { i32, i32 } [[TMP4]], 0 +// X86-NEXT:store i32 [[TMP5]], i32* [[__HI_I]], align 4 +// X86-NEXT:[[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP4]], 1 +// X86-NEXT:store i32 [[TMP6]], i32* [[__RES_HI_I]], align 4 +// X86-NEXT:[[TMP7:%.*]] = load i32, i32* [[__RES_LO_I]], align 4 +// X86-NEXT:[[TOBOOL_I:%.*]] = icmp ne i32 [[TMP7]], 0 +// X86-NEXT:br i1 [[TOBOOL_I]], label [[LAND_LHS_TRUE_I:%.*]], label [[IF_ELSE_I:%.*]] +// X86: land.lhs.true.i: +// X86-NEXT:[[TMP8:%.*]] = load i32, i32* [[__RES_HI_I]], align 4 +// X86-NEXT:[[TOBOOL1_I:%.*]] = icmp ne i32 [[TMP8]], 0 +// X86-NEXT:br i1 [[TOBOOL1_I]], label [[IF_THEN_I:%.*]], label [[IF_ELSE_I]] +// X86: if.then.i: +// X86-NEXT:[[TMP9:%.*]] = load i32, i32* [[__HI_I]], align 4 +// X86-NEXT:[[CONV_I:%.*]] = zext i32 [[TMP9]] to i64 +// X86-NEXT:[[SHL_I:%.*]] = shl i64 [[CONV_I]], 32 +// X86-NEXT:[[TMP10:%.*]] = load i32, i32* [[__LO_I]], align 4 +// X86-NEXT:[[CONV2_I:%.*]] = zext i32 [[TMP10]] to i64 +// X86-NEXT:[[OR_I:%.*]] = or i64 [[SHL_I]], [[CONV2_I]] +// X86-NEXT:[[TMP11:%.*]] = load i64*, i64** [[__P_ADDR_I]], align 4 +// X86-NEXT:store i64 [[OR_I]], i64* [[TMP11]], align 4 +// X86-NEXT:store i32 1, i32* [[RETVAL_I]], align 4 +// X86-NEXT:br label [[_RDRAND64_STEP_EXIT:%.*]] +// X86: if.else.i: +// X86-NEXT:[[TMP12:%.*]] = load i64*, i64** [[__P_ADDR_I]], align 4 +// X86-NEXT:store i64 0, i64* [[TMP12]], align 4 +// X86-NEXT:store i32 0, i32* [[RETVAL_I]], align 4 +// X86-NEXT:br label [[_RDRAND64_STEP_EXIT]] +// X86: _rdrand64_step.exit: +// X86-NEXT:[[TMP13:%.*]] = load i32, i32* [[RETVAL_I]], align 4 +// X86-NEXT:ret i32 [[TMP13]] } -#endif int rdseed16(unsigned short *p) { return _rdseed16_step(p); Index: clang/lib/Headers/immintrin.h === --- clang/lib/Headers/immintrin.h +++ clang/lib/Headers/immintrin.h @@ -287,6 +287,23 @@ { return (int)__builtin_ia32_rdrand64_step(__p); } +#else +// We need to emulate the functionality of 64-bit rdrand with 2 32-bit +// rdrand instructions. +static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd"))) +_rdrand64_step(unsigned long long *__p) +{ + unsigned int __lo, __hi; + unsigned int __res_lo = __builtin_ia32_rdrand32_step(&__lo); + unsigned int __res_hi = __builtin_ia32_rdrand32_step(&__hi); + if (__res_lo && __res_hi) { +
[PATCH] D132141: [X86] Emulate _rdrand64_step with two rdrand32 if it is 32bit
This revision was landed with ongoing or failed builds. This revision was automatically updated to reflect the committed changes. Closed by commit rG07e34763b027: [X86] Emulate _rdrand64_step with two rdrand32 if it is 32bit (authored by yubing). Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D132141/new/ https://reviews.llvm.org/D132141 Files: clang/lib/Headers/immintrin.h clang/test/CodeGen/X86/rdrand-builtins.c Index: clang/test/CodeGen/X86/rdrand-builtins.c === --- clang/test/CodeGen/X86/rdrand-builtins.c +++ clang/test/CodeGen/X86/rdrand-builtins.c @@ -1,5 +1,5 @@ // RUN: %clang_cc1 -no-opaque-pointers -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +rdrnd -target-feature +rdseed -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X64 -// RUN: %clang_cc1 -no-opaque-pointers -ffreestanding %s -triple=i386-unknown-unknown -target-feature +rdrnd -target-feature +rdseed -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK +// RUN: %clang_cc1 -no-opaque-pointers -ffreestanding %s -triple=i386-unknown-unknown -target-feature +rdrnd -target-feature +rdseed -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X86 #include @@ -17,14 +17,61 @@ // CHECK: store i32 } -#if __x86_64__ int rdrand64(unsigned long long *p) { return _rdrand64_step(p); // X64: @rdrand64 // X64: call { i64, i32 } @llvm.x86.rdrand.64 // X64: store i64 + +// X86-LABEL: @rdrand64( +// X86-NEXT: entry: +// X86-NEXT:[[RETVAL_I:%.*]] = alloca i32, align 4 +// X86-NEXT:[[__P_ADDR_I:%.*]] = alloca i64*, align 4 +// X86-NEXT:[[__LO_I:%.*]] = alloca i32, align 4 +// X86-NEXT:[[__HI_I:%.*]] = alloca i32, align 4 +// X86-NEXT:[[__RES_LO_I:%.*]] = alloca i32, align 4 +// X86-NEXT:[[__RES_HI_I:%.*]] = alloca i32, align 4 +// X86-NEXT:[[P_ADDR:%.*]] = alloca i64*, align 4 +// X86-NEXT:store i64* [[P:%.*]], i64** [[P_ADDR]], align 4 +// X86-NEXT:[[TMP0:%.*]] = load i64*, i64** [[P_ADDR]], align 4 +// X86-NEXT:store i64* [[TMP0]], i64** [[__P_ADDR_I]], align 4 +// X86-NEXT:[[TMP1:%.*]] = call { i32, i32 } @llvm.x86.rdrand.32() +// X86-NEXT:[[TMP2:%.*]] = extractvalue { i32, i32 } [[TMP1]], 0 +// X86-NEXT:store i32 [[TMP2]], i32* [[__LO_I]], align 4 +// X86-NEXT:[[TMP3:%.*]] = extractvalue { i32, i32 } [[TMP1]], 1 +// X86-NEXT:store i32 [[TMP3]], i32* [[__RES_LO_I]], align 4 +// X86-NEXT:[[TMP4:%.*]] = call { i32, i32 } @llvm.x86.rdrand.32() +// X86-NEXT:[[TMP5:%.*]] = extractvalue { i32, i32 } [[TMP4]], 0 +// X86-NEXT:store i32 [[TMP5]], i32* [[__HI_I]], align 4 +// X86-NEXT:[[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP4]], 1 +// X86-NEXT:store i32 [[TMP6]], i32* [[__RES_HI_I]], align 4 +// X86-NEXT:[[TMP7:%.*]] = load i32, i32* [[__RES_LO_I]], align 4 +// X86-NEXT:[[TOBOOL_I:%.*]] = icmp ne i32 [[TMP7]], 0 +// X86-NEXT:br i1 [[TOBOOL_I]], label [[LAND_LHS_TRUE_I:%.*]], label [[IF_ELSE_I:%.*]] +// X86: land.lhs.true.i: +// X86-NEXT:[[TMP8:%.*]] = load i32, i32* [[__RES_HI_I]], align 4 +// X86-NEXT:[[TOBOOL1_I:%.*]] = icmp ne i32 [[TMP8]], 0 +// X86-NEXT:br i1 [[TOBOOL1_I]], label [[IF_THEN_I:%.*]], label [[IF_ELSE_I]] +// X86: if.then.i: +// X86-NEXT:[[TMP9:%.*]] = load i32, i32* [[__HI_I]], align 4 +// X86-NEXT:[[CONV_I:%.*]] = zext i32 [[TMP9]] to i64 +// X86-NEXT:[[SHL_I:%.*]] = shl i64 [[CONV_I]], 32 +// X86-NEXT:[[TMP10:%.*]] = load i32, i32* [[__LO_I]], align 4 +// X86-NEXT:[[CONV2_I:%.*]] = zext i32 [[TMP10]] to i64 +// X86-NEXT:[[OR_I:%.*]] = or i64 [[SHL_I]], [[CONV2_I]] +// X86-NEXT:[[TMP11:%.*]] = load i64*, i64** [[__P_ADDR_I]], align 4 +// X86-NEXT:store i64 [[OR_I]], i64* [[TMP11]], align 4 +// X86-NEXT:store i32 1, i32* [[RETVAL_I]], align 4 +// X86-NEXT:br label [[_RDRAND64_STEP_EXIT:%.*]] +// X86: if.else.i: +// X86-NEXT:[[TMP12:%.*]] = load i64*, i64** [[__P_ADDR_I]], align 4 +// X86-NEXT:store i64 0, i64* [[TMP12]], align 4 +// X86-NEXT:store i32 0, i32* [[RETVAL_I]], align 4 +// X86-NEXT:br label [[_RDRAND64_STEP_EXIT]] +// X86: _rdrand64_step.exit: +// X86-NEXT:[[TMP13:%.*]] = load i32, i32* [[RETVAL_I]], align 4 +// X86-NEXT:ret i32 [[TMP13]] } -#endif int rdseed16(unsigned short *p) { return _rdseed16_step(p); Index: clang/lib/Headers/immintrin.h === --- clang/lib/Headers/immintrin.h +++ clang/lib/Headers/immintrin.h @@ -287,6 +287,23 @@ { return (int)__builtin_ia32_rdrand64_step(__p); } +#else +// We need to emulate the functionality of 64-bit rdrand with 2 32-bit +// rdrand instructions. +static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd"))) +_rdrand64_step(unsigned long long *__p) +{ + unsigned int __lo, __hi; + int __res_lo =
[PATCH] D132141: [X86] Emulate _rdrand64_step with two rdrand32 if it is 32bit
RKSimon accepted this revision. RKSimon added a comment. This revision is now accepted and ready to land. LGTM - cheers Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D132141/new/ https://reviews.llvm.org/D132141 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D132141: [X86] Emulate _rdrand64_step with two rdrand32 if it is 32bit
yubing updated this revision to Diff 454738. yubing added a comment. Execute the second rdrand32 despite of whether the first one fail or not Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D132141/new/ https://reviews.llvm.org/D132141 Files: clang/lib/Headers/immintrin.h clang/test/CodeGen/X86/rdrand-builtins.c Index: clang/test/CodeGen/X86/rdrand-builtins.c === --- clang/test/CodeGen/X86/rdrand-builtins.c +++ clang/test/CodeGen/X86/rdrand-builtins.c @@ -1,5 +1,5 @@ // RUN: %clang_cc1 -no-opaque-pointers -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +rdrnd -target-feature +rdseed -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X64 -// RUN: %clang_cc1 -no-opaque-pointers -ffreestanding %s -triple=i386-unknown-unknown -target-feature +rdrnd -target-feature +rdseed -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK +// RUN: %clang_cc1 -no-opaque-pointers -ffreestanding %s -triple=i386-unknown-unknown -target-feature +rdrnd -target-feature +rdseed -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X86 #include @@ -17,14 +17,61 @@ // CHECK: store i32 } -#if __x86_64__ int rdrand64(unsigned long long *p) { return _rdrand64_step(p); // X64: @rdrand64 // X64: call { i64, i32 } @llvm.x86.rdrand.64 // X64: store i64 + +// X86-LABEL: @rdrand64( +// X86-NEXT: entry: +// X86-NEXT:[[RETVAL_I:%.*]] = alloca i32, align 4 +// X86-NEXT:[[__P_ADDR_I:%.*]] = alloca i64*, align 4 +// X86-NEXT:[[__LO_I:%.*]] = alloca i32, align 4 +// X86-NEXT:[[__HI_I:%.*]] = alloca i32, align 4 +// X86-NEXT:[[__RES_LO_I:%.*]] = alloca i32, align 4 +// X86-NEXT:[[__RES_HI_I:%.*]] = alloca i32, align 4 +// X86-NEXT:[[P_ADDR:%.*]] = alloca i64*, align 4 +// X86-NEXT:store i64* [[P:%.*]], i64** [[P_ADDR]], align 4 +// X86-NEXT:[[TMP0:%.*]] = load i64*, i64** [[P_ADDR]], align 4 +// X86-NEXT:store i64* [[TMP0]], i64** [[__P_ADDR_I]], align 4 +// X86-NEXT:[[TMP1:%.*]] = call { i32, i32 } @llvm.x86.rdrand.32() +// X86-NEXT:[[TMP2:%.*]] = extractvalue { i32, i32 } [[TMP1]], 0 +// X86-NEXT:store i32 [[TMP2]], i32* [[__LO_I]], align 4 +// X86-NEXT:[[TMP3:%.*]] = extractvalue { i32, i32 } [[TMP1]], 1 +// X86-NEXT:store i32 [[TMP3]], i32* [[__RES_LO_I]], align 4 +// X86-NEXT:[[TMP4:%.*]] = call { i32, i32 } @llvm.x86.rdrand.32() +// X86-NEXT:[[TMP5:%.*]] = extractvalue { i32, i32 } [[TMP4]], 0 +// X86-NEXT:store i32 [[TMP5]], i32* [[__HI_I]], align 4 +// X86-NEXT:[[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP4]], 1 +// X86-NEXT:store i32 [[TMP6]], i32* [[__RES_HI_I]], align 4 +// X86-NEXT:[[TMP7:%.*]] = load i32, i32* [[__RES_LO_I]], align 4 +// X86-NEXT:[[TOBOOL_I:%.*]] = icmp ne i32 [[TMP7]], 0 +// X86-NEXT:br i1 [[TOBOOL_I]], label [[LAND_LHS_TRUE_I:%.*]], label [[IF_ELSE_I:%.*]] +// X86: land.lhs.true.i: +// X86-NEXT:[[TMP8:%.*]] = load i32, i32* [[__RES_HI_I]], align 4 +// X86-NEXT:[[TOBOOL1_I:%.*]] = icmp ne i32 [[TMP8]], 0 +// X86-NEXT:br i1 [[TOBOOL1_I]], label [[IF_THEN_I:%.*]], label [[IF_ELSE_I]] +// X86: if.then.i: +// X86-NEXT:[[TMP9:%.*]] = load i32, i32* [[__HI_I]], align 4 +// X86-NEXT:[[CONV_I:%.*]] = zext i32 [[TMP9]] to i64 +// X86-NEXT:[[SHL_I:%.*]] = shl i64 [[CONV_I]], 32 +// X86-NEXT:[[TMP10:%.*]] = load i32, i32* [[__LO_I]], align 4 +// X86-NEXT:[[CONV2_I:%.*]] = zext i32 [[TMP10]] to i64 +// X86-NEXT:[[OR_I:%.*]] = or i64 [[SHL_I]], [[CONV2_I]] +// X86-NEXT:[[TMP11:%.*]] = load i64*, i64** [[__P_ADDR_I]], align 4 +// X86-NEXT:store i64 [[OR_I]], i64* [[TMP11]], align 4 +// X86-NEXT:store i32 1, i32* [[RETVAL_I]], align 4 +// X86-NEXT:br label [[_RDRAND64_STEP_EXIT:%.*]] +// X86: if.else.i: +// X86-NEXT:[[TMP12:%.*]] = load i64*, i64** [[__P_ADDR_I]], align 4 +// X86-NEXT:store i64 0, i64* [[TMP12]], align 4 +// X86-NEXT:store i32 0, i32* [[RETVAL_I]], align 4 +// X86-NEXT:br label [[_RDRAND64_STEP_EXIT]] +// X86: _rdrand64_step.exit: +// X86-NEXT:[[TMP13:%.*]] = load i32, i32* [[RETVAL_I]], align 4 +// X86-NEXT:ret i32 [[TMP13]] } -#endif int rdseed16(unsigned short *p) { return _rdseed16_step(p); Index: clang/lib/Headers/immintrin.h === --- clang/lib/Headers/immintrin.h +++ clang/lib/Headers/immintrin.h @@ -291,6 +291,23 @@ { return (int)__builtin_ia32_rdrand64_step(__p); } +#else +// We need to emulate the functionality of 64-bit rdrand with 2 32-bit +// rdrand instructions. +static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd"))) +_rdrand64_step(unsigned long long *__p) +{ + unsigned int __lo, __hi; + int __res_lo = __builtin_ia32_rdrand32_step(&__lo); + int __res_hi = __builtin_ia32_rdrand32_step(&__hi); + if
[PATCH] D132141: [X86] Emulate _rdrand64_step with two rdrand32 if it is 32bit
RKSimon added inline comments. Comment at: clang/lib/Headers/immintrin.h:301 + unsigned long long tmp; + if (__builtin_ia32_rdrand32_step((unsigned int *)) & + __builtin_ia32_rdrand32_step(((unsigned int *)) + 1)) { yubing wrote: > RKSimon wrote: > > RKSimon wrote: > > > craig.topper wrote: > > > > craig.topper wrote: > > > > > Should `&` be `&&`? > > > > Can we avoid the pointer cast here? Use two unsigned ints and manually > > > > concatenate them to a 64-bit value. > > > +1 > > > ``` > > > unsigned int lo, hi; > > > if (__builtin_ia32_rdrand32_step() && > > > __builtin_ia32_rdrand32_step()) { > > > *p = ((unsigned long)hi << 32) | lo; > > > return 1; > > > } > > > ``` > > Are there any sideeffects that we might encounter by not always performing > > both __builtin_ia32_rdrand32_step calls? > > ``` > > unsigned int __lo, __hi; > > int __res_lo = __builtin_ia32_rdrand32_step(&__lo); > > int __res_hi = __builtin_ia32_rdrand32_step(&__hi); > > if (__res_lo && __res_hi) { > > *__p = ((unsigned long long)__hi << 32) | (unsigned long long)__lo; > > return 1; > > } else { > > *__p = 0; > > return 0; > > } > > ``` > however, if the first rdrand32 failed, then we don't need to execute the > second one. I understand that - but given randomizers are often used for sensitive applications (crypto) - my question was whether not always calling this twice was going to affect things. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D132141/new/ https://reviews.llvm.org/D132141 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D132141: [X86] Emulate _rdrand64_step with two rdrand32 if it is 32bit
yubing added inline comments. Comment at: clang/lib/Headers/immintrin.h:301 + unsigned long long tmp; + if (__builtin_ia32_rdrand32_step((unsigned int *)) & + __builtin_ia32_rdrand32_step(((unsigned int *)) + 1)) { RKSimon wrote: > RKSimon wrote: > > craig.topper wrote: > > > craig.topper wrote: > > > > Should `&` be `&&`? > > > Can we avoid the pointer cast here? Use two unsigned ints and manually > > > concatenate them to a 64-bit value. > > +1 > > ``` > > unsigned int lo, hi; > > if (__builtin_ia32_rdrand32_step() && > > __builtin_ia32_rdrand32_step()) { > > *p = ((unsigned long)hi << 32) | lo; > > return 1; > > } > > ``` > Are there any sideeffects that we might encounter by not always performing > both __builtin_ia32_rdrand32_step calls? > ``` > unsigned int __lo, __hi; > int __res_lo = __builtin_ia32_rdrand32_step(&__lo); > int __res_hi = __builtin_ia32_rdrand32_step(&__hi); > if (__res_lo && __res_hi) { > *__p = ((unsigned long long)__hi << 32) | (unsigned long long)__lo; > return 1; > } else { > *__p = 0; > return 0; > } > ``` however, if the first rdrand32 failed, then we don't need to execute the second one. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D132141/new/ https://reviews.llvm.org/D132141 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D132141: [X86] Emulate _rdrand64_step with two rdrand32 if it is 32bit
RKSimon added inline comments. Comment at: clang/lib/Headers/immintrin.h:301 + unsigned long long tmp; + if (__builtin_ia32_rdrand32_step((unsigned int *)) & + __builtin_ia32_rdrand32_step(((unsigned int *)) + 1)) { RKSimon wrote: > craig.topper wrote: > > craig.topper wrote: > > > Should `&` be `&&`? > > Can we avoid the pointer cast here? Use two unsigned ints and manually > > concatenate them to a 64-bit value. > +1 > ``` > unsigned int lo, hi; > if (__builtin_ia32_rdrand32_step() && > __builtin_ia32_rdrand32_step()) { > *p = ((unsigned long)hi << 32) | lo; > return 1; > } > ``` Are there any sideeffects that we might encounter by not always performing both __builtin_ia32_rdrand32_step calls? ``` unsigned int __lo, __hi; int __res_lo = __builtin_ia32_rdrand32_step(&__lo); int __res_hi = __builtin_ia32_rdrand32_step(&__hi); if (__res_lo && __res_hi) { *__p = ((unsigned long long)__hi << 32) | (unsigned long long)__lo; return 1; } else { *__p = 0; return 0; } ``` Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D132141/new/ https://reviews.llvm.org/D132141 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D132141: [X86] Emulate _rdrand64_step with two rdrand32 if it is 32bit
yubing updated this revision to Diff 454357. yubing added a comment. address craig's comments Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D132141/new/ https://reviews.llvm.org/D132141 Files: clang/lib/Headers/immintrin.h clang/test/CodeGen/X86/rdrand-builtins.c Index: clang/test/CodeGen/X86/rdrand-builtins.c === --- clang/test/CodeGen/X86/rdrand-builtins.c +++ clang/test/CodeGen/X86/rdrand-builtins.c @@ -1,5 +1,5 @@ // RUN: %clang_cc1 -no-opaque-pointers -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +rdrnd -target-feature +rdseed -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X64 -// RUN: %clang_cc1 -no-opaque-pointers -ffreestanding %s -triple=i386-unknown-unknown -target-feature +rdrnd -target-feature +rdseed -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK +// RUN: %clang_cc1 -no-opaque-pointers -ffreestanding %s -triple=i386-unknown-unknown -target-feature +rdrnd -target-feature +rdseed -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X86 #include @@ -17,14 +17,55 @@ // CHECK: store i32 } -#if __x86_64__ int rdrand64(unsigned long long *p) { return _rdrand64_step(p); // X64: @rdrand64 // X64: call { i64, i32 } @llvm.x86.rdrand.64 // X64: store i64 + +// X86-LABEL: @rdrand64( +// X86-NEXT: entry: +// X86-NEXT:[[RETVAL_I:%.*]] = alloca i32, align 4 +// X86-NEXT:[[__P_ADDR_I:%.*]] = alloca i64*, align 4 +// X86-NEXT:[[LO_I:%.*]] = alloca i32, align 4 +// X86-NEXT:[[HI_I:%.*]] = alloca i32, align 4 +// X86-NEXT:[[P_ADDR:%.*]] = alloca i64*, align 4 +// X86-NEXT:store i64* [[P:%.*]], i64** [[P_ADDR]], align 4 +// X86-NEXT:[[TMP0:%.*]] = load i64*, i64** [[P_ADDR]], align 4 +// X86-NEXT:store i64* [[TMP0]], i64** [[__P_ADDR_I]], align 4 +// X86-NEXT:[[TMP1:%.*]] = call { i32, i32 } @llvm.x86.rdrand.32() +// X86-NEXT:[[TMP2:%.*]] = extractvalue { i32, i32 } [[TMP1]], 0 +// X86-NEXT:store i32 [[TMP2]], i32* [[LO_I]], align 4 +// X86-NEXT:[[TMP3:%.*]] = extractvalue { i32, i32 } [[TMP1]], 1 +// X86-NEXT:[[TOBOOL_I:%.*]] = icmp ne i32 [[TMP3]], 0 +// X86-NEXT:br i1 [[TOBOOL_I]], label [[LAND_LHS_TRUE_I:%.*]], label [[IF_ELSE_I:%.*]] +// X86: land.lhs.true.i: +// X86-NEXT:[[TMP4:%.*]] = call { i32, i32 } @llvm.x86.rdrand.32() +// X86-NEXT:[[TMP5:%.*]] = extractvalue { i32, i32 } [[TMP4]], 0 +// X86-NEXT:store i32 [[TMP5]], i32* [[HI_I]], align 4 +// X86-NEXT:[[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP4]], 1 +// X86-NEXT:[[TOBOOL1_I:%.*]] = icmp ne i32 [[TMP6]], 0 +// X86-NEXT:br i1 [[TOBOOL1_I]], label [[IF_THEN_I:%.*]], label [[IF_ELSE_I]] +// X86: if.then.i: +// X86-NEXT:[[TMP7:%.*]] = load i32, i32* [[HI_I]], align 4 +// X86-NEXT:[[CONV_I:%.*]] = zext i32 [[TMP7]] to i64 +// X86-NEXT:[[SHL_I:%.*]] = shl i64 [[CONV_I]], 32 +// X86-NEXT:[[TMP8:%.*]] = load i32, i32* [[LO_I]], align 4 +// X86-NEXT:[[CONV2_I:%.*]] = zext i32 [[TMP8]] to i64 +// X86-NEXT:[[OR_I:%.*]] = or i64 [[SHL_I]], [[CONV2_I]] +// X86-NEXT:[[TMP9:%.*]] = load i64*, i64** [[__P_ADDR_I]], align 4 +// X86-NEXT:store i64 [[OR_I]], i64* [[TMP9]], align 4 +// X86-NEXT:store i32 1, i32* [[RETVAL_I]], align 4 +// X86-NEXT:br label [[_RDRAND64_STEP_EXIT:%.*]] +// X86: if.else.i: +// X86-NEXT:[[TMP10:%.*]] = load i64*, i64** [[__P_ADDR_I]], align 4 +// X86-NEXT:store i64 0, i64* [[TMP10]], align 4 +// X86-NEXT:store i32 0, i32* [[RETVAL_I]], align 4 +// X86-NEXT:br label [[_RDRAND64_STEP_EXIT]] +// X86: _rdrand64_step.exit: +// X86-NEXT:[[TMP11:%.*]] = load i32, i32* [[RETVAL_I]], align 4 +// X86-NEXT:ret i32 [[TMP11]] } -#endif int rdseed16(unsigned short *p) { return _rdseed16_step(p); Index: clang/lib/Headers/immintrin.h === --- clang/lib/Headers/immintrin.h +++ clang/lib/Headers/immintrin.h @@ -291,6 +291,21 @@ { return (int)__builtin_ia32_rdrand64_step(__p); } +#else +// We need to emulate the functionality of 64-bit rdrand with 2 32-bit +// rdrand instructions. +static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd"))) +_rdrand64_step(unsigned long long *__p) +{ + unsigned int __lo, __hi; + if (__builtin_ia32_rdrand32_step(&__lo) && __builtin_ia32_rdrand32_step(&__hi)) { +*__p = ((unsigned long long)__hi << 32) | (unsigned long long)__lo; +return 1; + } else { +*__p = 0; +return 0; + } +} #endif #endif /* __RDRND__ */ Index: clang/test/CodeGen/X86/rdrand-builtins.c === --- clang/test/CodeGen/X86/rdrand-builtins.c +++ clang/test/CodeGen/X86/rdrand-builtins.c @@ -1,5 +1,5 @@ // RUN: %clang_cc1 -no-opaque-pointers -ffreestanding %s
[PATCH] D132141: [X86] Emulate _rdrand64_step with two rdrand32 if it is 32bit
craig.topper added inline comments. Comment at: clang/lib/Headers/immintrin.h:300 +{ + unsigned int lo, hi; + if (__builtin_ia32_rdrand32_step() && __builtin_ia32_rdrand32_step()) { craig.topper wrote: > variable names in intrinsic headers must start with 2 underscores. What about this comment? Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D132141/new/ https://reviews.llvm.org/D132141 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D132141: [X86] Emulate _rdrand64_step with two rdrand32 if it is 32bit
yubing updated this revision to Diff 454356. yubing added a comment. address simon's comments Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D132141/new/ https://reviews.llvm.org/D132141 Files: clang/lib/Headers/immintrin.h clang/test/CodeGen/X86/rdrand-builtins.c Index: clang/test/CodeGen/X86/rdrand-builtins.c === --- clang/test/CodeGen/X86/rdrand-builtins.c +++ clang/test/CodeGen/X86/rdrand-builtins.c @@ -1,5 +1,5 @@ // RUN: %clang_cc1 -no-opaque-pointers -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +rdrnd -target-feature +rdseed -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X64 -// RUN: %clang_cc1 -no-opaque-pointers -ffreestanding %s -triple=i386-unknown-unknown -target-feature +rdrnd -target-feature +rdseed -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK +// RUN: %clang_cc1 -no-opaque-pointers -ffreestanding %s -triple=i386-unknown-unknown -target-feature +rdrnd -target-feature +rdseed -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X86 #include @@ -17,14 +17,55 @@ // CHECK: store i32 } -#if __x86_64__ int rdrand64(unsigned long long *p) { return _rdrand64_step(p); // X64: @rdrand64 // X64: call { i64, i32 } @llvm.x86.rdrand.64 // X64: store i64 + +// X86-LABEL: @rdrand64( +// X86-NEXT: entry: +// X86-NEXT:[[RETVAL_I:%.*]] = alloca i32, align 4 +// X86-NEXT:[[__P_ADDR_I:%.*]] = alloca i64*, align 4 +// X86-NEXT:[[LO_I:%.*]] = alloca i32, align 4 +// X86-NEXT:[[HI_I:%.*]] = alloca i32, align 4 +// X86-NEXT:[[P_ADDR:%.*]] = alloca i64*, align 4 +// X86-NEXT:store i64* [[P:%.*]], i64** [[P_ADDR]], align 4 +// X86-NEXT:[[TMP0:%.*]] = load i64*, i64** [[P_ADDR]], align 4 +// X86-NEXT:store i64* [[TMP0]], i64** [[__P_ADDR_I]], align 4 +// X86-NEXT:[[TMP1:%.*]] = call { i32, i32 } @llvm.x86.rdrand.32() +// X86-NEXT:[[TMP2:%.*]] = extractvalue { i32, i32 } [[TMP1]], 0 +// X86-NEXT:store i32 [[TMP2]], i32* [[LO_I]], align 4 +// X86-NEXT:[[TMP3:%.*]] = extractvalue { i32, i32 } [[TMP1]], 1 +// X86-NEXT:[[TOBOOL_I:%.*]] = icmp ne i32 [[TMP3]], 0 +// X86-NEXT:br i1 [[TOBOOL_I]], label [[LAND_LHS_TRUE_I:%.*]], label [[IF_ELSE_I:%.*]] +// X86: land.lhs.true.i: +// X86-NEXT:[[TMP4:%.*]] = call { i32, i32 } @llvm.x86.rdrand.32() +// X86-NEXT:[[TMP5:%.*]] = extractvalue { i32, i32 } [[TMP4]], 0 +// X86-NEXT:store i32 [[TMP5]], i32* [[HI_I]], align 4 +// X86-NEXT:[[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP4]], 1 +// X86-NEXT:[[TOBOOL1_I:%.*]] = icmp ne i32 [[TMP6]], 0 +// X86-NEXT:br i1 [[TOBOOL1_I]], label [[IF_THEN_I:%.*]], label [[IF_ELSE_I]] +// X86: if.then.i: +// X86-NEXT:[[TMP7:%.*]] = load i32, i32* [[HI_I]], align 4 +// X86-NEXT:[[CONV_I:%.*]] = zext i32 [[TMP7]] to i64 +// X86-NEXT:[[SHL_I:%.*]] = shl i64 [[CONV_I]], 32 +// X86-NEXT:[[TMP8:%.*]] = load i32, i32* [[LO_I]], align 4 +// X86-NEXT:[[CONV2_I:%.*]] = zext i32 [[TMP8]] to i64 +// X86-NEXT:[[OR_I:%.*]] = or i64 [[SHL_I]], [[CONV2_I]] +// X86-NEXT:[[TMP9:%.*]] = load i64*, i64** [[__P_ADDR_I]], align 4 +// X86-NEXT:store i64 [[OR_I]], i64* [[TMP9]], align 4 +// X86-NEXT:store i32 1, i32* [[RETVAL_I]], align 4 +// X86-NEXT:br label [[_RDRAND64_STEP_EXIT:%.*]] +// X86: if.else.i: +// X86-NEXT:[[TMP10:%.*]] = load i64*, i64** [[__P_ADDR_I]], align 4 +// X86-NEXT:store i64 0, i64* [[TMP10]], align 4 +// X86-NEXT:store i32 0, i32* [[RETVAL_I]], align 4 +// X86-NEXT:br label [[_RDRAND64_STEP_EXIT]] +// X86: _rdrand64_step.exit: +// X86-NEXT:[[TMP11:%.*]] = load i32, i32* [[RETVAL_I]], align 4 +// X86-NEXT:ret i32 [[TMP11]] } -#endif int rdseed16(unsigned short *p) { return _rdseed16_step(p); Index: clang/lib/Headers/immintrin.h === --- clang/lib/Headers/immintrin.h +++ clang/lib/Headers/immintrin.h @@ -291,6 +291,21 @@ { return (int)__builtin_ia32_rdrand64_step(__p); } +#else +// We need to emulate the functionality of 64-bit rdrand with 2 32-bit +// rdrand instructions. +static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd"))) +_rdrand64_step(unsigned long long *__p) +{ + unsigned int lo, hi; + if (__builtin_ia32_rdrand32_step() && __builtin_ia32_rdrand32_step()) { +*__p = ((unsigned long long)hi << 32) | (unsigned long long)lo; +return 1; + } else { +*__p = 0; +return 0; + } +} #endif #endif /* __RDRND__ */ Index: clang/test/CodeGen/X86/rdrand-builtins.c === --- clang/test/CodeGen/X86/rdrand-builtins.c +++ clang/test/CodeGen/X86/rdrand-builtins.c @@ -1,5 +1,5 @@ // RUN: %clang_cc1 -no-opaque-pointers -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature
[PATCH] D132141: [X86] Emulate _rdrand64_step with two rdrand32 if it is 32bit
craig.topper added inline comments. Comment at: clang/lib/Headers/immintrin.h:300 +{ + unsigned int lo, hi; + if (__builtin_ia32_rdrand32_step() && __builtin_ia32_rdrand32_step()) { variable names in intrinsic headers must start with 2 underscores. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D132141/new/ https://reviews.llvm.org/D132141 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D132141: [X86] Emulate _rdrand64_step with two rdrand32 if it is 32bit
RKSimon added inline comments. Comment at: clang/test/CodeGen/X86/rdrand-builtins.c:20 #if __x86_64__ int rdrand64(unsigned long long *p) { why do you still need the #if-else-endif? Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D132141/new/ https://reviews.llvm.org/D132141 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D132141: [X86] Emulate _rdrand64_step with two rdrand32 if it is 32bit
yubing updated this revision to Diff 453866. yubing added a comment. fix a small issue Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D132141/new/ https://reviews.llvm.org/D132141 Files: clang/lib/Headers/immintrin.h clang/test/CodeGen/X86/rdrand-builtins.c Index: clang/test/CodeGen/X86/rdrand-builtins.c === --- clang/test/CodeGen/X86/rdrand-builtins.c +++ clang/test/CodeGen/X86/rdrand-builtins.c @@ -1,5 +1,5 @@ // RUN: %clang_cc1 -no-opaque-pointers -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +rdrnd -target-feature +rdseed -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X64 -// RUN: %clang_cc1 -no-opaque-pointers -ffreestanding %s -triple=i386-unknown-unknown -target-feature +rdrnd -target-feature +rdseed -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK +// RUN: %clang_cc1 -no-opaque-pointers -ffreestanding %s -triple=i386-unknown-unknown -target-feature +rdrnd -target-feature +rdseed -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X86 #include @@ -24,6 +24,52 @@ // X64: call { i64, i32 } @llvm.x86.rdrand.64 // X64: store i64 } +#else +int rdrand64(unsigned long long *p) { +// X86-LABEL: @rdrand64( +// X86-NEXT: entry: +// X86-NEXT:[[RETVAL_I:%.*]] = alloca i32, align 4 +// X86-NEXT:[[__P_ADDR_I:%.*]] = alloca i64*, align 4 +// X86-NEXT:[[LO_I:%.*]] = alloca i32, align 4 +// X86-NEXT:[[HI_I:%.*]] = alloca i32, align 4 +// X86-NEXT:[[P_ADDR:%.*]] = alloca i64*, align 4 +// X86-NEXT:store i64* [[P:%.*]], i64** [[P_ADDR]], align 4 +// X86-NEXT:[[TMP0:%.*]] = load i64*, i64** [[P_ADDR]], align 4 +// X86-NEXT:store i64* [[TMP0]], i64** [[__P_ADDR_I]], align 4 +// X86-NEXT:[[TMP1:%.*]] = call { i32, i32 } @llvm.x86.rdrand.32() +// X86-NEXT:[[TMP2:%.*]] = extractvalue { i32, i32 } [[TMP1]], 0 +// X86-NEXT:store i32 [[TMP2]], i32* [[LO_I]], align 4 +// X86-NEXT:[[TMP3:%.*]] = extractvalue { i32, i32 } [[TMP1]], 1 +// X86-NEXT:[[TOBOOL_I:%.*]] = icmp ne i32 [[TMP3]], 0 +// X86-NEXT:br i1 [[TOBOOL_I]], label [[LAND_LHS_TRUE_I:%.*]], label [[IF_ELSE_I:%.*]] +// X86: land.lhs.true.i: +// X86-NEXT:[[TMP4:%.*]] = call { i32, i32 } @llvm.x86.rdrand.32() +// X86-NEXT:[[TMP5:%.*]] = extractvalue { i32, i32 } [[TMP4]], 0 +// X86-NEXT:store i32 [[TMP5]], i32* [[HI_I]], align 4 +// X86-NEXT:[[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP4]], 1 +// X86-NEXT:[[TOBOOL1_I:%.*]] = icmp ne i32 [[TMP6]], 0 +// X86-NEXT:br i1 [[TOBOOL1_I]], label [[IF_THEN_I:%.*]], label [[IF_ELSE_I]] +// X86: if.then.i: +// X86-NEXT:[[TMP7:%.*]] = load i32, i32* [[HI_I]], align 4 +// X86-NEXT:[[CONV_I:%.*]] = zext i32 [[TMP7]] to i64 +// X86-NEXT:[[SHL_I:%.*]] = shl i64 [[CONV_I]], 32 +// X86-NEXT:[[TMP8:%.*]] = load i32, i32* [[LO_I]], align 4 +// X86-NEXT:[[CONV2_I:%.*]] = zext i32 [[TMP8]] to i64 +// X86-NEXT:[[OR_I:%.*]] = or i64 [[SHL_I]], [[CONV2_I]] +// X86-NEXT:[[TMP9:%.*]] = load i64*, i64** [[__P_ADDR_I]], align 4 +// X86-NEXT:store i64 [[OR_I]], i64* [[TMP9]], align 4 +// X86-NEXT:store i32 1, i32* [[RETVAL_I]], align 4 +// X86-NEXT:br label [[_RDRAND64_STEP_EXIT:%.*]] +// X86: if.else.i: +// X86-NEXT:[[TMP10:%.*]] = load i64*, i64** [[__P_ADDR_I]], align 4 +// X86-NEXT:store i64 0, i64* [[TMP10]], align 4 +// X86-NEXT:store i32 0, i32* [[RETVAL_I]], align 4 +// X86-NEXT:br label [[_RDRAND64_STEP_EXIT]] +// X86: _rdrand64_step.exit: +// X86-NEXT:[[TMP11:%.*]] = load i32, i32* [[RETVAL_I]], align 4 +// X86-NEXT:ret i32 [[TMP11]] + return _rdrand64_step(p); +} #endif int rdseed16(unsigned short *p) { Index: clang/lib/Headers/immintrin.h === --- clang/lib/Headers/immintrin.h +++ clang/lib/Headers/immintrin.h @@ -291,6 +291,21 @@ { return (int)__builtin_ia32_rdrand64_step(__p); } +#else +// We need to emulate the functionality of 64-bit rdrand with 2 32-bit +// rdrand instructions. +static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd"))) +_rdrand64_step(unsigned long long *__p) +{ + unsigned int lo, hi; + if (__builtin_ia32_rdrand32_step() && __builtin_ia32_rdrand32_step()) { +*__p = ((unsigned long long)hi << 32) | (unsigned long long)lo; +return 1; + } else { +*__p = 0; +return 0; + } +} #endif #endif /* __RDRND__ */ Index: clang/test/CodeGen/X86/rdrand-builtins.c === --- clang/test/CodeGen/X86/rdrand-builtins.c +++ clang/test/CodeGen/X86/rdrand-builtins.c @@ -1,5 +1,5 @@ // RUN: %clang_cc1 -no-opaque-pointers -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +rdrnd -target-feature +rdseed -emit-llvm -o - -Wall -Werror | FileCheck %s
[PATCH] D132141: [X86] Emulate _rdrand64_step with two rdrand32 if it is 32bit
yubing updated this revision to Diff 453865. yubing added a comment. Address comments Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D132141/new/ https://reviews.llvm.org/D132141 Files: clang/lib/Headers/immintrin.h clang/test/CodeGen/X86/rdrand-builtins.c Index: clang/test/CodeGen/X86/rdrand-builtins.c === --- clang/test/CodeGen/X86/rdrand-builtins.c +++ clang/test/CodeGen/X86/rdrand-builtins.c @@ -1,5 +1,5 @@ // RUN: %clang_cc1 -no-opaque-pointers -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +rdrnd -target-feature +rdseed -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X64 -// RUN: %clang_cc1 -no-opaque-pointers -ffreestanding %s -triple=i386-unknown-unknown -target-feature +rdrnd -target-feature +rdseed -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK +// RUN: %clang_cc1 -no-opaque-pointers -ffreestanding %s -triple=i386-unknown-unknown -target-feature +rdrnd -target-feature +rdseed -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X86 #include @@ -24,6 +24,53 @@ // X64: call { i64, i32 } @llvm.x86.rdrand.64 // X64: store i64 } +#else +int rdrand64(unsigned long long *p) { +// X86-LABEL: @rdrand64( +// X86-NEXT: entry: +// X86-NEXT:[[RETVAL_I:%.*]] = alloca i32, align 4 +// X86-NEXT:[[__P_ADDR_I:%.*]] = alloca i64*, align 4 +// X86-NEXT:[[LO_I:%.*]] = alloca i32, align 4 +// X86-NEXT:[[HI_I:%.*]] = alloca i32, align 4 +// X86-NEXT:[[P_ADDR:%.*]] = alloca i64*, align 4 +// X86-NEXT:store i64* [[P:%.*]], i64** [[P_ADDR]], align 4 +// X86-NEXT:[[TMP0:%.*]] = load i64*, i64** [[P_ADDR]], align 4 +// X86-NEXT:store i64* [[TMP0]], i64** [[__P_ADDR_I]], align 4 +// X86-NEXT:[[TMP1:%.*]] = call { i32, i32 } @llvm.x86.rdrand.32() +// X86-NEXT:[[TMP2:%.*]] = extractvalue { i32, i32 } [[TMP1]], 0 +// X86-NEXT:store i32 [[TMP2]], i32* [[LO_I]], align 4 +// X86-NEXT:[[TMP3:%.*]] = extractvalue { i32, i32 } [[TMP1]], 1 +// X86-NEXT:[[TOBOOL_I:%.*]] = icmp ne i32 [[TMP3]], 0 +// X86-NEXT:br i1 [[TOBOOL_I]], label [[LAND_LHS_TRUE_I:%.*]], label [[IF_ELSE_I:%.*]] +// X86: land.lhs.true.i: +// X86-NEXT:[[TMP4:%.*]] = call { i32, i32 } @llvm.x86.rdrand.32() +// X86-NEXT:[[TMP5:%.*]] = extractvalue { i32, i32 } [[TMP4]], 0 +// X86-NEXT:store i32 [[TMP5]], i32* [[HI_I]], align 4 +// X86-NEXT:[[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP4]], 1 +// X86-NEXT:[[TOBOOL1_I:%.*]] = icmp ne i32 [[TMP6]], 0 +// X86-NEXT:br i1 [[TOBOOL1_I]], label [[IF_THEN_I:%.*]], label [[IF_ELSE_I]] +// X86: if.then.i: +// X86-NEXT:[[TMP7:%.*]] = load i32, i32* [[HI_I]], align 4 +// X86-NEXT:[[CONV_I:%.*]] = zext i32 [[TMP7]] to i64 +// X86-NEXT:[[SHL_I:%.*]] = shl i64 [[CONV_I]], 32 +// X86-NEXT:[[TMP8:%.*]] = load i32, i32* [[LO_I]], align 4 +// X86-NEXT:[[CONV2_I:%.*]] = zext i32 [[TMP8]] to i64 +// X86-NEXT:[[OR_I:%.*]] = or i64 [[SHL_I]], [[CONV2_I]] +// X86-NEXT:[[TMP9:%.*]] = load i64*, i64** [[__P_ADDR_I]], align 4 +// X86-NEXT:store i64 [[OR_I]], i64* [[TMP9]], align 4 +// X86-NEXT:store i32 1, i32* [[RETVAL_I]], align 4 +// X86-NEXT:br label [[_RDRAND64_STEP_EXIT:%.*]] +// X86: if.else.i: +// X86-NEXT:[[TMP10:%.*]] = load i64*, i64** [[__P_ADDR_I]], align 4 +// X86-NEXT:store i64 0, i64* [[TMP10]], align 4 +// X86-NEXT:store i32 0, i32* [[RETVAL_I]], align 4 +// X86-NEXT:br label [[_RDRAND64_STEP_EXIT]] +// X86: _rdrand64_step.exit: +// X86-NEXT:[[TMP11:%.*]] = load i32, i32* [[RETVAL_I]], align 4 +// X86-NEXT:ret i32 [[TMP11]] +; + return _rdrand64_step(p); +} #endif int rdseed16(unsigned short *p) { Index: clang/lib/Headers/immintrin.h === --- clang/lib/Headers/immintrin.h +++ clang/lib/Headers/immintrin.h @@ -291,6 +291,21 @@ { return (int)__builtin_ia32_rdrand64_step(__p); } +#else +// We need to emulate the functionality of 64-bit rdrand with 2 32-bit +// rdrand instructions. +static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd"))) +_rdrand64_step(unsigned long long *__p) +{ + unsigned int lo, hi; + if (__builtin_ia32_rdrand32_step() && __builtin_ia32_rdrand32_step()) { +*__p = ((unsigned long long)hi << 32) | (unsigned long long)lo; +return 1; + } else { +*__p = 0; +return 0; + } +} #endif #endif /* __RDRND__ */ Index: clang/test/CodeGen/X86/rdrand-builtins.c === --- clang/test/CodeGen/X86/rdrand-builtins.c +++ clang/test/CodeGen/X86/rdrand-builtins.c @@ -1,5 +1,5 @@ // RUN: %clang_cc1 -no-opaque-pointers -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +rdrnd -target-feature +rdseed -emit-llvm -o - -Wall -Werror | FileCheck %s
[PATCH] D132141: [X86] Emulate _rdrand64_step with two rdrand32 if it is 32bit
RKSimon added inline comments. Comment at: clang/lib/Headers/immintrin.h:301 + unsigned long long tmp; + if (__builtin_ia32_rdrand32_step((unsigned int *)) & + __builtin_ia32_rdrand32_step(((unsigned int *)) + 1)) { craig.topper wrote: > craig.topper wrote: > > Should `&` be `&&`? > Can we avoid the pointer cast here? Use two unsigned ints and manually > concatenate them to a 64-bit value. +1 ``` unsigned int lo, hi; if (__builtin_ia32_rdrand32_step() && __builtin_ia32_rdrand32_step()) { *p = ((unsigned long)hi << 32) | lo; return 1; } ``` Comment at: clang/test/CodeGen/X86/rdrand-builtins.c:2 // RUN: %clang_cc1 -no-opaque-pointers -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +rdrnd -target-feature +rdseed -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X64 -// RUN: %clang_cc1 -no-opaque-pointers -ffreestanding %s -triple=i386-unknown-unknown -target-feature +rdrnd -target-feature +rdseed -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK +// RUN: %clang_cc1 -no-opaque-pointers -ffreestanding %s -triple=i386-unknown-unknown -target-feature +rdrnd -target-feature +rdseed -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X32 X86 not X32 :) Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D132141/new/ https://reviews.llvm.org/D132141 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D132141: [X86] Emulate _rdrand64_step with two rdrand32 if it is 32bit
craig.topper added inline comments. Comment at: clang/lib/Headers/immintrin.h:301 + unsigned long long tmp; + if (__builtin_ia32_rdrand32_step((unsigned int *)) & + __builtin_ia32_rdrand32_step(((unsigned int *)) + 1)) { Should `&` be `&&`? Comment at: clang/lib/Headers/immintrin.h:301 + unsigned long long tmp; + if (__builtin_ia32_rdrand32_step((unsigned int *)) & + __builtin_ia32_rdrand32_step(((unsigned int *)) + 1)) { craig.topper wrote: > Should `&` be `&&`? Can we avoid the pointer cast here? Use two unsigned ints and manually concatenate them to a 64-bit value. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D132141/new/ https://reviews.llvm.org/D132141 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D132141: [X86] Emulate _rdrand64_step with two rdrand32 if it is 32bit
yubing created this revision. Herald added a subscriber: pengfei. Herald added a project: All. yubing requested review of this revision. Herald added a project: clang. Herald added a subscriber: cfe-commits. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D132141 Files: clang/lib/Headers/immintrin.h clang/test/CodeGen/X86/rdrand-builtins.c Index: clang/test/CodeGen/X86/rdrand-builtins.c === --- clang/test/CodeGen/X86/rdrand-builtins.c +++ clang/test/CodeGen/X86/rdrand-builtins.c @@ -1,5 +1,5 @@ // RUN: %clang_cc1 -no-opaque-pointers -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +rdrnd -target-feature +rdseed -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X64 -// RUN: %clang_cc1 -no-opaque-pointers -ffreestanding %s -triple=i386-unknown-unknown -target-feature +rdrnd -target-feature +rdseed -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK +// RUN: %clang_cc1 -no-opaque-pointers -ffreestanding %s -triple=i386-unknown-unknown -target-feature +rdrnd -target-feature +rdseed -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X32 #include @@ -24,6 +24,29 @@ // X64: call { i64, i32 } @llvm.x86.rdrand.64 // X64: store i64 } +#else +int rdrand64(unsigned long long *p) { +// X32: @rdrand64 +// X32: [[RETVAL_I:%.*]] = alloca i32, align 4 +// X32: call { i32, i32 } @llvm.x86.rdrand.32 +// X32: store i32 +// X32: call { i32, i32 } @llvm.x86.rdrand.32 +// X32: store i32 +// X32: [[AND_I:%.*]] = and i32 +// X32: [[TOBOOL_I:%.*]] = icmp ne i32 [[AND_I]], 0 +// X32: br i1 [[TOBOOL_I]], label [[IF_THEN_I:%.*]], label [[IF_ELSE_I:%.*]] +// X32: if.then.i: +// X32: store i64 +// X32: store i32 1, i32* [[RETVAL_I]], align 4 +// X32: br label [[_RDRAND64_STEP_EXIT:%.*]] +// X32: if.else.i: +// X32: store i64 0 +// X32: store i32 0, i32* [[RETVAL_I]], align 4 +// X32: br label [[_RDRAND64_STEP_EXIT]] +// X32: _rdrand64_step.exit: +// X32: %{{.*}} = load i32, i32* [[RETVAL_I]], align 4 + return _rdrand64_step(p); +} #endif int rdseed16(unsigned short *p) { Index: clang/lib/Headers/immintrin.h === --- clang/lib/Headers/immintrin.h +++ clang/lib/Headers/immintrin.h @@ -291,6 +291,22 @@ { return (int)__builtin_ia32_rdrand64_step(__p); } +#else +// We need to emulate the functionality of 64-bit rdrand with 2 32-bit +// rdrand instructions. +static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd"))) +_rdrand64_step(unsigned long long *__p) +{ + unsigned long long tmp; + if (__builtin_ia32_rdrand32_step((unsigned int *)) & + __builtin_ia32_rdrand32_step(((unsigned int *)) + 1)) { +*__p = tmp; +return 1; + } else { +*__p = 0; +return 0; + } +} #endif #endif /* __RDRND__ */ Index: clang/test/CodeGen/X86/rdrand-builtins.c === --- clang/test/CodeGen/X86/rdrand-builtins.c +++ clang/test/CodeGen/X86/rdrand-builtins.c @@ -1,5 +1,5 @@ // RUN: %clang_cc1 -no-opaque-pointers -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +rdrnd -target-feature +rdseed -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X64 -// RUN: %clang_cc1 -no-opaque-pointers -ffreestanding %s -triple=i386-unknown-unknown -target-feature +rdrnd -target-feature +rdseed -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK +// RUN: %clang_cc1 -no-opaque-pointers -ffreestanding %s -triple=i386-unknown-unknown -target-feature +rdrnd -target-feature +rdseed -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X32 #include @@ -24,6 +24,29 @@ // X64: call { i64, i32 } @llvm.x86.rdrand.64 // X64: store i64 } +#else +int rdrand64(unsigned long long *p) { +// X32: @rdrand64 +// X32: [[RETVAL_I:%.*]] = alloca i32, align 4 +// X32: call { i32, i32 } @llvm.x86.rdrand.32 +// X32: store i32 +// X32: call { i32, i32 } @llvm.x86.rdrand.32 +// X32: store i32 +// X32: [[AND_I:%.*]] = and i32 +// X32: [[TOBOOL_I:%.*]] = icmp ne i32 [[AND_I]], 0 +// X32: br i1 [[TOBOOL_I]], label [[IF_THEN_I:%.*]], label [[IF_ELSE_I:%.*]] +// X32: if.then.i: +// X32: store i64 +// X32: store i32 1, i32* [[RETVAL_I]], align 4 +// X32: br label [[_RDRAND64_STEP_EXIT:%.*]] +// X32: if.else.i: +// X32: store i64 0 +// X32: store i32 0, i32* [[RETVAL_I]], align 4 +// X32: br label [[_RDRAND64_STEP_EXIT]] +// X32: _rdrand64_step.exit: +// X32: %{{.*}} = load i32, i32* [[RETVAL_I]], align 4 + return _rdrand64_step(p); +} #endif int rdseed16(unsigned short *p) { Index: clang/lib/Headers/immintrin.h === --- clang/lib/Headers/immintrin.h +++ clang/lib/Headers/immintrin.h @@ -291,6 +291,22 @@ { return (int)__builtin_ia32_rdrand64_step(__p); } +#else +// We need to emulate the functionality of 64-bit