[PATCH] D132141: [X86] Emulate _rdrand64_step with two rdrand32 if it is 32bit

2022-08-23 Thread Bing Yu via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG6d8ddf53cc80: [X86] Emulate _rdrand64_step with two rdrand32 
if it is 32bit (authored by yubing).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D132141/new/

https://reviews.llvm.org/D132141

Files:
  clang/lib/Headers/immintrin.h
  clang/test/CodeGen/X86/rdrand-builtins.c

Index: clang/test/CodeGen/X86/rdrand-builtins.c
===
--- clang/test/CodeGen/X86/rdrand-builtins.c
+++ clang/test/CodeGen/X86/rdrand-builtins.c
@@ -1,5 +1,5 @@
 // RUN: %clang_cc1 -no-opaque-pointers -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +rdrnd -target-feature +rdseed -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X64
-// RUN: %clang_cc1 -no-opaque-pointers -ffreestanding %s -triple=i386-unknown-unknown -target-feature +rdrnd -target-feature +rdseed -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK
+// RUN: %clang_cc1 -no-opaque-pointers -ffreestanding %s -triple=i386-unknown-unknown -target-feature +rdrnd -target-feature +rdseed -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X86
 
 #include 
 
@@ -17,14 +17,61 @@
 // CHECK: store i32
 }
 
-#if __x86_64__
 int rdrand64(unsigned long long *p) {
   return _rdrand64_step(p);
 // X64: @rdrand64
 // X64: call { i64, i32 } @llvm.x86.rdrand.64
 // X64: store i64
+
+// X86-LABEL: @rdrand64(
+// X86-NEXT:  entry:
+// X86-NEXT:[[RETVAL_I:%.*]] = alloca i32, align 4
+// X86-NEXT:[[__P_ADDR_I:%.*]] = alloca i64*, align 4
+// X86-NEXT:[[__LO_I:%.*]] = alloca i32, align 4
+// X86-NEXT:[[__HI_I:%.*]] = alloca i32, align 4
+// X86-NEXT:[[__RES_LO_I:%.*]] = alloca i32, align 4
+// X86-NEXT:[[__RES_HI_I:%.*]] = alloca i32, align 4
+// X86-NEXT:[[P_ADDR:%.*]] = alloca i64*, align 4
+// X86-NEXT:store i64* [[P:%.*]], i64** [[P_ADDR]], align 4
+// X86-NEXT:[[TMP0:%.*]] = load i64*, i64** [[P_ADDR]], align 4
+// X86-NEXT:store i64* [[TMP0]], i64** [[__P_ADDR_I]], align 4
+// X86-NEXT:[[TMP1:%.*]] = call { i32, i32 } @llvm.x86.rdrand.32()
+// X86-NEXT:[[TMP2:%.*]] = extractvalue { i32, i32 } [[TMP1]], 0
+// X86-NEXT:store i32 [[TMP2]], i32* [[__LO_I]], align 4
+// X86-NEXT:[[TMP3:%.*]] = extractvalue { i32, i32 } [[TMP1]], 1
+// X86-NEXT:store i32 [[TMP3]], i32* [[__RES_LO_I]], align 4
+// X86-NEXT:[[TMP4:%.*]] = call { i32, i32 } @llvm.x86.rdrand.32()
+// X86-NEXT:[[TMP5:%.*]] = extractvalue { i32, i32 } [[TMP4]], 0
+// X86-NEXT:store i32 [[TMP5]], i32* [[__HI_I]], align 4
+// X86-NEXT:[[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP4]], 1
+// X86-NEXT:store i32 [[TMP6]], i32* [[__RES_HI_I]], align 4
+// X86-NEXT:[[TMP7:%.*]] = load i32, i32* [[__RES_LO_I]], align 4
+// X86-NEXT:[[TOBOOL_I:%.*]] = icmp ne i32 [[TMP7]], 0
+// X86-NEXT:br i1 [[TOBOOL_I]], label [[LAND_LHS_TRUE_I:%.*]], label [[IF_ELSE_I:%.*]]
+// X86:   land.lhs.true.i:
+// X86-NEXT:[[TMP8:%.*]] = load i32, i32* [[__RES_HI_I]], align 4
+// X86-NEXT:[[TOBOOL1_I:%.*]] = icmp ne i32 [[TMP8]], 0
+// X86-NEXT:br i1 [[TOBOOL1_I]], label [[IF_THEN_I:%.*]], label [[IF_ELSE_I]]
+// X86:   if.then.i:
+// X86-NEXT:[[TMP9:%.*]] = load i32, i32* [[__HI_I]], align 4
+// X86-NEXT:[[CONV_I:%.*]] = zext i32 [[TMP9]] to i64
+// X86-NEXT:[[SHL_I:%.*]] = shl i64 [[CONV_I]], 32
+// X86-NEXT:[[TMP10:%.*]] = load i32, i32* [[__LO_I]], align 4
+// X86-NEXT:[[CONV2_I:%.*]] = zext i32 [[TMP10]] to i64
+// X86-NEXT:[[OR_I:%.*]] = or i64 [[SHL_I]], [[CONV2_I]]
+// X86-NEXT:[[TMP11:%.*]] = load i64*, i64** [[__P_ADDR_I]], align 4
+// X86-NEXT:store i64 [[OR_I]], i64* [[TMP11]], align 4
+// X86-NEXT:store i32 1, i32* [[RETVAL_I]], align 4
+// X86-NEXT:br label [[_RDRAND64_STEP_EXIT:%.*]]
+// X86:   if.else.i:
+// X86-NEXT:[[TMP12:%.*]] = load i64*, i64** [[__P_ADDR_I]], align 4
+// X86-NEXT:store i64 0, i64* [[TMP12]], align 4
+// X86-NEXT:store i32 0, i32* [[RETVAL_I]], align 4
+// X86-NEXT:br label [[_RDRAND64_STEP_EXIT]]
+// X86:   _rdrand64_step.exit:
+// X86-NEXT:[[TMP13:%.*]] = load i32, i32* [[RETVAL_I]], align 4
+// X86-NEXT:ret i32 [[TMP13]]
 }
-#endif
 
 int rdseed16(unsigned short *p) {
   return _rdseed16_step(p);
Index: clang/lib/Headers/immintrin.h
===
--- clang/lib/Headers/immintrin.h
+++ clang/lib/Headers/immintrin.h
@@ -287,6 +287,23 @@
 {
   return (int)__builtin_ia32_rdrand64_step(__p);
 }
+#else
+// We need to emulate the functionality of 64-bit rdrand with 2 32-bit
+// rdrand instructions.
+static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
+_rdrand64_step(unsigned long long *__p)
+{
+  unsigned int __lo, __hi;
+  unsigned int 

[PATCH] D132141: [X86] Emulate _rdrand64_step with two rdrand32 if it is 32bit

2022-08-23 Thread Bing Yu via Phabricator via cfe-commits
yubing updated this revision to Diff 455041.
yubing added a comment.

address sign-conversion issue


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D132141/new/

https://reviews.llvm.org/D132141

Files:
  clang/lib/Headers/immintrin.h
  clang/test/CodeGen/X86/rdrand-builtins.c

Index: clang/test/CodeGen/X86/rdrand-builtins.c
===
--- clang/test/CodeGen/X86/rdrand-builtins.c
+++ clang/test/CodeGen/X86/rdrand-builtins.c
@@ -1,5 +1,5 @@
 // RUN: %clang_cc1 -no-opaque-pointers -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +rdrnd -target-feature +rdseed -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X64
-// RUN: %clang_cc1 -no-opaque-pointers -ffreestanding %s -triple=i386-unknown-unknown -target-feature +rdrnd -target-feature +rdseed -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK
+// RUN: %clang_cc1 -no-opaque-pointers -ffreestanding %s -triple=i386-unknown-unknown -target-feature +rdrnd -target-feature +rdseed -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X86
 
 #include 
 
@@ -17,14 +17,61 @@
 // CHECK: store i32
 }
 
-#if __x86_64__
 int rdrand64(unsigned long long *p) {
   return _rdrand64_step(p);
 // X64: @rdrand64
 // X64: call { i64, i32 } @llvm.x86.rdrand.64
 // X64: store i64
+
+// X86-LABEL: @rdrand64(
+// X86-NEXT:  entry:
+// X86-NEXT:[[RETVAL_I:%.*]] = alloca i32, align 4
+// X86-NEXT:[[__P_ADDR_I:%.*]] = alloca i64*, align 4
+// X86-NEXT:[[__LO_I:%.*]] = alloca i32, align 4
+// X86-NEXT:[[__HI_I:%.*]] = alloca i32, align 4
+// X86-NEXT:[[__RES_LO_I:%.*]] = alloca i32, align 4
+// X86-NEXT:[[__RES_HI_I:%.*]] = alloca i32, align 4
+// X86-NEXT:[[P_ADDR:%.*]] = alloca i64*, align 4
+// X86-NEXT:store i64* [[P:%.*]], i64** [[P_ADDR]], align 4
+// X86-NEXT:[[TMP0:%.*]] = load i64*, i64** [[P_ADDR]], align 4
+// X86-NEXT:store i64* [[TMP0]], i64** [[__P_ADDR_I]], align 4
+// X86-NEXT:[[TMP1:%.*]] = call { i32, i32 } @llvm.x86.rdrand.32()
+// X86-NEXT:[[TMP2:%.*]] = extractvalue { i32, i32 } [[TMP1]], 0
+// X86-NEXT:store i32 [[TMP2]], i32* [[__LO_I]], align 4
+// X86-NEXT:[[TMP3:%.*]] = extractvalue { i32, i32 } [[TMP1]], 1
+// X86-NEXT:store i32 [[TMP3]], i32* [[__RES_LO_I]], align 4
+// X86-NEXT:[[TMP4:%.*]] = call { i32, i32 } @llvm.x86.rdrand.32()
+// X86-NEXT:[[TMP5:%.*]] = extractvalue { i32, i32 } [[TMP4]], 0
+// X86-NEXT:store i32 [[TMP5]], i32* [[__HI_I]], align 4
+// X86-NEXT:[[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP4]], 1
+// X86-NEXT:store i32 [[TMP6]], i32* [[__RES_HI_I]], align 4
+// X86-NEXT:[[TMP7:%.*]] = load i32, i32* [[__RES_LO_I]], align 4
+// X86-NEXT:[[TOBOOL_I:%.*]] = icmp ne i32 [[TMP7]], 0
+// X86-NEXT:br i1 [[TOBOOL_I]], label [[LAND_LHS_TRUE_I:%.*]], label [[IF_ELSE_I:%.*]]
+// X86:   land.lhs.true.i:
+// X86-NEXT:[[TMP8:%.*]] = load i32, i32* [[__RES_HI_I]], align 4
+// X86-NEXT:[[TOBOOL1_I:%.*]] = icmp ne i32 [[TMP8]], 0
+// X86-NEXT:br i1 [[TOBOOL1_I]], label [[IF_THEN_I:%.*]], label [[IF_ELSE_I]]
+// X86:   if.then.i:
+// X86-NEXT:[[TMP9:%.*]] = load i32, i32* [[__HI_I]], align 4
+// X86-NEXT:[[CONV_I:%.*]] = zext i32 [[TMP9]] to i64
+// X86-NEXT:[[SHL_I:%.*]] = shl i64 [[CONV_I]], 32
+// X86-NEXT:[[TMP10:%.*]] = load i32, i32* [[__LO_I]], align 4
+// X86-NEXT:[[CONV2_I:%.*]] = zext i32 [[TMP10]] to i64
+// X86-NEXT:[[OR_I:%.*]] = or i64 [[SHL_I]], [[CONV2_I]]
+// X86-NEXT:[[TMP11:%.*]] = load i64*, i64** [[__P_ADDR_I]], align 4
+// X86-NEXT:store i64 [[OR_I]], i64* [[TMP11]], align 4
+// X86-NEXT:store i32 1, i32* [[RETVAL_I]], align 4
+// X86-NEXT:br label [[_RDRAND64_STEP_EXIT:%.*]]
+// X86:   if.else.i:
+// X86-NEXT:[[TMP12:%.*]] = load i64*, i64** [[__P_ADDR_I]], align 4
+// X86-NEXT:store i64 0, i64* [[TMP12]], align 4
+// X86-NEXT:store i32 0, i32* [[RETVAL_I]], align 4
+// X86-NEXT:br label [[_RDRAND64_STEP_EXIT]]
+// X86:   _rdrand64_step.exit:
+// X86-NEXT:[[TMP13:%.*]] = load i32, i32* [[RETVAL_I]], align 4
+// X86-NEXT:ret i32 [[TMP13]]
 }
-#endif
 
 int rdseed16(unsigned short *p) {
   return _rdseed16_step(p);
Index: clang/lib/Headers/immintrin.h
===
--- clang/lib/Headers/immintrin.h
+++ clang/lib/Headers/immintrin.h
@@ -287,6 +287,23 @@
 {
   return (int)__builtin_ia32_rdrand64_step(__p);
 }
+#else
+// We need to emulate the functionality of 64-bit rdrand with 2 32-bit
+// rdrand instructions.
+static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
+_rdrand64_step(unsigned long long *__p)
+{
+  unsigned int __lo, __hi;
+  unsigned int __res_lo = __builtin_ia32_rdrand32_step(&__lo);
+  unsigned int __res_hi = __builtin_ia32_rdrand32_step(&__hi);
+  if (__res_lo && __res_hi) {
+

[PATCH] D132141: [X86] Emulate _rdrand64_step with two rdrand32 if it is 32bit

2022-08-23 Thread Bing Yu via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG07e34763b027: [X86] Emulate _rdrand64_step with two rdrand32 
if it is 32bit (authored by yubing).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D132141/new/

https://reviews.llvm.org/D132141

Files:
  clang/lib/Headers/immintrin.h
  clang/test/CodeGen/X86/rdrand-builtins.c

Index: clang/test/CodeGen/X86/rdrand-builtins.c
===
--- clang/test/CodeGen/X86/rdrand-builtins.c
+++ clang/test/CodeGen/X86/rdrand-builtins.c
@@ -1,5 +1,5 @@
 // RUN: %clang_cc1 -no-opaque-pointers -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +rdrnd -target-feature +rdseed -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X64
-// RUN: %clang_cc1 -no-opaque-pointers -ffreestanding %s -triple=i386-unknown-unknown -target-feature +rdrnd -target-feature +rdseed -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK
+// RUN: %clang_cc1 -no-opaque-pointers -ffreestanding %s -triple=i386-unknown-unknown -target-feature +rdrnd -target-feature +rdseed -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X86
 
 #include 
 
@@ -17,14 +17,61 @@
 // CHECK: store i32
 }
 
-#if __x86_64__
 int rdrand64(unsigned long long *p) {
   return _rdrand64_step(p);
 // X64: @rdrand64
 // X64: call { i64, i32 } @llvm.x86.rdrand.64
 // X64: store i64
+
+// X86-LABEL: @rdrand64(
+// X86-NEXT:  entry:
+// X86-NEXT:[[RETVAL_I:%.*]] = alloca i32, align 4
+// X86-NEXT:[[__P_ADDR_I:%.*]] = alloca i64*, align 4
+// X86-NEXT:[[__LO_I:%.*]] = alloca i32, align 4
+// X86-NEXT:[[__HI_I:%.*]] = alloca i32, align 4
+// X86-NEXT:[[__RES_LO_I:%.*]] = alloca i32, align 4
+// X86-NEXT:[[__RES_HI_I:%.*]] = alloca i32, align 4
+// X86-NEXT:[[P_ADDR:%.*]] = alloca i64*, align 4
+// X86-NEXT:store i64* [[P:%.*]], i64** [[P_ADDR]], align 4
+// X86-NEXT:[[TMP0:%.*]] = load i64*, i64** [[P_ADDR]], align 4
+// X86-NEXT:store i64* [[TMP0]], i64** [[__P_ADDR_I]], align 4
+// X86-NEXT:[[TMP1:%.*]] = call { i32, i32 } @llvm.x86.rdrand.32()
+// X86-NEXT:[[TMP2:%.*]] = extractvalue { i32, i32 } [[TMP1]], 0
+// X86-NEXT:store i32 [[TMP2]], i32* [[__LO_I]], align 4
+// X86-NEXT:[[TMP3:%.*]] = extractvalue { i32, i32 } [[TMP1]], 1
+// X86-NEXT:store i32 [[TMP3]], i32* [[__RES_LO_I]], align 4
+// X86-NEXT:[[TMP4:%.*]] = call { i32, i32 } @llvm.x86.rdrand.32()
+// X86-NEXT:[[TMP5:%.*]] = extractvalue { i32, i32 } [[TMP4]], 0
+// X86-NEXT:store i32 [[TMP5]], i32* [[__HI_I]], align 4
+// X86-NEXT:[[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP4]], 1
+// X86-NEXT:store i32 [[TMP6]], i32* [[__RES_HI_I]], align 4
+// X86-NEXT:[[TMP7:%.*]] = load i32, i32* [[__RES_LO_I]], align 4
+// X86-NEXT:[[TOBOOL_I:%.*]] = icmp ne i32 [[TMP7]], 0
+// X86-NEXT:br i1 [[TOBOOL_I]], label [[LAND_LHS_TRUE_I:%.*]], label [[IF_ELSE_I:%.*]]
+// X86:   land.lhs.true.i:
+// X86-NEXT:[[TMP8:%.*]] = load i32, i32* [[__RES_HI_I]], align 4
+// X86-NEXT:[[TOBOOL1_I:%.*]] = icmp ne i32 [[TMP8]], 0
+// X86-NEXT:br i1 [[TOBOOL1_I]], label [[IF_THEN_I:%.*]], label [[IF_ELSE_I]]
+// X86:   if.then.i:
+// X86-NEXT:[[TMP9:%.*]] = load i32, i32* [[__HI_I]], align 4
+// X86-NEXT:[[CONV_I:%.*]] = zext i32 [[TMP9]] to i64
+// X86-NEXT:[[SHL_I:%.*]] = shl i64 [[CONV_I]], 32
+// X86-NEXT:[[TMP10:%.*]] = load i32, i32* [[__LO_I]], align 4
+// X86-NEXT:[[CONV2_I:%.*]] = zext i32 [[TMP10]] to i64
+// X86-NEXT:[[OR_I:%.*]] = or i64 [[SHL_I]], [[CONV2_I]]
+// X86-NEXT:[[TMP11:%.*]] = load i64*, i64** [[__P_ADDR_I]], align 4
+// X86-NEXT:store i64 [[OR_I]], i64* [[TMP11]], align 4
+// X86-NEXT:store i32 1, i32* [[RETVAL_I]], align 4
+// X86-NEXT:br label [[_RDRAND64_STEP_EXIT:%.*]]
+// X86:   if.else.i:
+// X86-NEXT:[[TMP12:%.*]] = load i64*, i64** [[__P_ADDR_I]], align 4
+// X86-NEXT:store i64 0, i64* [[TMP12]], align 4
+// X86-NEXT:store i32 0, i32* [[RETVAL_I]], align 4
+// X86-NEXT:br label [[_RDRAND64_STEP_EXIT]]
+// X86:   _rdrand64_step.exit:
+// X86-NEXT:[[TMP13:%.*]] = load i32, i32* [[RETVAL_I]], align 4
+// X86-NEXT:ret i32 [[TMP13]]
 }
-#endif
 
 int rdseed16(unsigned short *p) {
   return _rdseed16_step(p);
Index: clang/lib/Headers/immintrin.h
===
--- clang/lib/Headers/immintrin.h
+++ clang/lib/Headers/immintrin.h
@@ -287,6 +287,23 @@
 {
   return (int)__builtin_ia32_rdrand64_step(__p);
 }
+#else
+// We need to emulate the functionality of 64-bit rdrand with 2 32-bit
+// rdrand instructions.
+static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
+_rdrand64_step(unsigned long long *__p)
+{
+  unsigned int __lo, __hi;
+  int __res_lo = 

[PATCH] D132141: [X86] Emulate _rdrand64_step with two rdrand32 if it is 32bit

2022-08-23 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon accepted this revision.
RKSimon added a comment.
This revision is now accepted and ready to land.

LGTM - cheers


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D132141/new/

https://reviews.llvm.org/D132141

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D132141: [X86] Emulate _rdrand64_step with two rdrand32 if it is 32bit

2022-08-23 Thread Bing Yu via Phabricator via cfe-commits
yubing updated this revision to Diff 454738.
yubing added a comment.

Execute the second rdrand32 despite of whether the first one fail or not


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D132141/new/

https://reviews.llvm.org/D132141

Files:
  clang/lib/Headers/immintrin.h
  clang/test/CodeGen/X86/rdrand-builtins.c

Index: clang/test/CodeGen/X86/rdrand-builtins.c
===
--- clang/test/CodeGen/X86/rdrand-builtins.c
+++ clang/test/CodeGen/X86/rdrand-builtins.c
@@ -1,5 +1,5 @@
 // RUN: %clang_cc1 -no-opaque-pointers -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +rdrnd -target-feature +rdseed -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X64
-// RUN: %clang_cc1 -no-opaque-pointers -ffreestanding %s -triple=i386-unknown-unknown -target-feature +rdrnd -target-feature +rdseed -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK
+// RUN: %clang_cc1 -no-opaque-pointers -ffreestanding %s -triple=i386-unknown-unknown -target-feature +rdrnd -target-feature +rdseed -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X86
 
 #include 
 
@@ -17,14 +17,61 @@
 // CHECK: store i32
 }
 
-#if __x86_64__
 int rdrand64(unsigned long long *p) {
   return _rdrand64_step(p);
 // X64: @rdrand64
 // X64: call { i64, i32 } @llvm.x86.rdrand.64
 // X64: store i64
+
+// X86-LABEL: @rdrand64(
+// X86-NEXT:  entry:
+// X86-NEXT:[[RETVAL_I:%.*]] = alloca i32, align 4
+// X86-NEXT:[[__P_ADDR_I:%.*]] = alloca i64*, align 4
+// X86-NEXT:[[__LO_I:%.*]] = alloca i32, align 4
+// X86-NEXT:[[__HI_I:%.*]] = alloca i32, align 4
+// X86-NEXT:[[__RES_LO_I:%.*]] = alloca i32, align 4
+// X86-NEXT:[[__RES_HI_I:%.*]] = alloca i32, align 4
+// X86-NEXT:[[P_ADDR:%.*]] = alloca i64*, align 4
+// X86-NEXT:store i64* [[P:%.*]], i64** [[P_ADDR]], align 4
+// X86-NEXT:[[TMP0:%.*]] = load i64*, i64** [[P_ADDR]], align 4
+// X86-NEXT:store i64* [[TMP0]], i64** [[__P_ADDR_I]], align 4
+// X86-NEXT:[[TMP1:%.*]] = call { i32, i32 } @llvm.x86.rdrand.32()
+// X86-NEXT:[[TMP2:%.*]] = extractvalue { i32, i32 } [[TMP1]], 0
+// X86-NEXT:store i32 [[TMP2]], i32* [[__LO_I]], align 4
+// X86-NEXT:[[TMP3:%.*]] = extractvalue { i32, i32 } [[TMP1]], 1
+// X86-NEXT:store i32 [[TMP3]], i32* [[__RES_LO_I]], align 4
+// X86-NEXT:[[TMP4:%.*]] = call { i32, i32 } @llvm.x86.rdrand.32()
+// X86-NEXT:[[TMP5:%.*]] = extractvalue { i32, i32 } [[TMP4]], 0
+// X86-NEXT:store i32 [[TMP5]], i32* [[__HI_I]], align 4
+// X86-NEXT:[[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP4]], 1
+// X86-NEXT:store i32 [[TMP6]], i32* [[__RES_HI_I]], align 4
+// X86-NEXT:[[TMP7:%.*]] = load i32, i32* [[__RES_LO_I]], align 4
+// X86-NEXT:[[TOBOOL_I:%.*]] = icmp ne i32 [[TMP7]], 0
+// X86-NEXT:br i1 [[TOBOOL_I]], label [[LAND_LHS_TRUE_I:%.*]], label [[IF_ELSE_I:%.*]]
+// X86:   land.lhs.true.i:
+// X86-NEXT:[[TMP8:%.*]] = load i32, i32* [[__RES_HI_I]], align 4
+// X86-NEXT:[[TOBOOL1_I:%.*]] = icmp ne i32 [[TMP8]], 0
+// X86-NEXT:br i1 [[TOBOOL1_I]], label [[IF_THEN_I:%.*]], label [[IF_ELSE_I]]
+// X86:   if.then.i:
+// X86-NEXT:[[TMP9:%.*]] = load i32, i32* [[__HI_I]], align 4
+// X86-NEXT:[[CONV_I:%.*]] = zext i32 [[TMP9]] to i64
+// X86-NEXT:[[SHL_I:%.*]] = shl i64 [[CONV_I]], 32
+// X86-NEXT:[[TMP10:%.*]] = load i32, i32* [[__LO_I]], align 4
+// X86-NEXT:[[CONV2_I:%.*]] = zext i32 [[TMP10]] to i64
+// X86-NEXT:[[OR_I:%.*]] = or i64 [[SHL_I]], [[CONV2_I]]
+// X86-NEXT:[[TMP11:%.*]] = load i64*, i64** [[__P_ADDR_I]], align 4
+// X86-NEXT:store i64 [[OR_I]], i64* [[TMP11]], align 4
+// X86-NEXT:store i32 1, i32* [[RETVAL_I]], align 4
+// X86-NEXT:br label [[_RDRAND64_STEP_EXIT:%.*]]
+// X86:   if.else.i:
+// X86-NEXT:[[TMP12:%.*]] = load i64*, i64** [[__P_ADDR_I]], align 4
+// X86-NEXT:store i64 0, i64* [[TMP12]], align 4
+// X86-NEXT:store i32 0, i32* [[RETVAL_I]], align 4
+// X86-NEXT:br label [[_RDRAND64_STEP_EXIT]]
+// X86:   _rdrand64_step.exit:
+// X86-NEXT:[[TMP13:%.*]] = load i32, i32* [[RETVAL_I]], align 4
+// X86-NEXT:ret i32 [[TMP13]]
 }
-#endif
 
 int rdseed16(unsigned short *p) {
   return _rdseed16_step(p);
Index: clang/lib/Headers/immintrin.h
===
--- clang/lib/Headers/immintrin.h
+++ clang/lib/Headers/immintrin.h
@@ -291,6 +291,23 @@
 {
   return (int)__builtin_ia32_rdrand64_step(__p);
 }
+#else
+// We need to emulate the functionality of 64-bit rdrand with 2 32-bit
+// rdrand instructions.
+static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
+_rdrand64_step(unsigned long long *__p)
+{
+  unsigned int __lo, __hi;
+  int __res_lo = __builtin_ia32_rdrand32_step(&__lo);
+  int __res_hi = __builtin_ia32_rdrand32_step(&__hi);
+  if 

[PATCH] D132141: [X86] Emulate _rdrand64_step with two rdrand32 if it is 32bit

2022-08-22 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon added inline comments.



Comment at: clang/lib/Headers/immintrin.h:301
+  unsigned long long tmp;
+  if (__builtin_ia32_rdrand32_step((unsigned int *)) &
+  __builtin_ia32_rdrand32_step(((unsigned int *)) + 1)) {

yubing wrote:
> RKSimon wrote:
> > RKSimon wrote:
> > > craig.topper wrote:
> > > > craig.topper wrote:
> > > > > Should `&` be `&&`?
> > > > Can we avoid the pointer cast here? Use two unsigned ints and manually 
> > > > concatenate them to a 64-bit value.
> > > +1
> > > ```
> > > unsigned int lo, hi;
> > > if (__builtin_ia32_rdrand32_step() &&
> > > __builtin_ia32_rdrand32_step()) {
> > >   *p = ((unsigned long)hi << 32) | lo;
> > >   return 1;
> > > }
> > > ```
> > Are there any sideeffects that we might encounter by not always performing 
> > both __builtin_ia32_rdrand32_step calls?
> > ```
> >   unsigned int __lo, __hi;
> >   int __res_lo = __builtin_ia32_rdrand32_step(&__lo);
> >   int __res_hi = __builtin_ia32_rdrand32_step(&__hi);
> >   if (__res_lo && __res_hi) {
> > *__p = ((unsigned long long)__hi << 32) | (unsigned long long)__lo;
> > return 1;
> >   } else {
> > *__p = 0;
> > return 0;
> >   }
> > ```
> however, if the first rdrand32 failed, then we don't need to execute the 
> second one.
I understand that - but given randomizers are often used for sensitive 
applications (crypto) - my question was whether not always calling this twice 
was going to affect things.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D132141/new/

https://reviews.llvm.org/D132141

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D132141: [X86] Emulate _rdrand64_step with two rdrand32 if it is 32bit

2022-08-22 Thread Bing Yu via Phabricator via cfe-commits
yubing added inline comments.



Comment at: clang/lib/Headers/immintrin.h:301
+  unsigned long long tmp;
+  if (__builtin_ia32_rdrand32_step((unsigned int *)) &
+  __builtin_ia32_rdrand32_step(((unsigned int *)) + 1)) {

RKSimon wrote:
> RKSimon wrote:
> > craig.topper wrote:
> > > craig.topper wrote:
> > > > Should `&` be `&&`?
> > > Can we avoid the pointer cast here? Use two unsigned ints and manually 
> > > concatenate them to a 64-bit value.
> > +1
> > ```
> > unsigned int lo, hi;
> > if (__builtin_ia32_rdrand32_step() &&
> > __builtin_ia32_rdrand32_step()) {
> >   *p = ((unsigned long)hi << 32) | lo;
> >   return 1;
> > }
> > ```
> Are there any sideeffects that we might encounter by not always performing 
> both __builtin_ia32_rdrand32_step calls?
> ```
>   unsigned int __lo, __hi;
>   int __res_lo = __builtin_ia32_rdrand32_step(&__lo);
>   int __res_hi = __builtin_ia32_rdrand32_step(&__hi);
>   if (__res_lo && __res_hi) {
> *__p = ((unsigned long long)__hi << 32) | (unsigned long long)__lo;
> return 1;
>   } else {
> *__p = 0;
> return 0;
>   }
> ```
however, if the first rdrand32 failed, then we don't need to execute the second 
one.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D132141/new/

https://reviews.llvm.org/D132141

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D132141: [X86] Emulate _rdrand64_step with two rdrand32 if it is 32bit

2022-08-22 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon added inline comments.



Comment at: clang/lib/Headers/immintrin.h:301
+  unsigned long long tmp;
+  if (__builtin_ia32_rdrand32_step((unsigned int *)) &
+  __builtin_ia32_rdrand32_step(((unsigned int *)) + 1)) {

RKSimon wrote:
> craig.topper wrote:
> > craig.topper wrote:
> > > Should `&` be `&&`?
> > Can we avoid the pointer cast here? Use two unsigned ints and manually 
> > concatenate them to a 64-bit value.
> +1
> ```
> unsigned int lo, hi;
> if (__builtin_ia32_rdrand32_step() &&
> __builtin_ia32_rdrand32_step()) {
>   *p = ((unsigned long)hi << 32) | lo;
>   return 1;
> }
> ```
Are there any sideeffects that we might encounter by not always performing both 
__builtin_ia32_rdrand32_step calls?
```
  unsigned int __lo, __hi;
  int __res_lo = __builtin_ia32_rdrand32_step(&__lo);
  int __res_hi = __builtin_ia32_rdrand32_step(&__hi);
  if (__res_lo && __res_hi) {
*__p = ((unsigned long long)__hi << 32) | (unsigned long long)__lo;
return 1;
  } else {
*__p = 0;
return 0;
  }
```


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D132141/new/

https://reviews.llvm.org/D132141

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D132141: [X86] Emulate _rdrand64_step with two rdrand32 if it is 32bit

2022-08-21 Thread Bing Yu via Phabricator via cfe-commits
yubing updated this revision to Diff 454357.
yubing added a comment.

address craig's comments


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D132141/new/

https://reviews.llvm.org/D132141

Files:
  clang/lib/Headers/immintrin.h
  clang/test/CodeGen/X86/rdrand-builtins.c


Index: clang/test/CodeGen/X86/rdrand-builtins.c
===
--- clang/test/CodeGen/X86/rdrand-builtins.c
+++ clang/test/CodeGen/X86/rdrand-builtins.c
@@ -1,5 +1,5 @@
 // RUN: %clang_cc1 -no-opaque-pointers -ffreestanding %s 
-triple=x86_64-unknown-unknown -target-feature +rdrnd -target-feature +rdseed 
-emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X64
-// RUN: %clang_cc1 -no-opaque-pointers -ffreestanding %s 
-triple=i386-unknown-unknown -target-feature +rdrnd -target-feature +rdseed 
-emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK
+// RUN: %clang_cc1 -no-opaque-pointers -ffreestanding %s 
-triple=i386-unknown-unknown -target-feature +rdrnd -target-feature +rdseed 
-emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X86
 
 #include 
 
@@ -17,14 +17,55 @@
 // CHECK: store i32
 }
 
-#if __x86_64__
 int rdrand64(unsigned long long *p) {
   return _rdrand64_step(p);
 // X64: @rdrand64
 // X64: call { i64, i32 } @llvm.x86.rdrand.64
 // X64: store i64
+
+// X86-LABEL: @rdrand64(
+// X86-NEXT:  entry:
+// X86-NEXT:[[RETVAL_I:%.*]] = alloca i32, align 4
+// X86-NEXT:[[__P_ADDR_I:%.*]] = alloca i64*, align 4
+// X86-NEXT:[[LO_I:%.*]] = alloca i32, align 4
+// X86-NEXT:[[HI_I:%.*]] = alloca i32, align 4
+// X86-NEXT:[[P_ADDR:%.*]] = alloca i64*, align 4
+// X86-NEXT:store i64* [[P:%.*]], i64** [[P_ADDR]], align 4
+// X86-NEXT:[[TMP0:%.*]] = load i64*, i64** [[P_ADDR]], align 4
+// X86-NEXT:store i64* [[TMP0]], i64** [[__P_ADDR_I]], align 4
+// X86-NEXT:[[TMP1:%.*]] = call { i32, i32 } @llvm.x86.rdrand.32()
+// X86-NEXT:[[TMP2:%.*]] = extractvalue { i32, i32 } [[TMP1]], 0
+// X86-NEXT:store i32 [[TMP2]], i32* [[LO_I]], align 4
+// X86-NEXT:[[TMP3:%.*]] = extractvalue { i32, i32 } [[TMP1]], 1
+// X86-NEXT:[[TOBOOL_I:%.*]] = icmp ne i32 [[TMP3]], 0
+// X86-NEXT:br i1 [[TOBOOL_I]], label [[LAND_LHS_TRUE_I:%.*]], label 
[[IF_ELSE_I:%.*]]
+// X86:   land.lhs.true.i:
+// X86-NEXT:[[TMP4:%.*]] = call { i32, i32 } @llvm.x86.rdrand.32()
+// X86-NEXT:[[TMP5:%.*]] = extractvalue { i32, i32 } [[TMP4]], 0
+// X86-NEXT:store i32 [[TMP5]], i32* [[HI_I]], align 4
+// X86-NEXT:[[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP4]], 1
+// X86-NEXT:[[TOBOOL1_I:%.*]] = icmp ne i32 [[TMP6]], 0
+// X86-NEXT:br i1 [[TOBOOL1_I]], label [[IF_THEN_I:%.*]], label 
[[IF_ELSE_I]]
+// X86:   if.then.i:
+// X86-NEXT:[[TMP7:%.*]] = load i32, i32* [[HI_I]], align 4
+// X86-NEXT:[[CONV_I:%.*]] = zext i32 [[TMP7]] to i64
+// X86-NEXT:[[SHL_I:%.*]] = shl i64 [[CONV_I]], 32
+// X86-NEXT:[[TMP8:%.*]] = load i32, i32* [[LO_I]], align 4
+// X86-NEXT:[[CONV2_I:%.*]] = zext i32 [[TMP8]] to i64
+// X86-NEXT:[[OR_I:%.*]] = or i64 [[SHL_I]], [[CONV2_I]]
+// X86-NEXT:[[TMP9:%.*]] = load i64*, i64** [[__P_ADDR_I]], align 4
+// X86-NEXT:store i64 [[OR_I]], i64* [[TMP9]], align 4
+// X86-NEXT:store i32 1, i32* [[RETVAL_I]], align 4
+// X86-NEXT:br label [[_RDRAND64_STEP_EXIT:%.*]]
+// X86:   if.else.i:
+// X86-NEXT:[[TMP10:%.*]] = load i64*, i64** [[__P_ADDR_I]], align 4
+// X86-NEXT:store i64 0, i64* [[TMP10]], align 4
+// X86-NEXT:store i32 0, i32* [[RETVAL_I]], align 4
+// X86-NEXT:br label [[_RDRAND64_STEP_EXIT]]
+// X86:   _rdrand64_step.exit:
+// X86-NEXT:[[TMP11:%.*]] = load i32, i32* [[RETVAL_I]], align 4
+// X86-NEXT:ret i32 [[TMP11]]
 }
-#endif
 
 int rdseed16(unsigned short *p) {
   return _rdseed16_step(p);
Index: clang/lib/Headers/immintrin.h
===
--- clang/lib/Headers/immintrin.h
+++ clang/lib/Headers/immintrin.h
@@ -291,6 +291,21 @@
 {
   return (int)__builtin_ia32_rdrand64_step(__p);
 }
+#else
+// We need to emulate the functionality of 64-bit rdrand with 2 32-bit
+// rdrand instructions.
+static __inline__ int __attribute__((__always_inline__, __nodebug__, 
__target__("rdrnd")))
+_rdrand64_step(unsigned long long *__p)
+{
+  unsigned int __lo, __hi;
+  if (__builtin_ia32_rdrand32_step(&__lo) && 
__builtin_ia32_rdrand32_step(&__hi)) {
+*__p = ((unsigned long long)__hi << 32) | (unsigned long long)__lo;
+return 1;
+  } else {
+*__p = 0;
+return 0;
+  }
+}
 #endif
 #endif /* __RDRND__ */
 


Index: clang/test/CodeGen/X86/rdrand-builtins.c
===
--- clang/test/CodeGen/X86/rdrand-builtins.c
+++ clang/test/CodeGen/X86/rdrand-builtins.c
@@ -1,5 +1,5 @@
 // RUN: %clang_cc1 -no-opaque-pointers -ffreestanding %s 

[PATCH] D132141: [X86] Emulate _rdrand64_step with two rdrand32 if it is 32bit

2022-08-21 Thread Craig Topper via Phabricator via cfe-commits
craig.topper added inline comments.



Comment at: clang/lib/Headers/immintrin.h:300
+{
+  unsigned int lo, hi;
+  if (__builtin_ia32_rdrand32_step() && __builtin_ia32_rdrand32_step()) {

craig.topper wrote:
> variable names in intrinsic headers must start with 2 underscores.
What about this comment?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D132141/new/

https://reviews.llvm.org/D132141

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D132141: [X86] Emulate _rdrand64_step with two rdrand32 if it is 32bit

2022-08-21 Thread Bing Yu via Phabricator via cfe-commits
yubing updated this revision to Diff 454356.
yubing added a comment.

address simon's comments


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D132141/new/

https://reviews.llvm.org/D132141

Files:
  clang/lib/Headers/immintrin.h
  clang/test/CodeGen/X86/rdrand-builtins.c


Index: clang/test/CodeGen/X86/rdrand-builtins.c
===
--- clang/test/CodeGen/X86/rdrand-builtins.c
+++ clang/test/CodeGen/X86/rdrand-builtins.c
@@ -1,5 +1,5 @@
 // RUN: %clang_cc1 -no-opaque-pointers -ffreestanding %s 
-triple=x86_64-unknown-unknown -target-feature +rdrnd -target-feature +rdseed 
-emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X64
-// RUN: %clang_cc1 -no-opaque-pointers -ffreestanding %s 
-triple=i386-unknown-unknown -target-feature +rdrnd -target-feature +rdseed 
-emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK
+// RUN: %clang_cc1 -no-opaque-pointers -ffreestanding %s 
-triple=i386-unknown-unknown -target-feature +rdrnd -target-feature +rdseed 
-emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X86
 
 #include 
 
@@ -17,14 +17,55 @@
 // CHECK: store i32
 }
 
-#if __x86_64__
 int rdrand64(unsigned long long *p) {
   return _rdrand64_step(p);
 // X64: @rdrand64
 // X64: call { i64, i32 } @llvm.x86.rdrand.64
 // X64: store i64
+
+// X86-LABEL: @rdrand64(
+// X86-NEXT:  entry:
+// X86-NEXT:[[RETVAL_I:%.*]] = alloca i32, align 4
+// X86-NEXT:[[__P_ADDR_I:%.*]] = alloca i64*, align 4
+// X86-NEXT:[[LO_I:%.*]] = alloca i32, align 4
+// X86-NEXT:[[HI_I:%.*]] = alloca i32, align 4
+// X86-NEXT:[[P_ADDR:%.*]] = alloca i64*, align 4
+// X86-NEXT:store i64* [[P:%.*]], i64** [[P_ADDR]], align 4
+// X86-NEXT:[[TMP0:%.*]] = load i64*, i64** [[P_ADDR]], align 4
+// X86-NEXT:store i64* [[TMP0]], i64** [[__P_ADDR_I]], align 4
+// X86-NEXT:[[TMP1:%.*]] = call { i32, i32 } @llvm.x86.rdrand.32()
+// X86-NEXT:[[TMP2:%.*]] = extractvalue { i32, i32 } [[TMP1]], 0
+// X86-NEXT:store i32 [[TMP2]], i32* [[LO_I]], align 4
+// X86-NEXT:[[TMP3:%.*]] = extractvalue { i32, i32 } [[TMP1]], 1
+// X86-NEXT:[[TOBOOL_I:%.*]] = icmp ne i32 [[TMP3]], 0
+// X86-NEXT:br i1 [[TOBOOL_I]], label [[LAND_LHS_TRUE_I:%.*]], label 
[[IF_ELSE_I:%.*]]
+// X86:   land.lhs.true.i:
+// X86-NEXT:[[TMP4:%.*]] = call { i32, i32 } @llvm.x86.rdrand.32()
+// X86-NEXT:[[TMP5:%.*]] = extractvalue { i32, i32 } [[TMP4]], 0
+// X86-NEXT:store i32 [[TMP5]], i32* [[HI_I]], align 4
+// X86-NEXT:[[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP4]], 1
+// X86-NEXT:[[TOBOOL1_I:%.*]] = icmp ne i32 [[TMP6]], 0
+// X86-NEXT:br i1 [[TOBOOL1_I]], label [[IF_THEN_I:%.*]], label 
[[IF_ELSE_I]]
+// X86:   if.then.i:
+// X86-NEXT:[[TMP7:%.*]] = load i32, i32* [[HI_I]], align 4
+// X86-NEXT:[[CONV_I:%.*]] = zext i32 [[TMP7]] to i64
+// X86-NEXT:[[SHL_I:%.*]] = shl i64 [[CONV_I]], 32
+// X86-NEXT:[[TMP8:%.*]] = load i32, i32* [[LO_I]], align 4
+// X86-NEXT:[[CONV2_I:%.*]] = zext i32 [[TMP8]] to i64
+// X86-NEXT:[[OR_I:%.*]] = or i64 [[SHL_I]], [[CONV2_I]]
+// X86-NEXT:[[TMP9:%.*]] = load i64*, i64** [[__P_ADDR_I]], align 4
+// X86-NEXT:store i64 [[OR_I]], i64* [[TMP9]], align 4
+// X86-NEXT:store i32 1, i32* [[RETVAL_I]], align 4
+// X86-NEXT:br label [[_RDRAND64_STEP_EXIT:%.*]]
+// X86:   if.else.i:
+// X86-NEXT:[[TMP10:%.*]] = load i64*, i64** [[__P_ADDR_I]], align 4
+// X86-NEXT:store i64 0, i64* [[TMP10]], align 4
+// X86-NEXT:store i32 0, i32* [[RETVAL_I]], align 4
+// X86-NEXT:br label [[_RDRAND64_STEP_EXIT]]
+// X86:   _rdrand64_step.exit:
+// X86-NEXT:[[TMP11:%.*]] = load i32, i32* [[RETVAL_I]], align 4
+// X86-NEXT:ret i32 [[TMP11]]
 }
-#endif
 
 int rdseed16(unsigned short *p) {
   return _rdseed16_step(p);
Index: clang/lib/Headers/immintrin.h
===
--- clang/lib/Headers/immintrin.h
+++ clang/lib/Headers/immintrin.h
@@ -291,6 +291,21 @@
 {
   return (int)__builtin_ia32_rdrand64_step(__p);
 }
+#else
+// We need to emulate the functionality of 64-bit rdrand with 2 32-bit
+// rdrand instructions.
+static __inline__ int __attribute__((__always_inline__, __nodebug__, 
__target__("rdrnd")))
+_rdrand64_step(unsigned long long *__p)
+{
+  unsigned int lo, hi;
+  if (__builtin_ia32_rdrand32_step() && __builtin_ia32_rdrand32_step()) {
+*__p = ((unsigned long long)hi << 32) | (unsigned long long)lo;
+return 1;
+  } else {
+*__p = 0;
+return 0;
+  }
+}
 #endif
 #endif /* __RDRND__ */
 


Index: clang/test/CodeGen/X86/rdrand-builtins.c
===
--- clang/test/CodeGen/X86/rdrand-builtins.c
+++ clang/test/CodeGen/X86/rdrand-builtins.c
@@ -1,5 +1,5 @@
 // RUN: %clang_cc1 -no-opaque-pointers -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature 

[PATCH] D132141: [X86] Emulate _rdrand64_step with two rdrand32 if it is 32bit

2022-08-19 Thread Craig Topper via Phabricator via cfe-commits
craig.topper added inline comments.



Comment at: clang/lib/Headers/immintrin.h:300
+{
+  unsigned int lo, hi;
+  if (__builtin_ia32_rdrand32_step() && __builtin_ia32_rdrand32_step()) {

variable names in intrinsic headers must start with 2 underscores.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D132141/new/

https://reviews.llvm.org/D132141

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D132141: [X86] Emulate _rdrand64_step with two rdrand32 if it is 32bit

2022-08-19 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon added inline comments.



Comment at: clang/test/CodeGen/X86/rdrand-builtins.c:20
 
 #if __x86_64__
 int rdrand64(unsigned long long *p) {

why do you still need the #if-else-endif?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D132141/new/

https://reviews.llvm.org/D132141

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D132141: [X86] Emulate _rdrand64_step with two rdrand32 if it is 32bit

2022-08-18 Thread Bing Yu via Phabricator via cfe-commits
yubing updated this revision to Diff 453866.
yubing added a comment.

fix a small issue


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D132141/new/

https://reviews.llvm.org/D132141

Files:
  clang/lib/Headers/immintrin.h
  clang/test/CodeGen/X86/rdrand-builtins.c


Index: clang/test/CodeGen/X86/rdrand-builtins.c
===
--- clang/test/CodeGen/X86/rdrand-builtins.c
+++ clang/test/CodeGen/X86/rdrand-builtins.c
@@ -1,5 +1,5 @@
 // RUN: %clang_cc1 -no-opaque-pointers -ffreestanding %s 
-triple=x86_64-unknown-unknown -target-feature +rdrnd -target-feature +rdseed 
-emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X64
-// RUN: %clang_cc1 -no-opaque-pointers -ffreestanding %s 
-triple=i386-unknown-unknown -target-feature +rdrnd -target-feature +rdseed 
-emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK
+// RUN: %clang_cc1 -no-opaque-pointers -ffreestanding %s 
-triple=i386-unknown-unknown -target-feature +rdrnd -target-feature +rdseed 
-emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X86
 
 #include 
 
@@ -24,6 +24,52 @@
 // X64: call { i64, i32 } @llvm.x86.rdrand.64
 // X64: store i64
 }
+#else
+int rdrand64(unsigned long long *p) {
+// X86-LABEL: @rdrand64(
+// X86-NEXT:  entry:
+// X86-NEXT:[[RETVAL_I:%.*]] = alloca i32, align 4
+// X86-NEXT:[[__P_ADDR_I:%.*]] = alloca i64*, align 4
+// X86-NEXT:[[LO_I:%.*]] = alloca i32, align 4
+// X86-NEXT:[[HI_I:%.*]] = alloca i32, align 4
+// X86-NEXT:[[P_ADDR:%.*]] = alloca i64*, align 4
+// X86-NEXT:store i64* [[P:%.*]], i64** [[P_ADDR]], align 4
+// X86-NEXT:[[TMP0:%.*]] = load i64*, i64** [[P_ADDR]], align 4
+// X86-NEXT:store i64* [[TMP0]], i64** [[__P_ADDR_I]], align 4
+// X86-NEXT:[[TMP1:%.*]] = call { i32, i32 } @llvm.x86.rdrand.32()
+// X86-NEXT:[[TMP2:%.*]] = extractvalue { i32, i32 } [[TMP1]], 0
+// X86-NEXT:store i32 [[TMP2]], i32* [[LO_I]], align 4
+// X86-NEXT:[[TMP3:%.*]] = extractvalue { i32, i32 } [[TMP1]], 1
+// X86-NEXT:[[TOBOOL_I:%.*]] = icmp ne i32 [[TMP3]], 0
+// X86-NEXT:br i1 [[TOBOOL_I]], label [[LAND_LHS_TRUE_I:%.*]], label 
[[IF_ELSE_I:%.*]]
+// X86:   land.lhs.true.i:
+// X86-NEXT:[[TMP4:%.*]] = call { i32, i32 } @llvm.x86.rdrand.32()
+// X86-NEXT:[[TMP5:%.*]] = extractvalue { i32, i32 } [[TMP4]], 0
+// X86-NEXT:store i32 [[TMP5]], i32* [[HI_I]], align 4
+// X86-NEXT:[[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP4]], 1
+// X86-NEXT:[[TOBOOL1_I:%.*]] = icmp ne i32 [[TMP6]], 0
+// X86-NEXT:br i1 [[TOBOOL1_I]], label [[IF_THEN_I:%.*]], label 
[[IF_ELSE_I]]
+// X86:   if.then.i:
+// X86-NEXT:[[TMP7:%.*]] = load i32, i32* [[HI_I]], align 4
+// X86-NEXT:[[CONV_I:%.*]] = zext i32 [[TMP7]] to i64
+// X86-NEXT:[[SHL_I:%.*]] = shl i64 [[CONV_I]], 32
+// X86-NEXT:[[TMP8:%.*]] = load i32, i32* [[LO_I]], align 4
+// X86-NEXT:[[CONV2_I:%.*]] = zext i32 [[TMP8]] to i64
+// X86-NEXT:[[OR_I:%.*]] = or i64 [[SHL_I]], [[CONV2_I]]
+// X86-NEXT:[[TMP9:%.*]] = load i64*, i64** [[__P_ADDR_I]], align 4
+// X86-NEXT:store i64 [[OR_I]], i64* [[TMP9]], align 4
+// X86-NEXT:store i32 1, i32* [[RETVAL_I]], align 4
+// X86-NEXT:br label [[_RDRAND64_STEP_EXIT:%.*]]
+// X86:   if.else.i:
+// X86-NEXT:[[TMP10:%.*]] = load i64*, i64** [[__P_ADDR_I]], align 4
+// X86-NEXT:store i64 0, i64* [[TMP10]], align 4
+// X86-NEXT:store i32 0, i32* [[RETVAL_I]], align 4
+// X86-NEXT:br label [[_RDRAND64_STEP_EXIT]]
+// X86:   _rdrand64_step.exit:
+// X86-NEXT:[[TMP11:%.*]] = load i32, i32* [[RETVAL_I]], align 4
+// X86-NEXT:ret i32 [[TMP11]]
+  return _rdrand64_step(p);
+}
 #endif
 
 int rdseed16(unsigned short *p) {
Index: clang/lib/Headers/immintrin.h
===
--- clang/lib/Headers/immintrin.h
+++ clang/lib/Headers/immintrin.h
@@ -291,6 +291,21 @@
 {
   return (int)__builtin_ia32_rdrand64_step(__p);
 }
+#else
+// We need to emulate the functionality of 64-bit rdrand with 2 32-bit
+// rdrand instructions.
+static __inline__ int __attribute__((__always_inline__, __nodebug__, 
__target__("rdrnd")))
+_rdrand64_step(unsigned long long *__p)
+{
+  unsigned int lo, hi;
+  if (__builtin_ia32_rdrand32_step() && __builtin_ia32_rdrand32_step()) {
+*__p = ((unsigned long long)hi << 32) | (unsigned long long)lo;
+return 1;
+  } else {
+*__p = 0;
+return 0;
+  }
+}
 #endif
 #endif /* __RDRND__ */
 


Index: clang/test/CodeGen/X86/rdrand-builtins.c
===
--- clang/test/CodeGen/X86/rdrand-builtins.c
+++ clang/test/CodeGen/X86/rdrand-builtins.c
@@ -1,5 +1,5 @@
 // RUN: %clang_cc1 -no-opaque-pointers -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +rdrnd -target-feature +rdseed -emit-llvm -o - -Wall -Werror | FileCheck %s 

[PATCH] D132141: [X86] Emulate _rdrand64_step with two rdrand32 if it is 32bit

2022-08-18 Thread Bing Yu via Phabricator via cfe-commits
yubing updated this revision to Diff 453865.
yubing added a comment.

Address comments


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D132141/new/

https://reviews.llvm.org/D132141

Files:
  clang/lib/Headers/immintrin.h
  clang/test/CodeGen/X86/rdrand-builtins.c


Index: clang/test/CodeGen/X86/rdrand-builtins.c
===
--- clang/test/CodeGen/X86/rdrand-builtins.c
+++ clang/test/CodeGen/X86/rdrand-builtins.c
@@ -1,5 +1,5 @@
 // RUN: %clang_cc1 -no-opaque-pointers -ffreestanding %s 
-triple=x86_64-unknown-unknown -target-feature +rdrnd -target-feature +rdseed 
-emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X64
-// RUN: %clang_cc1 -no-opaque-pointers -ffreestanding %s 
-triple=i386-unknown-unknown -target-feature +rdrnd -target-feature +rdseed 
-emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK
+// RUN: %clang_cc1 -no-opaque-pointers -ffreestanding %s 
-triple=i386-unknown-unknown -target-feature +rdrnd -target-feature +rdseed 
-emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X86
 
 #include 
 
@@ -24,6 +24,53 @@
 // X64: call { i64, i32 } @llvm.x86.rdrand.64
 // X64: store i64
 }
+#else
+int rdrand64(unsigned long long *p) {
+// X86-LABEL: @rdrand64(
+// X86-NEXT:  entry:
+// X86-NEXT:[[RETVAL_I:%.*]] = alloca i32, align 4
+// X86-NEXT:[[__P_ADDR_I:%.*]] = alloca i64*, align 4
+// X86-NEXT:[[LO_I:%.*]] = alloca i32, align 4
+// X86-NEXT:[[HI_I:%.*]] = alloca i32, align 4
+// X86-NEXT:[[P_ADDR:%.*]] = alloca i64*, align 4
+// X86-NEXT:store i64* [[P:%.*]], i64** [[P_ADDR]], align 4
+// X86-NEXT:[[TMP0:%.*]] = load i64*, i64** [[P_ADDR]], align 4
+// X86-NEXT:store i64* [[TMP0]], i64** [[__P_ADDR_I]], align 4
+// X86-NEXT:[[TMP1:%.*]] = call { i32, i32 } @llvm.x86.rdrand.32()
+// X86-NEXT:[[TMP2:%.*]] = extractvalue { i32, i32 } [[TMP1]], 0
+// X86-NEXT:store i32 [[TMP2]], i32* [[LO_I]], align 4
+// X86-NEXT:[[TMP3:%.*]] = extractvalue { i32, i32 } [[TMP1]], 1
+// X86-NEXT:[[TOBOOL_I:%.*]] = icmp ne i32 [[TMP3]], 0
+// X86-NEXT:br i1 [[TOBOOL_I]], label [[LAND_LHS_TRUE_I:%.*]], label 
[[IF_ELSE_I:%.*]]
+// X86:   land.lhs.true.i:
+// X86-NEXT:[[TMP4:%.*]] = call { i32, i32 } @llvm.x86.rdrand.32()
+// X86-NEXT:[[TMP5:%.*]] = extractvalue { i32, i32 } [[TMP4]], 0
+// X86-NEXT:store i32 [[TMP5]], i32* [[HI_I]], align 4
+// X86-NEXT:[[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP4]], 1
+// X86-NEXT:[[TOBOOL1_I:%.*]] = icmp ne i32 [[TMP6]], 0
+// X86-NEXT:br i1 [[TOBOOL1_I]], label [[IF_THEN_I:%.*]], label 
[[IF_ELSE_I]]
+// X86:   if.then.i:
+// X86-NEXT:[[TMP7:%.*]] = load i32, i32* [[HI_I]], align 4
+// X86-NEXT:[[CONV_I:%.*]] = zext i32 [[TMP7]] to i64
+// X86-NEXT:[[SHL_I:%.*]] = shl i64 [[CONV_I]], 32
+// X86-NEXT:[[TMP8:%.*]] = load i32, i32* [[LO_I]], align 4
+// X86-NEXT:[[CONV2_I:%.*]] = zext i32 [[TMP8]] to i64
+// X86-NEXT:[[OR_I:%.*]] = or i64 [[SHL_I]], [[CONV2_I]]
+// X86-NEXT:[[TMP9:%.*]] = load i64*, i64** [[__P_ADDR_I]], align 4
+// X86-NEXT:store i64 [[OR_I]], i64* [[TMP9]], align 4
+// X86-NEXT:store i32 1, i32* [[RETVAL_I]], align 4
+// X86-NEXT:br label [[_RDRAND64_STEP_EXIT:%.*]]
+// X86:   if.else.i:
+// X86-NEXT:[[TMP10:%.*]] = load i64*, i64** [[__P_ADDR_I]], align 4
+// X86-NEXT:store i64 0, i64* [[TMP10]], align 4
+// X86-NEXT:store i32 0, i32* [[RETVAL_I]], align 4
+// X86-NEXT:br label [[_RDRAND64_STEP_EXIT]]
+// X86:   _rdrand64_step.exit:
+// X86-NEXT:[[TMP11:%.*]] = load i32, i32* [[RETVAL_I]], align 4
+// X86-NEXT:ret i32 [[TMP11]]
+;
+  return _rdrand64_step(p);
+}
 #endif
 
 int rdseed16(unsigned short *p) {
Index: clang/lib/Headers/immintrin.h
===
--- clang/lib/Headers/immintrin.h
+++ clang/lib/Headers/immintrin.h
@@ -291,6 +291,21 @@
 {
   return (int)__builtin_ia32_rdrand64_step(__p);
 }
+#else
+// We need to emulate the functionality of 64-bit rdrand with 2 32-bit
+// rdrand instructions.
+static __inline__ int __attribute__((__always_inline__, __nodebug__, 
__target__("rdrnd")))
+_rdrand64_step(unsigned long long *__p)
+{
+  unsigned int lo, hi;
+  if (__builtin_ia32_rdrand32_step() && __builtin_ia32_rdrand32_step()) {
+*__p = ((unsigned long long)hi << 32) | (unsigned long long)lo;
+return 1;
+  } else {
+*__p = 0;
+return 0;
+  }
+}
 #endif
 #endif /* __RDRND__ */
 


Index: clang/test/CodeGen/X86/rdrand-builtins.c
===
--- clang/test/CodeGen/X86/rdrand-builtins.c
+++ clang/test/CodeGen/X86/rdrand-builtins.c
@@ -1,5 +1,5 @@
 // RUN: %clang_cc1 -no-opaque-pointers -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +rdrnd -target-feature +rdseed -emit-llvm -o - -Wall -Werror | FileCheck %s 

[PATCH] D132141: [X86] Emulate _rdrand64_step with two rdrand32 if it is 32bit

2022-08-18 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon added inline comments.



Comment at: clang/lib/Headers/immintrin.h:301
+  unsigned long long tmp;
+  if (__builtin_ia32_rdrand32_step((unsigned int *)) &
+  __builtin_ia32_rdrand32_step(((unsigned int *)) + 1)) {

craig.topper wrote:
> craig.topper wrote:
> > Should `&` be `&&`?
> Can we avoid the pointer cast here? Use two unsigned ints and manually 
> concatenate them to a 64-bit value.
+1
```
unsigned int lo, hi;
if (__builtin_ia32_rdrand32_step() &&
__builtin_ia32_rdrand32_step()) {
  *p = ((unsigned long)hi << 32) | lo;
  return 1;
}
```



Comment at: clang/test/CodeGen/X86/rdrand-builtins.c:2
 // RUN: %clang_cc1 -no-opaque-pointers -ffreestanding %s 
-triple=x86_64-unknown-unknown -target-feature +rdrnd -target-feature +rdseed 
-emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X64
-// RUN: %clang_cc1 -no-opaque-pointers -ffreestanding %s 
-triple=i386-unknown-unknown -target-feature +rdrnd -target-feature +rdseed 
-emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK
+// RUN: %clang_cc1 -no-opaque-pointers -ffreestanding %s 
-triple=i386-unknown-unknown -target-feature +rdrnd -target-feature +rdseed 
-emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X32
 

X86 not X32 :) 


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D132141/new/

https://reviews.llvm.org/D132141

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D132141: [X86] Emulate _rdrand64_step with two rdrand32 if it is 32bit

2022-08-18 Thread Craig Topper via Phabricator via cfe-commits
craig.topper added inline comments.



Comment at: clang/lib/Headers/immintrin.h:301
+  unsigned long long tmp;
+  if (__builtin_ia32_rdrand32_step((unsigned int *)) &
+  __builtin_ia32_rdrand32_step(((unsigned int *)) + 1)) {

Should `&` be `&&`?



Comment at: clang/lib/Headers/immintrin.h:301
+  unsigned long long tmp;
+  if (__builtin_ia32_rdrand32_step((unsigned int *)) &
+  __builtin_ia32_rdrand32_step(((unsigned int *)) + 1)) {

craig.topper wrote:
> Should `&` be `&&`?
Can we avoid the pointer cast here? Use two unsigned ints and manually 
concatenate them to a 64-bit value.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D132141/new/

https://reviews.llvm.org/D132141

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D132141: [X86] Emulate _rdrand64_step with two rdrand32 if it is 32bit

2022-08-18 Thread Bing Yu via Phabricator via cfe-commits
yubing created this revision.
Herald added a subscriber: pengfei.
Herald added a project: All.
yubing requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D132141

Files:
  clang/lib/Headers/immintrin.h
  clang/test/CodeGen/X86/rdrand-builtins.c


Index: clang/test/CodeGen/X86/rdrand-builtins.c
===
--- clang/test/CodeGen/X86/rdrand-builtins.c
+++ clang/test/CodeGen/X86/rdrand-builtins.c
@@ -1,5 +1,5 @@
 // RUN: %clang_cc1 -no-opaque-pointers -ffreestanding %s 
-triple=x86_64-unknown-unknown -target-feature +rdrnd -target-feature +rdseed 
-emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X64
-// RUN: %clang_cc1 -no-opaque-pointers -ffreestanding %s 
-triple=i386-unknown-unknown -target-feature +rdrnd -target-feature +rdseed 
-emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK
+// RUN: %clang_cc1 -no-opaque-pointers -ffreestanding %s 
-triple=i386-unknown-unknown -target-feature +rdrnd -target-feature +rdseed 
-emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X32
 
 #include 
 
@@ -24,6 +24,29 @@
 // X64: call { i64, i32 } @llvm.x86.rdrand.64
 // X64: store i64
 }
+#else
+int rdrand64(unsigned long long *p) {
+// X32: @rdrand64
+// X32: [[RETVAL_I:%.*]] = alloca i32, align 4
+// X32: call { i32, i32 } @llvm.x86.rdrand.32
+// X32: store i32
+// X32: call { i32, i32 } @llvm.x86.rdrand.32
+// X32: store i32
+// X32: [[AND_I:%.*]] = and i32
+// X32: [[TOBOOL_I:%.*]] = icmp ne i32 [[AND_I]], 0
+// X32: br i1 [[TOBOOL_I]], label [[IF_THEN_I:%.*]], label [[IF_ELSE_I:%.*]]
+// X32: if.then.i:
+// X32: store i64
+// X32: store i32 1, i32* [[RETVAL_I]], align 4
+// X32: br label [[_RDRAND64_STEP_EXIT:%.*]]
+// X32: if.else.i:
+// X32: store i64 0
+// X32: store i32 0, i32* [[RETVAL_I]], align 4
+// X32: br label [[_RDRAND64_STEP_EXIT]]
+// X32: _rdrand64_step.exit:
+// X32: %{{.*}} = load i32, i32* [[RETVAL_I]], align 4
+  return _rdrand64_step(p);
+}
 #endif
 
 int rdseed16(unsigned short *p) {
Index: clang/lib/Headers/immintrin.h
===
--- clang/lib/Headers/immintrin.h
+++ clang/lib/Headers/immintrin.h
@@ -291,6 +291,22 @@
 {
   return (int)__builtin_ia32_rdrand64_step(__p);
 }
+#else
+// We need to emulate the functionality of 64-bit rdrand with 2 32-bit
+// rdrand instructions.
+static __inline__ int __attribute__((__always_inline__, __nodebug__, 
__target__("rdrnd")))
+_rdrand64_step(unsigned long long *__p)
+{
+  unsigned long long tmp;
+  if (__builtin_ia32_rdrand32_step((unsigned int *)) &
+  __builtin_ia32_rdrand32_step(((unsigned int *)) + 1)) {
+*__p = tmp;
+return 1;
+  } else {
+*__p = 0;
+return 0;
+  }
+}
 #endif
 #endif /* __RDRND__ */
 


Index: clang/test/CodeGen/X86/rdrand-builtins.c
===
--- clang/test/CodeGen/X86/rdrand-builtins.c
+++ clang/test/CodeGen/X86/rdrand-builtins.c
@@ -1,5 +1,5 @@
 // RUN: %clang_cc1 -no-opaque-pointers -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +rdrnd -target-feature +rdseed -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X64
-// RUN: %clang_cc1 -no-opaque-pointers -ffreestanding %s -triple=i386-unknown-unknown -target-feature +rdrnd -target-feature +rdseed -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK
+// RUN: %clang_cc1 -no-opaque-pointers -ffreestanding %s -triple=i386-unknown-unknown -target-feature +rdrnd -target-feature +rdseed -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X32
 
 #include 
 
@@ -24,6 +24,29 @@
 // X64: call { i64, i32 } @llvm.x86.rdrand.64
 // X64: store i64
 }
+#else
+int rdrand64(unsigned long long *p) {
+// X32: @rdrand64
+// X32: [[RETVAL_I:%.*]] = alloca i32, align 4
+// X32: call { i32, i32 } @llvm.x86.rdrand.32
+// X32: store i32
+// X32: call { i32, i32 } @llvm.x86.rdrand.32
+// X32: store i32
+// X32: [[AND_I:%.*]] = and i32
+// X32: [[TOBOOL_I:%.*]] = icmp ne i32 [[AND_I]], 0
+// X32: br i1 [[TOBOOL_I]], label [[IF_THEN_I:%.*]], label [[IF_ELSE_I:%.*]]
+// X32: if.then.i:
+// X32: store i64
+// X32: store i32 1, i32* [[RETVAL_I]], align 4
+// X32: br label [[_RDRAND64_STEP_EXIT:%.*]]
+// X32: if.else.i:
+// X32: store i64 0
+// X32: store i32 0, i32* [[RETVAL_I]], align 4
+// X32: br label [[_RDRAND64_STEP_EXIT]]
+// X32: _rdrand64_step.exit:
+// X32: %{{.*}} = load i32, i32* [[RETVAL_I]], align 4
+  return _rdrand64_step(p);
+}
 #endif
 
 int rdseed16(unsigned short *p) {
Index: clang/lib/Headers/immintrin.h
===
--- clang/lib/Headers/immintrin.h
+++ clang/lib/Headers/immintrin.h
@@ -291,6 +291,22 @@
 {
   return (int)__builtin_ia32_rdrand64_step(__p);
 }
+#else
+// We need to emulate the functionality of 64-bit