[PATCH] D113107: Support of expression granularity for _Float16.

Zahira Ammarguellat via Phabricator via cfe-commits Wed, 29 Jun 2022 14:25:01 -0700

zahiraam updated this revision to Diff 441169.

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D113107/new/


https://reviews.llvm.org/D113107

Files:
  clang/docs/LanguageExtensions.rst
  clang/docs/ReleaseNotes.rst
  clang/include/clang/Basic/TargetInfo.h
  clang/lib/Basic/Targets/X86.cpp
  clang/lib/Basic/Targets/X86.h
  clang/lib/CodeGen/CGExprComplex.cpp
  clang/lib/CodeGen/CGExprScalar.cpp
  clang/lib/CodeGen/CodeGenFunction.h
  clang/test/CodeGen/X86/Float16-arithmetic.c
  clang/test/CodeGen/X86/Float16-complex.c
  clang/test/Sema/Float16.c
  clang/test/SemaCXX/Float16.cpp

Index: clang/test/SemaCXX/Float16.cpp
===================================================================
--- clang/test/SemaCXX/Float16.cpp
+++ clang/test/SemaCXX/Float16.cpp
@@ -1,20 +1,10 @@
-// RUN: %clang_cc1 -fsyntax-only -verify -triple i686-linux-pc %s
-// RUN: %clang_cc1 -fsyntax-only -verify -triple i686-linux-pc -target-feature +sse2 %s -DHAVE
-// RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64-linux-pc %s -DHAVE
-// RUN: %clang_cc1 -fsyntax-only -verify -triple spir-unknown-unknown %s -DHAVE
-// RUN: %clang_cc1 -fsyntax-only -verify -triple armv7a-linux-gnu %s -DHAVE
-// RUN: %clang_cc1 -fsyntax-only -verify -triple aarch64-linux-gnu %s -DHAVE
+// RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64-linux-pc %s
+// RUN: %clang_cc1 -fsyntax-only -verify -triple spir-unknown-unknown %s
+// RUN: %clang_cc1 -fsyntax-only -verify -triple armv7a-linux-gnu %s
+// RUN: %clang_cc1 -fsyntax-only -verify -triple aarch64-linux-gnu %s
 
-#ifdef HAVE
 // expected-no-diagnostics
-#endif // HAVE
 
-#ifndef HAVE
-// expected-error@+2{{_Float16 is not supported on this target}}
-#endif // !HAVE
 _Float16 f;
 
-#ifndef HAVE
-// expected-error@+2{{invalid suffix 'F16' on floating constant}}
-#endif // !HAVE
 const auto g = 1.1F16;
Index: clang/test/Sema/Float16.c
===================================================================
--- clang/test/Sema/Float16.c
+++ clang/test/Sema/Float16.c
@@ -1,19 +1,15 @@
-// RUN: %clang_cc1 -fsyntax-only -verify -triple i686-linux-pc %s
-// RUN: %clang_cc1 -fsyntax-only -verify -triple i686-linux-pc -target-feature +sse2 %s -DHAVE
-// RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64-linux-pc %s -DHAVE
-// RUN: %clang_cc1 -fsyntax-only -verify -triple spir-unknown-unknown %s -DHAVE
-// RUN: %clang_cc1 -fsyntax-only -verify -triple armv7a-linux-gnu %s -DHAVE
-// RUN: %clang_cc1 -fsyntax-only -verify -triple aarch64-linux-gnu %s -DHAVE
+// RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64-linux-pc %s
+// RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64-linux-pc -target-feature +avx512fp16 %s
+// RUN: %clang_cc1 -fsyntax-only -verify -triple spir-unknown-unknown %s
+// RUN: %clang_cc1 -fsyntax-only -verify -triple armv7a-linux-gnu %s
+// RUN: %clang_cc1 -fsyntax-only -verify -triple aarch64-linux-gnu %s
+// RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64-linux-pc -target-feature +avx512fp16 %s
+// RUN: %clang_cc1 -fsyntax-only -verify -triple spir-unknown-unknown %s
+// RUN: %clang_cc1 -fsyntax-only -verify -triple armv7a-linux-gnu %s
+// RUN: %clang_cc1 -fsyntax-only -verify -triple aarch64-linux-gnu %s
 
-#ifndef HAVE
-// expected-error@+2{{_Float16 is not supported on this target}}
-#endif // HAVE
-_Float16 f;
-
-#ifdef HAVE
 _Complex _Float16 a;
 void builtin_complex(void) {
   _Float16 a = 0;
   (void)__builtin_complex(a, a); // expected-error {{'_Complex _Float16' is invalid}}
 }
-#endif
Index: clang/test/CodeGen/X86/Float16-complex.c
===================================================================
--- clang/test/CodeGen/X86/Float16-complex.c
+++ clang/test/CodeGen/X86/Float16-complex.c
@@ -1,134 +1,407 @@
-// RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown -target-feature +avx512fp16 -o - | FileCheck %s --check-prefix=X86
-// RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown -o - | FileCheck %s --check-prefix=X86
+// RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown -target-feature +avx512fp16 -o - | FileCheck %s --check-prefixes=CHECK,AVX
+// RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown -o - | FileCheck %s --check-prefixes=CHECK,X86
 
 _Float16 _Complex add_half_rr(_Float16 a, _Float16 b) {
-  // X86-LABEL: @add_half_rr(
-  // X86: fadd
-  // X86-NOT: fadd
-  // X86: ret
+  // CHECK-LABEL: @add_half_rr(
+  // CHECK: [[A:%.*]] = alloca half
+  // CHECK-NEXT: [[B:%.*]] = alloca half
+  // CHECK: [[A_LOAD:%.*]] = load half, ptr [[A]]
+
+  // AVX-NEXT: [[B_LOAD:%.*]] = load half, ptr [[B]]
+  // AVX-NEXT: [[AB_ADD:%.*]] = fadd half [[A_LOAD]], [[B_LOAD]]
+  // AVX: store half [[AB_ADD]], {{.*}}
+
+  // X86-NEXT: [[A_EXT:%.*]] = fpext half [[A_LOAD]] to float
+  // X86-NEXT: [[B_LOAD:%.*]] = load half, ptr [[B]]
+  // X86-NEXT: [[B_EXT:%.*]] = fpext half [[B_LOAD]] to float
+  // X86-NEXT: [[AB_ADD:%.*]] = fadd float [[A_EXT]], [[B_EXT]]
+  // X86-NEXT: [[AB_ADD_TRUNC:%.*]] = fptrunc float [[AB_ADD]] to half
+  // X86: store half [[AB_ADD_TRUNC]], {{.*}}
   return a + b;
 }
+
 _Float16 _Complex add_half_cr(_Float16 _Complex a, _Float16 b) {
-  // X86-LABEL: @add_half_cr(
-  // X86: fadd
-  // X86-NOT: fadd
-  // X86: ret
+  // CHECK-LABEL: @add_half_cr(
+  // CHECK: [[B:%.*]] = alloca half
+  // CHECK: [[AR:%.*]] = load half, ptr {{.*}}
+  // CHECK: [[AI:%.*]] = load half, ptr {{.*}}
+
+  // AVX: [[A_LOAD:%.*]] = load half, ptr [[B]]
+  // AVX: [[AB_ADD:%.*]] = fadd half [[AR]], [[A_LOAD]]
+  // AVX: store half [[AB_ADD]], ptr {{.*}}
+  // AVX: store half [[AI]], ptr {{.*}}
+
+  // X86-NEXT: [[AR_EXT:%.*]] = fpext half [[AR]] to float
+  // X86-NEXT: [[AI_EXT:%.*]] = fpext half [[AI]] to float
+  // X86-NEXT: [[B_LOAD:%.*]] = load half, ptr [[B]]
+  // X86-NEXT: [[B_EXT:%.*]] = fpext half [[B_LOAD]] to float
+  // X86-NEXT: [[AB_ADD:%.*]] = fadd float [[AR_EXT]], [[B_EXT]]
+  // X86: [[AB_ADD_TRUNC:%.*]] = fptrunc float [[AB_ADD]] to half
+  // X86-NEXT: [[AI_TRUNC:%.*]] = fptrunc float [[AI_EXT]] to half
+  // X86: store half [[AB_ADD_TRUNC]], ptr {{.*}}
+  // X86-NEXT: store half [[AI_TRUNC]], ptr {{.*}}
   return a + b;
 }
+
 _Float16 _Complex add_half_rc(_Float16 a, _Float16 _Complex b) {
-  // X86-LABEL: @add_half_rc(
-  // X86: fadd
-  // X86-NOT: fadd
-  // X86: ret
+  // CHECK-LABEL: @add_half_rc(
+
+  // CHECK: [[A:%.*]] = alloca half
+  // CHECK: store half %a, ptr [[A]]
+  // CHECK-NEXT: [[A_LOAD:%.*]] = load half, ptr [[A]]
+
+  // AVX: [[BR_LOAD:%.*]] = load half, ptr {{.*}}
+  // AVX: [[BI_LOAD:%.*]] = load half, ptr {{.*}}
+  // AVX: [[AB_ADD:%.*]] = fadd half [[A_LOAD]], [[BR_LOAD]]
+  // AVX:  store half [[AB_ADD]], ptr {{.*}}
+  // AVX-NEXT:  store half [[BI_LOAD]], ptr {{.*}}
+
+  // X86: [[A_EXT:%.*]] = fpext half [[A_LOAD]] to float
+  // X86: [[BR_LOAD:%.*]] = load half, ptr {{.*}}
+  // X86: [[BI_LOAD:%.*]] = load half, ptr {{.*}}
+  // X86-NEXT: [[BR_EXT:%.*]] = fpext half [[BR_LOAD]] to float
+  // X86-NEXT: [[BI_EXT:%.*]] = fpext half [[BI_LOAD]] to float
+  // X86-NEXT: [[AB_ADD:%.*]] = fadd float [[A_EXT]], [[BR_EXT]]
+  // X86-NEXT: [[AB_ADD_TRUNC:%.*]] = fptrunc float [[AB_ADD]] to half
+  // X86-NEXT: [[BI_TRUNC:%.*]] = fptrunc float [[BI_EXT]] to half
+  // X86: store half [[AB_ADD_TRUNC]], ptr {{.*}}
+  // X86-NEXT: store half [[BI_TRUNC]], ptr {{.*}}
   return a + b;
 }
+
 _Float16 _Complex add_half_cc(_Float16 _Complex a, _Float16 _Complex b) {
-  // X86-LABEL: @add_half_cc(
-  // X86: fadd
-  // X86: fadd
-  // X86-NOT: fadd
-  // X86: ret
+  // CHECK-LABEL: @add_half_cc(
+
+  // CHECK: [[AR_LOAD:%.*]] = load half, ptr {{.*}}
+  // CHECK: [[AI_LOAD:%.*]] = load half, ptr {{.*}}
+
+  // AVX: [[BR_LOAD:%.*]] = load half, ptr {{.*}}
+  // AVX: [[BI_LOAD:%.*]] = load half, ptr {{.*}}
+  // AVX-NEXT: [[AB_ADDR:%.*]] = fadd half [[AR_LOAD]], [[BR_LOAD]]
+  // AVX-NEXT: [[AB_ADDI:%.*]] = fadd half [[AI_LOAD]], [[BI_LOAD]]
+  // AVX: store half [[AB_ADDR]], ptr {{.*}}
+  // AVX-NEXT: store half [[AB_ADDI]], ptr {{.*}}
+
+  // X86: [[AR_LOAD_EXT:%.*]] = fpext half [[AR_LOAD]] to float
+  // X86: [[AI_LOAD_EXT:%.*]] = fpext half [[AI_LOAD]] to float
+  // X86: [[BR_LOAD:%.*]] = load half, ptr {{.*}}
+  // X86: [[BI_LOAD:%.*]] = load half, ptr {{.*}}
+  // X86-NEXT: [[BR_EXT:%.*]] = fpext half [[BR_LOAD]] to float
+  // X86-NEXT: [[BI_EXT:%.*]] = fpext half [[BI_LOAD]] to float
+  // X86-NEXT: [[AB_ADDR:%.*]] = fadd float [[AR_LOAD_EXT]], [[BR_EXT]]
+  // X86-NEXT: [[AB_ADDI:%.*]] = fadd float [[AI_LOAD_EXT]], [[BI_EXT]]
+  // X86-NEXT: [[AB_ADDR_TRUNC:%.*]] = fptrunc float [[AB_ADDR]] to half
+  // X86-NEXT: [[AB_ADDI_TRUNC:%.*]] = fptrunc float [[AB_ADDI]] to half
+  // X86: store half [[AB_ADDR_TRUNC]], ptr {{.*}}
+  // X86-NEXT: store half [[AB_ADDI_TRUNC]], ptr {{.*}}
   return a + b;
 }
 
 _Float16 _Complex sub_half_rr(_Float16 a, _Float16 b) {
-  // X86-LABEL: @sub_half_rr(
-  // X86: fsub
-  // X86-NOT: fsub
-  // X86: ret
+  // CHECK-LABEL: @sub_half_rr(
+  // CHECK: [[A:%.*]] = alloca half
+  // CHECK-NEXT: [[B:%.*]] = alloca half
+  // CHECK: [[A_LOAD:%.*]] = load half, ptr [[A]]
+
+  // AVX-NEXT: [[B_LOAD:%.*]] = load half, ptr [[B]]
+  // AVX-NEXT: [[AB_SUB:%.*]] = fsub half [[A_LOAD]], [[B_LOAD]]
+  // AVX: store half [[AB_SUB]], {{.*}}
+  // AVX-NEXT: store half 0xH0000, {{.*}}
+
+  // X86-NEXT: [[A_EXT:%.*]] = fpext half [[A_LOAD]] to float
+  // X86-NEXT: [[B_LOAD:%.*]] = load half, ptr [[B]]
+  // X86-NEXT: [[B_EXT:%.*]] = fpext half [[B_LOAD]] to float
+  // X86-NEXT: [[AB_SUB:%.*]] = fsub float [[A_EXT]], [[B_EXT]]
+  // X86-NEXT: [[AB_SUB_TRUNC:%.*]] = fptrunc float [[AB_SUB]] to half
+  // X86: store half [[AB_SUB_TRUNC]], {{.*}}
+  // X86-NEXT: store half 0xH0000, {{.*}}
   return a - b;
 }
+
 _Float16 _Complex sub_half_cr(_Float16 _Complex a, _Float16 b) {
-  // X86-LABEL: @sub_half_cr(
-  // X86: fsub
-  // X86-NOT: fsub
-  // X86: ret
+  // CHECK-LABEL: @sub_half_cr(
+  // CHECK: [[B:%.*]] = alloca half
+  // CHECK: [[AR:%.*]] = load half, ptr {{.*}}
+  // CHECK: [[AI:%.*]] = load half, ptr {{.*}}
+
+  // AVX: [[A_LOAD:%.*]] = load half, ptr [[B]]
+  // AVX: [[AB_SUB:%.*]] = fsub half [[AR]], [[A_LOAD]]
+  // AVX: store half [[AB_SUB]], ptr {{.*}}
+  // AVX: store half [[AI]], ptr {{.*}}
+
+  // X86-NEXT: [[AR_EXT:%.*]] = fpext half [[AR]] to float
+  // X86-NEXT: [[AI_EXT:%.*]] = fpext half [[AI]] to float
+  // X86-NEXT: [[B_LOAD:%.*]] = load half, ptr [[B]]
+  // X86-NEXT: [[B_EXT:%.*]] = fpext half [[B_LOAD]] to float
+  // X86-NEXT: [[AB_SUB:%.*]] = fsub float [[AR_EXT]], [[B_EXT]]
+  // X86-NEXT: fptrunc float [[AB_SUB]] to half
+  // X86-NEXT: fptrunc float [[AI_EXT]] to half
   return a - b;
 }
+
 _Float16 _Complex sub_half_rc(_Float16 a, _Float16 _Complex b) {
-  // X86-LABEL: @sub_half_rc(
-  // X86: fsub
-  // X86: fneg
-  // X86-NOT: fsub
-  // X86: ret
+  // CHECK-LABEL: @sub_half_rc(
+
+  // CHECK: [[A:%.*]] = alloca half
+  // CHECK: store half {{.*}}, ptr [[A]]
+  // CHECK-NEXT: [[A_LOAD:%.*]] = load half, ptr [[A]]
+
+  // AVX: [[BR_LOAD:%.*]] = load half, ptr {{.*}}
+  // AVX: [[BI_LOAD:%.*]] = load half, ptr {{.*}}
+  // AVX: [[AB_SUBR:%.*]] = fsub half [[A_LOAD]], [[BR_LOAD]]
+  // AVX: [[AB_SUBI:%.*]] = fneg half {{.*}}
+  // AVX:  store half [[AB_SUBR]], ptr {{.*}}
+  // AVX-NEXT: store half [[AB_SUBI]], ptr {{.*}}
+
+  // X86: [[A_EXT:%.*]] = fpext half [[A_LOAD]] to float
+  // X86: [[BR_LOAD:%.*]] = load half, ptr {{.*}}
+  // X86: [[BI_LOAD:%.*]] = load half, ptr {{.*}}
+  // X86-NEXT: [[BR_EXT:%.*]] = fpext half [[BR_LOAD]] to float
+  // X86-NEXT: [[BI_EXT:%.*]] = fpext half [[BI_LOAD]] to float
+  // X86-NEXT: [[AB_SUBR:%.*]] = fsub float [[A_EXT]], [[BR_EXT]]
+  // X86-NEXT: [[AB_SUBI:%.*]] = fneg float [[BI_EXT]]
+  // X86-NEXT: [[AB_SUBR_TRUNC:%.*]] = fptrunc float [[AB_SUBR]] to half
+  // X86-NEXT: [[AB_SUBI_TRUNC:%.*]] = fptrunc float [[AB_SUBI]] to half
+  // X86: store half [[AB_SUBR_TRUNC]], ptr {{.*}}
+  // X86-NEXT: store half [[AB_SUBI_TRUNC]], ptr {{.*}}
   return a - b;
 }
+
 _Float16 _Complex sub_half_cc(_Float16 _Complex a, _Float16 _Complex b) {
-  // X86-LABEL: @sub_half_cc(
-  // X86: fsub
-  // X86: fsub
-  // X86-NOT: fsub
-  // X86: ret
+  // CHECK-LABEL: @sub_half_cc(
+
+  // CHECK: [[AR_LOAD:%.*]] = load half, ptr {{.*}}
+  // CHECK: [[AI_LOAD:%.*]] = load half, ptr {{.*}}
+
+  // AVX: [[BR_LOAD:%.*]] = load half, ptr {{.*}}
+  // AVX: [[BI_LOAD:%.*]] = load half, ptr {{.*}}
+  // AVX-NEXT: [[AB_SUBR:%.*]] = fsub half [[AR_LOAD]], [[BR_LOAD]]
+  // AVX-NEXT: [[AB_SUBI:%.*]] = fsub half [[AI_LOAD]], [[BI_LOAD]]
+  // AVX: store half [[AB_SUBR]], ptr {{.*}}
+  // AVX-NEXT: store half [[AB_SUBI]], ptr {{.*}}
+
+  // X86: [[AR_LOAD_EXT:%.*]] = fpext half [[AR_LOAD]] to float
+  // X86: [[AI_LOAD_EXT:%.*]] = fpext half [[AI_LOAD]] to float
+  // X86: [[BR_LOAD:%.*]] = load half, ptr {{.*}}
+  // X86: [[BI_LOAD:%.*]] = load half, ptr {{.*}}
+  // X86-NEXT: [[BR_EXT:%.*]] = fpext half [[BR_LOAD]] to float
+  // X86-NEXT: [[BI_EXT:%.*]] = fpext half [[BI_LOAD]] to float
+  // X86-NEXT: [[AB_SUBR:%.*]] = fsub float [[AR_LOAD_EXT]], [[BR_EXT]]
+  // X86-NEXT: [[AB_SUBI:%.*]] = fsub float [[AI_LOAD_EXT]], [[BI_EXT]]
+  // X86-NEXT: [[AB_SUBR_TRUNC:%.*]] = fptrunc float [[AB_SUBR]] to half
+  // X86-NEXT: [[AB_SUBI_TRUNC:%.*]] = fptrunc float [[AB_SUBI]] to half
+  // X86: store half [[AB_SUBR_TRUNC]], ptr {{.*}}
+  // X86-NEXT: store half [[AB_SUBI_TRUNC]], ptr {{.*}}
   return a - b;
 }
 
 _Float16 _Complex mul_half_rr(_Float16 a, _Float16 b) {
-  // X86-LABEL: @mul_half_rr(
-  // X86: fmul
-  // X86-NOT: fmul
-  // X86: ret
+  // CHECK-LABEL: @mul_half_rr(
+  // CHECK: [[A:%.*]] = alloca half
+  // CHECK-NEXT: [[B:%.*]] = alloca half
+  // CHECK: [[A_LOAD:%.*]] = load half, ptr [[A]]
+
+  // AVX-NEXT: [[B_LOAD:%.*]] = load half, ptr [[B]]
+  // AVX-NEXT: [[AB_MUL:%.*]] = fmul half [[A_LOAD]], [[B_LOAD]]
+  // AVX: store half [[AB_MUL]], {{.*}}
+  // AVX-NEXT: store half 0xH0000, {{.*}}
+
+  // X86-NEXT: [[A_EXT:%.*]] = fpext half [[A_LOAD]] to float
+  // X86-NEXT: [[B_LOAD:%.*]] = load half, ptr [[B]]
+  // X86-NEXT: [[B_EXT:%.*]] = fpext half [[B_LOAD]] to float
+  // X86-NEXT: [[AB_MUL:%.*]] = fmul float [[A_EXT]], [[B_EXT]]
+  // X86-NEXT: [[AB_MUL_TRUNC:%.*]] = fptrunc float [[AB_MUL]] to half
+  // X86: store half [[AB_MUL_TRUNC]], {{.*}}
+  // X86-NEXT: store half 0xH0000, {{.*}}
   return a * b;
 }
+
 _Float16 _Complex mul_half_cr(_Float16 _Complex a, _Float16 b) {
   // X86-LABEL: @mul_half_cr(
-  // X86: fmul
-  // X86: fmul
-  // X86-NOT: fmul
-  // X86: ret
+  // CHECK: [[B:%.*]] = alloca half
+  // CHECK: [[AR:%.*]] = load half, ptr {{.*}}
+  // CHECK: [[AI:%.*]] = load half, ptr {{.*}}
+
+  // AVX: [[A_LOAD:%.*]] = load half, ptr [[B]]
+  // AVX: [[AB_MULR:%.*]] = fmul half [[AR]], [[A_LOAD]]
+  // AVX: [[AB_MULI:%.*]] = fmul half [[AI]], [[A_LOAD]]
+  // AVX: store half [[AB_MULR]], ptr {{.*}}
+  // AVX: store half [[AB_MULI]], ptr {{.*}}
+
+  // X86-NEXT: [[AR_EXT:%.*]] = fpext half [[AR]] to float
+  // X86-NEXT: [[AI_EXT:%.*]] = fpext half [[AI]] to float
+  // X86-NEXT: [[B_LOAD:%.*]] = load half, ptr [[B]]
+  // X86-NEXT: [[B_EXT:%.*]] = fpext half [[B_LOAD]] to float
+  // X86-NEXT: [[AB_MULR:%.*]] = fmul float [[AR_EXT]], [[B_EXT]]
+  // X86-NEXT: [[AB_MULI:%.*]] = fmul float [[AI_EXT]], [[B_EXT]]
+  // X86-NEXT: [[AB_MULR_TRUNC:%.*]] = fptrunc float [[AB_MULR]] to half
+  // X86-NEXT: [[AB_MULI_TRUNC:%.*]] = fptrunc float [[AB_MULI]] to half
+  // X86: store half [[AB_MULR_TRUNC]], ptr {{.*}}
+  // X86-NEXT: store half [[AB_MULI_TRUNC]], ptr {{.*}}
   return a * b;
 }
+
 _Float16 _Complex mul_half_rc(_Float16 a, _Float16 _Complex b) {
-  // X86-LABEL: @mul_half_rc(
-  // X86: fmul
-  // X86: fmul
-  // X86-NOT: fmul
-  // X86: ret
+  // CHECK-LABEL: @mul_half_rc(
+  // CHECK: [[A:%.*]] = alloca half
+  // CHECK: store half %a, ptr [[A]]
+  // CHECK-NEXT: [[A_LOAD:%.*]] = load half, ptr [[A]]
+
+  // AVX: [[BR_LOAD:%.*]] = load half, ptr {{.*}}
+  // AVX: [[BI_LOAD:%.*]] = load half, ptr {{.*}}
+  // AVX: [[AB_MULR:%.*]] = fmul half [[A_LOAD]], [[BR_LOAD]]
+  // AVX: [[AB_MULI:%.*]] = fmul half [[A_LOAD]], [[BI_LOAD]]
+  // AVX:  store half [[AB_MULR]], ptr {{.*}}
+  // AVX-NEXT:  store half [[AB_MULI]], ptr {{.*}}
+
+  // X86: [[A_EXT:%.*]] = fpext half [[A_LOAD]] to float
+  // X86: [[BR_LOAD:%.*]] = load half, ptr {{.*}}
+  // X86: [[BI_LOAD:%.*]] = load half, ptr {{.*}}
+  // X86-NEXT: [[BR_EXT:%.*]] = fpext half [[BR_LOAD]] to float
+  // X86-NEXT: [[BI_EXT:%.*]] = fpext half [[BI_LOAD]] to float
+  // X86-NEXT: [[AB_MULR:%.*]] = fmul float [[A_EXT]], [[BR_EXT]]
+  // X86-NEXT: [[AB_MULI:%.*]] = fmul float [[A_EXT]], [[BI_EXT]]
+  // X86-NEXT: [[AB_MULR_TRUNC:%.*]] = fptrunc float [[AB_MULR]] to half
+  // X86-NEXT: [[AB_MULI_TRUNC:%.*]] = fptrunc float [[AB_MULI]] to half
+  // X86: store half [[AB_MULR_TRUNC]], ptr {{.*}}
+  // X86-NEXT: store half [[AB_MULI_TRUNC]], ptr {{.*}}
   return a * b;
 }
+
 _Float16 _Complex mul_half_cc(_Float16 _Complex a, _Float16 _Complex b) {
-  // X86-LABEL: @mul_half_cc(
-  // X86: %[[AC:[^ ]+]] = fmul
-  // X86: %[[BD:[^ ]+]] = fmul
-  // X86: %[[AD:[^ ]+]] = fmul
-  // X86: %[[BC:[^ ]+]] = fmul
-  // X86: %[[RR:[^ ]+]] = fsub half %[[AC]], %[[BD]]
-  // X86: %[[RI:[^ ]+]] = fadd half
-  // X86-DAG: %[[AD]]
-  // X86-DAG: ,
-  // X86-DAG: %[[BC]]
-  // X86: fcmp uno half %[[RR]]
-  // X86: fcmp uno half %[[RI]]
-  // X86: call {{.*}} @__mulhc3(
-  // X86: ret
+  // CHECK: @mul_half_cc(
+  // CHECK: [[AR_LOAD:%.*]] = load half, ptr {{.*}}
+  // CHECK: [[AI_LOAD:%.*]] = load half, ptr {{.*}}
+
+  // AVX: [[BR_LOAD:%.*]] = load half, ptr {{.*}}
+  // AVX: [[BI_LOAD:%.*]] = load half, ptr {{.*}}
+  // AVX-NEXT: [[AC:%.*]] = fmul half [[AR_LOAD]], [[BR_LOAD]]
+  // AVX-NEXT: [[BD:%.*]] = fmul half [[AI_LOAD]], [[BI_LOAD]]
+  // AVX-NEXT: [[AD:%.*]] = fmul half [[AR_LOAD]], [[BI_LOAD]]
+  // AVX-NEXT: [[BC:%.*]] = fmul half [[AI_LOAD]], [[BR_LOAD]]
+  // AVX:  call <2 x half> @__mulhc3(
+
+  // X86: [[AR_EXT:%.*]] = fpext half [[AR_LOAD]] to float
+  // X86: [[AI_EXT:%.*]] = fpext half [[AI_LOAD]] to float
+  // X86: [[BR_LOAD:%.*]] = load half, ptr {{.*}}
+  // X86: [[BI_LOAD:%.*]] = load half, ptr {{.*}}
+  // X86-NEXT: [[BR_EXT:%.*]] = fpext half [[BR_LOAD]] to float
+  // X86-NEXT: [[BI_EXT:%.*]] = fpext half [[BI_LOAD]] to float
+  // X86-NEXT: [[AC:%.*]] = fmul float [[AR_EXT]], [[BR_EXT]]
+  // X86-NEXT: [[BD:%.*]] = fmul float [[AI_EXT]], [[BI_EXT]]
+  // X86-NEXT: [[AD:%.*]] = fmul float [[AR_EXT]], [[BI_EXT]]
+  // X86-NEXT: [[BC:%.*]] = fmul float [[AI_EXT]], [[BR_EXT]]
+  // X86: call <2 x float> @__mulsc3(
   return a * b;
 }
-
 _Float16 _Complex div_half_rr(_Float16 a, _Float16 b) {
-  // X86-LABEL: @div_half_rr(
-  // X86: fdiv
-  // X86-NOT: fdiv
-  // X86: ret
+  // CHECK: @div_half_rr(
+
+  // CHECK: [[A:%.*]] = alloca half
+  // CHECK-NEXT: [[B:%.*]] = alloca half
+  // CHECK: [[A_LOAD:%.*]] = load half, ptr [[A]]
+
+  // AVX-NEXT: [[B_LOAD:%.*]] = load half, ptr [[B]]
+  // AVX-NEXT: [[AB_DIV:%.*]] = fdiv half [[A_LOAD]], [[B_LOAD]]
+  // AVX: store half [[AB_DIV]], {{.*}}
+
+  // X86-NEXT: [[A_EXT:%.*]] = fpext half [[A_LOAD]] to float
+  // X86-NEXT: [[B_LOAD:%.*]] = load half, ptr [[B]]
+  // X86-NEXT: [[B_EXT:%.*]] = fpext half [[B_LOAD]] to float
+  // X86-NEXT: [[AB_DIV:%.*]] = fdiv float [[A_EXT]], [[B_EXT]]
+  // X86-NEXT: [[AB_DIV_TRUNC:%.*]] = fptrunc float [[AB_DIV]] to half
+  // X86: store half [[AB_DIV_TRUNC]], {{.*}}
   return a / b;
 }
+
 _Float16 _Complex div_half_cr(_Float16 _Complex a, _Float16 b) {
-  // X86-LABEL: @div_half_cr(
-  // X86: fdiv
-  // X86: fdiv
-  // X86-NOT: fdiv
-  // X86: ret
+  // CHECK-LABEL: @div_half_cr(
+  // CHECK: [[B:%.*]] = alloca half
+  // CHECK: [[AR:%.*]] = load half, ptr {{.*}}
+  // CHECK: [[AI:%.*]] = load half, ptr {{.*}}
+
+  // AVX: [[A_LOAD:%.*]] = load half, ptr [[B]]
+  // AVX: [[AB_DIVR:%.*]] = fdiv half [[AR]], [[A_LOAD]]
+  // AVX: [[AB_DIVI:%.*]] = fdiv half [[AI]], [[A_LOAD]]
+  // AVX: store half [[AB_DIVR]], ptr {{.*}}
+  // AVX-NEXT: store half [[AB_DIVI]], ptr {{.*}}
+
+  // X86-NEXT: [[AR_EXT:%.*]] = fpext half [[AR]] to float
+  // X86-NEXT: [[AI_EXT:%.*]] = fpext half [[AI]] to float
+  // X86-NEXT: [[B_LOAD:%.*]] = load half, ptr [[B]]
+  // X86-NEXT: [[B_EXT:%.*]] = fpext half [[B_LOAD]] to float
+  // X86-NEXT: [[AB_DIVR:%.*]] = fdiv float [[AR_EXT]], [[B_EXT]]
+  // X86-NEXT: [[AB_DIVI:%.*]] = fdiv float [[AI_EXT]], [[B_EXT]]
+  // X86-NEXT: [[AB_DIVR_TRUNC:%.*]] = fptrunc float [[AB_DIVR]] to half
+  // X86-NEXT: [[AB_DIVI_TRUNC:%.*]] = fptrunc float [[AB_DIVI]] to half
+  // X86: store half [[AB_DIVR_TRUNC]], ptr {{.*}}
+  // X86-NEXT: store half [[AB_DIVI_TRUNC]], ptr {{.*}}
   return a / b;
 }
 _Float16 _Complex div_half_rc(_Float16 a, _Float16 _Complex b) {
-  // X86-LABEL: @div_half_rc(
-  // X86-NOT: fdiv
-  // X86: call {{.*}} @__divhc3(
-  // X86: ret
+  // CHECK-LABEL: @div_half_rc(
+  // CHECK: [[A:%.*]] = alloca half
+  // CHECK: store half %a, ptr [[A]]
+  // CHECK-NEXT: [[A_LOAD:%.*]] = load half, ptr [[A]]
+  // AVX: [[BR_LOAD:%.*]] = load half, ptr {{.*}}
+  // AVX: [[BI_LOAD:%.*]] = load half, ptr {{.*}}
+  // AVX: call <2 x half> @__divhc3(half {{.*}} [[A_LOAD]],
+  // AVX-DAG: half {{.*}} [[BR_LOAD]],
+  // AVX-DAG: half {{.*}} [[BI_LOAD]])
+
+  // X86: [[A_EXT:%.*]] = fpext half {{.*}} to float
+  // X86: [[BR_LOAD:%.*]] = load half, ptr {{.*}}
+  // X86: [[BI_LOAD:%.*]] = load half, ptr {{.*}}
+  // X86-NEXT: [[BR_EXT:%.*]] = fpext half [[BR_LOAD]] to float
+  // X86-NEXT: [[BI_EXT:%.*]] = fpext half [[BI_LOAD]] to float
+  // X86-DAG:  call <2 x float> @__divsc3(float {{.*}} [[A_EXT]],
+  // X86-DAG: float {{.*}} [[BR_EXT]],
+  // X86-DAG: float {{.*}} [[BI_EXT]])
   return a / b;
 }
+
 _Float16 _Complex div_half_cc(_Float16 _Complex a, _Float16 _Complex b) {
-  // X86-LABEL: @div_half_cc(
-  // X86-NOT: fdiv
-  // X86: call {{.*}} @__divhc3(
-  // X86: ret
+  // CHECK-LABEL: @div_half_cc(
+
+  // CHECK: [[AR_LOAD:%.*]] = load half, ptr {{.*}}
+  // CHECK: [[AI_LOAD:%.*]] = load half, ptr {{.*}}
+  // AVX: [[BR_LOAD:%.*]] = load half, ptr {{.*}}
+  // AVX: [[BI_LOAD:%.*]] = load half, ptr {{.*}}
+
+  // AVX: call <2 x half> @__divhc3(half {{.*}} [[AR_LOAD]],
+  // AVX-DAG: half {{.*}} [[AI_LOAD]],
+  // AVX-DAG: half {{.*}} [[BR_LOAD]],
+  // AVX-DAG: half {{.*}} [[BI_LOAD]])
+
+  // X86: [[AR_EXT:%.*]] = fpext half [[AR_LOAD]] to float
+  // X86-NEXT: [[AI_EXT:%.*]] = fpext half [[AI_LOAD]] to float
+  // X86: [[BR_LOAD:%.*]] = load half, ptr {{.*}}
+  // X86: [[BI_LOAD:%.*]] = load half, ptr {{.*}}
+
+  // X86: [[BR_EXT:%.*]] = fpext half [[BR_LOAD]] to float
+  // X86-NEXT: [[BI_EXT:%.*]] = fpext half [[BI_LOAD]] to float
+  // X86: call <2 x float> @__divsc3(float {{.*}} [[AR_EXT]],
+  // X86-DAG: float {{.*}} [[AI_EXT]],
+  // X86-DAG: float {{.*}} [[BR_EXT]],
+  // X86-DAG: float {{.*}} [[BI_EXT]])
   return a / b;
 }
+
+_Float16 _Complex addcompound_half_rrr(_Float16 a, _Float16 c) {
+  // CHECK-LABEL: @addcompound_half_rrr
+
+  // AVX: [[A_LOAD:%.*]] = load half, ptr {{.*}}
+  // AVX-NEXT: [[C_LOAD:%.*]] = load half, ptr {{.*}}
+  // AVX-NEXT: [[AC_ADD:%.*]] = fadd half [[C_LOAD]], [[A_LOAD]]
+  // AVX-NEXT: store half [[AC_ADD]], ptr {{.*}}
+
+  // X86:  [[A_LOAD:%.*]] = load half, ptr {{.*}}
+  // X86-NEXT: [[A_EXT:%.*]] = fpext half %0 to float
+  // X86-NEXT: [[B_LOAD:%.*]] = load half, ptr {{.*}}
+  // X86-NEXT: [[B_EXT:%.*]] = fpext half [[B_LOAD]] to float
+  // X86-NEXT: [[AC_ADD:%.*]] = fadd float [[B_EXT]], [[A_EXT]]
+  // X86-NEXT: [[AC_ADD_TRUNC:%.*]] = fptrunc float [[AC_ADD]] to half
+  // X86-NEXT: store half [[AC_ADD_TRUNC]], ptr {{.*}}
+  c += a;
+  return c;
+}
Index: clang/test/CodeGen/X86/Float16-arithmetic.c
===================================================================
--- clang/test/CodeGen/X86/Float16-arithmetic.c
+++ clang/test/CodeGen/X86/Float16-arithmetic.c
@@ -1,29 +1,121 @@
 // RUN: %clang_cc1 -triple  x86_64-unknown-unknown \
 // RUN: -emit-llvm -o - %s  | FileCheck %s --check-prefixes=CHECK
 
-// CHECK-NOT: fpext
-// CHECK-NOT: fptrunc
-
 _Float16 add1(_Float16 a, _Float16 b) {
+  // CHECK-LABEL: define {{.*}} half @add1
+  // CHECK: [[A:%.*]] = alloca half
+  // CHECK-NEXT: [[B:%.*]] = alloca half
+  // CHECK: [[A_LOAD:%.*]] = load half, ptr [[A]]
+  // CHECK-NEXT: [[A_EXT:%.*]] = fpext half [[A_LOAD]] to float
+  // CHECK-NEXT: [[B_LOAD:%.*]] = load half, ptr [[B]]
+  // CHECK-NEXT: [[B_EXT:%.*]] = fpext half [[B_LOAD]] to float
+  // CHECK-NEXT: [[AB_ADD:%.*]] = fadd float [[A_EXT]], [[B_EXT]]
+  // CHECK-NEXT: [[AB_ADD_TRUNC:%.*]] = fptrunc float [[AB_ADD]] to half
+  // CHECK: ret half [[AB_ADD_TRUNC]]
   return a + b;
 }
 
 _Float16 add2(_Float16 a, _Float16 b, _Float16 c) {
+  // CHECK-LABEL: define {{.*}} half @add2
+  // CHECK: [[A:%.*]] = alloca half
+  // CHECK-NEXT: [[B:%.*]] = alloca half
+  // CHECK: [[C:%.*]] = alloca half
+  // CHECK: [[A_LOAD:%.*]] = load half, ptr [[A]]
+  // CHECK-NEXT: [[A_EXT:%.*]] = fpext half [[A_LOAD]] to float
+  // CHECK: [[A_LOAD:%.*]] = load half, ptr [[B]]
+  // CHECK-NEXT: [[B_EXT:%.*]] = fpext half [[B_LOAD]] to float
+  // CHECK-NEXT: [[AB_ADD:%.*]] = fadd float [[A_EXT]], [[B_EXT]]
+  // CHECK-NEXT: [[C_LOAD:%.*]] = load half, ptr [[C]]
+  // CHECK-NEXT: [[C_EXT:%.*]] = fpext half [[C_LOAD]] to float
+  // CHECK-NEXT: [[ABC_ADD:%.*]] = fadd float [[AB_ADD]], [[C_EXT]]
+  // CHECK-NEXT: [[ABC_ADD_TRUNC:%.*]] = fptrunc float [[ABC_ADD]] to half
+  // CHECK: ret half [[ABC_ADD_TRUNC]]
   return a + b + c;
 }
 
 _Float16 div(_Float16 a, _Float16 b) {
+  // CHECK-LABEL: define dso_local half @div
+  // CHECK: [[A:%.*]] = alloca half
+  // CHECK-NEXT: [[B:%.*]] = alloca half
+  // CHECK: [[A_LOAD:%.*]] = load half, ptr [[A]]
+  // CHECK-NEXT: [[A_EXT:%.*]] = fpext half [[A_LOAD]] to float
+  // CHECK-NEXT: [[B_LOAD:%.*]] = load half, ptr [[B]]
+  // CHECK-NEXT: [[B_EXT:%.*]] = fpext half [[B_LOAD]] to float
+  // CHECK-NEXT: [[AB_DIV:%.*]] = fdiv float [[A_EXT]], [[B_EXT]]
+  // CHECK-NEXT: [[AB_DIV_TRUNC:%.*]] = fptrunc float [[AB_DIV]] to half
+  // CHECK: ret half [[AB_DIV_TRUNC]]
   return a / b;
 }
 
 _Float16 mul(_Float16 a, _Float16 b) {
+  // CHECK-LABEL: define dso_local half @mul
+  // CHECK: [[A:%.*]] = alloca half
+  // CHECK-NEXT: [[B:%.*]] = alloca half
+  // CHECK: [[A_LOAD:%.*]] = load half, ptr [[A]]
+  // CHECK-NEXT: [[A_EXT:%.*]] = fpext half [[A_LOAD]] to float
+  // CHECK-NEXT: [[B_LOAD:%.*]] = load half, ptr [[B]]
+  // CHECK-NEXT: [[B_EXT:%.*]] = fpext half [[B_LOAD]] to float
+  // CHECK-NEXT: [[AB_MUL:%.*]] = fmul float [[A_EXT]], [[B_EXT]]
+  // CHECK-NEXT: [[AB_MUL_TRUNC:%.*]] = fptrunc float [[AB_MUL]] to half
+  // CHECK: ret half [[AB_MUL_TRUNC]]
   return a * b;
 }
 
 _Float16 add_and_mul1(_Float16 a, _Float16 b, _Float16 c, _Float16 d) {
+  // CHECK-LABEL: define dso_local half @add_and_mul1
+  // CHECK: [[A:%.*]] = alloca half
+  // CHECK-NEXT: [[B:%.*]] = alloca half
+  // CHECK-NEXT: [[C:%.*]] = alloca half
+  // CHECK: [[D:%.*]] = alloca half
+  // CHECK: [[A_LOAD:%.*]] = load half, ptr [[A]]
+  // CHECK-NEXT: [[A_EXT:%.*]] = fpext half [[A_LOAD]] to float
+  // CHECK-NEXT: [[A_LOAD:%.*]] = load half, ptr [[B]]
+  // CHECK-NEXT: [[B_EXT:%.*]] = fpext half [[B_LOAD]] to float
+  // CHECK-NEXT: [[AB_MUL:%.*]] = fmul float [[A_EXT]], [[B_EXT]]
+  // CHECK-NEXT: [[C_LOAD:%.*]] = load half, ptr [[C]]
+  // CHECK-NEXT: [[C_EXT:%.*]] = fpext half [[C_LOAD]] to float
+  // CHECK-NEXT: [[D_LOAD:%.*]] = load half, ptr [[D]]
+  // CHECK-NEXT: [[D_EXT:%.*]] = fpext half [[D_LOAD]] to float
+  // CHECK-NEXT: [[CD_MUL:%.*]] = fmul float [[C_EXT]], [[D_EXT]]
+  // CHECK-NEXT: [[ADD:%.*]] = fadd float [[AB_MUL]], [[CD_MUL]]
+  // CHECK-NEXT: [[ADD_TRUNC:%.*]] = fptrunc float [[ADD]] to half
+  // CHECK: ret half [[ADD_TRUNC]]
   return a * b + c * d;
 }
 
 _Float16 add_and_mul2(_Float16 a, _Float16 b, _Float16 c, _Float16 d) {
+  // CHECK-LABEL: define dso_local half @add_and_mul2
+  // CHECK: [[A:%.*]] = alloca half
+  // CHECK-NEXT: [[B:%.*]] = alloca half
+  // CHECK-NEXT: [[C:%.*]] = alloca half
+  // CHECK: [[D:%.*]] = alloca half
+  // CHECK: [[A_LOAD:%.*]] = load half, ptr [[A]]
+  // CHECK-NEXT: [[A_EXT:%.*]] = fpext half [[A_LOAD]] to float
+  // CHECK-NEXT: [[B_LOAD:%.*]] = load half, ptr [[B]]
+  // CHECK-NEXT: [[B_EXT:%.*]] = fpext half [[B_LOAD]] to float
+  // CHECK-NEXT: [[MUL:%.*]] = fmul float 6.000000e+00, [[B_EXT]]
+  // CHECK-NEXT: [[SUB:%.*]] = fsub float [[A_EXT]], [[MUL]]
+  // CHECK-NEXT: [[SUB_TRUNC:%.*]] = fptrunc float [[SUB]] to half
+  // CHECK-NEXT: [[SUB_EXT:%.*]] = fpext half [[SUB_TRUNC]] to float
+  // CHECK-NEXT: [[C_LOAD:%.*]] = load half, ptr [[C]]
+  // CHECK-NEXT: [[C_EXT:%.*]] = fpext half [[C_LOAD]] to float
+  // CHECK-NEXT: [[ADD:%.*]] = fadd float [[SUB_EXT]], [[C_EXT]]
+  // CHECK-NEXT: [[ADD_TUNC:%.*]] = fptrunc float [[ADD]] to half
+  // CHECK: ret half [[ADD_TRUNC]]
   return (a - 6 * b) + c;
 }
+
+_Float16 addcompound(_Float16 a, _Float16 c) {
+  // CHECK-LABEL: dso_local half @addcompound
+  // CHECK: [[A_LOAD:%.*]] = load half, ptr {{.*}}
+  // CHECK-NEXT: [[A_EXT:%.*]] = fpext half [[A_LOAD]] to float
+  // CHECK-NEXT: [[C_LOAD:%.*]] = load half, ptr {{.*}}
+  // CHECK-NEXT: [[C_EXT:%.*]] = fpext half [[C_LOAD]] to float
+  // CHECK-NEXT: [[AC_ADD:%.*]] = fadd float [[C_EXT]], [[A_EXT]]
+  // CHECK-NEXT: [[AC_ADD_TRUNC:%.*]] = fptrunc float [[AC_ADD]] to half
+  // CHECK-NEXT:  store half [[AC_ADD_TRUNC]], ptr {{.*}}
+  // CHECK-NEXT: [[RES:%.*]] = load half, ptr {{.*}}
+  // CHECK:  ret half [[RES]]
+  c += a;
+  return c;
+}
Index: clang/lib/CodeGen/CodeGenFunction.h
===================================================================
--- clang/lib/CodeGen/CodeGenFunction.h
+++ clang/lib/CodeGen/CodeGenFunction.h
@@ -4403,6 +4403,9 @@
   /// EmitLoadOfComplex - Load a complex number from the specified l-value.
   ComplexPairTy EmitLoadOfComplex(LValue src, SourceLocation loc);
 
+  ComplexPairTy EmitPromotedComplexExpr(const Expr *E, QualType DstTy);
+  llvm::Value *EmitPromotedScalarExpr(const Expr *E, QualType DstType);
+
   Address emitAddrOfRealComponent(Address complex, QualType complexType);
   Address emitAddrOfImagComponent(Address complex, QualType complexType);
 
Index: clang/lib/CodeGen/CGExprScalar.cpp
===================================================================
--- clang/lib/CodeGen/CGExprScalar.cpp
+++ clang/lib/CodeGen/CGExprScalar.cpp
@@ -791,21 +791,47 @@
   // Helper functions for fixed point binary operations.
   Value *EmitFixedPointBinOp(const BinOpInfo &Ops);
 
-  BinOpInfo EmitBinOps(const BinaryOperator *E);
-  LValue EmitCompoundAssignLValue(const CompoundAssignOperator *E,
-                            Value *(ScalarExprEmitter::*F)(const BinOpInfo &),
-                                  Value *&Result);
+  BinOpInfo EmitBinOps(const BinaryOperator *E,
+                       QualType PromotionTy = QualType());
+
+  Value *EmitPromoted(const Expr *E, QualType PromotionTy);
+
+  LValue EmitCompoundAssignLValue(
+      const CompoundAssignOperator *E, QualType PromotionType,
+      Value *(ScalarExprEmitter::*F)(const BinOpInfo &), Value *&Result);
 
   Value *EmitCompoundAssign(const CompoundAssignOperator *E,
+                            QualType PromotionTy,
                             Value *(ScalarExprEmitter::*F)(const BinOpInfo &));
 
+  QualType getPromotionType(const Expr *E) {
+    if (E->getType()->isFloat16Type()) {
+      if (CGF.getTarget().shouldEmitFloat16WithExcessPrecision())
+        return CGF.getContext().FloatTy;
+    }
+    return QualType();
+  }
+
   // Binary operators and binary compound assignment operators.
-#define HANDLEBINOP(OP) \
-  Value *VisitBin ## OP(const BinaryOperator *E) {                         \
-    return Emit ## OP(EmitBinOps(E));                                      \
-  }                                                                        \
-  Value *VisitBin ## OP ## Assign(const CompoundAssignOperator *E) {       \
-    return EmitCompoundAssign(E, &ScalarExprEmitter::Emit ## OP);          \
+#define HANDLEBINOP(OP)                                                        \
+  Value *VisitBin##OP(const BinaryOperator *E) {                               \
+    QualType promotionTy = getPromotionType(E);                                \
+    auto result = Emit##OP(EmitBinOps(E, promotionTy));                        \
+    if (result)                                                                \
+      if (!promotionTy.isNull())                                               \
+        result = Builder.CreateFPTrunc(result, ConvertType(E->getType()),      \
+                                       "unpromotion");                         \
+    return result;                                                             \
+  }                                                                            \
+  Value *VisitBin##OP##Assign(const CompoundAssignOperator *E) {               \
+    QualType promotionTy = getPromotionType(E);                                \
+    auto result =                                                              \
+        EmitCompoundAssign(E, promotionTy, &ScalarExprEmitter::Emit##OP);      \
+    if (result)                                                                \
+      if (!promotionTy.isNull())                                               \
+        result = Builder.CreateFPTrunc(result, ConvertType(E->getType()),      \
+                                       "unpromotion");                         \
+    return result;                                                             \
   }
   HANDLEBINOP(Mul)
   HANDLEBINOP(Div)
@@ -3050,12 +3076,45 @@
 //                           Binary Operators
 //===----------------------------------------------------------------------===//
 
-BinOpInfo ScalarExprEmitter::EmitBinOps(const BinaryOperator *E) {
+Value *ScalarExprEmitter::EmitPromoted(const Expr *E, QualType PromotionType) {
+  if (auto BO = dyn_cast<BinaryOperator>(E)) {
+    switch (BO->getOpcode()) {
+#define HANDLE_BINOP(OP)                                                       \
+  case BO_##OP:                                                                \
+    return Emit##OP(EmitBinOps(BO, PromotionType));
+      HANDLE_BINOP(Add)
+      HANDLE_BINOP(Sub)
+      HANDLE_BINOP(Mul)
+      HANDLE_BINOP(Div)
+#undef HANDLE_BINOP
+    default:
+      break;
+    }
+  } else {
+    auto result = Visit(const_cast<Expr *>(E));
+    if (result)
+      return CGF.Builder.CreateFPExt(result, ConvertType(PromotionType), "ext");
+  }
+  // fallback path
+  auto result = Visit(const_cast<Expr *>(E));
+  if (result)
+    result = CGF.Builder.CreateFPExt(result, ConvertType(E->getType()));
+  return result;
+}
+
+BinOpInfo ScalarExprEmitter::EmitBinOps(const BinaryOperator *E,
+                                        QualType PromotionType) {
   TestAndClearIgnoreResultAssign();
   BinOpInfo Result;
-  Result.LHS = Visit(E->getLHS());
-  Result.RHS = Visit(E->getRHS());
-  Result.Ty  = E->getType();
+  if (!PromotionType.isNull()) {
+    Result.LHS = CGF.EmitPromotedScalarExpr(E->getLHS(), PromotionType);
+    Result.RHS = CGF.EmitPromotedScalarExpr(E->getRHS(), PromotionType);
+    Result.Ty = PromotionType;
+  } else {
+    Result.LHS = Visit(E->getLHS());
+    Result.RHS = Visit(E->getRHS());
+    Result.Ty  = E->getType();
+  }
   Result.Opcode = E->getOpcode();
   Result.FPFeatures = E->getFPFeaturesInEffect(CGF.getLangOpts());
   Result.E = E;
@@ -3063,9 +3122,8 @@
 }
 
 LValue ScalarExprEmitter::EmitCompoundAssignLValue(
-                                              const CompoundAssignOperator *E,
-                        Value *(ScalarExprEmitter::*Func)(const BinOpInfo &),
-                                                   Value *&Result) {
+    const CompoundAssignOperator *E, QualType PromotionType,
+    Value *(ScalarExprEmitter::*Func)(const BinOpInfo &), Value *&Result) {
   QualType LHSTy = E->getLHS()->getType();
   BinOpInfo OpInfo;
 
@@ -3074,8 +3132,13 @@
 
   // Emit the RHS first.  __block variables need to have the rhs evaluated
   // first, plus this should improve codegen a little.
-  OpInfo.RHS = Visit(E->getRHS());
-  OpInfo.Ty = E->getComputationResultType();
+  if (!PromotionType.isNull()) {
+    OpInfo.RHS = CGF.EmitPromotedScalarExpr(E->getRHS(), PromotionType);
+    OpInfo.Ty = PromotionType;
+  } else {
+    OpInfo.RHS = Visit(E->getRHS());
+    OpInfo.Ty = E->getComputationResultType();
+  }
   OpInfo.Opcode = E->getOpcode();
   OpInfo.FPFeatures = E->getFPFeaturesInEffect(CGF.getLangOpts());
   OpInfo.E = E;
@@ -3094,7 +3157,8 @@
       llvm::Instruction::BinaryOps Op;
       switch (OpInfo.Opcode) {
         // We don't have atomicrmw operands for *, %, /, <<, >>
-        case BO_MulAssign: case BO_DivAssign:
+        case BO_MulAssign:
+        case BO_DivAssign:
         case BO_RemAssign:
         case BO_ShlAssign:
         case BO_ShrAssign:
@@ -3154,16 +3218,24 @@
 
   CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, OpInfo.FPFeatures);
   SourceLocation Loc = E->getExprLoc();
-  OpInfo.LHS =
-      EmitScalarConversion(OpInfo.LHS, LHSTy, E->getComputationLHSType(), Loc);
+  if (!PromotionType.isNull())
+    OpInfo.LHS =
+        EmitScalarConversion(OpInfo.LHS, LHSTy, PromotionType, E->getExprLoc());
+  else
+    OpInfo.LHS = EmitScalarConversion(OpInfo.LHS, LHSTy,
+                                      E->getComputationLHSType(), Loc);
 
   // Expand the binary operator.
   Result = (this->*Func)(OpInfo);
 
   // Convert the result back to the LHS type,
   // potentially with Implicit Conversion sanitizer check.
-  Result = EmitScalarConversion(Result, E->getComputationResultType(), LHSTy,
-                                Loc, ScalarConversionOpts(CGF.SanOpts));
+  if (!PromotionType.isNull())
+    Result = EmitScalarConversion(Result, PromotionType, LHSTy, Loc,
+                                  ScalarConversionOpts(CGF.SanOpts));
+  else
+    Result = EmitScalarConversion(Result, E->getComputationResultType(), LHSTy,
+                                  Loc, ScalarConversionOpts(CGF.SanOpts));
 
   if (atomicPHI) {
     llvm::BasicBlock *curBlock = Builder.GetInsertBlock();
@@ -3193,11 +3265,12 @@
   return LHSLV;
 }
 
-Value *ScalarExprEmitter::EmitCompoundAssign(const CompoundAssignOperator *E,
-                      Value *(ScalarExprEmitter::*Func)(const BinOpInfo &)) {
+Value *ScalarExprEmitter::EmitCompoundAssign(
+    const CompoundAssignOperator *E, QualType PromotionTy,
+    Value *(ScalarExprEmitter::*Func)(const BinOpInfo &)) {
   bool Ignore = TestAndClearIgnoreResultAssign();
   Value *RHS = nullptr;
-  LValue LHS = EmitCompoundAssignLValue(E, Func, RHS);
+  LValue LHS = EmitCompoundAssignLValue(E, PromotionTy, Func, RHS);
 
   // If the result is clearly ignored, return now.
   if (Ignore)
@@ -4896,6 +4969,12 @@
       .EmitComplexToScalarConversion(Src, SrcTy, DstTy, Loc);
 }
 
+Value *
+CodeGenFunction::EmitPromotedScalarExpr(const Expr *E,
+                                        QualType DstType) {
+  return ScalarExprEmitter(*this).EmitPromoted(E, DstType);
+}
+
 
 llvm::Value *CodeGenFunction::
 EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
@@ -4930,8 +5009,8 @@
   switch (E->getOpcode()) {
 #define COMPOUND_OP(Op)                                                       \
     case BO_##Op##Assign:                                                     \
-      return Scalar.EmitCompoundAssignLValue(E, &ScalarExprEmitter::Emit##Op, \
-                                             Result)
+      return Scalar.EmitCompoundAssignLValue(                                 \
+          E, Scalar.getPromotionType(E), &ScalarExprEmitter::Emit##Op, Result)
   COMPOUND_OP(Mul);
   COMPOUND_OP(Div);
   COMPOUND_OP(Rem);
Index: clang/lib/CodeGen/CGExprComplex.cpp
===================================================================
--- clang/lib/CodeGen/CGExprComplex.cpp
+++ clang/lib/CodeGen/CGExprComplex.cpp
@@ -253,7 +253,9 @@
     QualType Ty;  // Computation Type.
   };
 
-  BinOpInfo EmitBinOps(const BinaryOperator *E);
+  BinOpInfo EmitBinOps(const BinaryOperator *E,
+                       QualType PromotionTy = QualType());
+  ComplexPairTy EmitPromoted(const Expr *E, QualType PromotionTy);
   LValue EmitCompoundAssignLValue(const CompoundAssignOperator *E,
                                   ComplexPairTy (ComplexExprEmitter::*Func)
                                   (const BinOpInfo &),
@@ -270,18 +272,43 @@
   ComplexPairTy EmitComplexBinOpLibCall(StringRef LibCallName,
                                         const BinOpInfo &Op);
 
-  ComplexPairTy VisitBinAdd(const BinaryOperator *E) {
-    return EmitBinAdd(EmitBinOps(E));
-  }
-  ComplexPairTy VisitBinSub(const BinaryOperator *E) {
-    return EmitBinSub(EmitBinOps(E));
-  }
-  ComplexPairTy VisitBinMul(const BinaryOperator *E) {
-    return EmitBinMul(EmitBinOps(E));
-  }
-  ComplexPairTy VisitBinDiv(const BinaryOperator *E) {
-    return EmitBinDiv(EmitBinOps(E));
-  }
+ QualType getPromotionType(const Expr *E) {
+    assert(E->getType()->isAnyComplexType() &&
+           "Expecting to promote a complex type!");
+    QualType ElementType =
+        E->getType()->castAs<ComplexType>()->getElementType();
+    if (ElementType->isFloat16Type())
+      if (CGF.getTarget().shouldEmitFloat16WithExcessPrecision())
+        return CGF.getContext().getComplexType(CGF.getContext().FloatTy);
+    return QualType();
+  }
+
+#define HANDLEBINOP(OP)                                                        \
+  ComplexPairTy VisitBin##OP(const BinaryOperator *E) {                        \
+    QualType promotionTy = getPromotionType(E);                                \
+    ComplexPairTy result = EmitBin##OP(EmitBinOps(E, promotionTy));            \
+    if (!promotionTy.isNull()) {                                               \
+      if (result.first)                                                        \
+        result.first = Builder.CreateFPTrunc(                                  \
+            result.first,                                                      \
+            CGF.ConvertType(                                                   \
+                E->getType()->castAs<ComplexType>()->getElementType()),        \
+            "unpromotion");                                                    \
+      if (result.second)                                                       \
+        result.second = Builder.CreateFPTrunc(                                 \
+            result.second,                                                     \
+            CGF.ConvertType(                                                   \
+                E->getType()->castAs<ComplexType>()->getElementType()),        \
+            "unpromotion");                                                    \
+    }                                                                          \
+    return result;                                                             \
+  }
+
+  HANDLEBINOP(Mul)
+  HANDLEBINOP(Div)
+  HANDLEBINOP(Add)
+  HANDLEBINOP(Sub)
+#undef HANDLEBINOP
 
   ComplexPairTy VisitCXXRewrittenBinaryOperator(CXXRewrittenBinaryOperator *E) {
     return Visit(E->getSemanticForm());
@@ -876,25 +903,95 @@
   return ComplexPairTy(DSTr, DSTi);
 }
 
+ComplexPairTy ComplexExprEmitter::EmitPromoted(const Expr *E,
+                                               QualType PromotionType) {
+  if (auto BO = dyn_cast<BinaryOperator>(E)) {
+    switch (BO->getOpcode()) {
+#define HANDLE_BINOP(OP)                                                       \
+  case BO_##OP:                                                                \
+    return EmitBin##OP(EmitBinOps(BO, PromotionType));
+      HANDLE_BINOP(Add)
+      HANDLE_BINOP(Sub)
+      HANDLE_BINOP(Mul)
+      HANDLE_BINOP(Div)
+#undef HANDLE_BINOP
+    default:
+      break;
+    }
+  } else {
+    ComplexPairTy Result = Visit(const_cast<Expr *>(E));
+    llvm::Value *Resultr = CGF.Builder.CreateFPExt(
+        Result.first,
+        CGF.ConvertType(PromotionType->castAs<ComplexType>()->getElementType()),
+        "ext");
+    llvm::Value *Resulti = CGF.Builder.CreateFPExt(
+        Result.second,
+        CGF.ConvertType(PromotionType->castAs<ComplexType>()->getElementType()),
+        "ext");
+    return ComplexPairTy(Resultr, Resulti);
+  }
+  // fallback path
+  ComplexPairTy Result = Visit(const_cast<Expr *>(E));
+  llvm::Value *Resultr = CGF.Builder.CreateFPExt(
+      Result.first,
+      CGF.ConvertType(E->getType()->castAs<ComplexType>()->getElementType()));
+  llvm::Value *Resulti = CGF.Builder.CreateFPExt(
+      Result.second,
+      CGF.ConvertType(E->getType()->castAs<ComplexType>()->getElementType()));
+  return ComplexPairTy(Resultr, Resulti);
+}
+
+ComplexPairTy CodeGenFunction::EmitPromotedComplexExpr(const Expr *E,
+                                                       QualType DstTy) {
+  return ComplexExprEmitter(*this).EmitPromoted(E, DstTy);
+}
+
 ComplexExprEmitter::BinOpInfo
-ComplexExprEmitter::EmitBinOps(const BinaryOperator *E) {
+ComplexExprEmitter::EmitBinOps(const BinaryOperator *E,
+                               QualType PromotionType) {
   TestAndClearIgnoreReal();
   TestAndClearIgnoreImag();
   BinOpInfo Ops;
-  if (E->getLHS()->getType()->isRealFloatingType())
-    Ops.LHS = ComplexPairTy(CGF.EmitScalarExpr(E->getLHS()), nullptr);
-  else
-    Ops.LHS = Visit(E->getLHS());
-  if (E->getRHS()->getType()->isRealFloatingType())
-    Ops.RHS = ComplexPairTy(CGF.EmitScalarExpr(E->getRHS()), nullptr);
-  else
-    Ops.RHS = Visit(E->getRHS());
 
-  Ops.Ty = E->getType();
+  if (E->getLHS()->getType()->isRealFloatingType()) {
+    if (!PromotionType.isNull())
+      Ops.LHS = ComplexPairTy(
+          CGF.EmitPromotedScalarExpr(
+              E->getLHS(),
+              PromotionType->castAs<ComplexType>()->getElementType()),
+          nullptr);
+    else
+      Ops.LHS = ComplexPairTy(CGF.EmitScalarExpr(E->getLHS()), nullptr);
+  } else {
+    if (!PromotionType.isNull())
+      Ops.LHS = ComplexPairTy(
+          CGF.EmitPromotedComplexExpr(E->getLHS(), PromotionType));
+    else
+      Ops.LHS = Visit(E->getLHS());
+  }
+  if (E->getRHS()->getType()->isRealFloatingType()) {
+    if (!PromotionType.isNull())
+      Ops.RHS = ComplexPairTy(
+          CGF.EmitPromotedScalarExpr(
+              E->getRHS(),
+              PromotionType->castAs<ComplexType>()->getElementType()),
+          nullptr);
+    else
+      Ops.RHS = ComplexPairTy(CGF.EmitScalarExpr(E->getRHS()), nullptr);
+  } else {
+    if (!PromotionType.isNull())
+      Ops.RHS = ComplexPairTy(
+          CGF.EmitPromotedComplexExpr(E->getRHS(), PromotionType));
+    else
+      Ops.RHS = Visit(E->getRHS());
+  }
+  if (!PromotionType.isNull())
+    Ops.Ty = PromotionType;
+  else
+    Ops.Ty = E->getType();
   return Ops;
 }
 
-
 LValue ComplexExprEmitter::
 EmitCompoundAssignLValue(const CompoundAssignOperator *E,
           ComplexPairTy (ComplexExprEmitter::*Func)(const BinOpInfo&),
Index: clang/lib/Basic/Targets/X86.h
===================================================================
--- clang/lib/Basic/Targets/X86.h
+++ clang/lib/Basic/Targets/X86.h
@@ -286,6 +286,10 @@
     return false;
   }
 
+  bool shouldEmitFloat16WithExcessPrecision() const {
+    return HasFloat16 && !hasLegalHalfType();
+  }
+
   void getTargetDefines(const LangOptions &Opts,
                         MacroBuilder &Builder) const override;
 
Index: clang/lib/Basic/Targets/X86.cpp
===================================================================
--- clang/lib/Basic/Targets/X86.cpp
+++ clang/lib/Basic/Targets/X86.cpp
@@ -239,6 +239,7 @@
       HasAVX512ER = true;
     } else if (Feature == "+avx512fp16") {
       HasAVX512FP16 = true;
+      HasLegalHalfType = true;
     } else if (Feature == "+avx512pf") {
       HasAVX512PF = true;
     } else if (Feature == "+avx512dq") {
@@ -371,6 +372,8 @@
                          .Default(NoXOP);
     XOPLevel = std::max(XOPLevel, XLevel);
   }
+  // Turn on _float16 for x86 (feature sse2)
+  HasFloat16 = SSELevel >= SSE2;
 
   // LLVM doesn't have a separate switch for fpmath, so only accept it if it
   // matches the selected sse level.
Index: clang/include/clang/Basic/TargetInfo.h
===================================================================
--- clang/include/clang/Basic/TargetInfo.h
+++ clang/include/clang/Basic/TargetInfo.h
@@ -909,6 +909,8 @@
     return true;
   }
 
+  virtual bool shouldEmitFloat16WithExcessPrecision() const { return false; }
+
   /// Specify if mangling based on address space map should be used or
   /// not for language specific address spaces
   bool useAddressSpaceMapMangling() const {
Index: clang/docs/ReleaseNotes.rst
===================================================================
--- clang/docs/ReleaseNotes.rst
+++ clang/docs/ReleaseNotes.rst
@@ -527,6 +527,8 @@
   handled incorrectly by some software (e.g. new failures with incorrect
   assertions).
 
+- Support for ``_Float16`` type has been added.
+
 Arm and AArch64 Support in Clang
 --------------------------------
 
Index: clang/docs/LanguageExtensions.rst
===================================================================
--- clang/docs/LanguageExtensions.rst
+++ clang/docs/LanguageExtensions.rst
@@ -749,7 +749,11 @@
 includes all 64-bit and all recent 32-bit processors. When the target supports
 AVX512-FP16, ``_Float16`` arithmetic is performed using that native support.
 Otherwise, ``_Float16`` arithmetic is performed by promoting to ``float``,
-performing the operation, and then truncating to ``_Float16``.
+performing the operation, and then truncating to ``_Float16``. When doing this
+emulation, Clang defaults to following the C standard's rules for excess
+precision arithmetic, which avoids intermediate truncations within statements
+and may generate different results from a strict operation-by-operation
+emulation.
 
 ``_Float16`` will be supported on more targets as they define ABIs for it.

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D113107: Support of expression granularity for _Float16.

Reply via email to