[PATCH] D137760: Add FP8 E4M3 support to APFloat.

2022-11-15 Thread Reed Wanderman-Milne via Phabricator via cfe-commits
reedwm added a comment.

In D137760#3928314 , @stellaraccident 
wrote:

> Thanks for this. Patch lgtm. Has a couple of format issues but probably ok 
> as-is (this file is very inconsistently formatted and it looks like you 
> overrode clang-format a bit for consistency with adjacent code).

Yeah, I ran clang-format-diff.py, but reverted some changes when inconsistent 
with the rest of the file. In particular, clang-format-diff.py would unindent 
parts of structs within a namespace (since clang-format wants no indentation in 
a namespace but the file uses indentation), and in the test file I ensured 
added/modified lines in a test function were consistent with the unmodified 
lines in the function.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D137760/new/

https://reviews.llvm.org/D137760

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D137760: Add FP8 E4M3 support to APFloat.

2022-11-15 Thread Benjamin Kramer via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG88eb3c62f25d: Add FP8 E4M3 support to APFloat. (authored by 
reedwm, committed by bkramer).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D137760/new/

https://reviews.llvm.org/D137760

Files:
  clang/include/clang/AST/Stmt.h
  clang/lib/AST/MicrosoftMangle.cpp
  llvm/include/llvm/ADT/APFloat.h
  llvm/lib/Support/APFloat.cpp
  llvm/unittests/ADT/APFloatTest.cpp

Index: llvm/unittests/ADT/APFloatTest.cpp
===
--- llvm/unittests/ADT/APFloatTest.cpp
+++ llvm/unittests/ADT/APFloatTest.cpp
@@ -1683,6 +1683,7 @@
 TEST(APFloatTest, getLargest) {
   EXPECT_EQ(3.402823466e+38f, APFloat::getLargest(APFloat::IEEEsingle()).convertToFloat());
   EXPECT_EQ(1.7976931348623158e+308, APFloat::getLargest(APFloat::IEEEdouble()).convertToDouble());
+  EXPECT_EQ(448, APFloat::getLargest(APFloat::Float8E4M3FN()).convertToDouble());
 }
 
 TEST(APFloatTest, getSmallest) {
@@ -1766,6 +1767,8 @@
   {::x87DoubleExtended(), true, {0, 0x8000ULL}, 2},
   {::Float8E5M2(), false, {0, 0}, 1},
   {::Float8E5M2(), true, {0x80ULL, 0}, 1},
+  {::Float8E4M3FN(), false, {0, 0}, 1},
+  {::Float8E4M3FN(), true, {0x80ULL, 0}, 1},
   };
   const unsigned NumGetZeroTests = 12;
   for (unsigned i = 0; i < NumGetZeroTests; ++i) {
@@ -3665,6 +3668,16 @@
 EXPECT_EQ(f1.mod(f2), APFloat::opOK);
 EXPECT_TRUE(f1.bitwiseIsEqual(expected));
   }
+  {
+// Test E4M3FN mod where the LHS exponent is maxExponent (8) and the RHS is
+// the max value whose exponent is minExponent (-6). This requires special
+// logic in the mod implementation to prevent overflow to NaN.
+APFloat f1(APFloat::Float8E4M3FN(), "0x1p8");// 256
+APFloat f2(APFloat::Float8E4M3FN(), "0x1.ep-6"); // 0.029296875
+APFloat expected(APFloat::Float8E4M3FN(), "0x1p-8"); // 0.00390625
+EXPECT_EQ(f1.mod(f2), APFloat::opOK);
+EXPECT_TRUE(f1.bitwiseIsEqual(expected));
+  }
 }
 
 TEST(APFloatTest, remainder) {
@@ -4756,6 +4769,389 @@
   EXPECT_TRUE(ilogb(F) == -1);
 }
 
+TEST(APFloatTest, ConvertE4M3FNToE5M2) {
+  bool losesInfo;
+  APFloat test(APFloat::Float8E4M3FN(), "1.0");
+  APFloat::opStatus status = test.convert(
+  APFloat::Float8E5M2(), APFloat::rmNearestTiesToEven, );
+  EXPECT_EQ(1.0f, test.convertToFloat());
+  EXPECT_FALSE(losesInfo);
+  EXPECT_EQ(status, APFloat::opOK);
+
+  test = APFloat(APFloat::Float8E4M3FN(), "0.0");
+  status = test.convert(APFloat::Float8E5M2(), APFloat::rmNearestTiesToEven,
+);
+  EXPECT_EQ(0.0f, test.convertToFloat());
+  EXPECT_FALSE(losesInfo);
+  EXPECT_EQ(status, APFloat::opOK);
+
+  test = APFloat(APFloat::Float8E4M3FN(), "0x1.2p0"); // 1.125
+  status = test.convert(APFloat::Float8E5M2(), APFloat::rmNearestTiesToEven,
+);
+  EXPECT_EQ(0x1.0p0 /* 1.0 */, test.convertToFloat());
+  EXPECT_TRUE(losesInfo);
+  EXPECT_EQ(status, APFloat::opInexact);
+
+  test = APFloat(APFloat::Float8E4M3FN(), "0x1.6p0"); // 1.375
+  status = test.convert(APFloat::Float8E5M2(), APFloat::rmNearestTiesToEven,
+);
+  EXPECT_EQ(0x1.8p0 /* 1.5 */, test.convertToFloat());
+  EXPECT_TRUE(losesInfo);
+  EXPECT_EQ(status, APFloat::opInexact);
+
+  // Convert E4M3 denormal to E5M2 normal. Should not be truncated, despite the
+  // destination format having one fewer significand bit
+  test = APFloat(APFloat::Float8E4M3FN(), "0x1.Cp-7");
+  status = test.convert(APFloat::Float8E5M2(), APFloat::rmNearestTiesToEven,
+);
+  EXPECT_EQ(0x1.Cp-7, test.convertToFloat());
+  EXPECT_FALSE(losesInfo);
+  EXPECT_EQ(status, APFloat::opOK);
+
+  // Test convert from NaN
+  test = APFloat(APFloat::Float8E4M3FN(), "nan");
+  status = test.convert(APFloat::Float8E5M2(), APFloat::rmNearestTiesToEven,
+);
+  EXPECT_TRUE(std::isnan(test.convertToFloat()));
+  EXPECT_FALSE(losesInfo);
+  EXPECT_EQ(status, APFloat::opOK);
+}
+
+TEST(APFloatTest, ConvertE5M2ToE4M3FN) {
+  bool losesInfo;
+  APFloat test(APFloat::Float8E5M2(), "1.0");
+  APFloat::opStatus status = test.convert(
+  APFloat::Float8E4M3FN(), APFloat::rmNearestTiesToEven, );
+  EXPECT_EQ(1.0f, test.convertToFloat());
+  EXPECT_FALSE(losesInfo);
+  EXPECT_EQ(status, APFloat::opOK);
+
+  test = APFloat(APFloat::Float8E5M2(), "0.0");
+  status = test.convert(APFloat::Float8E4M3FN(), APFloat::rmNearestTiesToEven,
+);
+  EXPECT_EQ(0.0f, test.convertToFloat());
+  EXPECT_FALSE(losesInfo);
+  EXPECT_EQ(status, APFloat::opOK);
+
+  test = APFloat(APFloat::Float8E5M2(), "0x1.Cp8"); // 448
+  status = test.convert(APFloat::Float8E4M3FN(), APFloat::rmNearestTiesToEven,
+);
+  EXPECT_EQ(0x1.Cp8 /* 448 */, test.convertToFloat());
+  EXPECT_FALSE(losesInfo);
+  

[PATCH] D137760: Add FP8 E4M3 support to APFloat.

2022-11-15 Thread Stella Laurenzo via Phabricator via cfe-commits
stellaraccident accepted this revision.
stellaraccident added a comment.

Thanks for this. Patch lgtm. Has a couple of format issues but probably ok 
as-is (this file is very inconsistently formatted and it looks like you 
overrode clang-format a bit for consistency with adjacent code).


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D137760/new/

https://reviews.llvm.org/D137760

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D137760: Add FP8 E4M3 support to APFloat.

2022-11-15 Thread Benjamin Kramer via Phabricator via cfe-commits
bkramer accepted this revision.
bkramer added a comment.
This revision is now accepted and ready to land.

lgtm


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D137760/new/

https://reviews.llvm.org/D137760

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D137760: Add FP8 E4M3 support to APFloat.

2022-11-09 Thread Reed Wanderman-Milne via Phabricator via cfe-commits
reedwm created this revision.
reedwm added reviewers: stellaraccident, bkramer, rengolin, jpienaar.
Herald added subscribers: bzcheeseman, rriddle, hiraditya, kristof.beyls.
Herald added a project: All.
reedwm requested review of this revision.
Herald added subscribers: llvm-commits, cfe-commits, stephenneuendorffer.
Herald added projects: clang, LLVM.

NVIDIA, ARM, and Intel recently introduced two new FP8 formats, as described in 
the paper: https://arxiv.org/abs/2209.05433. The first of the two FP8 dtypes, 
E5M2, was added in https://reviews.llvm.org/D133823. This change adds the 
second of the two: E4M3.

There is an RFC for adding the FP8 dtypes here: 
https://discourse.llvm.org/t/rfc-add-apfloat-and-mlir-type-support-for-fp8-e5m2/65279.
 I spoke with the RFC's author, Stella, and she gave me the go ahead to 
implement the E4M3 type. The name of the E4M3 type in APFloat is Float8E4M3FN, 
as discussed in the RFC. The "FN" means only Finite and NaN values are 
supported.

Unlike E5M2, E4M3 has different behavior from IEEE types in regards to Inf and 
NaN values. There are no Inf values, and NaN is represented when the exponent 
and mantissa bits are all 1s. To represent these differences in APFloat, I 
added an enum field, fltNonfiniteBehavior, to the fltSemantics struct. The 
possible enum values are IEEE754 and NanOnly. Only Float8E4M3FN has the NanOnly 
behavior.

After this change is submitted, I plan on adding the Float8E4M3FN type to MLIR, 
in the same way as E5M2 was added in https://reviews.llvm.org/D133823.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D137760

Files:
  clang/include/clang/AST/Stmt.h
  clang/lib/AST/MicrosoftMangle.cpp
  llvm/include/llvm/ADT/APFloat.h
  llvm/lib/Support/APFloat.cpp
  llvm/unittests/ADT/APFloatTest.cpp

Index: llvm/unittests/ADT/APFloatTest.cpp
===
--- llvm/unittests/ADT/APFloatTest.cpp
+++ llvm/unittests/ADT/APFloatTest.cpp
@@ -1683,6 +1683,7 @@
 TEST(APFloatTest, getLargest) {
   EXPECT_EQ(3.402823466e+38f, APFloat::getLargest(APFloat::IEEEsingle()).convertToFloat());
   EXPECT_EQ(1.7976931348623158e+308, APFloat::getLargest(APFloat::IEEEdouble()).convertToDouble());
+  EXPECT_EQ(448, APFloat::getLargest(APFloat::Float8E4M3FN()).convertToDouble());
 }
 
 TEST(APFloatTest, getSmallest) {
@@ -1766,6 +1767,8 @@
   {::x87DoubleExtended(), true, {0, 0x8000ULL}, 2},
   {::Float8E5M2(), false, {0, 0}, 1},
   {::Float8E5M2(), true, {0x80ULL, 0}, 1},
+  {::Float8E4M3FN(), false, {0, 0}, 1},
+  {::Float8E4M3FN(), true, {0x80ULL, 0}, 1},
   };
   const unsigned NumGetZeroTests = 12;
   for (unsigned i = 0; i < NumGetZeroTests; ++i) {
@@ -3665,6 +3668,16 @@
 EXPECT_EQ(f1.mod(f2), APFloat::opOK);
 EXPECT_TRUE(f1.bitwiseIsEqual(expected));
   }
+  {
+// Test E4M3FN mod where the LHS exponent is maxExponent (8) and the RHS is
+// the max value whose exponent is minExponent (-6). This requires special
+// logic in the mod implementation to prevent overflow to NaN.
+APFloat f1(APFloat::Float8E4M3FN(), "0x1p8");// 256
+APFloat f2(APFloat::Float8E4M3FN(), "0x1.ep-6"); // 0.029296875
+APFloat expected(APFloat::Float8E4M3FN(), "0x1p-8"); // 0.00390625
+EXPECT_EQ(f1.mod(f2), APFloat::opOK);
+EXPECT_TRUE(f1.bitwiseIsEqual(expected));
+  }
 }
 
 TEST(APFloatTest, remainder) {
@@ -4756,6 +4769,389 @@
   EXPECT_TRUE(ilogb(F) == -1);
 }
 
+TEST(APFloatTest, ConvertE4M3FNToE5M2) {
+  bool losesInfo;
+  APFloat test(APFloat::Float8E4M3FN(), "1.0");
+  APFloat::opStatus status = test.convert(
+  APFloat::Float8E5M2(), APFloat::rmNearestTiesToEven, );
+  EXPECT_EQ(1.0f, test.convertToFloat());
+  EXPECT_FALSE(losesInfo);
+  EXPECT_EQ(status, APFloat::opOK);
+
+  test = APFloat(APFloat::Float8E4M3FN(), "0.0");
+  status = test.convert(APFloat::Float8E5M2(), APFloat::rmNearestTiesToEven,
+);
+  EXPECT_EQ(0.0f, test.convertToFloat());
+  EXPECT_FALSE(losesInfo);
+  EXPECT_EQ(status, APFloat::opOK);
+
+  test = APFloat(APFloat::Float8E4M3FN(), "0x1.2p0"); // 1.125
+  status = test.convert(APFloat::Float8E5M2(), APFloat::rmNearestTiesToEven,
+);
+  EXPECT_EQ(0x1.0p0 /* 1.0 */, test.convertToFloat());
+  EXPECT_TRUE(losesInfo);
+  EXPECT_EQ(status, APFloat::opInexact);
+
+  test = APFloat(APFloat::Float8E4M3FN(), "0x1.6p0"); // 1.375
+  status = test.convert(APFloat::Float8E5M2(), APFloat::rmNearestTiesToEven,
+);
+  EXPECT_EQ(0x1.8p0 /* 1.5 */, test.convertToFloat());
+  EXPECT_TRUE(losesInfo);
+  EXPECT_EQ(status, APFloat::opInexact);
+
+  // Convert E4M3 denormal to E5M2 normal. Should not be truncated, despite the
+  // destination format having one fewer significand bit
+  test = APFloat(APFloat::Float8E4M3FN(), "0x1.Cp-7");
+  status = test.convert(APFloat::Float8E5M2(), APFloat::rmNearestTiesToEven,
+);
+  EXPECT_EQ(0x1.Cp-7,