On 13 December 2014 at 05:06, Yangfei (Felix) <felix.y...@huawei.com> wrote: > Thanks for reviewing the patch. See my comments inlined: > > >> > This patch fix this two issues. Three changes: >> > 1. vfma_f32, vfmaq_f32, vfms_f32, vfmsq_f32 are only available for >> arm*-*-* target with the FMA feature, we take care of this through the macro >> __ARM_FEATURE_FMA. >> > 2. vfma_n_f32 and vfmaq_n_f32 are only available for aarch64 target, we >> take care of this through the macro __aarch64__. >> > 3. vfmaq_f64, vfmaq_n_f64 and vfmsq_f64 are only available for aarch64 >> target, we just exclude test for them to keep the testcases clean. (Note: >> They >> also pass on aarch64 & aarch64_be target and we can add test for them if >> needed). >> I would prefer to have all the available variants tested. > > OK, the v2 patch attached have all the available variants added. > >> > +#ifdef __aarch64__ >> > /* Expected results. */ >> > VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x4438ca3d, 0x44390a3d }; >> > VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x44869eb8, 0x4486beb8, >> > 0x4486deb8, 0x4486feb8 }; >> > -VECT_VAR_DECL(expected,hfloat,64,2) [] = { 0x408906e1532b8520, >> > 0x40890ee1532b8520 }; >> >> Why do you remove this one? > > We need to make some changes to the header files for this test. > Initially, I don't want to touch the header files, so I reduced this testcase > to a minimal one. > >> > >> > int main (void) >> > { >> > +#ifdef __ARM_FEATURE_FMA >> > exec_vfms (); >> > +#endif >> > return 0; >> > } >> >> In the other tests, I try to put as much code in common as possible, between >> the >> 'a' and 's' variants (e.g. vmla/vmls). Maybe you can do that as a follow-up? > > Yes, I think we can handle this with a follow-on patch. > The v2 patch is tested on armeb-linux-gnueabi, arm-linux-gnueabi, > aarch64-linux-gnu and aarch64_be-linux-gnu. > How about this one? Thanks. > It looks better, thanks. Minor comment below.
> > Index: gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/compute-ref-data.h > =================================================================== > --- gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/compute-ref-data.h > (revision 218582) > +++ gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/compute-ref-data.h > (working copy) > @@ -142,6 +142,10 @@ VECT_VAR_DECL_INIT(buffer, poly, 16, 8); > PAD(buffer_pad, poly, 16, 8); > VECT_VAR_DECL_INIT(buffer, float, 32, 4); > PAD(buffer_pad, float, 32, 4); > +#ifdef __aarch64__ > +VECT_VAR_DECL_INIT(buffer, float, 64, 2); > +PAD(buffer_pad, float, 64, 2); > +#endif > > /* The tests for vld1_dup and vdup expect at least 4 entries in the > input buffer, so force 1- and 2-elements initializers to have 4 > Index: gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vfma_n.c > =================================================================== > --- gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vfma_n.c > (revision 218582) > +++ gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vfma_n.c > (working copy) > @@ -2,6 +2,7 @@ > #include "arm-neon-ref.h" > #include "compute-ref-data.h" > > +#if defined(__aarch64__) && defined(__ARM_FEATURE_FMA) > /* Expected results. */ > VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x4438ca3d, 0x44390a3d }; > VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x44869eb8, 0x4486beb8, > 0x4486deb8, 0x4486feb8 }; > @@ -9,28 +10,29 @@ VECT_VAR_DECL(expected,hfloat,64,2) [] = { 0x40890 > > #define VECT_VAR_ASSIGN(S,Q,T1,W) S##Q##_##T1##W > #define ASSIGN(S, Q, T, W, V) T##W##_t S##Q##_##T##W = V > -#define TEST_MSG "VFMA/VFMAQ" > +#define TEST_MSG "VFMA_N/VFMAQ_N" > + > void exec_vfma_n (void) > { > /* Basic test: v4=vfma_n(v1,v2), then store the result. */ > -#define TEST_VFMA(Q, T1, T2, W, N) \ > +#define TEST_VFMA_N(Q, T1, T2, W, N) \ > VECT_VAR(vector_res, T1, W, N) = \ > vfma##Q##_n_##T2##W(VECT_VAR(vector1, T1, W, N), \ > - VECT_VAR(vector2, T1, W, N), \ > - VECT_VAR_ASSIGN(Scalar, Q, T1, W)); > \ > + VECT_VAR(vector2, T1, W, N), \ > + VECT_VAR_ASSIGN(scalar, Q, T1, W)); \ > vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, > N)) > > -#define CHECK_VFMA_RESULTS(test_name,comment) \ > +#define CHECK_VFMA_N_RESULTS(test_name,comment) > \ > { \ > CHECK_FP(test_name, float, 32, 2, PRIx32, expected, comment); \ > CHECK_FP(test_name, float, 32, 4, PRIx32, expected, comment); \ > - CHECK_FP(test_name, float, 64, 2, PRIx64, expected, comment); \ > - } > + CHECK_FP(test_name, float, 64, 2, PRIx64, expected, comment); \ > + } > > #define DECL_VABD_VAR(VAR) \ be careful with your cut and paste. VABD should probably be VFMA_N here, although it's purely a naming convention :-) It's OK for me with that change, but I'm not a maintainer. One more question: are there any corner-cases we would want to check? (for instance, rounding, nan, infinity, ...) > DECL_VARIABLE(VAR, float, 32, 2); \ > DECL_VARIABLE(VAR, float, 32, 4); \ > - DECL_VARIABLE(VAR, float, 64, 2); > + DECL_VARIABLE(VAR, float, 64, 2); > > DECL_VABD_VAR(vector1); > DECL_VABD_VAR(vector2); > @@ -50,20 +52,23 @@ void exec_vfma_n (void) > VDUP(vector2, q, float, f, 64, 2, 15.8f); > > /* Choose init value arbitrarily. */ > - ASSIGN(Scalar, , float, 32, 81.2f); > - ASSIGN(Scalar, q, float, 32, 36.8f); > - ASSIGN(Scalar, q, float, 64, 51.7f); > + ASSIGN(scalar, , float, 32, 81.2f); > + ASSIGN(scalar, q, float, 32, 36.8f); > + ASSIGN(scalar, q, float, 64, 51.7f); > > /* Execute the tests. */ > - TEST_VFMA(, float, f, 32, 2); > - TEST_VFMA(q, float, f, 32, 4); > - TEST_VFMA(q, float, f, 64, 2); > + TEST_VFMA_N(, float, f, 32, 2); > + TEST_VFMA_N(q, float, f, 32, 4); > + TEST_VFMA_N(q, float, f, 64, 2); > > - CHECK_VFMA_RESULTS (TEST_MSG, ""); > + CHECK_VFMA_N_RESULTS (TEST_MSG, ""); > } > +#endif > > int main (void) > { > +#if defined(__aarch64__) && defined(__ARM_FEATURE_FMA) > exec_vfma_n (); > +#endif > return 0; > } > Index: gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vfma.c > =================================================================== > --- gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vfma.c (revision > 218582) > +++ gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vfma.c (working copy) > @@ -2,12 +2,16 @@ > #include "arm-neon-ref.h" > #include "compute-ref-data.h" > > +#ifdef __ARM_FEATURE_FMA > /* Expected results. */ > VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x4438ca3d, 0x44390a3d }; > VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x44869eb8, 0x4486beb8, > 0x4486deb8, 0x4486feb8 }; > +#ifdef __aarch64__ > VECT_VAR_DECL(expected,hfloat,64,2) [] = { 0x408906e1532b8520, > 0x40890ee1532b8520 }; > +#endif > > #define TEST_MSG "VFMA/VFMAQ" > + > void exec_vfma (void) > { > /* Basic test: v4=vfma(v1,v2), then store the result. */ > @@ -15,20 +19,30 @@ void exec_vfma (void) > VECT_VAR(vector_res, T1, W, N) = \ > vfma##Q##_##T2##W(VECT_VAR(vector1, T1, W, N), \ > VECT_VAR(vector2, T1, W, N), \ > - VECT_VAR(vector3, T1, W, N)); \ > + VECT_VAR(vector3, T1, W, N)); \ > vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, > N)) > > +#ifdef __aarch64__ > #define CHECK_VFMA_RESULTS(test_name,comment) \ > { \ > CHECK_FP(test_name, float, 32, 2, PRIx32, expected, comment); \ > CHECK_FP(test_name, float, 32, 4, PRIx32, expected, comment); \ > - CHECK_FP(test_name, float, 64, 2, PRIx64, expected, comment); \ > - } > - > + CHECK_FP(test_name, float, 64, 2, PRIx64, expected, comment); \ > + } > #define DECL_VABD_VAR(VAR) \ > DECL_VARIABLE(VAR, float, 32, 2); \ > DECL_VARIABLE(VAR, float, 32, 4); \ > - DECL_VARIABLE(VAR, float, 64, 2); > + DECL_VARIABLE(VAR, float, 64, 2); > +#else > +#define CHECK_VFMA_RESULTS(test_name,comment) \ > + { \ > + CHECK_FP(test_name, float, 32, 2, PRIx32, expected, comment); \ > + CHECK_FP(test_name, float, 32, 4, PRIx32, expected, comment); \ > + } > +#define DECL_VABD_VAR(VAR) \ > + DECL_VARIABLE(VAR, float, 32, 2); \ > + DECL_VARIABLE(VAR, float, 32, 4); > +#endif > > DECL_VABD_VAR(vector1); > DECL_VABD_VAR(vector2); > @@ -40,28 +54,39 @@ void exec_vfma (void) > /* Initialize input "vector1" from "buffer". */ > VLOAD(vector1, buffer, , float, f, 32, 2); > VLOAD(vector1, buffer, q, float, f, 32, 4); > +#ifdef __aarch64__ > VLOAD(vector1, buffer, q, float, f, 64, 2); > +#endif > > /* Choose init value arbitrarily. */ > VDUP(vector2, , float, f, 32, 2, 9.3f); > VDUP(vector2, q, float, f, 32, 4, 29.7f); > +#ifdef __aarch64__ > VDUP(vector2, q, float, f, 64, 2, 15.8f); > +#endif > > /* Choose init value arbitrarily. */ > VDUP(vector3, , float, f, 32, 2, 81.2f); > VDUP(vector3, q, float, f, 32, 4, 36.8f); > +#ifdef __aarch64__ > VDUP(vector3, q, float, f, 64, 2, 51.7f); > +#endif > > /* Execute the tests. */ > TEST_VFMA(, float, f, 32, 2); > TEST_VFMA(q, float, f, 32, 4); > +#ifdef __aarch64__ > TEST_VFMA(q, float, f, 64, 2); > +#endif > > CHECK_VFMA_RESULTS (TEST_MSG, ""); > } > +#endif > > int main (void) > { > +#ifdef __ARM_FEATURE_FMA > exec_vfma (); > +#endif > return 0; > } > Index: gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vfms.c > =================================================================== > --- gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vfms.c (revision > 218582) > +++ gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vfms.c (working copy) > @@ -2,33 +2,47 @@ > #include "arm-neon-ref.h" > #include "compute-ref-data.h" > > +#ifdef __ARM_FEATURE_FMA > /* Expected results. */ > VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc440ca3d, 0xc4408a3d }; > VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc48a9eb8, 0xc48a7eb8, > 0xc48a5eb8, 0xc48a3eb8 }; > +#ifdef __aarch64__ > VECT_VAR_DECL(expected,hfloat,64,2) [] = { 0xc08a06e1532b8520, > 0xc089fee1532b8520 }; > +#endif > > -#define TEST_MSG "VFMA/VFMAQ" > +#define TEST_MSG "VFMS/VFMSQ" > + > void exec_vfms (void) > { > /* Basic test: v4=vfms(v1,v2), then store the result. */ > -#define TEST_VFMA(Q, T1, T2, W, N) \ > +#define TEST_VFMS(Q, T1, T2, W, N) \ > VECT_VAR(vector_res, T1, W, N) = \ > vfms##Q##_##T2##W(VECT_VAR(vector1, T1, W, N), \ > VECT_VAR(vector2, T1, W, N), \ > - VECT_VAR(vector3, T1, W, N)); \ > + VECT_VAR(vector3, T1, W, N)); \ > vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, > N)) > > -#define CHECK_VFMA_RESULTS(test_name,comment) \ > +#ifdef __aarch64__ > +#define CHECK_VFMS_RESULTS(test_name,comment) \ > { \ > CHECK_FP(test_name, float, 32, 2, PRIx32, expected, comment); \ > CHECK_FP(test_name, float, 32, 4, PRIx32, expected, comment); \ > - CHECK_FP(test_name, float, 64, 2, PRIx64, expected, comment); \ > - } > - > + CHECK_FP(test_name, float, 64, 2, PRIx64, expected, comment); \ > + } > #define DECL_VABD_VAR(VAR) \ > DECL_VARIABLE(VAR, float, 32, 2); \ > DECL_VARIABLE(VAR, float, 32, 4); \ > - DECL_VARIABLE(VAR, float, 64, 2); > + DECL_VARIABLE(VAR, float, 64, 2); > +#else > +#define CHECK_VFMS_RESULTS(test_name,comment) \ > + { \ > + CHECK_FP(test_name, float, 32, 2, PRIx32, expected, comment); \ > + CHECK_FP(test_name, float, 32, 4, PRIx32, expected, comment); \ > + } > +#define DECL_VABD_VAR(VAR) \ > + DECL_VARIABLE(VAR, float, 32, 2); \ > + DECL_VARIABLE(VAR, float, 32, 4); > +#endif > > DECL_VABD_VAR(vector1); > DECL_VABD_VAR(vector2); > @@ -40,28 +54,39 @@ void exec_vfms (void) > /* Initialize input "vector1" from "buffer". */ > VLOAD(vector1, buffer, , float, f, 32, 2); > VLOAD(vector1, buffer, q, float, f, 32, 4); > +#ifdef __aarch64__ > VLOAD(vector1, buffer, q, float, f, 64, 2); > +#endif > > /* Choose init value arbitrarily. */ > VDUP(vector2, , float, f, 32, 2, 9.3f); > VDUP(vector2, q, float, f, 32, 4, 29.7f); > +#ifdef __aarch64__ > VDUP(vector2, q, float, f, 64, 2, 15.8f); > +#endif > > /* Choose init value arbitrarily. */ > VDUP(vector3, , float, f, 32, 2, 81.2f); > VDUP(vector3, q, float, f, 32, 4, 36.8f); > +#ifdef __aarch64__ > VDUP(vector3, q, float, f, 64, 2, 51.7f); > +#endif > > /* Execute the tests. */ > - TEST_VFMA(, float, f, 32, 2); > - TEST_VFMA(q, float, f, 32, 4); > - TEST_VFMA(q, float, f, 64, 2); > + TEST_VFMS(, float, f, 32, 2); > + TEST_VFMS(q, float, f, 32, 4); > +#ifdef __aarch64__ > + TEST_VFMS(q, float, f, 64, 2); > +#endif > > - CHECK_VFMA_RESULTS (TEST_MSG, ""); > + CHECK_VFMS_RESULTS (TEST_MSG, ""); > } > +#endif > > int main (void) > { > +#ifdef __ARM_FEATURE_FMA > exec_vfms (); > +#endif > return 0; > } > Index: gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h > =================================================================== > --- gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h > (revision 218582) > +++ gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h > (working copy) > @@ -8,6 +8,7 @@ > > /* helper type, to help write floating point results in integer form. */ > typedef uint32_t hfloat32_t; > +typedef uint64_t hfloat64_t; > > extern void abort(void); > extern void *memset(void *, int, size_t); > @@ -141,6 +142,9 @@ static ARRAY(result, uint, 64, 2); > static ARRAY(result, poly, 8, 16); > static ARRAY(result, poly, 16, 8); > static ARRAY(result, float, 32, 4); > +#ifdef __aarch64__ > +static ARRAY(result, float, 64, 2); > +#endif > > /* Declare expected results, one of each size. They are defined and > initialized in each test file. */ > @@ -166,6 +170,7 @@ extern ARRAY(expected, uint, 64, 2); > extern ARRAY(expected, poly, 8, 16); > extern ARRAY(expected, poly, 16, 8); > extern ARRAY(expected, hfloat, 32, 4); > +extern ARRAY(expected, hfloat, 64, 2); > > /* Check results. Operates on all possible vector types. */ > #define CHECK_RESULTS(test_name,comment) \ > Index: gcc/testsuite/ChangeLog > =================================================================== > --- gcc/testsuite/ChangeLog (revision 218582) > +++ gcc/testsuite/ChangeLog (working copy) > @@ -1,3 +1,19 @@ > +2014-12-13 Felix Yang <felix.y...@huawei.com> > + Haijian Zhang <z.zhanghaij...@huawei.com> > + > + * gcc.target/aarch64/advsimd-intrinsics/compute-ref-data.h > + (buffer_float64x2, buffer_pad_float64x2): New helper variables. > + * gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h (hfloat64_t, > + result_float64x2, expected_hfloat64x2): New helper type, variable and > + declaration. > + (buffer_float64x2, buffer_pad_float64x2): New helper variables. > + * gcc.target/aarch64/advsimd-intrinsics/vfma.c: Don't run on target > + without the FMA feature and exclude test for vfmaq_f64 on arm*-*-*. > + * gcc.target/aarch64/advsimd-intrinsics/vfms.c: Don't run on target > + without the FMA feature and exclude test for vfmsq_f64 on arm*-*-*. > + * gcc.target/aarch64/advsimd-intrinsics/vfma_n.c: Don't run on > arm*-*-* > + and target without the FMA feature. > + > 2014-12-10 Martin Liska <mli...@suse.cz> > > * gcc.dg/ipa/pr63909.c: New test.