Re: [PATCH v5 5/8] target/ppc: Implemented xvf16ger*

2022-05-23 Thread Daniel Henrique Barboza

checkpatch.pl didn't like this patch:



Checking v5-5-8-target-ppc-Implemented-xvf16ger.patch...
WARNING: line over 80 characters
#177: FILE: target/ppc/fpu_helper.c:3535:
+va = !(pmsk & 2) ? float64_zero : extract(a->VsrHF(2 * i), 
excp_ptr);

WARNING: line over 80 characters
#178: FILE: target/ppc/fpu_helper.c:3536:
+vb = !(pmsk & 2) ? float64_zero : extract(b->VsrHF(2 * j), 
excp_ptr);

WARNING: line over 80 characters
#179: FILE: target/ppc/fpu_helper.c:3537:
+vc = !(pmsk & 1) ? float64_zero : extract(a->VsrHF(2 * i + 1), 
excp_ptr);

WARNING: line over 80 characters
#180: FILE: target/ppc/fpu_helper.c:3538:
+vd = !(pmsk & 1) ? float64_zero : extract(b->VsrHF(2 * j + 1), 
excp_ptr);

total: 0 errors, 4 warnings, 165 lines checked

v5-5-8-target-ppc-Implemented-xvf16ger.patch has style problems, please review. 
 If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.




Thanks,


Daniel

On 5/20/22 16:54, Lucas Mateus Castro(alqotel) wrote:

From: "Lucas Mateus Castro (alqotel)" 

Implement the following PowerISA v3.1 instructions:
xvf16ger2:   VSX Vector 16-bit Floating-Point GER (rank-2 update)
xvf16ger2nn: VSX Vector 16-bit Floating-Point GER (rank-2 update) Negative
multiply, Negative accumulate
xvf16ger2np: VSX Vector 16-bit Floating-Point GER (rank-2 update) Negative
multiply, Positive accumulate
xvf16ger2pn: VSX Vector 16-bit Floating-Point GER (rank-2 update) Positive
multiply, Negative accumulate
xvf16ger2pp: VSX Vector 16-bit Floating-Point GER (rank-2 update) Positive
multiply, Positive accumulate

Signed-off-by: Lucas Mateus Castro (alqotel) 
Reviewed-by: Richard Henderson 
---
  target/ppc/cpu.h|  3 +
  target/ppc/fpu_helper.c | 91 +
  target/ppc/helper.h |  5 ++
  target/ppc/insn32.decode|  6 ++
  target/ppc/translate/vsx-impl.c.inc |  6 ++
  5 files changed, 111 insertions(+)

diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index bdedf4138e..46769a5647 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -227,6 +227,7 @@ typedef union _ppc_vsr_t {
  int16_t s16[8];
  int32_t s32[4];
  int64_t s64[2];
+float16 f16[8];
  float32 f32[4];
  float64 f64[2];
  float128 f128;
@@ -2641,6 +2642,7 @@ static inline bool lsw_reg_in_range(int start, int nregs, 
int rx)
  #define VsrSW(i) s32[i]
  #define VsrD(i) u64[i]
  #define VsrSD(i) s64[i]
+#define VsrHF(i) f16[i]
  #define VsrSF(i) f32[i]
  #define VsrDF(i) f64[i]
  #else
@@ -2652,6 +2654,7 @@ static inline bool lsw_reg_in_range(int start, int nregs, 
int rx)
  #define VsrSW(i) s32[3 - (i)]
  #define VsrD(i) u64[1 - (i)]
  #define VsrSD(i) s64[1 - (i)]
+#define VsrHF(i) f16[7 - (i)]
  #define VsrSF(i) f32[3 - (i)]
  #define VsrDF(i) f64[1 - (i)]
  #endif
diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c
index 1766da5bcf..7a7aa03ac4 100644
--- a/target/ppc/fpu_helper.c
+++ b/target/ppc/fpu_helper.c
@@ -36,6 +36,15 @@ static inline float128 float128_snan_to_qnan(float128 x)
  #define float32_snan_to_qnan(x) ((x) | 0x0040)
  #define float16_snan_to_qnan(x) ((x) | 0x0200)
  
+static inline float32 bfp32_neg(float32 a)

+{
+if (unlikely(float32_is_any_nan(a))) {
+return a;
+} else {
+return float32_chs(a);
+}
+}
+
  static inline bool fp_exceptions_enabled(CPUPPCState *env)
  {
  #ifdef CONFIG_USER_ONLY
@@ -3502,6 +3511,53 @@ static inline void vsxger_excp(CPUPPCState *env, 
uintptr_t retaddr)
  do_fpscr_check_status(env, retaddr);
  }
  
+typedef float64 extract_f16(float16, float_status *);

+
+static float64 extract_hf16(float16 in, float_status *fp_status)
+{
+return float16_to_float64(in, true, fp_status);
+}
+
+static void vsxger16(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
+ ppc_acc_t  *at, uint32_t mask, bool acc,
+ bool neg_mul, bool neg_acc, extract_f16 extract)
+{
+float32 r, aux_acc;
+float64 psum, va, vb, vc, vd;
+int i, j, xmsk_bit, ymsk_bit;
+uint8_t pmsk = FIELD_EX32(mask, GER_MSK, PMSK),
+xmsk = FIELD_EX32(mask, GER_MSK, XMSK),
+ymsk = FIELD_EX32(mask, GER_MSK, YMSK);
+float_status *excp_ptr = >fp_status;
+for (i = 0, xmsk_bit = 1 << 3; i < 4; i++, xmsk_bit >>= 1) {
+for (j = 0, ymsk_bit = 1 << 3; j < 4; j++, ymsk_bit >>= 1) {
+if ((xmsk_bit & xmsk) && (ymsk_bit & ymsk)) {
+va = !(pmsk & 2) ? float64_zero : extract(a->VsrHF(2 * i), 
excp_ptr);
+vb = !(pmsk & 2) ? float64_zero : extract(b->VsrHF(2 * j), 
excp_ptr);
+vc = !(pmsk & 1) ? float64_zero : extract(a->VsrHF(2 * i + 1), 
excp_ptr);
+vd = !(pmsk & 1) ? float64_zero : extract(b->VsrHF(2 * j + 1), 
excp_ptr);
+psum = float64_mul(va, vb, excp_ptr);
+psum = float64r32_muladd(vc, vd, psum, 0, 

[PATCH v5 5/8] target/ppc: Implemented xvf16ger*

2022-05-20 Thread Lucas Mateus Castro(alqotel)
From: "Lucas Mateus Castro (alqotel)" 

Implement the following PowerISA v3.1 instructions:
xvf16ger2:   VSX Vector 16-bit Floating-Point GER (rank-2 update)
xvf16ger2nn: VSX Vector 16-bit Floating-Point GER (rank-2 update) Negative
multiply, Negative accumulate
xvf16ger2np: VSX Vector 16-bit Floating-Point GER (rank-2 update) Negative
multiply, Positive accumulate
xvf16ger2pn: VSX Vector 16-bit Floating-Point GER (rank-2 update) Positive
multiply, Negative accumulate
xvf16ger2pp: VSX Vector 16-bit Floating-Point GER (rank-2 update) Positive
multiply, Positive accumulate

Signed-off-by: Lucas Mateus Castro (alqotel) 
Reviewed-by: Richard Henderson 
---
 target/ppc/cpu.h|  3 +
 target/ppc/fpu_helper.c | 91 +
 target/ppc/helper.h |  5 ++
 target/ppc/insn32.decode|  6 ++
 target/ppc/translate/vsx-impl.c.inc |  6 ++
 5 files changed, 111 insertions(+)

diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index bdedf4138e..46769a5647 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -227,6 +227,7 @@ typedef union _ppc_vsr_t {
 int16_t s16[8];
 int32_t s32[4];
 int64_t s64[2];
+float16 f16[8];
 float32 f32[4];
 float64 f64[2];
 float128 f128;
@@ -2641,6 +2642,7 @@ static inline bool lsw_reg_in_range(int start, int nregs, 
int rx)
 #define VsrSW(i) s32[i]
 #define VsrD(i) u64[i]
 #define VsrSD(i) s64[i]
+#define VsrHF(i) f16[i]
 #define VsrSF(i) f32[i]
 #define VsrDF(i) f64[i]
 #else
@@ -2652,6 +2654,7 @@ static inline bool lsw_reg_in_range(int start, int nregs, 
int rx)
 #define VsrSW(i) s32[3 - (i)]
 #define VsrD(i) u64[1 - (i)]
 #define VsrSD(i) s64[1 - (i)]
+#define VsrHF(i) f16[7 - (i)]
 #define VsrSF(i) f32[3 - (i)]
 #define VsrDF(i) f64[1 - (i)]
 #endif
diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c
index 1766da5bcf..7a7aa03ac4 100644
--- a/target/ppc/fpu_helper.c
+++ b/target/ppc/fpu_helper.c
@@ -36,6 +36,15 @@ static inline float128 float128_snan_to_qnan(float128 x)
 #define float32_snan_to_qnan(x) ((x) | 0x0040)
 #define float16_snan_to_qnan(x) ((x) | 0x0200)
 
+static inline float32 bfp32_neg(float32 a)
+{
+if (unlikely(float32_is_any_nan(a))) {
+return a;
+} else {
+return float32_chs(a);
+}
+}
+
 static inline bool fp_exceptions_enabled(CPUPPCState *env)
 {
 #ifdef CONFIG_USER_ONLY
@@ -3502,6 +3511,53 @@ static inline void vsxger_excp(CPUPPCState *env, 
uintptr_t retaddr)
 do_fpscr_check_status(env, retaddr);
 }
 
+typedef float64 extract_f16(float16, float_status *);
+
+static float64 extract_hf16(float16 in, float_status *fp_status)
+{
+return float16_to_float64(in, true, fp_status);
+}
+
+static void vsxger16(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
+ ppc_acc_t  *at, uint32_t mask, bool acc,
+ bool neg_mul, bool neg_acc, extract_f16 extract)
+{
+float32 r, aux_acc;
+float64 psum, va, vb, vc, vd;
+int i, j, xmsk_bit, ymsk_bit;
+uint8_t pmsk = FIELD_EX32(mask, GER_MSK, PMSK),
+xmsk = FIELD_EX32(mask, GER_MSK, XMSK),
+ymsk = FIELD_EX32(mask, GER_MSK, YMSK);
+float_status *excp_ptr = >fp_status;
+for (i = 0, xmsk_bit = 1 << 3; i < 4; i++, xmsk_bit >>= 1) {
+for (j = 0, ymsk_bit = 1 << 3; j < 4; j++, ymsk_bit >>= 1) {
+if ((xmsk_bit & xmsk) && (ymsk_bit & ymsk)) {
+va = !(pmsk & 2) ? float64_zero : extract(a->VsrHF(2 * i), 
excp_ptr);
+vb = !(pmsk & 2) ? float64_zero : extract(b->VsrHF(2 * j), 
excp_ptr);
+vc = !(pmsk & 1) ? float64_zero : extract(a->VsrHF(2 * i + 1), 
excp_ptr);
+vd = !(pmsk & 1) ? float64_zero : extract(b->VsrHF(2 * j + 1), 
excp_ptr);
+psum = float64_mul(va, vb, excp_ptr);
+psum = float64r32_muladd(vc, vd, psum, 0, excp_ptr);
+r = float64_to_float32(psum, excp_ptr);
+if (acc) {
+aux_acc = at[i].VsrSF(j);
+if (neg_mul) {
+r = bfp32_neg(r);
+}
+if (neg_acc) {
+aux_acc = bfp32_neg(aux_acc);
+}
+r = float32_add(r, aux_acc, excp_ptr);
+}
+at[i].VsrSF(j) = r;
+} else {
+at[i].VsrSF(j) = float32_zero;
+}
+}
+}
+vsxger_excp(env, GETPC());
+}
+
 typedef void vsxger_zero(ppc_vsr_t *at, int, int);
 
 typedef void vsxger_muladd_f(ppc_vsr_t *, ppc_vsr_t *, ppc_vsr_t *, int, int,
@@ -3579,6 +3635,41 @@ static void vsxger(CPUPPCState *env, ppc_vsr_t *a, 
ppc_vsr_t *b, ppc_acc_t  *at,
 vsxger_excp(env, GETPC());
 }
 
+QEMU_FLATTEN
+void helper_XVF16GER2(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
+ ppc_acc_t *at, uint32_t mask)
+{
+vsxger16(env, a, b, at, mask, false, false, false, extract_hf16);
+}
+