Hi Maxime, > -----Original Message----- > From: Maxime Coquelin <maxime.coque...@redhat.com> > Sent: Thursday, September 15, 2022 12:37 AM > To: Vargas, Hernan <hernan.var...@intel.com>; dev@dpdk.org; > gak...@marvell.com; t...@redhat.com > Cc: Chautru, Nicolas <nicolas.chau...@intel.com>; Zhang, Qi Z > <qi.z.zh...@intel.com> > Subject: Re: [PATCH v2 13/37] baseband/acc10x: limit cases for HARQ > pruning > > > > On 8/20/22 04:31, Hernan Vargas wrote: > > Add flag ACC101_HARQ_PRUNING_OPTIMIZATION to limit cases when > HARQ > > pruning is valid. > > > > Signed-off-by: Hernan Vargas <hernan.var...@intel.com> > > --- > > drivers/baseband/acc100/rte_acc100_pmd.c | 52 > +++++++++++++++++++----- > > 1 file changed, 41 insertions(+), 11 deletions(-) > > > > diff --git a/drivers/baseband/acc100/rte_acc100_pmd.c > > b/drivers/baseband/acc100/rte_acc100_pmd.c > > index 81bae4d695..e47f7d68c2 100644 > > --- a/drivers/baseband/acc100/rte_acc100_pmd.c > > +++ b/drivers/baseband/acc100/rte_acc100_pmd.c > > @@ -1370,17 +1370,23 @@ acc100_fcw_ld_fill(struct rte_bbdev_dec_op > *op, struct acc100_fcw_ld *fcw, > > harq_index = hq_index(op->ldpc_dec.harq_combined_output.offset); > > #ifdef ACC100_EXT_MEM > > /* Limit cases when HARQ pruning is valid */ > > +#ifdef ACC100_HARQ_PRUNING_OPTIMIZATION > > harq_prun = ((op->ldpc_dec.harq_combined_output.offset % > > - ACC100_HARQ_OFFSET) == 0) && > > - (op->ldpc_dec.harq_combined_output.offset <= > UINT16_MAX > > - * ACC100_HARQ_OFFSET); > > + ACC100_HARQ_OFFSET) == 0); > > +#endif > > Optimizations should not be put under #ifdefs, it will become a testing hell > otherwise. CI will have to run as many builds as there are possible > combinations, which is not sustainable. > > Even if not part of this patch, the "#ifdef ACC100_EXT_MEM" should also be > removed.
With regards to the ACC100_EXT_MEM, this compilation switch is to be able to use the device using standard memory (not the dedicated one on the card). I believe there is value notably for debug purpose for user to be able to rebuild with different capability (more like a DEBUG purpose). I understand that only the default value is being built by default. As you pointed out this is not related to that patchset. > > > #endif > > if (fcw->hcin_en > 0) { > > harq_in_length = op->ldpc_dec.harq_combined_input.length; > > if (fcw->hcin_decomp_mode > 0) > > harq_in_length = harq_in_length * 8 / 6; > > - harq_in_length = RTE_ALIGN(harq_in_length, 64); > > - if ((harq_layout[harq_index].offset > 0) & harq_prun) { > > + harq_in_length = RTE_MIN(harq_in_length, op- > >ldpc_dec.n_cb > > + - op->ldpc_dec.n_filler); > > + /* Alignment on next 64B - Already enforced from HC output > */ > > + harq_in_length = RTE_ALIGN_FLOOR(harq_in_length, 64); > > + /* Stronger alignment requirement when in decompression > mode */ > > + if (fcw->hcin_decomp_mode > 0) > > + harq_in_length = RTE_ALIGN_FLOOR(harq_in_length, > 256); > > + if ((harq_layout[harq_index].offset > 0) && harq_prun) { > > rte_bbdev_log_debug("HARQ IN offset unexpected > for now\n"); > > fcw->hcin_size0 = harq_layout[harq_index].size0; > > fcw->hcin_offset = harq_layout[harq_index].offset; > @@ -1455,6 > > +1461,7 @@ acc101_fcw_ld_fill(struct rte_bbdev_dec_op *op, struct > acc100_fcw_ld *fcw, > > uint16_t harq_out_length, harq_in_length, ncb_p, k0_p, > parity_offset; > > uint32_t harq_index; > > uint32_t l; > > + bool harq_prun = false; > > > > fcw->qm = op->ldpc_dec.q_m; > > fcw->nfiller = op->ldpc_dec.n_filler; @@ -1500,6 +1507,13 @@ > > acc101_fcw_ld_fill(struct rte_bbdev_dec_op *op, struct acc100_fcw_ld > *fcw, > > fcw->llr_pack_mode = check_bit(op->ldpc_dec.op_flags, > > RTE_BBDEV_LDPC_LLR_COMPRESSION); > > harq_index = hq_index(op->ldpc_dec.harq_combined_output.offset); > > + #ifdef ACC100_EXT_MEM > > + /* Limit cases when HARQ pruning is valid */ #ifdef > > +ACC101_HARQ_PRUNING_OPTIMIZATION > > + harq_prun = ((op->ldpc_dec.harq_combined_output.offset % > > + ACC101_HARQ_OFFSET) == 0); > > +#endif > > +#endif > > if (fcw->hcin_en > 0) { > > harq_in_length = op->ldpc_dec.harq_combined_input.length; > > if (fcw->hcin_decomp_mode > 0) > > @@ -1508,9 +1522,17 @@ acc101_fcw_ld_fill(struct rte_bbdev_dec_op > *op, struct acc100_fcw_ld *fcw, > > - op->ldpc_dec.n_filler); > > /* Alignment on next 64B - Already enforced from HC output > */ > > harq_in_length = RTE_ALIGN_FLOOR(harq_in_length, 64); > > - fcw->hcin_size0 = harq_in_length; > > - fcw->hcin_offset = 0; > > - fcw->hcin_size1 = 0; > > + if ((harq_layout[harq_index].offset > 0) && harq_prun) { > > + rte_bbdev_log_debug("HARQ IN offset unexpected > for now\n"); > > + fcw->hcin_size0 = harq_layout[harq_index].size0; > > + fcw->hcin_offset = harq_layout[harq_index].offset; > > + fcw->hcin_size1 = harq_in_length - > > + harq_layout[harq_index].offset; > > + } else { > > + fcw->hcin_size0 = harq_in_length; > > + fcw->hcin_offset = 0; > > + fcw->hcin_size1 = 0; > > + } > > } else { > > fcw->hcin_size0 = 0; > > fcw->hcin_offset = 0; > > @@ -1551,9 +1573,17 @@ acc101_fcw_ld_fill(struct rte_bbdev_dec_op > *op, struct acc100_fcw_ld *fcw, > > harq_out_length = RTE_MIN(harq_out_length, ncb_p); > > /* Alignment on next 64B */ > > harq_out_length = RTE_ALIGN_CEIL(harq_out_length, 64); > > - fcw->hcout_size0 = harq_out_length; > > - fcw->hcout_size1 = 0; > > - fcw->hcout_offset = 0; > > + if ((k0_p > fcw->hcin_size0 + > ACC100_HARQ_OFFSET_THRESHOLD) && > > + harq_prun) { > > + fcw->hcout_size0 = (uint16_t) fcw->hcin_size0; > > + fcw->hcout_offset = k0_p & 0xFFC0; > > + fcw->hcout_size1 = harq_out_length - fcw- > >hcout_offset; > > + } else { > > + fcw->hcout_size0 = harq_out_length; > > + fcw->hcout_size1 = 0; > > + fcw->hcout_offset = 0; > > + } > > + > > harq_layout[harq_index].offset = fcw->hcout_offset; > > harq_layout[harq_index].size0 = fcw->hcout_size0; > > } else {