Re: [f265 dev team] B-pyramid patch

Daniel Giguere Mon, 28 Jul 2014 13:40:34 -0700

Here is the review.
2 minors comments.
Search for %%%.


On 07/28/2014 04:10 PM, Laurent Birtz wrote:

Add B-pyramid GOPs.
Improve bi-prediction logic.
Add switches to test QP and lambda adjustments.

diff --git a/doc/params.txt b/doc/params.txt
index b9e30df..2584650 100644
--- a/doc/params.txt
+++ b/doc/params.txt
@@ -110,6 +110,12 @@ HM compatibility: set to the HM GOP size minus 1, 
regardless of the frame types
 in the GOP.


+* bref=X. Default to 0.
+
+Maximum number of B reference frames on one side of the B pyramid. This
+controls the depth of the B pyramid.
+
+
 * wpp=X. Default to 0.

 Enable wavefront parallel processing to allow multi-threaded decoding.
diff --git a/f265/analyze.c b/f265/analyze.c
index c415cbf..84fa1f5 100644
--- a/f265/analyze.c
+++ b/f265/analyze.c
@@ -37,6 +37,9 @@
 // Bit 12 forces the reconstruction of blocks in RDM.
 // Bit 13 prevents the analysis of the 32x32 CBs (forced split).
 // Bit 14 early terminates the transform tree exploration.
+// Bit 15 performs B refinement tests.
+// Bit 16 performs QP adjustment tests.
+// Bit 17 performs lambda adjustment tests.

 // Description of some failed experiments.
 //
@@ -2611,13 +2614,11 @@ static int fenc_analyze_inter_uni(f265_enc_thread *t, 
f265_cb *cb, int part_idx,
 // optimize the use of local variables.
 static int fenc_analyze_inter_part(f265_enc_thread *t, f265_cb *cb, int 
part_idx)
 {
+    f265_cb_analysis *cba = t->cba;
     f265_inter_block *ib = &t->inter_block;
     f265_me_ctx *me = &t->me;
     f265_inter_neighbour_mv cands[5];

-    // Best prediction type (0 unidirectional, 1 bidirectional, 2-6 merge).
-    int pred_type = 0;
-
     // Set the partition data.
     ib->part_idx = part_idx;
     fenc_me_set_partition(t, cb, part_idx);
@@ -2633,38 +2634,57 @@ static int fenc_analyze_inter_part(f265_enc_thread *t, 
f265_cb *cb, int part_idx
             F265_COPY2_IF_LT(uni_cost[list], cost, uni_ref_idx[list], ref_idx, 
int, int);
         }

-    int best_uni_list = uni_cost[1] < uni_cost[0];
-    int part_cost = uni_cost[best_uni_list];
+    // Best prediction type (0 unidirectional, 1 bidirectional, 2-6 merge).
+    // Assume the P frame case initially.
+    int pred_type = 0;
+    int best_uni_list = 0;
+    int part_cost = uni_cost[0];

-    // Test bidirectional prediction.
-    // FIXME. Derive best_uni_list inside this branch (without the 4x8 test), 
by
-    // adding the second bin context value of inter_pred_idc. If not 4x8, add
-    // the cost of the first bin context. Keep the best cost overall. The P 
case
-    // must fast track.
-    if (t->nb_lists == 2 && (cb->lg_bs > 3 || cb->inter_part == F265_PART_UN))
+    // Handle the B frame case.
+    if (t->nb_lists == 2)
     {
-        // Use the mode comparison metric by default.
-        me->dist_func_id = t->an.mode_metric;
+        // Bidirectional prediction occurs if the partition is not 8x4/4x8.
+        int bidir_flag = cb->lg_bs > 3 || cb->inter_part == F265_PART_UN;
+
+        // Update the unidirectional prediction costs. FIXME, check if is
+        // worthwhile to account for inter_pred_idc with 2 bypass bins.
+        #if 0
+        int inter_pred_idc_uni_cost = (1 + bidir_flag)*cba->rdm_bin_cost;
+        #else
+        int inter_pred_idc_uni_cost = 0;
+        #endif

-        // Set the bidirectional prediction data. Assuming the reference 
indices
-        // and the motion vectors stay the same as the unidirectional
-        // prediction.
-        for (int list = 0; list < 2; list++)
+        uni_cost[0] += inter_pred_idc_uni_cost;
+        uni_cost[1] += inter_pred_idc_uni_cost;
+        best_uni_list = uni_cost[1] < uni_cost[0];
+        part_cost = uni_cost[best_uni_list];
+
+        // Test bidirectional prediction.
+        if (bidir_flag)
         {
-            int ref_idx = uni_ref_idx[list];
-            int pmv_idx = ib->uni_pmv_idx[list][ref_idx];
+            // Use the mode comparison metric by default.
+            me->dist_func_id = t->an.mode_metric;
+
+            // Set the bidirectional prediction data. Assuming the reference 
indices
+            // and the motion vectors stay the same as the unidirectional
+            // prediction.
+            for (int list = 0; list < 2; list++)
+            {
+                int ref_idx = uni_ref_idx[list];
+                int pmv_idx = ib->uni_pmv_idx[list][ref_idx];

-            ib->bi_mv[list] = ib->uni_mv[list][ref_idx];
-            ib->bi_pmv_idx[list] = pmv_idx;
-            ib->bi_ref_idx[list] = ref_idx;
+                ib->bi_mv[list] = ib->uni_mv[list][ref_idx];
+                ib->bi_pmv_idx[list] = pmv_idx;
+                ib->bi_ref_idx[list] = ref_idx;

-            fenc_me_set_ref(t, t->ref_ctx[list] + ref_idx, list);
-            fenc_me_set_pmv(me, ib->uni_pmv[list][ref_idx][pmv_idx], t->qp[0], 
list);
-        }
+                fenc_me_set_ref(t, t->ref_ctx[list] + ref_idx, list);
+                fenc_me_set_pmv(me, ib->uni_pmv[list][ref_idx][pmv_idx], 
t->qp[0], list);
+            }

-        // Compute the cost and update the partition cost.
-        int bi_cost = fenc_me_mv_total_cost_bi(ib->bi_mv, me);
-        F265_COPY2_IF_LT(part_cost, bi_cost, pred_type, 1, int, int);
+            // Compute the cost and update the partition cost.
+            int bi_cost = cba->rdm_bin_cost + 
fenc_me_mv_total_cost_bi(ib->bi_mv, me);
+            F265_COPY2_IF_LT(part_cost, bi_cost, pred_type, 1, int, int);
+        }
     }

     // Add the non-merge flag cost.
diff --git a/f265/bdi.c b/f265/bdi.c
index 38496be..c78fa2f 100644
--- a/f265/bdi.c
+++ b/f265/bdi.c
@@ -247,6 +247,7 @@ void f265_normalize_params(f265_enc_params *p)
     CL(p->qg_log, -1, 6);
     CL(p->nb_refs, 0, 16);
     CL(p->nb_b_frames, 0, 16);
+    CL(p->nb_b_refs, 0, F265_MAX(F265_MIN(p->nb_refs, p->nb_b_frames) - 2, 0));
     CL(p->chroma_qp_idx_off, -12, 12);
     for (int i = 0; i < 2; i++) CL(p->deblock_off[i], -6, 6);
     CL(p->merge_cand, 1, 5);
diff --git a/f265/enc.c b/f265/enc.c
index 6f68a4c..f1bfae2 100644
--- a/f265/enc.c
+++ b/f265/enc.c
@@ -665,6 +665,7 @@ static void fenc_init_enc_mem(f265_enc_params *p, 
f265_enc_mem_data *d, char **e
     gd->qg_log = p->qg_log;
     gd->nb_refs = p->nb_refs;
     gd->nb_b_frames = p->nb_b_frames;
+    gd->nb_b_refs = p->nb_b_refs;
     gd->profile_idc = p->profile_idc;
     gd->level_idc = p->level_idc;
     gd->chroma_qp_idx_off = p->chroma_qp_idx_off;
@@ -674,7 +675,9 @@ static void fenc_init_enc_mem(f265_enc_params *p, 
f265_enc_mem_data *d, char **e
     gd->poc_bits = 8; // FIXME, hardcoded for HM compatibility.
     gd->default_nb_ref_idx[0] = F265_MAX(p->nb_refs, 1);
     gd->default_nb_ref_idx[1] = 1;
-    gd->nb_reordered_frames = !!p->nb_b_frames;
%%% Replace by "The calculated number of re-ordered frames is is too high when 
"...
+    // FIXME, the bound is not tight enough when some B references are stored
+    // in-order.
+    gd->nb_reordered_frames = p->nb_b_frames ? p->nb_b_refs + 1 : 0;
     gd->frame_rate_num = p->frame_rate_num;
     gd->frame_rate_den = p->frame_rate_den;
     gd->algo = p->algo;
@@ -2361,6 +2364,14 @@ static void fenc_init_enc_thread(f265_enc *e, f265_frame 
*f, f265_enc_thread *t,
     t->hm_lambda[0] = fenc_calc_lambda(&e->gd, f, &f->qp);
     t->hm_lambda[1] = fenc_calc_lambda_chroma(t->qp[0],t->qp[1], 
t->hm_lambda[0]);

%%% Commiting test code? Or do you means the lambda factor are temporary?
%%% In the first case, it should not be commited. In the second, refactor the 
comment.
+    // Temporary lambda adjustment tests.
+    if (t->enc->gd.algo&(1<<17))
+    {
+        if (f->frame_type == F265_FRAME_I) t->hm_lambda[0] *= 0.74;
+        if (f->frame_type == F265_FRAME_P) t->hm_lambda[0] *= 0.74;
+        if (f->frame_type == F265_FRAME_B) t->hm_lambda[0] *= 0.74;
+    }
+
     // Set the chroma weight.
     t->hm_wcd = pow(2.0, (t->qp[0] - t->qp[1]) / 3.0);

diff --git a/f265/enc.h b/f265/enc.h
index 3af6647..bca2e55 100644
--- a/f265/enc.h
+++ b/f265/enc.h
@@ -2250,6 +2250,9 @@ typedef struct f265_gen_data
     // Number of B frames.
     int8_t nb_b_frames;

+    // Maximum number of B reference frames on one side of the B pyramid.
+    int8_t nb_b_refs;
+
     // Profile indicator.
     int8_t profile_idc;

diff --git a/f265/f265.h b/f265/f265.h
index 5fade9f..d1d0a2e 100644
--- a/f265/f265.h
+++ b/f265/f265.h
@@ -136,6 +136,9 @@ typedef struct f265_enc_params
     // Number of B frames.
     int8_t nb_b_frames;

+    // Maximum number of B reference frames on one side of the B pyramid.
+    int8_t nb_b_refs;
+
     // Profile indicator. Set to 0 for automatic management.
     int8_t profile_idc;

diff --git a/f265/la.c b/f265/la.c
index 8390390..448a4ff 100644
--- a/f265/la.c
+++ b/f265/la.c
@@ -88,6 +88,45 @@ f265_frame* fenc_la_make_frame(f265_enc *e)
     return f;
 }

+// Recursively store the B frames in coded order. The frames are stored in a
+// B-pyramid while the supply of B reference frames lasts.
+void fenc_reorder_b_frames(f265_frame **out, f265_frame **in, int nb_in, int 
nb_b_refs)
+{
+    // No more B references or not enough frames to add another B-pyramid 
level.
+    // Store frames in order, using any remaining B references.
+    if (!nb_b_refs || nb_in <= 2)
+    {
+        for (int i = 0; i < nb_in; i++)
+        {
+            f265_frame *f = in[i];
+            f->la_frame_type = F265_FRAME_B;
+            int b_ref_flag = nb_b_refs > i;
+            F265_SET_FLAG(f->gen_flags, F265_FF_REF, b_ref_flag);
+            out[i] = f;
+        }
+    }
+
+    // Add another B-pyramid level.
+    else
+    {
+        // Number of frames to the left and to the right of the middle B
+        // reference. If the number of frames is even, the left side gets more
+        // frames.
+        int nb_left = nb_in>>1;
+        int nb_right = nb_in - nb_left - 1;
+
+        // Middle B.
+        f265_frame *m = in[nb_left];
+        m->la_frame_type = F265_FRAME_B;
+        F265_SET_FLAG(m->gen_flags, F265_FF_REF, 1);
+        out[0] = m;
+
+        // Left and right.
+        fenc_reorder_b_frames(out + 1, in, nb_left, nb_b_refs - 1);
+        fenc_reorder_b_frames(out + nb_left + 1, in + nb_left + 1, nb_right, 
nb_b_refs - 1);
+    }
+}
+
 // Process the received frame in regular lookahead. Return the output frame, if
 // any. We process the buffered frames as soon as possible. For real-time
 // processing this can help to distribute the CPU load evenly.
@@ -175,13 +214,8 @@ f265_frame* fenc_la_process_frame_regular(f265_enc *enc, 
f265_frame *in)
         p[0]->la_frame_type = i_flag ? F265_FRAME_I : F265_FRAME_P;
         F265_SET_FLAG(p[0]->gen_flags, F265_FF_REF, !!enc->gd.nb_refs);

-        // B frames.
-        for (int i = 0; i < nb_seq_b; i++)
-        {
-            p[1+i] = la->display[1+i];
-            p[1+i]->la_frame_type = F265_FRAME_B;
-            F265_SET_FLAG(p[1+i]->gen_flags, F265_FF_REF, 0);
-        }
+        // Reorder the B frames.
+        fenc_reorder_b_frames(p + 1, la->display + 1, nb_seq_b, 
enc->gd.nb_b_refs);

         // Commit the current GOP if there are no committed frames and the
         // lookahead is full or flushing.
diff --git a/f265/parse.c b/f265/parse.c
index 9642a7a..ab4cd0a 100644
--- a/f265/parse.c
+++ b/f265/parse.c
@@ -171,6 +171,11 @@ static void handle_param_bframes(f265_parse_ctx *ctx, 
f265_enc_params *p, f265_p
     p->nb_b_frames = a->i;
 }

+static void handle_param_bref(f265_parse_ctx *ctx, f265_enc_params *p, 
f265_parse_arg *a, int32_t nb_args)
+{
+    p->nb_b_refs = a->i;
+}
+
 static void handle_param_wpp(f265_parse_ctx *ctx, f265_enc_params *p, 
f265_parse_arg *a, int32_t nb_args)
 {
     p->wpp_flag = a->i;
@@ -509,6 +514,7 @@ static const f265_parse_entry f265_enc_params_table[] =
     { "qg", handle_param_qg, 1, 0 },
     { "ref", handle_param_ref, 1, 0 },
     { "bframes", handle_param_bframes, 1, 0 },
+    { "bref", handle_param_bref, 1, 0 },
     { "wpp", handle_param_wpp, 1, 0 },
     { "deblock", handle_param_deblock, 1, 0 },
     { "sao", handle_param_sao, 1, 0 },
diff --git a/f265/rc.c b/f265/rc.c
index d61b7cc..781794d 100644
--- a/f265/rc.c
+++ b/f265/rc.c
@@ -326,6 +326,14 @@ int8_t fenc_rc_frame_start(f265_enc_thread *t, f265_frame 
*prev)

     if (rc->method == F265_RCM_CQP)
     {
+        // Temporary QP adjustment tests.
+        if (t->enc->gd.algo&(1<<16) && frame->frame_type == F265_FRAME_I)
+            return F265_MAX(t->enc->gd.init_qp - 3, 0);
+        if (t->enc->gd.algo&(1<<16) && frame->frame_type == F265_FRAME_B && 
frame->gen_flags&F265_FF_REF)
+            return F265_MAX(t->enc->gd.init_qp + 1, 0);
+        if (t->enc->gd.algo&(1<<16) && frame->frame_type == F265_FRAME_B && 
!(frame->gen_flags&F265_FF_REF))
+            return F265_MAX(t->enc->gd.init_qp + 2, 0);
+
         return t->enc->gd.init_qp;
     }

Re: [f265 dev team] B-pyramid patch

Reply via email to