Here is the review.
2 minors comments.
Search for %%%.
On 07/28/2014 04:10 PM, Laurent Birtz wrote:
Add B-pyramid GOPs.
Improve bi-prediction logic.
Add switches to test QP and lambda adjustments.
diff --git a/doc/params.txt b/doc/params.txt
index b9e30df..2584650 100644
--- a/doc/params.txt
+++ b/doc/params.txt
@@ -110,6 +110,12 @@ HM compatibility: set to the HM GOP size minus 1,
regardless of the frame types
in the GOP.
+* bref=X. Default to 0.
+
+Maximum number of B reference frames on one side of the B pyramid. This
+controls the depth of the B pyramid.
+
+
* wpp=X. Default to 0.
Enable wavefront parallel processing to allow multi-threaded decoding.
diff --git a/f265/analyze.c b/f265/analyze.c
index c415cbf..84fa1f5 100644
--- a/f265/analyze.c
+++ b/f265/analyze.c
@@ -37,6 +37,9 @@
// Bit 12 forces the reconstruction of blocks in RDM.
// Bit 13 prevents the analysis of the 32x32 CBs (forced split).
// Bit 14 early terminates the transform tree exploration.
+// Bit 15 performs B refinement tests.
+// Bit 16 performs QP adjustment tests.
+// Bit 17 performs lambda adjustment tests.
// Description of some failed experiments.
//
@@ -2611,13 +2614,11 @@ static int fenc_analyze_inter_uni(f265_enc_thread *t,
f265_cb *cb, int part_idx,
// optimize the use of local variables.
static int fenc_analyze_inter_part(f265_enc_thread *t, f265_cb *cb, int
part_idx)
{
+ f265_cb_analysis *cba = t->cba;
f265_inter_block *ib = &t->inter_block;
f265_me_ctx *me = &t->me;
f265_inter_neighbour_mv cands[5];
- // Best prediction type (0 unidirectional, 1 bidirectional, 2-6 merge).
- int pred_type = 0;
-
// Set the partition data.
ib->part_idx = part_idx;
fenc_me_set_partition(t, cb, part_idx);
@@ -2633,38 +2634,57 @@ static int fenc_analyze_inter_part(f265_enc_thread *t,
f265_cb *cb, int part_idx
F265_COPY2_IF_LT(uni_cost[list], cost, uni_ref_idx[list], ref_idx,
int, int);
}
- int best_uni_list = uni_cost[1] < uni_cost[0];
- int part_cost = uni_cost[best_uni_list];
+ // Best prediction type (0 unidirectional, 1 bidirectional, 2-6 merge).
+ // Assume the P frame case initially.
+ int pred_type = 0;
+ int best_uni_list = 0;
+ int part_cost = uni_cost[0];
- // Test bidirectional prediction.
- // FIXME. Derive best_uni_list inside this branch (without the 4x8 test),
by
- // adding the second bin context value of inter_pred_idc. If not 4x8, add
- // the cost of the first bin context. Keep the best cost overall. The P
case
- // must fast track.
- if (t->nb_lists == 2 && (cb->lg_bs > 3 || cb->inter_part == F265_PART_UN))
+ // Handle the B frame case.
+ if (t->nb_lists == 2)
{
- // Use the mode comparison metric by default.
- me->dist_func_id = t->an.mode_metric;
+ // Bidirectional prediction occurs if the partition is not 8x4/4x8.
+ int bidir_flag = cb->lg_bs > 3 || cb->inter_part == F265_PART_UN;
+
+ // Update the unidirectional prediction costs. FIXME, check if is
+ // worthwhile to account for inter_pred_idc with 2 bypass bins.
+ #if 0
+ int inter_pred_idc_uni_cost = (1 + bidir_flag)*cba->rdm_bin_cost;
+ #else
+ int inter_pred_idc_uni_cost = 0;
+ #endif
- // Set the bidirectional prediction data. Assuming the reference
indices
- // and the motion vectors stay the same as the unidirectional
- // prediction.
- for (int list = 0; list < 2; list++)
+ uni_cost[0] += inter_pred_idc_uni_cost;
+ uni_cost[1] += inter_pred_idc_uni_cost;
+ best_uni_list = uni_cost[1] < uni_cost[0];
+ part_cost = uni_cost[best_uni_list];
+
+ // Test bidirectional prediction.
+ if (bidir_flag)
{
- int ref_idx = uni_ref_idx[list];
- int pmv_idx = ib->uni_pmv_idx[list][ref_idx];
+ // Use the mode comparison metric by default.
+ me->dist_func_id = t->an.mode_metric;
+
+ // Set the bidirectional prediction data. Assuming the reference
indices
+ // and the motion vectors stay the same as the unidirectional
+ // prediction.
+ for (int list = 0; list < 2; list++)
+ {
+ int ref_idx = uni_ref_idx[list];
+ int pmv_idx = ib->uni_pmv_idx[list][ref_idx];
- ib->bi_mv[list] = ib->uni_mv[list][ref_idx];
- ib->bi_pmv_idx[list] = pmv_idx;
- ib->bi_ref_idx[list] = ref_idx;
+ ib->bi_mv[list] = ib->uni_mv[list][ref_idx];
+ ib->bi_pmv_idx[list] = pmv_idx;
+ ib->bi_ref_idx[list] = ref_idx;
- fenc_me_set_ref(t, t->ref_ctx[list] + ref_idx, list);
- fenc_me_set_pmv(me, ib->uni_pmv[list][ref_idx][pmv_idx], t->qp[0],
list);
- }
+ fenc_me_set_ref(t, t->ref_ctx[list] + ref_idx, list);
+ fenc_me_set_pmv(me, ib->uni_pmv[list][ref_idx][pmv_idx],
t->qp[0], list);
+ }
- // Compute the cost and update the partition cost.
- int bi_cost = fenc_me_mv_total_cost_bi(ib->bi_mv, me);
- F265_COPY2_IF_LT(part_cost, bi_cost, pred_type, 1, int, int);
+ // Compute the cost and update the partition cost.
+ int bi_cost = cba->rdm_bin_cost +
fenc_me_mv_total_cost_bi(ib->bi_mv, me);
+ F265_COPY2_IF_LT(part_cost, bi_cost, pred_type, 1, int, int);
+ }
}
// Add the non-merge flag cost.
diff --git a/f265/bdi.c b/f265/bdi.c
index 38496be..c78fa2f 100644
--- a/f265/bdi.c
+++ b/f265/bdi.c
@@ -247,6 +247,7 @@ void f265_normalize_params(f265_enc_params *p)
CL(p->qg_log, -1, 6);
CL(p->nb_refs, 0, 16);
CL(p->nb_b_frames, 0, 16);
+ CL(p->nb_b_refs, 0, F265_MAX(F265_MIN(p->nb_refs, p->nb_b_frames) - 2, 0));
CL(p->chroma_qp_idx_off, -12, 12);
for (int i = 0; i < 2; i++) CL(p->deblock_off[i], -6, 6);
CL(p->merge_cand, 1, 5);
diff --git a/f265/enc.c b/f265/enc.c
index 6f68a4c..f1bfae2 100644
--- a/f265/enc.c
+++ b/f265/enc.c
@@ -665,6 +665,7 @@ static void fenc_init_enc_mem(f265_enc_params *p,
f265_enc_mem_data *d, char **e
gd->qg_log = p->qg_log;
gd->nb_refs = p->nb_refs;
gd->nb_b_frames = p->nb_b_frames;
+ gd->nb_b_refs = p->nb_b_refs;
gd->profile_idc = p->profile_idc;
gd->level_idc = p->level_idc;
gd->chroma_qp_idx_off = p->chroma_qp_idx_off;
@@ -674,7 +675,9 @@ static void fenc_init_enc_mem(f265_enc_params *p,
f265_enc_mem_data *d, char **e
gd->poc_bits = 8; // FIXME, hardcoded for HM compatibility.
gd->default_nb_ref_idx[0] = F265_MAX(p->nb_refs, 1);
gd->default_nb_ref_idx[1] = 1;
- gd->nb_reordered_frames = !!p->nb_b_frames;
%%% Replace by "The calculated number of re-ordered frames is is too high when
"...
+ // FIXME, the bound is not tight enough when some B references are stored
+ // in-order.
+ gd->nb_reordered_frames = p->nb_b_frames ? p->nb_b_refs + 1 : 0;
gd->frame_rate_num = p->frame_rate_num;
gd->frame_rate_den = p->frame_rate_den;
gd->algo = p->algo;
@@ -2361,6 +2364,14 @@ static void fenc_init_enc_thread(f265_enc *e, f265_frame
*f, f265_enc_thread *t,
t->hm_lambda[0] = fenc_calc_lambda(&e->gd, f, &f->qp);
t->hm_lambda[1] = fenc_calc_lambda_chroma(t->qp[0],t->qp[1],
t->hm_lambda[0]);
%%% Commiting test code? Or do you means the lambda factor are temporary?
%%% In the first case, it should not be commited. In the second, refactor the
comment.
+ // Temporary lambda adjustment tests.
+ if (t->enc->gd.algo&(1<<17))
+ {
+ if (f->frame_type == F265_FRAME_I) t->hm_lambda[0] *= 0.74;
+ if (f->frame_type == F265_FRAME_P) t->hm_lambda[0] *= 0.74;
+ if (f->frame_type == F265_FRAME_B) t->hm_lambda[0] *= 0.74;
+ }
+
// Set the chroma weight.
t->hm_wcd = pow(2.0, (t->qp[0] - t->qp[1]) / 3.0);
diff --git a/f265/enc.h b/f265/enc.h
index 3af6647..bca2e55 100644
--- a/f265/enc.h
+++ b/f265/enc.h
@@ -2250,6 +2250,9 @@ typedef struct f265_gen_data
// Number of B frames.
int8_t nb_b_frames;
+ // Maximum number of B reference frames on one side of the B pyramid.
+ int8_t nb_b_refs;
+
// Profile indicator.
int8_t profile_idc;
diff --git a/f265/f265.h b/f265/f265.h
index 5fade9f..d1d0a2e 100644
--- a/f265/f265.h
+++ b/f265/f265.h
@@ -136,6 +136,9 @@ typedef struct f265_enc_params
// Number of B frames.
int8_t nb_b_frames;
+ // Maximum number of B reference frames on one side of the B pyramid.
+ int8_t nb_b_refs;
+
// Profile indicator. Set to 0 for automatic management.
int8_t profile_idc;
diff --git a/f265/la.c b/f265/la.c
index 8390390..448a4ff 100644
--- a/f265/la.c
+++ b/f265/la.c
@@ -88,6 +88,45 @@ f265_frame* fenc_la_make_frame(f265_enc *e)
return f;
}
+// Recursively store the B frames in coded order. The frames are stored in a
+// B-pyramid while the supply of B reference frames lasts.
+void fenc_reorder_b_frames(f265_frame **out, f265_frame **in, int nb_in, int
nb_b_refs)
+{
+ // No more B references or not enough frames to add another B-pyramid
level.
+ // Store frames in order, using any remaining B references.
+ if (!nb_b_refs || nb_in <= 2)
+ {
+ for (int i = 0; i < nb_in; i++)
+ {
+ f265_frame *f = in[i];
+ f->la_frame_type = F265_FRAME_B;
+ int b_ref_flag = nb_b_refs > i;
+ F265_SET_FLAG(f->gen_flags, F265_FF_REF, b_ref_flag);
+ out[i] = f;
+ }
+ }
+
+ // Add another B-pyramid level.
+ else
+ {
+ // Number of frames to the left and to the right of the middle B
+ // reference. If the number of frames is even, the left side gets more
+ // frames.
+ int nb_left = nb_in>>1;
+ int nb_right = nb_in - nb_left - 1;
+
+ // Middle B.
+ f265_frame *m = in[nb_left];
+ m->la_frame_type = F265_FRAME_B;
+ F265_SET_FLAG(m->gen_flags, F265_FF_REF, 1);
+ out[0] = m;
+
+ // Left and right.
+ fenc_reorder_b_frames(out + 1, in, nb_left, nb_b_refs - 1);
+ fenc_reorder_b_frames(out + nb_left + 1, in + nb_left + 1, nb_right,
nb_b_refs - 1);
+ }
+}
+
// Process the received frame in regular lookahead. Return the output frame, if
// any. We process the buffered frames as soon as possible. For real-time
// processing this can help to distribute the CPU load evenly.
@@ -175,13 +214,8 @@ f265_frame* fenc_la_process_frame_regular(f265_enc *enc,
f265_frame *in)
p[0]->la_frame_type = i_flag ? F265_FRAME_I : F265_FRAME_P;
F265_SET_FLAG(p[0]->gen_flags, F265_FF_REF, !!enc->gd.nb_refs);
- // B frames.
- for (int i = 0; i < nb_seq_b; i++)
- {
- p[1+i] = la->display[1+i];
- p[1+i]->la_frame_type = F265_FRAME_B;
- F265_SET_FLAG(p[1+i]->gen_flags, F265_FF_REF, 0);
- }
+ // Reorder the B frames.
+ fenc_reorder_b_frames(p + 1, la->display + 1, nb_seq_b,
enc->gd.nb_b_refs);
// Commit the current GOP if there are no committed frames and the
// lookahead is full or flushing.
diff --git a/f265/parse.c b/f265/parse.c
index 9642a7a..ab4cd0a 100644
--- a/f265/parse.c
+++ b/f265/parse.c
@@ -171,6 +171,11 @@ static void handle_param_bframes(f265_parse_ctx *ctx,
f265_enc_params *p, f265_p
p->nb_b_frames = a->i;
}
+static void handle_param_bref(f265_parse_ctx *ctx, f265_enc_params *p,
f265_parse_arg *a, int32_t nb_args)
+{
+ p->nb_b_refs = a->i;
+}
+
static void handle_param_wpp(f265_parse_ctx *ctx, f265_enc_params *p,
f265_parse_arg *a, int32_t nb_args)
{
p->wpp_flag = a->i;
@@ -509,6 +514,7 @@ static const f265_parse_entry f265_enc_params_table[] =
{ "qg", handle_param_qg, 1, 0 },
{ "ref", handle_param_ref, 1, 0 },
{ "bframes", handle_param_bframes, 1, 0 },
+ { "bref", handle_param_bref, 1, 0 },
{ "wpp", handle_param_wpp, 1, 0 },
{ "deblock", handle_param_deblock, 1, 0 },
{ "sao", handle_param_sao, 1, 0 },
diff --git a/f265/rc.c b/f265/rc.c
index d61b7cc..781794d 100644
--- a/f265/rc.c
+++ b/f265/rc.c
@@ -326,6 +326,14 @@ int8_t fenc_rc_frame_start(f265_enc_thread *t, f265_frame
*prev)
if (rc->method == F265_RCM_CQP)
{
+ // Temporary QP adjustment tests.
+ if (t->enc->gd.algo&(1<<16) && frame->frame_type == F265_FRAME_I)
+ return F265_MAX(t->enc->gd.init_qp - 3, 0);
+ if (t->enc->gd.algo&(1<<16) && frame->frame_type == F265_FRAME_B &&
frame->gen_flags&F265_FF_REF)
+ return F265_MAX(t->enc->gd.init_qp + 1, 0);
+ if (t->enc->gd.algo&(1<<16) && frame->frame_type == F265_FRAME_B &&
!(frame->gen_flags&F265_FF_REF))
+ return F265_MAX(t->enc->gd.init_qp + 2, 0);
+
return t->enc->gd.init_qp;
}