Update quality=10, quality=15 presets.
Add thresholds for mode analysis.
Add transform tree early termination.
Transform block encoding function tweaks.
Fix finline macro for clang.
Note, I had to invert the control flow of the TT exploration function,
you can skip that part in the review.
Thanks,
Laurent
diff --git a/f265/analyze.c b/f265/analyze.c
index 266b020..210e602 100644
--- a/f265/analyze.c
+++ b/f265/analyze.c
@@ -33,7 +33,29 @@
// Bit 8 enables the second RDO pass.
//
// Bit 10 enables fake luma interpolation.
+// Bit 11 enables the use of thresholds for the mode analysis.
// Bit 12 forces the reconstruction of blocks in RDM.
+// Bit 14 early terminates the transform tree exploration.
+
+// Description of some failed experiments.
+//
+// Deferred RDO transform tree analysis. For each CB mode, analyze only the
+// largest transform block in RDO. Refine the full transform tree of the best CB
+// mode found. It turns out that this lowers both quality and performance. The
+// transform tree seems to matter much for choosing the best CB mode. Aggressive
+// filtering using the RDM costs yield only one or a few candidates per CB, so
+// the overhead of processing the largest transform block twice cancels the
+// performance benefits.
+//
+// Skipping the analysis of 32x32 intra if the children do not choose intra
+// 16x16. This causes a large quality penalty. 32x32 intra is chosen regularly
+// even when inter, intra 8x8 and intra 4x4 are present in the smaller CBs. The
+// correlation between the intra 32x32 RDM cost and the 16x16 intra RDM costs is
+// very poor.
+
+
+// FIXME. Used to estimate the quality loss if we skip the chroma RDO process.
+//#define NO_CHROMA_RDO
// FIXME:
@@ -48,12 +70,17 @@
///////////////////////////////////////////////////////////////////////////////
// Temporary debugging code.
-// Print the analysis costs. Obsolete.
+// Print the analysis costs.
//#define VAN_TRACE_ANALYSIS
// Verify the CB inter cost in RDM mode. No effect in RDO mode.
//#define VAN_VERIFY_CB_INTER_COST
+#ifdef VAN_TRACE_ANALYSIS
+// Set to 0 to disable tracing.
+int fenc_trace_analysis_flag = 0;
+#endif
+
void fenc_an_cb_loc(f265_enc_thread *t, f265_cb *cb, int comp)
{
int csf = !!comp;
@@ -750,45 +777,14 @@ static int64_t fenc_analyze_intra_luma_tt(f265_enc_thread *t, f265_cb *cb, int m
// Get the split flag costs.
uint16_t split_flag_costs[2] = { 0, 0 };
- #if 0
- if (split_present_flag)
- #else
if (t->an.rdm_flag && split_present_flag)
- #endif
{
int off = (5-lg_bs)<<1;
for (int i = 0; i < 2; i++) split_flag_costs[i] = t->an.se_costs[F265_SE_SPLIT_TRANSFORM + off + i];
}
- // FIXME: consider extracting this code in a function.
- if (split_flag)
- {
- // Add a split transform node.
- *t->tt.tn++ = 15;
-
- int64_t split_flag_cost = 0;
- if (t->an.rdm_flag) split_flag_cost = split_flag_costs[1];
- else if (split_present_flag)
- {
- F265_RDO_COST(split_flag_cost,
- fenc_encode_context_bin(&t->cbs, F265_CO_SPLIT_TRANSFORM+5-lg_bs, 1));
- }
-
- fenc_stash_push(t);
- int64_t subtree_cost = 0;
- for (int i = 0, sbs = 1<<(lg_bs-1); i < 4; i++)
- subtree_cost += fenc_analyze_intra_luma_tt(t, cb, mode, depth_range, depth+1, lg_bs-1,
- cb_ox + (i&1)*sbs, cb_oy + (i>>1)*sbs);
-
- fenc_stash_pop(t);
-
- split_cost = split_flag_cost + subtree_cost;
- }
-
if (unsplit_flag)
{
- if (stash_flag) fenc_stash_save_reset(t);
-
// Add an unsplit transform node.
*t->tt.tn++ = 7;
@@ -820,16 +816,46 @@ static int64_t fenc_analyze_intra_luma_tt(f265_enc_thread *t, f265_cb *cb, int m
}
unsplit_cost = unsplit_flag_cost + cbf_cost + fudge + tb_cost;
+
+ // Early terminate the exploration.
+ if (t->enc->gd.algo&(1<<14) && !nz_flag) split_flag = stash_flag = 0;
}
+ if (split_flag)
+ {
+ if (stash_flag) fenc_stash_save_reset(t);
+
+ // Add a split transform node.
+ *t->tt.tn++ = 15;
+
+ int64_t split_flag_cost = 0;
+ if (t->an.rdm_flag) split_flag_cost = split_flag_costs[1];
+ else if (split_present_flag)
+ {
+ F265_RDO_COST(split_flag_cost,
+ fenc_encode_context_bin(&t->cbs, F265_CO_SPLIT_TRANSFORM+5-lg_bs, 1));
+ }
+
+ fenc_stash_push(t);
+ int64_t subtree_cost = 0;
+ for (int i = 0, sbs = 1<<(lg_bs-1); i < 4; i++)
+ subtree_cost += fenc_analyze_intra_luma_tt(t, cb, mode, depth_range, depth+1, lg_bs-1,
+ cb_ox + (i&1)*sbs, cb_oy + (i>>1)*sbs);
+
+ fenc_stash_pop(t);
+
+ split_cost = split_flag_cost + subtree_cost;
+ }
+
+
if (split_cost < unsplit_cost)
{
- if (stash_flag) fenc_stash_restore(t);
best_cost = split_cost;
}
else
{
+ if (stash_flag) fenc_stash_restore(t);
best_cost = unsplit_cost;
}
@@ -920,6 +946,10 @@ static int64_t fenc_analyze_inter_tb(f265_enc_thread *t, f265_cb *cb, int *nz_fl
int64_t cost = coeff_cost + dist;
+ #ifdef NO_CHROMA_RDO
+ if (comp) cost = 0;
+ #endif
+
return cost;
}
@@ -967,6 +997,10 @@ static int64_t fenc_analyze_inter_tb_full(f265_enc_thread *t, f265_cb *cb, int *
fenc_stash_pop(t);
+ #ifdef NO_CHROMA_RDO
+ if (comp) ret_cost = 0;
+ #endif
+
return ret_cost;
}
@@ -984,8 +1018,58 @@ static int64_t fenc_analyze_inter_sub_tt(f265_enc_thread *t, f265_cb *cb, int *y
if (stash_flag) fenc_stash_init_cb_off(t, cb, cb_ox, cb_oy, 1<<lg_bs, 1<<lg_bs);
+ if (unsplit_flag)
+ {
+ // Add an unsplit transform node.
+ uint8_t *tn = t->tt.tn++;
+
+ int64_t unsplit_flag_cost = 0;
+ if (split_present_flag)
+ {
+ F265_RDO_COST(unsplit_flag_cost,
+ fenc_encode_context_bin(&t->cbs, F265_CO_SPLIT_TRANSFORM+5-lg_bs, 0));
+ }
+
+ // Encode the luma transform block.
+ int nz_flag;
+ int64_t tb_cost = fenc_analyze_inter_tb_full(t, cb, &nz_flag, 0, depth, lg_bs, cb_ox, cb_oy);
+ unsplit_yuv_flags |= nz_flag;
+
+ // Encode the chroma transform blocks.
+ // FIXME: we should consider the tentative chroma flags for the
+ // split/unsplit comparison (without passing up the tentative costs).
+ if (lg_bs > 2 || block_idx == 3)
+ {
+ int chroma_lg_bs = F265_MAX(lg_bs-1, 2);
+ int chroma_off[2] = { (cb_ox>>1)&~3, (cb_oy>>1)&~3 };
+
+ for (int comp = 1; comp < 3; comp++)
+ {
+ tb_cost += fenc_analyze_inter_tb_full(t, cb, &nz_flag, comp, depth, chroma_lg_bs,
+ chroma_off[0], chroma_off[1]);
+ unsplit_yuv_flags |= nz_flag<<comp;
+ }
+ }
+
+ // Encode the luma CBF unless it is inferred by root_cbf.
+ int64_t luma_cbf_cost = 0;
+ if (depth || unsplit_yuv_flags&6)
+ {
+ F265_RDO_COST(luma_cbf_cost,
+ fenc_encode_context_bin(&t->cbs, F265_CO_CBF_LUMA+!depth, unsplit_yuv_flags&1));
+ }
+
+ unsplit_cost = unsplit_flag_cost + luma_cbf_cost + tb_cost;
+ *tn = unsplit_yuv_flags;
+
+ // Early terminate the exploration.
+ if (t->enc->gd.algo&(1<<14) && !unsplit_yuv_flags) split_flag = stash_flag = 0;
+ }
+
if (split_flag)
{
+ if (stash_flag) fenc_stash_save_reset(t);
+
// Add a split transform node.
uint8_t *tn = t->tt.tn++;
@@ -1041,6 +1125,9 @@ static int64_t fenc_analyze_inter_sub_tt(f265_enc_thread *t, f265_cb *cb, int *y
fenc_encode_context_bin(&t->cbs, F265_CO_CBF_CHROMA+depth+1, cbf);
}
chroma_cbf_cost = fenc_rdo_bit_cost(t, t->cbs.rdo.bits);
+ #ifdef NO_CHROMA_RDO
+ chroma_cbf_cost = 0;
+ #endif
}
}
@@ -1055,62 +1142,15 @@ static int64_t fenc_analyze_inter_sub_tt(f265_enc_thread *t, f265_cb *cb, int *y
*tn = 8|split_yuv_flags;
}
- if (unsplit_flag)
- {
- if (stash_flag) fenc_stash_save_reset(t);
-
- // Add an unsplit transform node.
- uint8_t *tn = t->tt.tn++;
-
- int64_t unsplit_flag_cost = 0;
- if (split_present_flag)
- {
- F265_RDO_COST(unsplit_flag_cost,
- fenc_encode_context_bin(&t->cbs, F265_CO_SPLIT_TRANSFORM+5-lg_bs, 0));
- }
-
- // Encode the luma transform block.
- int nz_flag;
- int64_t tb_cost = fenc_analyze_inter_tb_full(t, cb, &nz_flag, 0, depth, lg_bs, cb_ox, cb_oy);
- unsplit_yuv_flags |= nz_flag;
-
- // Encode the chroma transform blocks.
- // FIXME: we should consider the tentative chroma flags for the
- // split/unsplit comparison (without passing up the tentative costs).
- if (lg_bs > 2 || block_idx == 3)
- {
- int chroma_lg_bs = F265_MAX(lg_bs-1, 2);
- int chroma_off[2] = { (cb_ox>>1)&~3, (cb_oy>>1)&~3 };
-
- for (int comp = 1; comp < 3; comp++)
- {
- tb_cost += fenc_analyze_inter_tb_full(t, cb, &nz_flag, comp, depth, chroma_lg_bs,
- chroma_off[0], chroma_off[1]);
- unsplit_yuv_flags |= nz_flag<<comp;
- }
- }
-
- // Encode the luma CBF unless it is inferred by root_cbf.
- int64_t luma_cbf_cost = 0;
- if (depth || unsplit_yuv_flags&6)
- {
- F265_RDO_COST(luma_cbf_cost,
- fenc_encode_context_bin(&t->cbs, F265_CO_CBF_LUMA+!depth, unsplit_yuv_flags&1));
- }
-
- unsplit_cost = unsplit_flag_cost + luma_cbf_cost + tb_cost;
- *tn = unsplit_yuv_flags;
- }
-
if (split_cost < unsplit_cost)
{
- if (stash_flag) fenc_stash_restore(t);
best_cost = split_cost;
*yuv_flags = split_yuv_flags;
}
else
{
+ if (stash_flag) fenc_stash_restore(t);
best_cost = unsplit_cost;
*yuv_flags = unsplit_yuv_flags;
}
@@ -1163,6 +1203,9 @@ static int64_t fenc_analyze_inter_tt_residual(f265_enc_thread *t, f265_cb *cb, i
for (int i = 0; i < 2; i++)
fenc_encode_context_bin(&t->cbs, F265_CO_CBF_CHROMA+0, (nz_flags>>(1+i))&1);
int64_t chroma_cbf_cost = fenc_rdo_bit_cost(t, t->cbs.rdo.bits);
+ #ifdef NO_CHROMA_RDO
+ chroma_cbf_cost = 0;
+ #endif
int64_t residual_cost = root_cbf_cost + chroma_cbf_cost + subtree_cost;
@@ -1762,6 +1805,7 @@ static int64_t fenc_analyze_intra_part_luma(f265_enc_thread *t, f265_cb *cb, int
int64_t best_cost;
// Parse the internal algorithm fields.
+ if (!an->bypass_intra_algo_flag)
{
int algo = t->enc->gd.algo, tmp;
@@ -1950,144 +1994,164 @@ static int64_t fenc_analyze_intra_part_luma(f265_enc_thread *t, f265_cb *cb, int
return best_cost;
}
-// Return the cost of encoding the CB with intra prediction.
-// FIXME, split that function.
-static void fenc_analyze_intra_cb(f265_enc_thread *t, f265_cb *cb)
+// Analyze the intra HV mode.
+static void fenc_analyze_intra_hv(f265_enc_thread *t, f265_cb *cb)
{
f265_analysis *an = &t->an;
f265_cb_analysis *cba = t->cba;
int lg_bs = cb->lg_bs;
- int split_flag = lg_bs == t->enc->gd.cb_range[0];
int depth_range[2];
int save_stash_flags = F265_STASH_ALL;
int luma_stash_flags = F265_STASH_CABAC|F265_STASH_TN|F265_STASH_Y;
int chroma_stash_flags = F265_STASH_CABAC|F265_STASH_U|F265_STASH_V;
- // No cache right now.
- cba->intra_mode_cache_flags[0] = cba->intra_mode_cache_flags[1] = 0;
-
// Remember the initial transform tree position.
uint8_t *init_tn = t->tt.tn;
// Set the CB mode to intra.
F265_SET_FLAG(cb->flags, F265_CB_INTRA, 1);
- // Test HV. Consider putting HV last, there are less modes to save/restore
- // if worse than UN.
- if (split_flag)
- {
- // Add a split transform node.
- if (!an->rdm_flag) *t->tt.tn++ = 15;
+ // Set the partitioning mode to HV and update the prediction map.
+ // This needs to be optimized. FIXME.
+ cb->intra_luma_mode[1] = 0;
+ fenc_update_pmap_unsplit_cb(t, cb);
- // Set the partitioning mode to HV and update the prediction map.
- // This needs to be optimized. FIXME.
- cb->intra_luma_mode[1] = 0;
- fenc_update_pmap_unsplit_cb(t, cb);
+ // Add a split transform node.
+ if (!an->rdm_flag) *t->tt.tn++ = 15;
- // Set the analyzed depth range.
- fenc_get_intra_part_depth_range(t, lg_bs-1, depth_range);
+ // Set the analyzed depth range.
+ fenc_get_intra_part_depth_range(t, lg_bs-1, depth_range);
- // Get the base split cost.
- int64_t cost = an->rdm_flag ? cba->rdm_se_costs[0] : cba->rdo_se_costs[1];
+ // Get the base split cost.
+ int64_t cost = an->rdm_flag ? cba->rdm_se_costs[0] : cba->rdo_se_costs[1];
- fenc_stash_push(t);
+ fenc_stash_push(t);
- // Analyze luma.
+ // Analyze luma.
- // Fake the luma reconstruction with the source pixels.
- if (an->rdm_flag && !an->rdm_rec_flag) fenc_analyze_fake_cb_rec(t, cb, 0);
+ // Fake the luma reconstruction with the source pixels.
+ if (an->rdm_flag && !an->rdm_rec_flag) fenc_analyze_fake_cb_rec(t, cb, 0);
- t->stash.flags = luma_stash_flags;
- for (int i = 0, sbs = 1<<(lg_bs-1); i < 4; i++)
- {
- cost += fenc_analyze_intra_part_luma(t, cb, depth_range, i, lg_bs-1, (i&1)*sbs, (i>>1)*sbs);
+ t->stash.flags = luma_stash_flags;
+ for (int i = 0, sbs = 1<<(lg_bs-1); i < 4; i++)
+ {
+ cost += fenc_analyze_intra_part_luma(t, cb, depth_range, i, lg_bs-1, (i&1)*sbs, (i>>1)*sbs);
- // Optionally reconstruct the partition in RDM mode.
- if (an->rdm_rec_flag)
- {
- // Make a static transform tree with the largest intra block.
- fenc_make_static_tt(&t->tt.tn, depth_range[0], 0);
+ // Optionally reconstruct the partition in RDM mode.
+ if (an->rdm_rec_flag)
+ {
+ // Make a static transform tree with the largest intra block.
+ fenc_make_static_tt(&t->tt.tn, depth_range[0], 0);
- // Reconstruct. FIXME, writing TBs here.
- t->tt.tn = init_tn;
- fenc_set_tmp_tb(t);
- fenc_rec_intra_part_tt(t, 0, lg_bs-1, cb->intra_luma_mode[i],
- cb->cb_off[0] + (i&1)*sbs, cb->cb_off[1] + (i>>1)*sbs, 0);
- t->tt.tn = init_tn;
- }
+ // Reconstruct. FIXME, writing TBs here.
+ t->tt.tn = init_tn;
+ fenc_set_tmp_tb(t);
+ fenc_rec_intra_part_tt(t, 0, lg_bs-1, cb->intra_luma_mode[i],
+ cb->cb_off[0] + (i&1)*sbs, cb->cb_off[1] + (i>>1)*sbs, 0);
+ t->tt.tn = init_tn;
}
+ }
- // Analyze chroma.
- t->stash.flags = chroma_stash_flags;
- if (!an->rdm_flag) cost += fenc_analyze_intra_cb_chroma(t, cb, init_tn);
+ // Analyze chroma.
+ t->stash.flags = chroma_stash_flags;
+ #ifndef NO_CHROMA_RDO
+ if (!an->rdm_flag) cost += fenc_analyze_intra_cb_chroma(t, cb, init_tn);
+ #else
+ if (!an->rdm_flag) fenc_analyze_intra_cb_chroma(t, cb, init_tn);
+ #endif
- fenc_stash_pop(t);
+ fenc_stash_pop(t);
- // Export the intra data.
- for (int i = 0; i < 4; i++) cba->intra_hv_modes[i] = cb->intra_luma_mode[i];
- cba->intra_chroma_modes[1] = cb->intra_chroma_mode;
+ // Export the intra data.
+ for (int i = 0; i < 4; i++) cba->intra_hv_modes[i] = cb->intra_luma_mode[i];
+ cba->intra_chroma_modes[1] = cb->intra_chroma_mode;
- // Update the best intra mode and the best CB mode.
+ // Update the best intra mode and the best CB mode.
+ if (an->rdm_flag)
+ {
+ cba->rdm_mode_costs[2] = cost;
F265_COPY2_IF_LT(cba->rdm_intra_cost, cost, cba->rdm_intra_mode, 2, int, int);
F265_COPY2_IF_LT(cba->rdm_best_cost, cost, cba->rdm_best_mode, 2, int, int);
+ }
- if (!an->rdm_flag)
+ if (!an->rdm_flag)
+ {
+ // Update the best intra mode and the best CB mode.
+ if (cost < cba->rdo_best_cost)
{
- // Update the best intra mode and the best CB mode.
- if (cost < cba->rdo_best_cost)
- {
- t->stash.flags = save_stash_flags;
- cba->rdo_best_cost = cost;
- cba->rdo_best_mode = 2;
- fenc_stash_save(t);
- }
-
- fenc_stash_reset(t);
+ t->stash.flags = save_stash_flags;
+ cba->rdo_best_cost = cost;
+ cba->rdo_best_mode = 2;
+ fenc_stash_save(t);
}
+
+ fenc_stash_reset(t);
}
+}
- // Test UN.
- {
- t->stash.flags = save_stash_flags;
- cb->intra_luma_mode[1] = -1;
- fenc_get_intra_part_depth_range(t, lg_bs, depth_range);
+// Analyze the intra UN mode.
+static void fenc_analyze_intra_un(f265_enc_thread *t, f265_cb *cb)
+{
+ f265_analysis *an = &t->an;
+ f265_cb_analysis *cba = t->cba;
+ int lg_bs = cb->lg_bs;
+ int depth_range[2];
+ int save_stash_flags = F265_STASH_ALL;
+ int luma_stash_flags = F265_STASH_CABAC|F265_STASH_TN|F265_STASH_Y;
+ int chroma_stash_flags = F265_STASH_CABAC|F265_STASH_U|F265_STASH_V;
- // Get the base cost.
- int64_t cost = an->rdm_flag ? cba->rdm_se_costs[0] : cba->rdo_se_costs[0];
+ // Remember the initial transform tree position.
+ uint8_t *init_tn = t->tt.tn;
- fenc_stash_push(t);
+ // Set the CB mode to intra UN.
+ F265_SET_FLAG(cb->flags, F265_CB_INTRA, 1);
+ t->stash.flags = save_stash_flags;
+ cb->intra_luma_mode[1] = -1;
+ fenc_get_intra_part_depth_range(t, lg_bs, depth_range);
- // Analyze luma.
- t->stash.flags = luma_stash_flags;
- cost += fenc_analyze_intra_part_luma(t, cb, depth_range, 0, lg_bs, 0, 0);
+ // Get the base cost.
+ int64_t cost = an->rdm_flag ? cba->rdm_se_costs[0] : cba->rdo_se_costs[0];
- // Analyze chroma.
- t->stash.flags = chroma_stash_flags;
- if (!an->rdm_flag) cost += fenc_analyze_intra_cb_chroma(t, cb, init_tn);
+ fenc_stash_push(t);
- fenc_stash_pop(t);
+ // Analyze luma.
+ t->stash.flags = luma_stash_flags;
+ cost += fenc_analyze_intra_part_luma(t, cb, depth_range, 0, lg_bs, 0, 0);
- // Export the intra data.
- cba->intra_un_mode = cb->intra_luma_mode[0];
- cba->intra_chroma_modes[0] = cb->intra_chroma_mode;
+ // Analyze chroma.
+ t->stash.flags = chroma_stash_flags;
+ #ifndef NO_CHROMA_RDO
+ if (!an->rdm_flag) cost += fenc_analyze_intra_cb_chroma(t, cb, init_tn);
+ #else
+ if (!an->rdm_flag) fenc_analyze_intra_cb_chroma(t, cb, init_tn);
+ #endif
- // Update the best intra mode and the best CB mode.
+ fenc_stash_pop(t);
+
+ // Export the intra data.
+ cba->intra_un_mode = cb->intra_luma_mode[0];
+ cba->intra_chroma_modes[0] = cb->intra_chroma_mode;
+
+ // Update the best intra mode and the best CB mode.
+ if (an->rdm_flag)
+ {
+ cba->rdm_mode_costs[1] = cost;
F265_COPY2_IF_LT(cba->rdm_intra_cost, cost, cba->rdm_intra_mode, 1, int, int);
F265_COPY2_IF_LT(cba->rdm_best_cost, cost, cba->rdm_best_mode, 1, int, int);
+ }
- if (!an->rdm_flag)
+ if (!an->rdm_flag)
+ {
+ // Update the best intra mode and the best CB mode.
+ if (cost < cba->rdo_best_cost)
{
- // Update the best intra mode and the best CB mode.
- if (cost < cba->rdo_best_cost)
- {
- cba->rdo_best_cost = cost;
- cba->rdo_best_mode = 1;
- t->stash.flags = save_stash_flags;
- fenc_stash_save(t);
- }
-
- fenc_stash_reset(t);
+ cba->rdo_best_cost = cost;
+ cba->rdo_best_mode = 1;
+ t->stash.flags = save_stash_flags;
+ fenc_stash_save(t);
}
+
+ fenc_stash_reset(t);
}
}
@@ -2172,13 +2236,13 @@ static void fenc_analyze_un_merge_rdm(f265_enc_thread *t, f265_cb *cb)
// Get the total cost and update the best cost.
int cost = base_cost + t->an.se_costs[F265_SE_MERGE_IDX+merge_idx] + fenc_me_merge_cand_dist(t, *cand);
+ cba->rdm_mode_costs[12 + merge_idx] = cost;
F265_COPY2_IF_LT(best_cost, cost, best_idx, merge_idx, int, int);
}
- // Update the best merge candidate index, the skip flag, the best inter mode
- // and the best CB mode.
+ // Update the best merge candidate index, the best inter mode and the best
+ // CB mode.
cba->rdm_un_merge_idx = best_idx;
- cba->un_skip_flag = 0;
int cb_mode = 12 + best_idx;
F265_COPY2_IF_LT(cba->rdm_inter_cost, best_cost, cba->rdm_inter_mode, cb_mode, int, int);
F265_COPY2_IF_LT(cba->rdm_best_cost, best_cost, cba->rdm_best_mode, cb_mode, int, int);
@@ -2701,6 +2765,7 @@ static void fenc_analyze_inter_part_mode_rdm(f265_enc_thread *t, f265_cb *cb, in
// Update the best inter mode and the best CB mode.
int cb_mode = 4 + part_mode;
+ cba->rdm_mode_costs[cb_mode] = cost;
F265_COPY2_IF_LT(cba->rdm_inter_cost, cost, cba->rdm_inter_mode, cb_mode, int, int);
F265_COPY2_IF_LT(cba->rdm_best_cost, cost, cba->rdm_best_mode, cb_mode, int, int);
}
@@ -2860,6 +2925,10 @@ static void fenc_analyze_init_cb(f265_enc_thread *t, f265_cb *cb)
// merge candidates were not obtained yet.
cba->nb_unique_un_merge_idx = 0;
+ // Initialize the best merge candidate data.
+ cba->rdm_un_merge_idx = 0;
+ cba->un_skip_flag = 0;
+
// Get the RDM cost of a bypass bin. FIXME. This needs to be adjusted by the
// CB QP.
cba->rdm_bin_cost = an->se_costs[F265_SE_BYPASS];
@@ -3057,12 +3126,365 @@ static void fenc_analyze_save_cb(f265_enc_thread *t, f265_cb *cb, uint8_t *init_
}
}
+// Test intra UN in RDM. The RDM modes found are cached in the CB analysis.
+static inline void fenc_analyze_intra_un_rdm(f265_enc_thread *t, f265_cb *cb)
+{
+ f265_analysis *an = &t->an;
+ f265_cb_analysis *cba = t->cba;
+ f265_intra_block *ib = &t->intra_block;
+
+ an->rdm_flag = 1;
+ fenc_analyze_intra_un(t, cb);
+ for (int i = 0; i < 3; i++) cba->intra_un_cands[i] = ib->cands[i];
+ an->rdm_flag = 0;
+}
+
+// Test intra UN in RDO. This assumes the RDM analysis has already been done.
+static inline void fenc_analyze_intra_un_rdo(f265_enc_thread *t, f265_cb *cb, int limit_cand_flag)
+{
+ f265_analysis *an = &t->an;
+ f265_cb_analysis *cba = t->cba;
+ f265_intra_block *ib = &t->intra_block;
+
+ // Restore the cached RDM modes.
+ for (int i = 0; i < 3; i++) ib->cands[i] = cba->intra_un_cands[i];
+
+ // Use the intra algorithms specified below. FIXME.
+ an->bypass_intra_algo_flag = 1;
+
+ // Disable the RDM pass.
+ ib->rdm_base_algo = 0;
+
+ // Use one RDO pass.
+ ib->cache_neighbour_flags[1] = 0;
+ ib->nb_cands[1] = limit_cand_flag ? 1 : 3;
+ ib->rdo_algo[0] = 3;
+ ib->rdo_save_flag[0] = 1;
+ ib->nb_cands[2] = 1;
+ ib->rdo_algo[1] = 0;
+
+ // Analyze.
+ fenc_analyze_intra_un(t, cb);
+
+ // Restore the default behavior.
+ an->bypass_intra_algo_flag = 0;
+}
+
+// Test all inter UN modes in RDM.
+static inline void fenc_analyze_inter_un_rdm(f265_enc_thread *t, f265_cb *cb)
+{
+ fenc_analyze_un_merge_rdm(t, cb);
+ fenc_analyze_inter_part_mode_rdm(t, cb, 0);
+}
+
+// Test the inter H2/V2 modes in RDM.
+static inline void fenc_analyze_inter_h2_v2_rdm(f265_enc_thread *t, f265_cb *cb)
+{
+ fenc_analyze_inter_part_mode_rdm(t, cb, 1);
+ fenc_analyze_inter_part_mode_rdm(t, cb, 2);
+}
+
+// Test the inter UN merge modes in RDO based on the threshold.
+static inline void fenc_analyze_inter_merge_rdo_threshold(f265_enc_thread *t, f265_cb *cb, int threshold)
+{
+ f265_cb_analysis *cba = t->cba;
+ for (int8_t idx = 0; idx < 5; idx++)
+ if (cba->rdm_mode_costs[12+idx] <= threshold)
+ fenc_analyze_un_merge_rdo(t, cb, &idx, 1);
+}
+
+// Test the inter UN non-merge, H2 and V2 modes in RDO based on the threshold.
+static inline void fenc_analyze_inter_un_h2_v2_rdo_threshold(f265_enc_thread *t, f265_cb *cb, int threshold)
+{
+ f265_cb_analysis *cba = t->cba;
+ for (int part_mode = 0; part_mode < 3; part_mode++)
+ if (cba->rdm_mode_costs[4+part_mode] <= threshold)
+ fenc_analyze_inter_part_mode_rdo(t, cb, part_mode);
+}
+
+// Return true if the current CB may early skip. This is the case if the best
+// RDO UN merge mode skips and the best RDM mode is also a UN merge mode. The
+// RDM test is important to avoid a large quality penalty.
+static int fenc_analyze_can_early_skip(f265_cb_analysis *cba)
+{
+ return (cba->un_skip_flag && cba->rdm_best_mode >= 12);
+}
+
+// Analyze the CB with RDO and thresholds. Experimental code.
+static void fenc_analyze_cb_rdo_threshold(f265_enc_thread *t, f265_cb *cb)
+{
+ f265_analysis *an = &t->an;
+ f265_cb_analysis *cba = t->cba;
+ int cb_unsplit_flag = !(cb->flags&F265_CB_FORBIDDEN);
+ int cb_inter_flag = cb_unsplit_flag && t->frame_type != F265_FRAME_I;
+ uint8_t *init_tn = t->tt.tn;
+
+ // Initialize the CB.
+ fenc_analyze_init_cb(t, cb);
+
+ // 8x8 preliminary exploration.
+ if (cb->lg_bs == 3 && an->rdm_flag)
+ {
+ // Testing intra HV and inter H2/V2 improves quality minorly for a large
+ // performance penalty, so we don't do it here.
+
+ // Test intra UN.
+ fenc_analyze_intra_un(t, cb);
+
+ // Test inter UN.
+ if (cb_inter_flag)
+ fenc_analyze_inter_un_rdm(t, cb);
+
+ // Set the interest flags based on the costs obtained. Intra (1), inter
+ // (4).
+ cb->interests = 0;
+ cb->interests |= (cba->rdm_intra_cost < cba->rdm_inter_cost*1.33)<<1;
+ cb->interests |= (cba->rdm_inter_cost < cba->rdm_intra_cost*1.33)<<4;
+ }
+
+ // 8x8 final exploration.
+ else if (cb->lg_bs == 3)
+ {
+ // 8x8 RDM. We do not reuse the modes found in the preliminary
+ // exploration since they are potentially stale.
+
+ // Test inter unless the preliminary exploration ruled it out. For the
+ // H2/V2 modes, the early termination based on the intra UN cost vs
+ // inter UN cost does not seem useful, so we test all modes.
+ if (cb->interests&(1<<4))
+ {
+ fenc_analyze_inter_un_rdm(t, cb);
+ fenc_analyze_inter_h2_v2_rdm(t, cb);
+ }
+
+ // Test intra unless the preliminary exploration ruled it out.
+ if (cb->interests&(1<<1))
+ {
+ // Test intra UN.
+ fenc_analyze_intra_un_rdm(t, cb);
+
+ // Test intra HV unless inter is significantly better than intra UN.
+ if (cba->rdm_intra_cost < cba->rdm_best_cost*1.07)
+ {
+ an->rdm_flag = 1;
+ fenc_analyze_intra_hv(t, cb);
+ an->rdm_flag = 0;
+ }
+ }
+
+ // 8x8 RDO. We use a bigger threshold when the 8x8 CBs seem good.
+ int threshold = an->long_8x8_analysis_flag ? cba->rdm_best_cost*1.33 : cba->rdm_best_cost*1.05;
+
+ // Test inter UN merge.
+ fenc_analyze_inter_merge_rdo_threshold(t, cb, threshold);
+
+ // Test the other modes unless we early skip.
+ if (!fenc_analyze_can_early_skip(cba))
+ {
+ // Test UN/H2/V2 in RDO.
+ fenc_analyze_inter_un_h2_v2_rdo_threshold(t, cb, threshold);
+
+ // Test intra UN with one RDO pass.
+ if (cba->rdm_mode_costs[1] <= threshold)
+ fenc_analyze_intra_un_rdo(t, cb, 0);
+
+ // Test intra HV with one RDO pass.
+ if (cba->rdm_mode_costs[2] <= threshold)
+ {
+ f265_intra_block *ib = &t->intra_block;
+
+ // Use fixed parameters. FIXME.
+ an->bypass_intra_algo_flag = 1;
+
+ ib->cache_neighbour_flags[0] = 1;
+ ib->rdm_base_algo = 5;
+ ib->rdm_angular_algo = 1;
+ ib->rdm_angular_early_term_flag = 1;
+ ib->rdm_cand_select_algo = 1;
+
+ ib->cache_neighbour_flags[1] = 1;
+ ib->nb_cands[1] = 3;
+ ib->rdo_algo[0] = 3;
+ ib->rdo_save_flag[0] = 1;
+ ib->nb_cands[2] = 1;
+ ib->rdo_algo[1] = 0;
+
+ fenc_analyze_intra_hv(t, cb);
+
+ an->bypass_intra_algo_flag = 0;
+ }
+ }
+ }
+
+ // 16x16.
+ else if (cb->lg_bs == 4)
+ {
+ // Probe the 8x8 CBs in RDM.
+ an->rdm_flag = 1;
+ fenc_analyze_split_cb(t, cb);
+ an->rdm_flag = 0;
+
+ // Test 16x16 RDM.
+ if (cb_unsplit_flag)
+ {
+ // Test inter.
+ if (cb_inter_flag)
+ {
+ // Test UN.
+ fenc_analyze_inter_un_rdm(t, cb);
+
+ // Test H2/V2 if inter 8x8 is better than inter 16x16.
+ if (cba->rdm_child_inter_cost < cba->rdm_inter_cost)
+ fenc_analyze_inter_h2_v2_rdm(t, cb);
+ }
+
+ // Test intra unless inter 8x8 is better than intra 8x8.
+ if (cba->rdm_child_intra_cost < cba->rdm_child_inter_cost)
+ fenc_analyze_intra_un_rdm(t, cb);
+ }
+
+ // Track whether the 8x8 CBs look good.
+ an->long_8x8_analysis_flag = cba->rdm_mode_costs[0] < cba->rdm_best_cost*1.03;
+
+ // 16x16 RDO threshold.
+ int threshold = cba->rdm_best_cost*1.07;
+
+ // Test inter UN merge.
+ fenc_analyze_inter_merge_rdo_threshold(t, cb, threshold);
+
+ // Test the other modes unless we early skip.
+ if (!fenc_analyze_can_early_skip(cba))
+ {
+ // Test UN/H2/V2 in RDO.
+ fenc_analyze_inter_un_h2_v2_rdo_threshold(t, cb, threshold);
+
+ // Test intra. Limit the number of candidates if intra 16x16 is not
+ // the best RDM mode.
+ if (cba->rdm_mode_costs[1] <= threshold)
+ fenc_analyze_intra_un_rdo(t, cb, cba->rdm_intra_mode != cba->rdm_best_mode);
+
+ // Analyze the 8x8 CBs in RDO.
+ if (cba->rdm_mode_costs[0] <= threshold)
+ fenc_analyze_split_cb(t, cb);
+ }
+ }
+
+ // 32x32.
+ else
+ {
+ // RDO threshold multipliers for the best cost. 100 means "unlimited", 0
+ // means "do not test". FIXME, this is a kludge.
+ float inter_mult = 0;
+ float intra_mult = 0;
+
+ // Analyze the 16x16 CBs in RDO.
+ fenc_analyze_split_cb(t, cb);
+
+ // Test 32x32 RDM.
+ if (cb_unsplit_flag)
+ {
+ // Track the modes of the children blocks. FIXME, some stats are
+ // obsolete.
+ int nb_intra_4 = 0;
+ int nb_intra_8 = 0;
+ int nb_intra_16 = 0;
+ int nb_inter_8 = 0;
+ int nb_inter_16 = 0;
+ for (int i = 0; i < 4; i++)
+ {
+ f265_cb *cb_16 = t->cb + cb->child_idx + i;
+
+ if (cb_16->flags&F265_CB_SPLIT)
+ {
+ for (int j = 0; j < 4; j++)
+ {
+ f265_cb *cb_8 = t->cb + cb_16->child_idx + j;
+
+ if (cb_8->flags&F265_CB_INTRA)
+ {
+ if (cb_8->intra_luma_mode[1] == -1) nb_intra_8++;
+ else nb_intra_4++;
+ }
+
+ else nb_inter_8++;
+ }
+ }
+
+ else if (cb_16->flags&F265_CB_INTRA) nb_intra_16++;
+ else nb_inter_16++;
+ }
+
+ // Inter coverage in 8x8 blocks. 0 is all intra, 16 is all inter.
+ int inter_score = 4*nb_inter_16 + nb_inter_8;
+
+ // All intra. Test intra. Do not test inter.
+ if (inter_score == 0)
+ intra_mult = 100;
+
+ // All inter. Do not test intra. Test inter with a very low
+ // threshold if all children choose inter 16x16.
+ else if (inter_score == 16)
+ {
+ if (nb_inter_16 == 4) inter_mult = 1.01;
+ }
+
+ // Mixed intra/inter modes. Test intra if at least 3/4 of the blocks
+ // are intra. Same for inter.
+ else
+ {
+ if (inter_score <= 4) intra_mult = 100;
+ if (inter_score >= 12) inter_mult = 1.01;
+ }
+
+ // Test inter.
+ if (inter_mult)
+ {
+ // Test UN.
+ fenc_analyze_inter_un_rdm(t, cb);
+
+ // Test H2/V2 if inter 16x16 is better than inter 32x32.
+ if (cba->rdm_child_inter_cost < cba->rdm_inter_cost)
+ fenc_analyze_inter_h2_v2_rdm(t, cb);
+ }
+
+ // Test intra UN.
+ if (intra_mult)
+ fenc_analyze_intra_un_rdm(t, cb);
+ }
+
+ // Actual RDO thresholds.
+ int inter_threshold = (inter_mult == 100) ? F265_MAX_SAD - 1 : (int)(cba->rdm_best_cost*inter_mult);
+ int intra_threshold = (intra_mult == 100) ? F265_MAX_SAD - 1 : (int)(cba->rdm_best_cost*intra_mult);
+
+ // Test inter UN merge.
+ fenc_analyze_inter_merge_rdo_threshold(t, cb, inter_threshold);
+
+ // Test the other modes unless we early skip.
+ if (!fenc_analyze_can_early_skip(cba))
+ {
+ // Test UN/H2/V2 in RDO.
+ fenc_analyze_inter_un_h2_v2_rdo_threshold(t, cb, inter_threshold);
+
+ // Test intra.
+ if (cba->rdm_mode_costs[1] <= intra_threshold)
+ fenc_analyze_intra_un_rdo(t, cb, 0);
+ }
+ }
+
+ // Save the CB.
+ fenc_analyze_save_cb(t, cb, init_tn);
+}
+
// Analyze a coding block.
static void fenc_analyze_cb(f265_enc_thread *t, f265_cb *cb)
{
+ // Hijack the control flow. FIXME, kludge.
+ if (t->enc->gd.algo&(1<<11)) { fenc_analyze_cb_rdo_threshold(t, cb); return; }
+
int cb_split_flag = !(cb->flags&F265_CB_LEAF);
int cb_unsplit_flag = !(cb->flags&F265_CB_FORBIDDEN);
- int cb_intra_flag = cb_unsplit_flag;
+ int cb_intra_un_flag = cb_unsplit_flag;
+ int cb_intra_hv_flag = !cb_split_flag;
int cb_inter_flag = cb_unsplit_flag && t->frame_type != F265_FRAME_I;
uint8_t *init_tn = t->tt.tn;
@@ -3072,7 +3494,8 @@ static void fenc_analyze_cb(f265_enc_thread *t, f265_cb *cb)
// Test the modes.
if (cb_split_flag) fenc_analyze_split_cb(t, cb);
if (cb_inter_flag) { if (t->an.rdm_flag) fenc_analyze_inter_cb_rdm(t, cb); else fenc_analyze_inter_cb_rdo(t, cb); }
- if (cb_intra_flag) fenc_analyze_intra_cb(t, cb);
+ if (cb_intra_hv_flag) fenc_analyze_intra_hv(t, cb);
+ if (cb_intra_un_flag) fenc_analyze_intra_un(t, cb);
// Save the CB.
fenc_analyze_save_cb(t, cb, init_tn);
@@ -3089,10 +3512,14 @@ void fenc_analyze_ctb(f265_enc_thread *t)
fenc_trace_syntax_flag = 0;
#endif
+ #ifdef VAN_TRACE_ANALYSIS
// FIXME. Used for debug.
- #if 0
- printf("\nFrame %d CTB %d (%d,%d) pix (%d,%d).\n",
- (int)t->src_frame->abs_poc, t->ctb_xy, t->ctb_x, t->ctb_y, t->ctb_off[0], t->ctb_off[1]);
+ fenc_trace_analysis_flag = t->src_frame->abs_poc == 1 && t->ctb_xy >= 0;
+ if (fenc_trace_analysis_flag)
+ {
+ printf("\nFrame %d CTB %d (%d,%d) pix (%d,%d).\n",
+ (int)t->src_frame->abs_poc, t->ctb_xy, t->ctb_x, t->ctb_y, t->ctb_off[0], t->ctb_off[1]);
+ }
#endif
// Back up the raw CABAC object.
diff --git a/f265/bdi.h b/f265/bdi.h
index 9856598..9d2e4ce 100644
--- a/f265/bdi.h
+++ b/f265/bdi.h
@@ -76,11 +76,13 @@
#else
#define noinline __attribute__ ((noinline))
// GCC randomly fails to inline functions after trivial modifications, with a
-// message like "sorry, unimplemented, function not inlinable". Uncomment when
-// gcc gets repaired or we switch to a better compiler.
-// #define finline __attribute__ ((always_inline))
+// message like "sorry, unimplemented, function not inlinable".
+#ifdef __clang__
+#define finline __attribute__ ((always_inline)) inline
+#else
#define finline inline
#endif
+#endif
// Data alignment specifications.
// NOTE This prefix screws up Doxygen's parser, disable it when producing the
@@ -482,6 +484,7 @@ extern const uint8_t f265_scan_map_data[2*256];
extern const uint8_t f265_last_coeff_table[32];
extern const uint8_t f265_coeff_nz_ctx_table[5*16];
extern const uint8_t f265_gt1_ctx_counter_table[8];
+extern const int8_t f265_nz_to_gt1_table[17];
extern const uint8_t f265_mpm_bin_table[3];
extern const uint8_t f265_chroma_mode_table[4];
extern const int8_t f265_intra_luma_modes[35];
diff --git a/f265/bdi_ro.c b/f265/bdi_ro.c
index 57ed3d3..05972f0 100644
--- a/f265/bdi_ro.c
+++ b/f265/bdi_ro.c
@@ -455,6 +455,10 @@ const uint8_t f265_coeff_nz_ctx_table[5*16] =
// Coefficient greater-than-1 context counter update table.
const uint8_t f265_gt1_ctx_counter_table[8] = { 0, 2, 3, 3, 0, 0, 0, 0 };
+// Convert the number of non-zero coefficients to the number of greater-than-1
+// flags.
+const int8_t f265_nz_to_gt1_table[17] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8 };
+
// Intra MPM binarization table. The first two bits are the binarization string,
// the next two bits are the number of bins.
const uint8_t f265_mpm_bin_table[3] = { 0x4, 0xa, 0xb };
diff --git a/f265/enc.h b/f265/enc.h
index b00584d..12da932 100644
--- a/f265/enc.h
+++ b/f265/enc.h
@@ -924,6 +924,12 @@ typedef struct f265_analysis
// True if the blocks are reconstructed in RDM.
int8_t rdm_rec_flag;
+ // True to bypass the intra algo configuration.
+ int8_t bypass_intra_algo_flag;
+
+ // True if the 8x8 CBs look interesting.
+ int8_t long_8x8_analysis_flag;
+
// We may override the encoder parameters with local parameters during
// analysis to speed up the analysis.
uint8_t tb_range[2];
diff --git a/f265/entropy.c b/f265/entropy.c
index ed06549..bd1d5ed 100644
--- a/f265/entropy.c
+++ b/f265/entropy.c
@@ -401,9 +401,9 @@ static finline int fenc_write_rqt_root_cbf(f265_enc_thread *t, f265_cb *cb)
return flag;
}
-// Write the coefficients in a non-empty transform block.
-// FIXME: check if forced inlining of chroma_flag is beneficial.
-static finline void fenc_write_tb(f265_cabac_bs *cbs, f265_tt_enc *tt, int chroma_flag)
+// Write the coefficients in a non-empty transform block. We use a frontend
+// function to control how inlining occurs. It's slightly faster with clang.
+static finline void fenc_write_tb_base(f265_cabac_bs *cbs, f265_tt_enc *tt, int chroma_flag)
{
f265_tb_enc *tb = tt->tb;
f265_sb_enc *sb = tt->sb;
@@ -507,13 +507,12 @@ static finline void fenc_write_tb(f265_cabac_bs *cbs, f265_tt_enc *tt, int chrom
// Compute the values of the subblock neighbour flags for the context
// derivation of the non-zero flags.
int neighbour_flags[2] = { (sb_neighbour_flags[0]>>sb_pos)&1, (sb_neighbour_flags[1]>>sb_pos)&1 };
- int sb_neighbour_idx = neighbour_flags[0]|neighbour_flags[1];
- int coeff_neighbour_idx = (neighbour_flags[0]|(neighbour_flags[1]<<1))<<4;
// Encode the subblock non-zero flag unless it is inferred to be 1.
int sb_enc_flag = (sb_nz_flags>>sb_idx)&1;
if (middle_sb_flag)
{
+ int sb_neighbour_idx = neighbour_flags[0]|neighbour_flags[1];
fenc_trace_syntax_element("coded_sub_block_flag");
fenc_encode_context_bin(cbs, F265_CO_CODED_SUB_BLOCK + 2*chroma_flag + sb_neighbour_idx, sb_enc_flag);
}
@@ -521,19 +520,11 @@ static finline void fenc_write_tb(f265_cabac_bs *cbs, f265_tt_enc *tt, int chrom
// Skip empty subblocks.
if (!sb_enc_flag) continue;
- // Expand the subblock data and pass to the next.
- int nz_flags = sb->nz_flags;
- int signs = sb->signs;
- int remain_flags = sb->remain_flags;
- int gt1_flags = sb->gt1_flags;
- int gt2_flag = sb->packed_data>>5;
- int nb_nz = sb->packed_data&31;
- int nb_gt1 = F265_MIN(nb_nz, 8);
- sb++;
-
// Encode the coefficient non-zero flags. The non-zero flag of the first
// coefficient is present unless the subblock non-zero flag is present
// and the other coefficients are zero.
+ int coeff_neighbour_idx = (neighbour_flags[0]|(neighbour_flags[1]<<1))<<4;
+ int nz_flags = sb->nz_flags;
int skip_first_coeff_nz_flag = middle_sb_flag & (nz_flags == (1<<15));
for (int i = last_coeff_nz_idx; i < 16-skip_first_coeff_nz_flag; i++)
{
@@ -553,7 +544,11 @@ static finline void fenc_write_tb(f265_cabac_bs *cbs, f265_tt_enc *tt, int chrom
// Encode the greater-than-1 flags. FIXME: also count the number of
// coefficients inferred using the greater-than-1 flags. Use conditional
// compilation to use popcnt below instead when available.
+ int nb_nz = sb->packed_data&31;
+ int nb_gt1 = f265_nz_to_gt1_table[nb_nz];
int nb_gt1_inferred = 0;
+ int gt1_flags = sb->gt1_flags;
+ int gt2_flag = sb->packed_data>>5;
for (int i = 0; i < nb_gt1; i++)
{
int gt1_flag = (gt1_flags>>i)&1;
@@ -573,11 +568,13 @@ static finline void fenc_write_tb(f265_cabac_bs *cbs, f265_tt_enc *tt, int chrom
}
// Encode the signs.
+ int signs = sb->signs;
int sign_hidden_flag = (sign_hiding_flags>>sb_pos)&1;
if (nb_nz - sign_hidden_flag) fenc_trace_syntax_element("coeff_sign_flag");
fenc_encode_bypass_bins(cbs, signs >> sign_hidden_flag, nb_nz - sign_hidden_flag);
// Encode the coefficient remaining levels.
+ int remain_flags = sb->remain_flags;
if (remain_flags)
{
// The current base level decreases as we consume the last
@@ -652,6 +649,9 @@ static finline void fenc_write_tb(f265_cabac_bs *cbs, f265_tt_enc *tt, int chrom
// Pass to the next subblock levels.
levels += 16;
}
+
+ // Pass to the next subblock.
+ sb++;
}
tt->tb++;
@@ -659,6 +659,12 @@ static finline void fenc_write_tb(f265_cabac_bs *cbs, f265_tt_enc *tt, int chrom
tt->levels = levels;
}
+static noinline void fenc_write_tb(f265_cabac_bs *cbs, f265_tt_enc *tt, int chroma_flag)
+{
+ if (chroma_flag) fenc_write_tb_base(cbs, tt, 1);
+ else fenc_write_tb_base(cbs, tt, 0);
+}
+
// Encode the current TT. Used in RDOQ to update the contexts.
static void fenc_encode_rdoq_sub_part(f265_enc_thread *t, int yuv_flag, int comp, int lg_bs, int depth)
{
@@ -751,6 +757,7 @@ static void fenc_write_tt(f265_enc_thread *t, f265_cb *cb, int cbf_mask, int max
for (int c = 1; c < 3; c++) if (chroma_cbf&(1<<c)) fenc_write_tb(&t->cbs, &t->tt, 1);
}
+
///////////////////////////////////////////////////////////////////////////////
// Non-RDO functions.
diff --git a/f265/parse.c b/f265/parse.c
index b4e3231..96778d1 100644
--- a/f265/parse.c
+++ b/f265/parse.c
@@ -267,10 +267,24 @@ static void handle_param_quality(f265_parse_ctx *ctx, f265_enc_params *p, f265_p
p->tb_range[1] = 4;
p->tb_depth[0] = 1;
p->tb_depth[1] = 1;
- p->rdo_level = 0;
+ p->rdo_level = 1;
+ p->amp_flag = 0;
+ p->tmv_flag = 1;
+ p->algo = (1<<0)|(1<<2)|(1<<4)|(1<<5)|(1<<7)|(1<<10)|(1<<11)|(1<<14);
+ }
+
+ else if (quality <= 15)
+ {
+ p->cb_range[0] = 3;
+ p->cb_range[1] = 5;
+ p->tb_range[0] = 2;
+ p->tb_range[1] = 5;
+ p->tb_depth[0] = 1;
+ p->tb_depth[1] = 1;
+ p->rdo_level = 1;
p->amp_flag = 0;
p->tmv_flag = 1;
- p->algo = (1<<0)|(1<<2)|(1<<4)|(1<<10)|(1<<12);
+ p->algo = (1<<0)|(1<<2)|(1<<4)|(1<<5)|(1<<7)|(1<<10)|(1<<11)|(1<<14);
}
else if (quality <= 25)