diff --git a/f265/analyze.c b/f265/analyze.c
index 84fa1f5..79e592b 100644
--- a/f265/analyze.c
+++ b/f265/analyze.c
@@ -32,6 +32,7 @@
//
// Bit 8 enables the second RDO pass.
//
+// Bit 9 enables fake luma bi-prediction.
// Bit 10 enables fake luma interpolation.
// Bit 11 enables the use of thresholds for the mode analysis.
// Bit 12 forces the reconstruction of blocks in RDM.
@@ -2682,7 +2683,20 @@ static int fenc_analyze_inter_part(f265_enc_thread *t, f265_cb *cb, int part_idx
}
// Compute the cost and update the partition cost.
- int bi_cost = cba->rdm_bin_cost + fenc_me_mv_total_cost_bi(ib->bi_mv, me);
+ int bi_cost = 0;
+
+ // Fake the interpolation. This changes the motion estimation
+ // context.
+ if (t->enc->gd.algo&(1<<9))
+ {
+ bi_cost += cba->rdm_bin_cost;
+ for (int i = 0; i < 2; i++) bi_cost += fenc_me_mv_cost_test(me, ib->bi_mv[i], i);
+ bi_cost += fenc_get_fake_luma_block_dist_b(t, ib->bi_ref_idx, ib->bi_mv);
+ }
+
+ // Do the real interpolation.
+ else bi_cost = cba->rdm_bin_cost + fenc_me_mv_total_cost_bi(ib->bi_mv, me);
+
F265_COPY2_IF_LT(part_cost, bi_cost, pred_type, 1, int, int);
}
}
diff --git a/f265/enc.h b/f265/enc.h
index bca2e55..afbca27 100644
--- a/f265/enc.h
+++ b/f265/enc.h
@@ -2869,6 +2869,7 @@ void fenc_me_interpol_plane(int16_t *dst, int dst_stride, f265_mv mv, f265_me_ct
f265_pix *ref_plane, int comp, int width, int height);
void fenc_me_interpol(f265_pix *dst, int dst_stride, f265_mv mv, f265_me_ctx *me, int comp);
void fenc_me_interpol_bi(f265_pix *dst, int dst_stride, f265_mv mv[2], f265_me_ctx *me, int comp);
+int fenc_get_fake_luma_block_dist_b(f265_enc_thread *t, int8_t bi_ref_idx[2], f265_mv bi_mv[2]);
int fenc_me_get_dist(f265_me_ctx *me, f265_pix *src0, int32_t stride0, f265_pix *src1,
int32_t stride1, int32_t width, int32_t height, int32_t bitdepth);
int fenc_me_luma_cost(f265_mv mv, f265_me_ctx *me);
diff --git a/f265/me.c b/f265/me.c
index 2d8024f..1eddb6e 100644
--- a/f265/me.c
+++ b/f265/me.c
@@ -149,8 +149,9 @@ static f265_pix* fenc_fake_luma_ref_p(f265_enc_thread *t, int *out_stride, f265_
return out;
}
-// Return the fake luma distortion cost for the specified motion vector.
-static int fenc_get_fake_luma_block_dist(f265_enc_thread *t, f265_mv mv)
+// Return the fake luma uni-predicted distortion cost for the specified motion
+// vector. The function assumes the references are already set up.
+static int fenc_get_fake_luma_block_dist_p(f265_enc_thread *t, f265_mv mv)
{
f265_me_ctx *me = &t->me;
int packed_dims = me->packed_dims[0];
@@ -169,6 +170,42 @@ static int fenc_get_fake_luma_block_dist(f265_enc_thread *t, f265_mv mv)
return dist;
}
+// Return the fake luma bi-predicted distortion cost for the specified reference
+// indices and motion vectors. The function sets the references and clips the
+// motion vectors itself.
+int fenc_get_fake_luma_block_dist_b(f265_enc_thread *t, int8_t bi_ref_idx[2], f265_mv bi_mv[2])
+{
+ f265_me_ctx *me = &t->me;
+ int packed_dims = me->packed_dims[0];
+ int width = (packed_dims>>8)&0xff, height = packed_dims&0xff, awi = packed_dims>>24;
+ int aligned_block_size = F265_ALIGN_VAL(width*height, 64);
+ int alloc_size = 2*aligned_block_size*sizeof(f265_pix);
+ f265_pix *tmp_buf = (f265_pix*)t->store;
+ fenc_fsad_func df = me->dist_func_id ? fenc_satd_awi : fenc_fsad[awi];
+
+ t->store += alloc_size;
+
+ // Get the reference in each list.
+ int ref_strides[2];
+ f265_pix *refs[2];
+ for (int list = 0; list < 2; list++)
+ {
+ fenc_me_set_ref(t, t->ref_ctx[list] + bi_ref_idx[list], 0);
+ f265_mv mv = f265_clip_mv(bi_mv[list], t->mc_bounds64);
+ refs[list] = fenc_fake_luma_ref_p(t, ref_strides + list, tmp_buf + aligned_block_size*list, mv.x, mv.y);
+ }
+
+ // Average the references. Clobber the L0 reference in place.
+ fenc_avg_pix[awi](tmp_buf, refs[0], ref_strides[0], refs[1], ref_strides[1], packed_dims);
+
+ // Compute the distortion.
+ int dist = df(me->src_planes[0], t->plane_stride, refs[0], width, packed_dims);
+
+ t->store -= alloc_size;
+
+ return dist;
+}
+
// Return the distortion using the current distortion metric.
int fenc_me_get_dist(f265_me_ctx *me, f265_pix *src0, int32_t stride0, f265_pix *src1,
int32_t stride1, int32_t width, int32_t height, int32_t bitdepth)
@@ -213,44 +250,63 @@ int fenc_me_merge_cand_dist(f265_enc_thread *t, f265_inter_neighbour_mv cand)
int bi_flag = cand.ref_idx[0] != -1 && cand.ref_idx[1] != -1;
int uni_list = cand.ref_idx[0] == -1;
int uni_ref_idx = cand.ref_idx[uni_list];
- int dist = 0;
-
- // Set the references and clip the MVs in place.
- if (bi_flag)
- for (int list = 0; list < 2; list++)
- {
- fenc_me_set_ref(t, t->ref_ctx[list] + cand.ref_idx[list], list);
- f265_clip_mv(cand.mv[list], t->mc_bounds64);
- }
- else
+ // Fake the uniprediction.
+ if (likely(t->enc->gd.algo&(1<<10) && !bi_flag))
{
fenc_me_set_ref(t, t->ref_ctx[uni_list] + uni_ref_idx, 0);
- f265_clip_mv(cand.mv[uni_list], t->mc_bounds64);
+ f265_mv mv = f265_clip_mv(cand.mv[uni_list], t->mc_bounds64);
+ return fenc_get_fake_luma_block_dist_p(t, mv);
}
- // Fast track.
- if ((t->enc->gd.algo&0x400) && !bi_flag)
- return fenc_get_fake_luma_block_dist(t, cand.mv[uni_list]);
+ // Fake the biprediction.
+ else if (t->enc->gd.algo&(1<<9) && bi_flag)
+ {
+ int8_t bi_ref_idx[2] = { cand.ref_idx[0], cand.ref_idx[1] };
+ return fenc_get_fake_luma_block_dist_b(t, bi_ref_idx, cand.mv);
+ }
- // Pass each component.
- int nb_comp = 1 + (me->chroma_flag<<1);
- for (int comp = 0; comp < nb_comp; comp++)
+ // Use the real prediction.
+ else
{
- // Do the interpolation.
- f265_pix buf[64*64];
- if (bi_flag) fenc_me_interpol_bi(buf, 64, cand.mv, me, comp);
- else fenc_me_interpol(buf, 64, cand.mv[uni_list], me, comp);
+ f265_mv mvs[2];
+ int dist = 0;
- // Compute the distortion.
- int is_chroma = !!comp;
- int scale_x = me->csf[0] * is_chroma;
- int scale_y = me->csf[1] * is_chroma;
- dist += (me->dist[me->dist_func_id])(me->src_planes[comp], me->ref_stride, buf, 64,
- me->dim[0]>>scale_x, me->dim[1]>>scale_y, me->bit_depth[0]);
- }
+ // Set the references and clip the MVs.
+ if (bi_flag)
+ {
+ for (int list = 0; list < 2; list++)
+ {
+ fenc_me_set_ref(t, t->ref_ctx[list] + cand.ref_idx[list], list);
+ mvs[list] = f265_clip_mv(cand.mv[list], t->mc_bounds64);
+ }
+ }
- return dist;
+ else
+ {
+ fenc_me_set_ref(t, t->ref_ctx[uni_list] + uni_ref_idx, 0);
+ mvs[0] = f265_clip_mv(cand.mv[uni_list], t->mc_bounds64);
+ }
+
+ // Pass each component.
+ int nb_comp = 1 + (me->chroma_flag<<1);
+ for (int comp = 0; comp < nb_comp; comp++)
+ {
+ // Do the interpolation.
+ f265_pix buf[64*64];
+ if (bi_flag) fenc_me_interpol_bi(buf, 64, mvs, me, comp);
+ else fenc_me_interpol(buf, 64, mvs[0], me, comp);
+
+ // Compute the distortion.
+ int is_chroma = !!comp;
+ int scale_x = me->csf[0] * is_chroma;
+ int scale_y = me->csf[1] * is_chroma;
+ dist += (me->dist[me->dist_func_id])(me->src_planes[comp], me->ref_stride, buf, 64,
+ me->dim[0]>>scale_x, me->dim[1]>>scale_y, me->bit_depth[0]);
+ }
+
+ return dist;
+ }
}
// Compute the MV length like HM.
@@ -442,7 +498,7 @@ int fenc_me_mv_cost_test(f265_me_ctx *me, f265_mv mv, int ref_id)
int fenc_me_mv_total_cost(f265_mv mv, f265_me_ctx *me)
{
int mv_cost = fenc_me_mv_cost_test(me, mv, 0);
- int dist = (me->t->enc->gd.algo&0x400) ? fenc_get_fake_luma_block_dist(me->t, mv) : fenc_me_luma_cost(mv, me);
+ int dist = (me->t->enc->gd.algo&0x400) ? fenc_get_fake_luma_block_dist_p(me->t, mv) : fenc_me_luma_cost(mv, me);
if (me->chroma_flag)
for (int comp = 1; comp < 3; comp++)
dist += fenc_me_chroma_cost(mv, me, comp);
@@ -500,7 +556,7 @@ int fenc_me_test_pmv(f265_mv pmv[2], f265_me_ctx *me, int dist, int *cost)
if (t->enc->gd.algo&0x400)
{
for (int i = 0; i < 2; i++)
- costs[i] = fenc_get_fake_luma_block_dist(t, f265_clip_mv(pmv[i], me->me_bounds64));
+ costs[i] = fenc_get_fake_luma_block_dist_p(t, f265_clip_mv(pmv[i], me->me_bounds64));
}
else