Module: libav
Branch: master
Commit: b77fffa127663028169c5ed543956af4b9496c29

Author:    Anton Khirnov <[email protected]>
Committer: Anton Khirnov <[email protected]>
Date:      Wed Apr 13 13:52:36 2016 +0200

h264: make slice threading work with deblocking_filter=1

In such a case, decode the MBs in parallel without the loop filter, then
execute the filter serially.

The ref2frm array was previously moved to H264SliceContext. That was
incorrect, since it applies to all the slices and should properly be in
H264Context (it did not actually break decoding, since this distinction
only becomes relevant with slice threading and deblocking_filter=1,
which was not implemented before this commit). The ref2frm array is thus
moved back to H264Context.

---

 libavcodec/h264.c       |    5 ----
 libavcodec/h264.h       |   13 +++++-----
 libavcodec/h264_slice.c |   62 ++++++++++++++++++++++++++++++++---------------
 3 files changed, 48 insertions(+), 32 deletions(-)

diff --git a/libavcodec/h264.c b/libavcodec/h264.c
index c024d7e..27cbcd2 100644
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -837,7 +837,6 @@ static int decode_nal_units(H264Context *h, const uint8_t 
*buf, int buf_size)
             nal->ref_idc == 0 && nal->type != NAL_SEI)
             continue;
 
-again:
         // FIXME these should stop being context-global variables
         h->nal_ref_idc   = nal->ref_idc;
         h->nal_unit_type = nal->type;
@@ -947,10 +946,6 @@ again:
         if (err < 0) {
             av_log(h->avctx, AV_LOG_ERROR, "decode_slice_header error\n");
             sl->ref_count[0] = sl->ref_count[1] = sl->list_count = 0;
-        } else if (err == 1) {
-            /* Slice could not be decoded in parallel mode, restart. */
-            sl               = &h->slice_ctx[0];
-            goto again;
         }
     }
     if (context_count) {
diff --git a/libavcodec/h264.h b/libavcodec/h264.h
index daad1be..5c2c810 100644
--- a/libavcodec/h264.h
+++ b/libavcodec/h264.h
@@ -392,7 +392,6 @@ typedef struct H264SliceContext {
     H264Ref ref_list[2][48];        /**< 0..15: frame refs, 16..47: mbaff 
field refs.
                                          *   Reordered version of 
default_ref_list
                                          *   according to picture reordering 
in slice header */
-    int ref2frm[MAX_SLICES][2][64];     ///< reference to frame number lists, 
used in the loop filter, the first 2 are for -2,-1
 
     const uint8_t *intra_pcm_ptr;
     int16_t *dc_val_base;
@@ -470,6 +469,11 @@ typedef struct H264Context {
     int context_initialized;
     int flags;
     int workaround_bugs;
+    /* Set when slice threading is used and at least one slice uses deblocking
+     * mode 1 (i.e. across slice boundaries). Then we disable the loop filter
+     * during normal MB decoding and execute it serially at the end.
+     */
+    int postpone_filter;
 
     int8_t(*intra4x4_pred_mode);
     H264PredContext hpc;
@@ -591,12 +595,6 @@ typedef struct H264Context {
 
     int slice_context_count;
 
-    /**
-     *  1 if the single thread fallback warning has already been
-     *  displayed, 0 otherwise.
-     */
-    int single_decode_warning;
-
     /** @} */
 
     /**
@@ -642,6 +640,7 @@ typedef struct H264Context {
     AVBufferPool *mb_type_pool;
     AVBufferPool *motion_val_pool;
     AVBufferPool *ref_index_pool;
+    int ref2frm[MAX_SLICES][2][64];     ///< reference to frame number lists, 
used in the loop filter, the first 2 are for -2,-1
 } H264Context;
 
 extern const uint16_t ff_h264_mb_sizes[4];
diff --git a/libavcodec/h264_slice.c b/libavcodec/h264_slice.c
index 9e08c0b..240feb9 100644
--- a/libavcodec/h264_slice.c
+++ b/libavcodec/h264_slice.c
@@ -498,6 +498,8 @@ static int h264_frame_start(H264Context *h)
 
     h->next_output_pic = NULL;
 
+    h->postpone_filter = 0;
+
     assert(h->cur_pic_ptr->long_ref == 0);
 
     return 0;
@@ -920,7 +922,7 @@ static int h264_slice_header_init(H264Context *h)
  *
  * @param h h264context
  *
- * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be 
multithreaded
+ * @return 0 if okay, <0 if an error occurred
  */
 int ff_h264_decode_slice_header(H264Context *h, H264SliceContext *sl)
 {
@@ -1481,17 +1483,7 @@ int ff_h264_decode_slice_header(H264Context *h, 
H264SliceContext *sl)
              * Do not bother to deblock across slices. */
             sl->deblocking_filter = 2;
         } else {
-            h->max_contexts = 1;
-            if (!h->single_decode_warning) {
-                av_log(h->avctx, AV_LOG_INFO,
-                       "Cannot parallelize deblocking type 1, decoding such 
frames in sequential order\n");
-                h->single_decode_warning = 1;
-            }
-            if (sl != h->slice_ctx) {
-                av_log(h->avctx, AV_LOG_ERROR,
-                       "Deblocking switched inside frame.\n");
-                return 1;
-            }
+            h->postpone_filter = 1;
         }
     }
     sl->qp_thresh = 15 -
@@ -1509,7 +1501,7 @@ int ff_h264_decode_slice_header(H264Context *h, 
H264SliceContext *sl)
 
     for (j = 0; j < 2; j++) {
         int id_list[16];
-        int *ref2frm = sl->ref2frm[sl->slice_num & (MAX_SLICES - 1)][j];
+        int *ref2frm = h->ref2frm[sl->slice_num & (MAX_SLICES - 1)][j];
         for (i = 0; i < 16; i++) {
             id_list[i] = 60;
             if (j < sl->list_count && i < sl->ref_count[j] &&
@@ -1597,7 +1589,7 @@ static av_always_inline void 
fill_filter_caches_inter(const H264Context *h,
         if (USES_LIST(top_type, list)) {
             const int b_xy  = h->mb2b_xy[top_xy] + 3 * b_stride;
             const int b8_xy = 4 * top_xy + 2;
-            int (*ref2frm)[64] = sl->ref2frm[h->slice_table[top_xy] & 
(MAX_SLICES - 1)][0] + (MB_MBAFF(sl) ? 20 : 2);
+            int (*ref2frm)[64] = h->ref2frm[h->slice_table[top_xy] & 
(MAX_SLICES - 1)][0] + (MB_MBAFF(sl) ? 20 : 2);
             AV_COPY128(mv_dst - 1 * 8, h->cur_pic.motion_val[list][b_xy + 0]);
             ref_cache[0 - 1 * 8] =
             ref_cache[1 - 1 * 8] = 
ref2frm[list][h->cur_pic.ref_index[list][b8_xy + 0]];
@@ -1612,7 +1604,7 @@ static av_always_inline void 
fill_filter_caches_inter(const H264Context *h,
             if (USES_LIST(left_type[LTOP], list)) {
                 const int b_xy  = h->mb2b_xy[left_xy[LTOP]] + 3;
                 const int b8_xy = 4 * left_xy[LTOP] + 1;
-                int (*ref2frm)[64] = sl->ref2frm[h->slice_table[left_xy[LTOP]] 
& (MAX_SLICES - 1)][0] + (MB_MBAFF(sl) ? 20 : 2);
+                int (*ref2frm)[64] = h->ref2frm[h->slice_table[left_xy[LTOP]] 
& (MAX_SLICES - 1)][0] + (MB_MBAFF(sl) ? 20 : 2);
                 AV_COPY32(mv_dst - 1 +  0, h->cur_pic.motion_val[list][b_xy + 
b_stride * 0]);
                 AV_COPY32(mv_dst - 1 +  8, h->cur_pic.motion_val[list][b_xy + 
b_stride * 1]);
                 AV_COPY32(mv_dst - 1 + 16, h->cur_pic.motion_val[list][b_xy + 
b_stride * 2]);
@@ -1645,7 +1637,7 @@ static av_always_inline void 
fill_filter_caches_inter(const H264Context *h,
 
     {
         int8_t *ref = &h->cur_pic.ref_index[list][4 * mb_xy];
-        int (*ref2frm)[64] = sl->ref2frm[sl->slice_num & (MAX_SLICES - 1)][0] 
+ (MB_MBAFF(sl) ? 20 : 2);
+        int (*ref2frm)[64] = h->ref2frm[sl->slice_num & (MAX_SLICES - 1)][0] + 
(MB_MBAFF(sl) ? 20 : 2);
         uint32_t ref01 = (pack16to32(ref2frm[list][ref[0]], 
ref2frm[list][ref[1]]) & 0x00FF00FF) * 0x0101;
         uint32_t ref23 = (pack16to32(ref2frm[list][ref[2]], 
ref2frm[list][ref[3]]) & 0x00FF00FF) * 0x0101;
         AV_WN32A(&ref_cache[0 * 8], ref01);
@@ -1820,6 +1812,9 @@ static void loop_filter(const H264Context *h, 
H264SliceContext *sl, int start_x,
     const int pixel_shift    = h->pixel_shift;
     const int block_h        = 16 >> h->chroma_y_shift;
 
+    if (h->postpone_filter)
+        return;
+
     if (sl->deblocking_filter) {
         for (mb_x = start_x; mb_x < end_x; mb_x++)
             for (mb_y = end_mb_y - FRAME_MBAFF(h); mb_y <= end_mb_y; mb_y++) {
@@ -1944,6 +1939,7 @@ static int decode_slice(struct AVCodecContext *avctx, 
void *arg)
     H264SliceContext *sl = arg;
     const H264Context *h = sl->h264;
     int lf_x_start = sl->mb_x;
+    int orig_deblock = sl->deblocking_filter;
     int ret;
 
     sl->linesize   = h->cur_pic_ptr->f->linesize[0];
@@ -1955,6 +1951,9 @@ static int decode_slice(struct AVCodecContext *avctx, 
void *arg)
 
     sl->mb_skip_run = -1;
 
+    if (h->postpone_filter)
+        sl->deblocking_filter = 0;
+
     sl->is_complex = FRAME_MBAFF(h) || h->picture_structure != PICT_FRAME ||
                      avctx->codec_id != AV_CODEC_ID_H264 ||
                      (CONFIG_GRAY && (h->flags & AV_CODEC_FLAG_GRAY));
@@ -2004,7 +2003,7 @@ static int decode_slice(struct AVCodecContext *avctx, 
void *arg)
                              sl->mb_y, ER_MB_END);
                 if (sl->mb_x >= lf_x_start)
                     loop_filter(h, sl, lf_x_start, sl->mb_x + 1);
-                return 0;
+                goto finish;
             }
             if (ret < 0 || sl->cabac.bytestream > sl->cabac.bytestream_end + 
2) {
                 av_log(h->avctx, AV_LOG_ERROR,
@@ -2035,7 +2034,7 @@ static int decode_slice(struct AVCodecContext *avctx, 
void *arg)
                              sl->mb_y, ER_MB_END);
                 if (sl->mb_x > lf_x_start)
                     loop_filter(h, sl, lf_x_start, sl->mb_x);
-                return 0;
+                goto finish;
             }
         }
     } else {
@@ -2089,7 +2088,7 @@ static int decode_slice(struct AVCodecContext *avctx, 
void *arg)
                         er_add_slice(sl, sl->resync_mb_x, sl->resync_mb_y,
                                      sl->mb_x - 1, sl->mb_y, ER_MB_END);
 
-                        return 0;
+                        goto finish;
                     } else {
                         er_add_slice(sl, sl->resync_mb_x, sl->resync_mb_y,
                                      sl->mb_x - 1, sl->mb_y, ER_MB_END);
@@ -2109,7 +2108,7 @@ static int decode_slice(struct AVCodecContext *avctx, 
void *arg)
                     if (sl->mb_x > lf_x_start)
                         loop_filter(h, sl, lf_x_start, sl->mb_x);
 
-                    return 0;
+                    goto finish;
                 } else {
                     er_add_slice(sl, sl->resync_mb_x, sl->resync_mb_y, 
sl->mb_x,
                                  sl->mb_y, ER_MB_ERROR);
@@ -2119,6 +2118,10 @@ static int decode_slice(struct AVCodecContext *avctx, 
void *arg)
             }
         }
     }
+
+finish:
+    sl->deblocking_filter = orig_deblock;
+    return 0;
 }
 
 /**
@@ -2139,6 +2142,7 @@ int ff_h264_execute_decode_slices(H264Context *h, 
unsigned context_count)
         int ret;
 
         h->slice_ctx[0].next_slice_idx = h->mb_width * h->mb_height;
+        h->postpone_filter = 0;
 
         ret = decode_slice(avctx, &h->slice_ctx[0]);
         h->mb_y = h->slice_ctx[0].mb_y;
@@ -2172,6 +2176,24 @@ int ff_h264_execute_decode_slices(H264Context *h, 
unsigned context_count)
         h->mb_y              = sl->mb_y;
         for (i = 1; i < context_count; i++)
             h->slice_ctx[0].er.error_count += h->slice_ctx[i].er.error_count;
+
+        if (h->postpone_filter) {
+            h->postpone_filter = 0;
+
+            for (i = 0; i < context_count; i++) {
+                int y_end, x_end;
+
+                sl = &h->slice_ctx[i];
+                y_end = FFMIN(sl->mb_y + 1, h->mb_height);
+                x_end = (sl->mb_y >= h->mb_height) ? h->mb_width : sl->mb_x;
+
+                for (j = sl->resync_mb_y; j < y_end; j += 1 + 
FIELD_OR_MBAFF_PICTURE(h)) {
+                    sl->mb_y = j;
+                    loop_filter(h, sl, j > sl->resync_mb_y ? 0 : 
sl->resync_mb_x,
+                                j == y_end - 1 ? x_end : h->mb_width);
+                }
+            }
+        }
     }
 
     return 0;

_______________________________________________
libav-commits mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-commits

Reply via email to