On 2016-12-03 17:34:34 +0100, Anton Khirnov wrote:
> Certain hardware decoding APIs are often not thread-safe, so having the user
> access decoded hardware surfaces while the decoder is running in another
> thread can cause failures (this is mainly known to happen with DXVA2).
>
> For such hwaccels, only allow the decoding thread to run while the user
> is inside a lavc decode call (avcodec_send_packet/receive_frame).
> ---
> libavcodec/avcodec.h | 5 +++++
> libavcodec/hwaccel.h | 24 ++++++++++++++++++++++
> libavcodec/pthread_frame.c | 51
> ++++++++++++++++++++++++++++++++++++++++------
> libavcodec/vaapi_h264.c | 2 ++
> libavcodec/vaapi_mpeg2.c | 2 ++
> libavcodec/vaapi_mpeg4.c | 3 +++
> libavcodec/vaapi_vc1.c | 3 +++
> libavcodec/vaapi_vp8.c | 2 ++
> 8 files changed, 86 insertions(+), 6 deletions(-)
> create mode 100644 libavcodec/hwaccel.h
>
> diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
> index e75d300..96149ac 100644
> --- a/libavcodec/avcodec.h
> +++ b/libavcodec/avcodec.h
> @@ -3361,6 +3361,11 @@ typedef struct AVHWAccel {
> * AVCodecInternal.hwaccel_priv_data.
> */
> int priv_data_size;
> +
> + /**
> + * Internal hwaccel capabilities.
> + */
> + int caps_internal;
> } AVHWAccel;
>
> /**
> diff --git a/libavcodec/hwaccel.h b/libavcodec/hwaccel.h
> new file mode 100644
> index 0000000..60dbe81
> --- /dev/null
> +++ b/libavcodec/hwaccel.h
> @@ -0,0 +1,24 @@
> +/*
> + * This file is part of Libav.
> + *
> + * Libav is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * Libav is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with Libav; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301
> USA
> + */
> +
> +#ifndef AVCODEC_HWACCEL_H
> +#define AVCODEC_HWACCEL_H
> +
> +#define HWACCEL_CAP_ASYNC_SAFE (1 << 0)
> +
> +#endif /* AVCODEC_HWACCEL_H */
> diff --git a/libavcodec/pthread_frame.c b/libavcodec/pthread_frame.c
> index 9fdfb93..83f528e 100644
> --- a/libavcodec/pthread_frame.c
> +++ b/libavcodec/pthread_frame.c
> @@ -34,6 +34,7 @@
> #endif
>
> #include "avcodec.h"
> +#include "hwaccel.h"
> #include "internal.h"
> #include "pthread_internal.h"
> #include "thread.h"
> @@ -101,6 +102,7 @@ typedef struct PerThreadContext {
> int die; ///< Set when the thread should exit.
>
> int hwaccel_serializing;
> + int async_serializing;
> } PerThreadContext;
>
> /**
> @@ -116,6 +118,7 @@ typedef struct FrameThreadContext {
> * is used.
> */
> pthread_mutex_t hwaccel_mutex;
> + pthread_mutex_t async_mutex;
>
> int next_decoding; ///< The next context to submit a packet
> to.
> int next_finished; ///< The next context to return output
> from.
> @@ -178,6 +181,11 @@ static attribute_align_arg void
> *frame_worker_thread(void *arg)
> if (atomic_load(&p->state) == STATE_SETTING_UP)
> ff_thread_finish_setup(avctx);
>
> + if (p->async_serializing) {
> + p->async_serializing = 0;
> + pthread_mutex_unlock(&p->parent->async_mutex);
> + }
> +
> if (p->hwaccel_serializing) {
> p->hwaccel_serializing = 0;
> pthread_mutex_unlock(&p->parent->hwaccel_mutex);
is it concise decision to release the async_mutex before the hwaccel
mutex? That should it make more likely that only 1 frame per decode call
is decoded. If it is a concise and or tested decision it warrants a
comment here.
> @@ -406,7 +414,11 @@ int ff_thread_decode_frame(AVCodecContext *avctx,
> FrameThreadContext *fctx = avctx->internal->thread_ctx;
> int finished = fctx->next_finished;
> PerThreadContext *p;
> - int err;
> + int err, ret;
> +
> + /* release the hwaccel lock, permitting blocked hwaccel threadsto
s/hwaccel\( lock\)/async\1/
> + * go forward while we are in this function */
> + pthread_mutex_unlock(&fctx->async_mutex);
>
> /*
> * Submit a packet to the next decoding thread.
> @@ -414,9 +426,11 @@ int ff_thread_decode_frame(AVCodecContext *avctx,
>
> p = &fctx->threads[fctx->next_decoding];
> err = update_context_from_user(p->avctx, avctx);
> - if (err) return err;
> + if (err)
> + goto finish;
> err = submit_packet(p, avpkt);
> - if (err) return err;
> + if (err)
> + goto finish;
>
> /*
> * If we're still receiving the initial packets, don't return a frame.
> @@ -426,8 +440,10 @@ int ff_thread_decode_frame(AVCodecContext *avctx,
> if (fctx->next_decoding >= (avctx->thread_count-1)) fctx->delaying =
> 0;
>
> *got_picture_ptr=0;
> - if (avpkt->size)
> - return avpkt->size;
> + if (avpkt->size) {
> + ret = avpkt->size;
> + goto finish;
> + }
> }
>
> /*
> @@ -469,7 +485,12 @@ int ff_thread_decode_frame(AVCodecContext *avctx,
> fctx->next_finished = finished;
>
> /* return the size of the consumed packet if no error occurred */
> - return (p->result >= 0) ? avpkt->size : p->result;
> + ret = (p->result >= 0) ? avpkt->size : p->result;
> +finish:
> + pthread_mutex_lock(&fctx->async_mutex);
> + if (err < 0)
> + return err;
> + return ret;
> }
>
> void ff_thread_report_progress(ThreadFrame *f, int n, int field)
> @@ -524,6 +545,12 @@ void ff_thread_finish_setup(AVCodecContext *avctx) {
> p->hwaccel_serializing = 1;
> }
>
> + if (avctx->hwaccel &&
> + !(avctx->hwaccel->caps_internal & HWACCEL_CAP_ASYNC_SAFE)) {
> + p->async_serializing = 1;
> + pthread_mutex_lock(&p->parent->async_mutex);
> + }
please add a comment that nothing hwaccel related happens until after
finish_setup()
> +
> pthread_mutex_lock(&p->progress_mutex);
>
> atomic_store(&p->state, STATE_SETUP_FINISHED);
> @@ -537,6 +564,8 @@ static void park_frame_worker_threads(FrameThreadContext
> *fctx, int thread_count
> {
> int i;
>
> + pthread_mutex_unlock(&fctx->async_mutex);
> +
> for (i = 0; i < thread_count; i++) {
> PerThreadContext *p = &fctx->threads[i];
>
> @@ -547,6 +576,8 @@ static void park_frame_worker_threads(FrameThreadContext
> *fctx, int thread_count
> pthread_mutex_unlock(&p->progress_mutex);
> }
> }
> +
> + pthread_mutex_lock(&fctx->async_mutex);
> }
>
> void ff_frame_thread_free(AVCodecContext *avctx, int thread_count)
> @@ -605,6 +636,10 @@ void ff_frame_thread_free(AVCodecContext *avctx, int
> thread_count)
> av_freep(&fctx->threads);
> pthread_mutex_destroy(&fctx->buffer_mutex);
> pthread_mutex_destroy(&fctx->hwaccel_mutex);
> +
> + pthread_mutex_unlock(&fctx->async_mutex);
> + pthread_mutex_destroy(&fctx->async_mutex);
> +
> av_freep(&avctx->internal->thread_ctx);
> }
>
> @@ -647,6 +682,10 @@ int ff_frame_thread_init(AVCodecContext *avctx)
>
> pthread_mutex_init(&fctx->buffer_mutex, NULL);
> pthread_mutex_init(&fctx->hwaccel_mutex, NULL);
> +
> + pthread_mutex_init(&fctx->async_mutex, NULL);
> + pthread_mutex_lock(&fctx->async_mutex);
> +
> fctx->delaying = 1;
>
> for (i = 0; i < thread_count; i++) {
> diff --git a/libavcodec/vaapi_h264.c b/libavcodec/vaapi_h264.c
> index 7d8dc34..f765523 100644
> --- a/libavcodec/vaapi_h264.c
> +++ b/libavcodec/vaapi_h264.c
> @@ -22,6 +22,7 @@
>
> #include "h264dec.h"
> #include "h264_ps.h"
> +#include "hwaccel.h"
> #include "vaapi_decode.h"
>
> /**
> @@ -399,4 +400,5 @@ AVHWAccel ff_h264_vaapi_hwaccel = {
> .init = &ff_vaapi_decode_init,
> .uninit = &ff_vaapi_decode_uninit,
> .priv_data_size = sizeof(VAAPIDecodeContext),
> + .caps_internal = HWACCEL_CAP_ASYNC_SAFE,
> };
> diff --git a/libavcodec/vaapi_mpeg2.c b/libavcodec/vaapi_mpeg2.c
> index 6c10578..a14d115 100644
> --- a/libavcodec/vaapi_mpeg2.c
> +++ b/libavcodec/vaapi_mpeg2.c
> @@ -20,6 +20,7 @@
> * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301
> USA
> */
>
> +#include "hwaccel.h"
> #include "mpegutils.h"
> #include "mpegvideo.h"
> #include "internal.h"
> @@ -184,4 +185,5 @@ AVHWAccel ff_mpeg2_vaapi_hwaccel = {
> .init = &ff_vaapi_decode_init,
> .uninit = &ff_vaapi_decode_uninit,
> .priv_data_size = sizeof(VAAPIDecodeContext),
> + .caps_internal = HWACCEL_CAP_ASYNC_SAFE,
> };
> diff --git a/libavcodec/vaapi_mpeg4.c b/libavcodec/vaapi_mpeg4.c
> index 4413cbf..b4819b8 100644
> --- a/libavcodec/vaapi_mpeg4.c
> +++ b/libavcodec/vaapi_mpeg4.c
> @@ -21,6 +21,7 @@
> */
>
> #include "h263.h"
> +#include "hwaccel.h"
> #include "internal.h"
> #include "mpeg4video.h"
> #include "mpegvideo.h"
> @@ -200,6 +201,7 @@ AVHWAccel ff_mpeg4_vaapi_hwaccel = {
> .init = &ff_vaapi_decode_init,
> .uninit = &ff_vaapi_decode_uninit,
> .priv_data_size = sizeof(VAAPIDecodeContext),
> + .caps_internal = HWACCEL_CAP_ASYNC_SAFE,
> };
> #endif
>
> @@ -216,5 +218,6 @@ AVHWAccel ff_h263_vaapi_hwaccel = {
> .init = &ff_vaapi_decode_init,
> .uninit = &ff_vaapi_decode_uninit,
> .priv_data_size = sizeof(VAAPIDecodeContext),
> + .caps_internal = HWACCEL_CAP_ASYNC_SAFE,
> };
> #endif
> diff --git a/libavcodec/vaapi_vc1.c b/libavcodec/vaapi_vc1.c
> index fe1a20f..42634f2 100644
> --- a/libavcodec/vaapi_vc1.c
> +++ b/libavcodec/vaapi_vc1.c
> @@ -20,6 +20,7 @@
> * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301
> USA
> */
>
> +#include "hwaccel.h"
> #include "internal.h"
> #include "vaapi_decode.h"
> #include "vc1.h"
> @@ -399,6 +400,7 @@ AVHWAccel ff_wmv3_vaapi_hwaccel = {
> .init = &ff_vaapi_decode_init,
> .uninit = &ff_vaapi_decode_uninit,
> .priv_data_size = sizeof(VAAPIDecodeContext),
> + .caps_internal = HWACCEL_CAP_ASYNC_SAFE,
> };
> #endif
>
> @@ -414,4 +416,5 @@ AVHWAccel ff_vc1_vaapi_hwaccel = {
> .init = &ff_vaapi_decode_init,
> .uninit = &ff_vaapi_decode_uninit,
> .priv_data_size = sizeof(VAAPIDecodeContext),
> + .caps_internal = HWACCEL_CAP_ASYNC_SAFE,
> };
> diff --git a/libavcodec/vaapi_vp8.c b/libavcodec/vaapi_vp8.c
> index 70e9cec..1ba5c7e 100644
> --- a/libavcodec/vaapi_vp8.c
> +++ b/libavcodec/vaapi_vp8.c
> @@ -19,6 +19,7 @@
> #include <va/va.h>
> #include <va/va_dec_vp8.h>
>
> +#include "hwaccel.h"
> #include "vaapi_decode.h"
> #include "vp8.h"
>
> @@ -231,4 +232,5 @@ AVHWAccel ff_vp8_vaapi_hwaccel = {
> .init = &ff_vaapi_decode_init,
> .uninit = &ff_vaapi_decode_uninit,
> .priv_data_size = sizeof(VAAPIDecodeContext),
> + .caps_internal = HWACCEL_CAP_ASYNC_SAFE,
> };
If the async cap made a significant performance difference compared to a
single lock for async and hwaccel serializing, the cap should be added t
vdpau too.
Janne
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel