Re: [libav-devel] [PATCH v4] qsvvpp: Fix to perform full init only when needed

2018-07-17 Thread Maxym Dmytrychenko
On Wed, Jul 18, 2018 at 1:34 AM Mark Thompson  wrote:

> On 18/07/18 00:25, Maxym Dmytrychenko wrote:
> > On Wed, Jul 18, 2018 at 12:48 AM Mark Thompson  wrote:
> >
> >> On 17/07/18 17:07, Maxym Dmytrychenko wrote:
> >>> Not used VPP sessions, like for hwupload/hwdownload handling,
> >>> can increase CPU utilization and this patch fixes it.
> >>> thank you,Joe, for the contribution.
> >>>
> >>> Signed-off-by: Maxym Dmytrychenko 
> >>> ---
> >>>  libavutil/hwcontext_qsv.c | 38 +-
> >>>  1 file changed, 29 insertions(+), 9 deletions(-)
> >>>
> >>> diff --git a/libavutil/hwcontext_qsv.c b/libavutil/hwcontext_qsv.c
> >>> index b3eb4a3ea..390c3aac4 100644
> >>> --- a/libavutil/hwcontext_qsv.c
> >>> +++ b/libavutil/hwcontext_qsv.c
> >>> @@ -18,6 +18,7 @@
> >>>
> >>>  #include 
> >>>  #include 
> >>> +#include 
> >>>
> >>>  #include 
> >>>
> >>> @@ -56,7 +57,9 @@ typedef struct QSVDeviceContext {
> >>>
> >>>  typedef struct QSVFramesContext {
> >>>  mfxSession session_download;
> >>> +atomic_int session_download_init;
> >>>  mfxSession session_upload;
> >>> +atomic_int session_upload_init;
> >>>
> >>>  AVBufferRef *child_frames_ref;
> >>>  mfxFrameSurface1 *surfaces_internal;
> >>> @@ -146,13 +149,15 @@ static void qsv_frames_uninit(AVHWFramesContext
> >> *ctx)
> >>>  MFXVideoVPP_Close(s->session_download);
> >>>  MFXClose(s->session_download);
> >>>  }
> >>> -s->session_download = NULL;
> >>> +s->session_download  = NULL;
> >>> +s->session_download_init = 0;
> >>>
> >>>  if (s->session_upload) {
> >>>  MFXVideoVPP_Close(s->session_upload);
> >>>  MFXClose(s->session_upload);
> >>>  }
> >>> -s->session_upload = NULL;
> >>> +s->session_upload  = NULL;
> >>> +s->session_upload_init = 0;
> >>>
> >>>  av_freep(>mem_ids);
> >>>  av_freep(>surface_ptrs);
> >>> @@ -535,13 +540,10 @@ static int qsv_frames_init(AVHWFramesContext
> *ctx)
> >>>  s->mem_ids[i] = frames_hwctx->surfaces[i].Data.MemId;
> >>>  }
> >>>
> >>> -ret = qsv_init_internal_session(ctx, >session_download, 0);
> >>> -if (ret < 0)
> >>> -return ret;
> >>> -
> >>> -ret = qsv_init_internal_session(ctx, >session_upload, 1);
> >>> -if (ret < 0)
> >>> -return ret;
> >>> +s->session_download  = NULL;
> >>> +s->session_upload= NULL;
> >>> +s->session_download_init = 0;
> >>> +s->session_upload_init   = 0;
> >>>
> >>>  return 0;
> >>>  }
> >>> @@ -741,6 +743,15 @@ static int
> qsv_transfer_data_from(AVHWFramesContext
> >> *ctx, AVFrame *dst,
> >>>  mfxSyncPoint sync = NULL;
> >>>  mfxStatus err;
> >>>
> >>> +while (!s->session_download_init && !s->session_download) {
> >>> + if (atomic_fetch_add(>session_download_init, 1) == 0) {
> >>> +qsv_init_internal_session(ctx, >session_download, 0);
> >>> +}
> >>> + else {
> >>> +av_usleep(1);
> >>
> >> This races - consider what happens if the other thread is preempted for
> >> more than 1µs, or if the initialisation itself takes more than that
> long.
>
> (Apologies, I misread that the first time - with the spin loop it should
> only be a benign race on session_download, but it's still undefined
> behaviour by C11 and things like tsan will complain about it: read in the
> non-initialising thread against write in the initialising thread, after the
> flag has been set to 1.)
>
>
np,
let's fix it, sure.


> >>
> >> You need to actually do some synchronisation here (e.g. with a 'once'
> >> variable) - with only atomic flags there is no way to guarantee that the
> >> other thread has finished unless you spin, which isn't acceptable.
> >>
> >> pthread_once was considered but we need to pass  s->session_download
> > adding mutex - can be overkill for each call of   qsv_transfer_data_*
> >
> > what do you mean by 'once' variable?
> > dont really see it currently implemented somewhere...
>
> See AVOnce.  It's used in a few places in this role, for example in
> hwcontext_d3d11va.c for the library loading.
>
>
if I see it correct: (ret = ff_thread_once(_loaded,
load_functions)
 it comes down to :
static inline int ff_thread_once(char *control, void (*routine)(void))
note - no params for routine(), where I need to pass current ( ctx,
>session_download )
that is the puzzle to solve now.

Also thinking if
#if HAVE_PTHREADS
would be needed.

pthread_mutex / pthread_cond seems to be the only option left,
anything else?


> >>> +}
> >>> +}
> >>> +
> >>>  if (!s->session_download) {
> >>>  if (s->child_frames_ref)
> >>>  return qsv_transfer_data_child(ctx, dst, src);
> >>> @@ -788,6 +799,15 @@ static int qsv_transfer_data_to(AVHWFramesContext
> >> *ctx, AVFrame *dst,
> >>>  mfxSyncPoint sync = NULL;
> >>>  mfxStatus err;
> >>>
> >>> +while (!s->session_upload_init && !s->session_upload) {
> >>> + if 

Re: [libav-devel] [PATCH v4] qsvvpp: Fix to perform full init only when needed

2018-07-17 Thread Mark Thompson
On 18/07/18 00:25, Maxym Dmytrychenko wrote:
> On Wed, Jul 18, 2018 at 12:48 AM Mark Thompson  wrote:
> 
>> On 17/07/18 17:07, Maxym Dmytrychenko wrote:
>>> Not used VPP sessions, like for hwupload/hwdownload handling,
>>> can increase CPU utilization and this patch fixes it.
>>> thank you,Joe, for the contribution.
>>>
>>> Signed-off-by: Maxym Dmytrychenko 
>>> ---
>>>  libavutil/hwcontext_qsv.c | 38 +-
>>>  1 file changed, 29 insertions(+), 9 deletions(-)
>>>
>>> diff --git a/libavutil/hwcontext_qsv.c b/libavutil/hwcontext_qsv.c
>>> index b3eb4a3ea..390c3aac4 100644
>>> --- a/libavutil/hwcontext_qsv.c
>>> +++ b/libavutil/hwcontext_qsv.c
>>> @@ -18,6 +18,7 @@
>>>
>>>  #include 
>>>  #include 
>>> +#include 
>>>
>>>  #include 
>>>
>>> @@ -56,7 +57,9 @@ typedef struct QSVDeviceContext {
>>>
>>>  typedef struct QSVFramesContext {
>>>  mfxSession session_download;
>>> +atomic_int session_download_init;
>>>  mfxSession session_upload;
>>> +atomic_int session_upload_init;
>>>
>>>  AVBufferRef *child_frames_ref;
>>>  mfxFrameSurface1 *surfaces_internal;
>>> @@ -146,13 +149,15 @@ static void qsv_frames_uninit(AVHWFramesContext
>> *ctx)
>>>  MFXVideoVPP_Close(s->session_download);
>>>  MFXClose(s->session_download);
>>>  }
>>> -s->session_download = NULL;
>>> +s->session_download  = NULL;
>>> +s->session_download_init = 0;
>>>
>>>  if (s->session_upload) {
>>>  MFXVideoVPP_Close(s->session_upload);
>>>  MFXClose(s->session_upload);
>>>  }
>>> -s->session_upload = NULL;
>>> +s->session_upload  = NULL;
>>> +s->session_upload_init = 0;
>>>
>>>  av_freep(>mem_ids);
>>>  av_freep(>surface_ptrs);
>>> @@ -535,13 +540,10 @@ static int qsv_frames_init(AVHWFramesContext *ctx)
>>>  s->mem_ids[i] = frames_hwctx->surfaces[i].Data.MemId;
>>>  }
>>>
>>> -ret = qsv_init_internal_session(ctx, >session_download, 0);
>>> -if (ret < 0)
>>> -return ret;
>>> -
>>> -ret = qsv_init_internal_session(ctx, >session_upload, 1);
>>> -if (ret < 0)
>>> -return ret;
>>> +s->session_download  = NULL;
>>> +s->session_upload= NULL;
>>> +s->session_download_init = 0;
>>> +s->session_upload_init   = 0;
>>>
>>>  return 0;
>>>  }
>>> @@ -741,6 +743,15 @@ static int qsv_transfer_data_from(AVHWFramesContext
>> *ctx, AVFrame *dst,
>>>  mfxSyncPoint sync = NULL;
>>>  mfxStatus err;
>>>
>>> +while (!s->session_download_init && !s->session_download) {
>>> + if (atomic_fetch_add(>session_download_init, 1) == 0) {
>>> +qsv_init_internal_session(ctx, >session_download, 0);
>>> +}
>>> + else {
>>> +av_usleep(1);
>>
>> This races - consider what happens if the other thread is preempted for
>> more than 1µs, or if the initialisation itself takes more than that long.

(Apologies, I misread that the first time - with the spin loop it should only 
be a benign race on session_download, but it's still undefined behaviour by C11 
and things like tsan will complain about it: read in the non-initialising 
thread against write in the initialising thread, after the flag has been set to 
1.)

>>
>> You need to actually do some synchronisation here (e.g. with a 'once'
>> variable) - with only atomic flags there is no way to guarantee that the
>> other thread has finished unless you spin, which isn't acceptable.
>>
>> pthread_once was considered but we need to pass  s->session_download
> adding mutex - can be overkill for each call of   qsv_transfer_data_*
> 
> what do you mean by 'once' variable?
> dont really see it currently implemented somewhere...

See AVOnce.  It's used in a few places in this role, for example in 
hwcontext_d3d11va.c for the library loading.

>>> +}
>>> +}
>>> +
>>>  if (!s->session_download) {
>>>  if (s->child_frames_ref)
>>>  return qsv_transfer_data_child(ctx, dst, src);
>>> @@ -788,6 +799,15 @@ static int qsv_transfer_data_to(AVHWFramesContext
>> *ctx, AVFrame *dst,
>>>  mfxSyncPoint sync = NULL;
>>>  mfxStatus err;
>>>
>>> +while (!s->session_upload_init && !s->session_upload) {
>>> + if (atomic_fetch_add(>session_upload_init, 1) == 0) {
>>> +qsv_init_internal_session(ctx, >session_upload, 1);
>>> +}
>>> + else {
>>> +av_usleep(1);
>>> +}
>>> +}
>>> +
>>>  if (!s->session_upload) {
>>>  if (s->child_frames_ref)
>>>  return qsv_transfer_data_child(ctx, dst, src);
>>>
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH v4] qsvvpp: Fix to perform full init only when needed

2018-07-17 Thread Maxym Dmytrychenko
On Wed, Jul 18, 2018 at 12:48 AM Mark Thompson  wrote:

> On 17/07/18 17:07, Maxym Dmytrychenko wrote:
> > Not used VPP sessions, like for hwupload/hwdownload handling,
> > can increase CPU utilization and this patch fixes it.
> > thank you,Joe, for the contribution.
> >
> > Signed-off-by: Maxym Dmytrychenko 
> > ---
> >  libavutil/hwcontext_qsv.c | 38 +-
> >  1 file changed, 29 insertions(+), 9 deletions(-)
> >
> > diff --git a/libavutil/hwcontext_qsv.c b/libavutil/hwcontext_qsv.c
> > index b3eb4a3ea..390c3aac4 100644
> > --- a/libavutil/hwcontext_qsv.c
> > +++ b/libavutil/hwcontext_qsv.c
> > @@ -18,6 +18,7 @@
> >
> >  #include 
> >  #include 
> > +#include 
> >
> >  #include 
> >
> > @@ -56,7 +57,9 @@ typedef struct QSVDeviceContext {
> >
> >  typedef struct QSVFramesContext {
> >  mfxSession session_download;
> > +atomic_int session_download_init;
> >  mfxSession session_upload;
> > +atomic_int session_upload_init;
> >
> >  AVBufferRef *child_frames_ref;
> >  mfxFrameSurface1 *surfaces_internal;
> > @@ -146,13 +149,15 @@ static void qsv_frames_uninit(AVHWFramesContext
> *ctx)
> >  MFXVideoVPP_Close(s->session_download);
> >  MFXClose(s->session_download);
> >  }
> > -s->session_download = NULL;
> > +s->session_download  = NULL;
> > +s->session_download_init = 0;
> >
> >  if (s->session_upload) {
> >  MFXVideoVPP_Close(s->session_upload);
> >  MFXClose(s->session_upload);
> >  }
> > -s->session_upload = NULL;
> > +s->session_upload  = NULL;
> > +s->session_upload_init = 0;
> >
> >  av_freep(>mem_ids);
> >  av_freep(>surface_ptrs);
> > @@ -535,13 +540,10 @@ static int qsv_frames_init(AVHWFramesContext *ctx)
> >  s->mem_ids[i] = frames_hwctx->surfaces[i].Data.MemId;
> >  }
> >
> > -ret = qsv_init_internal_session(ctx, >session_download, 0);
> > -if (ret < 0)
> > -return ret;
> > -
> > -ret = qsv_init_internal_session(ctx, >session_upload, 1);
> > -if (ret < 0)
> > -return ret;
> > +s->session_download  = NULL;
> > +s->session_upload= NULL;
> > +s->session_download_init = 0;
> > +s->session_upload_init   = 0;
> >
> >  return 0;
> >  }
> > @@ -741,6 +743,15 @@ static int qsv_transfer_data_from(AVHWFramesContext
> *ctx, AVFrame *dst,
> >  mfxSyncPoint sync = NULL;
> >  mfxStatus err;
> >
> > +while (!s->session_download_init && !s->session_download) {
> > + if (atomic_fetch_add(>session_download_init, 1) == 0) {
> > +qsv_init_internal_session(ctx, >session_download, 0);
> > +}
> > + else {
> > +av_usleep(1);
>
> This races - consider what happens if the other thread is preempted for
> more than 1µs, or if the initialisation itself takes more than that long.
>
> You need to actually do some synchronisation here (e.g. with a 'once'
> variable) - with only atomic flags there is no way to guarantee that the
> other thread has finished unless you spin, which isn't acceptable.
>
> pthread_once was considered but we need to pass  s->session_download
adding mutex - can be overkill for each call of   qsv_transfer_data_*

what do you mean by 'once' variable?
dont really see it currently implemented somewhere...


> > +}
> > +}
> > +
> >  if (!s->session_download) {
> >  if (s->child_frames_ref)
> >  return qsv_transfer_data_child(ctx, dst, src);
> > @@ -788,6 +799,15 @@ static int qsv_transfer_data_to(AVHWFramesContext
> *ctx, AVFrame *dst,
> >  mfxSyncPoint sync = NULL;
> >  mfxStatus err;
> >
> > +while (!s->session_upload_init && !s->session_upload) {
> > + if (atomic_fetch_add(>session_upload_init, 1) == 0) {
> > +qsv_init_internal_session(ctx, >session_upload, 1);
> > +}
> > + else {
> > +av_usleep(1);
> > +}
> > +}
> > +
> >  if (!s->session_upload) {
> >  if (s->child_frames_ref)
> >  return qsv_transfer_data_child(ctx, dst, src);
> >
>
> Thanks,
>
> - Mark
> ___
> libav-devel mailing list
> libav-devel@libav.org
> https://lists.libav.org/mailman/listinfo/libav-devel


regards
Max
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH v4] qsvvpp: Fix to perform full init only when needed

2018-07-17 Thread Mark Thompson
On 17/07/18 17:07, Maxym Dmytrychenko wrote:
> Not used VPP sessions, like for hwupload/hwdownload handling,
> can increase CPU utilization and this patch fixes it.
> thank you,Joe, for the contribution.
> 
> Signed-off-by: Maxym Dmytrychenko 
> ---
>  libavutil/hwcontext_qsv.c | 38 +-
>  1 file changed, 29 insertions(+), 9 deletions(-)
> 
> diff --git a/libavutil/hwcontext_qsv.c b/libavutil/hwcontext_qsv.c
> index b3eb4a3ea..390c3aac4 100644
> --- a/libavutil/hwcontext_qsv.c
> +++ b/libavutil/hwcontext_qsv.c
> @@ -18,6 +18,7 @@
>  
>  #include 
>  #include 
> +#include 
>  
>  #include 
>  
> @@ -56,7 +57,9 @@ typedef struct QSVDeviceContext {
>  
>  typedef struct QSVFramesContext {
>  mfxSession session_download;
> +atomic_int session_download_init;
>  mfxSession session_upload;
> +atomic_int session_upload_init;
>  
>  AVBufferRef *child_frames_ref;
>  mfxFrameSurface1 *surfaces_internal;
> @@ -146,13 +149,15 @@ static void qsv_frames_uninit(AVHWFramesContext *ctx)
>  MFXVideoVPP_Close(s->session_download);
>  MFXClose(s->session_download);
>  }
> -s->session_download = NULL;
> +s->session_download  = NULL;
> +s->session_download_init = 0;
>  
>  if (s->session_upload) {
>  MFXVideoVPP_Close(s->session_upload);
>  MFXClose(s->session_upload);
>  }
> -s->session_upload = NULL;
> +s->session_upload  = NULL;
> +s->session_upload_init = 0;
>  
>  av_freep(>mem_ids);
>  av_freep(>surface_ptrs);
> @@ -535,13 +540,10 @@ static int qsv_frames_init(AVHWFramesContext *ctx)
>  s->mem_ids[i] = frames_hwctx->surfaces[i].Data.MemId;
>  }
>  
> -ret = qsv_init_internal_session(ctx, >session_download, 0);
> -if (ret < 0)
> -return ret;
> -
> -ret = qsv_init_internal_session(ctx, >session_upload, 1);
> -if (ret < 0)
> -return ret;
> +s->session_download  = NULL;
> +s->session_upload= NULL;
> +s->session_download_init = 0;
> +s->session_upload_init   = 0;
>  
>  return 0;
>  }
> @@ -741,6 +743,15 @@ static int qsv_transfer_data_from(AVHWFramesContext 
> *ctx, AVFrame *dst,
>  mfxSyncPoint sync = NULL;
>  mfxStatus err;
>  
> +while (!s->session_download_init && !s->session_download) {
> + if (atomic_fetch_add(>session_download_init, 1) == 0) {
> +qsv_init_internal_session(ctx, >session_download, 0);
> +}
> + else {
> +av_usleep(1);

This races - consider what happens if the other thread is preempted for more 
than 1µs, or if the initialisation itself takes more than that long.

You need to actually do some synchronisation here (e.g. with a 'once' variable) 
- with only atomic flags there is no way to guarantee that the other thread has 
finished unless you spin, which isn't acceptable.

> +}
> +}
> +
>  if (!s->session_download) {
>  if (s->child_frames_ref)
>  return qsv_transfer_data_child(ctx, dst, src);
> @@ -788,6 +799,15 @@ static int qsv_transfer_data_to(AVHWFramesContext *ctx, 
> AVFrame *dst,
>  mfxSyncPoint sync = NULL;
>  mfxStatus err;
>  
> +while (!s->session_upload_init && !s->session_upload) {
> + if (atomic_fetch_add(>session_upload_init, 1) == 0) {
> +qsv_init_internal_session(ctx, >session_upload, 1);
> +}
> + else {
> +av_usleep(1);
> +}
> +}
> +
>  if (!s->session_upload) {
>  if (s->child_frames_ref)
>  return qsv_transfer_data_child(ctx, dst, src);
> 

Thanks,

- Mark
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

[libav-devel] [PATCH v4] qsvvpp: Fix to perform full init only when needed

2018-07-17 Thread Maxym Dmytrychenko
Not used VPP sessions, like for hwupload/hwdownload handling,
can increase CPU utilization and this patch fixes it.
thank you,Joe, for the contribution.

Signed-off-by: Maxym Dmytrychenko 
---
 libavutil/hwcontext_qsv.c | 38 +-
 1 file changed, 29 insertions(+), 9 deletions(-)

diff --git a/libavutil/hwcontext_qsv.c b/libavutil/hwcontext_qsv.c
index b3eb4a3ea..390c3aac4 100644
--- a/libavutil/hwcontext_qsv.c
+++ b/libavutil/hwcontext_qsv.c
@@ -18,6 +18,7 @@
 
 #include 
 #include 
+#include 
 
 #include 
 
@@ -56,7 +57,9 @@ typedef struct QSVDeviceContext {
 
 typedef struct QSVFramesContext {
 mfxSession session_download;
+atomic_int session_download_init;
 mfxSession session_upload;
+atomic_int session_upload_init;
 
 AVBufferRef *child_frames_ref;
 mfxFrameSurface1 *surfaces_internal;
@@ -146,13 +149,15 @@ static void qsv_frames_uninit(AVHWFramesContext *ctx)
 MFXVideoVPP_Close(s->session_download);
 MFXClose(s->session_download);
 }
-s->session_download = NULL;
+s->session_download  = NULL;
+s->session_download_init = 0;
 
 if (s->session_upload) {
 MFXVideoVPP_Close(s->session_upload);
 MFXClose(s->session_upload);
 }
-s->session_upload = NULL;
+s->session_upload  = NULL;
+s->session_upload_init = 0;
 
 av_freep(>mem_ids);
 av_freep(>surface_ptrs);
@@ -535,13 +540,10 @@ static int qsv_frames_init(AVHWFramesContext *ctx)
 s->mem_ids[i] = frames_hwctx->surfaces[i].Data.MemId;
 }
 
-ret = qsv_init_internal_session(ctx, >session_download, 0);
-if (ret < 0)
-return ret;
-
-ret = qsv_init_internal_session(ctx, >session_upload, 1);
-if (ret < 0)
-return ret;
+s->session_download  = NULL;
+s->session_upload= NULL;
+s->session_download_init = 0;
+s->session_upload_init   = 0;
 
 return 0;
 }
@@ -741,6 +743,15 @@ static int qsv_transfer_data_from(AVHWFramesContext *ctx, 
AVFrame *dst,
 mfxSyncPoint sync = NULL;
 mfxStatus err;
 
+while (!s->session_download_init && !s->session_download) {
+   if (atomic_fetch_add(>session_download_init, 1) == 0) {
+qsv_init_internal_session(ctx, >session_download, 0);
+}
+   else {
+av_usleep(1);
+}
+}
+
 if (!s->session_download) {
 if (s->child_frames_ref)
 return qsv_transfer_data_child(ctx, dst, src);
@@ -788,6 +799,15 @@ static int qsv_transfer_data_to(AVHWFramesContext *ctx, 
AVFrame *dst,
 mfxSyncPoint sync = NULL;
 mfxStatus err;
 
+while (!s->session_upload_init && !s->session_upload) {
+   if (atomic_fetch_add(>session_upload_init, 1) == 0) {
+qsv_init_internal_session(ctx, >session_upload, 1);
+}
+   else {
+av_usleep(1);
+}
+}
+
 if (!s->session_upload) {
 if (s->child_frames_ref)
 return qsv_transfer_data_child(ctx, dst, src);
-- 
2.15.2 (Apple Git-101.1)

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel