Re: [FFmpeg-devel] [PATCH 03/10] lavc/hevcdec: allocate local_ctx as array of structs rather than pointers

2024-05-27 Thread Andreas Rheinhardt
Anton Khirnov:
> Quoting Andreas Rheinhardt (2024-04-17 11:29:18)
>> Anton Khirnov:
>>> It is more efficient and easier to manage.
>>> ---
>>
>> Allocating structures used by slice contexts jointly has the potential
>> downside of false sharing if the structures are not sufficiently
>> aligned/padded.
> 
> What do you suggest? Align first member to cacheline size?
> 

This is problematic, because av_malloc is not necessarily aligned to the
chacheline size; for the same reason it is not possible to simply
DECLARE_ALIGNED_64 for it (see e.g.
7945d30e91b96d2f4f5b612048169087d214d41e). Given that the structure we
are talking about is already pretty big, the easiest way is to add
explicit padding at the end. Use __GCC_DESTRUCTIVE_SIZE if that is
defined or 64 if not (or maybe always use 128B?
https://stackoverflow.com/questions/72126606/should-the-cache-padding-size-of-x86-64-be-128-bytes).

- Andreas

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH 03/10] lavc/hevcdec: allocate local_ctx as array of structs rather than pointers

2024-05-24 Thread Anton Khirnov
Quoting Andreas Rheinhardt (2024-04-17 11:29:18)
> Anton Khirnov:
> > It is more efficient and easier to manage.
> > ---
> 
> Allocating structures used by slice contexts jointly has the potential
> downside of false sharing if the structures are not sufficiently
> aligned/padded.

What do you suggest? Align first member to cacheline size?

-- 
Anton Khirnov
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH 03/10] lavc/hevcdec: allocate local_ctx as array of structs rather than pointers

2024-04-17 Thread Andreas Rheinhardt
Anton Khirnov:
> It is more efficient and easier to manage.
> ---

Allocating structures used by slice contexts jointly has the potential
downside of false sharing if the structures are not sufficiently
aligned/padded.

- Andreas

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH 03/10] lavc/hevcdec: allocate local_ctx as array of structs rather than pointers

2024-04-10 Thread Anton Khirnov
It is more efficient and easier to manage.
---
 libavcodec/hevcdec.c | 57 +---
 libavcodec/hevcdec.h |  2 +-
 2 files changed, 33 insertions(+), 26 deletions(-)

diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c
index 55f72af972..47226ef0ab 100644
--- a/libavcodec/hevcdec.c
+++ b/libavcodec/hevcdec.c
@@ -2598,7 +2598,7 @@ static int hls_slice_data(HEVCContext *s)
 static int hls_decode_entry_wpp(AVCodecContext *avctxt, void *hevc_lclist,
 int job, int self_id)
 {
-HEVCLocalContext *lc = ((HEVCLocalContext**)hevc_lclist)[self_id];
+HEVCLocalContext *lc = &((HEVCLocalContext*)hevc_lclist)[self_id];
 const HEVCContext *const s = lc->parent;
 int ctb_size= 1 << s->ps.sps->log2_ctb_size;
 int more_data   = 1;
@@ -2682,7 +2682,7 @@ static int hls_slice_data_wpp(HEVCContext *s, const 
H2645NAL *nal)
 {
 const uint8_t *data = nal->data;
 int length  = nal->size;
-HEVCLocalContext *lc = s->HEVClc;
+HEVCLocalContext *lc;
 int *ret;
 int64_t offset;
 int64_t startheader, cmpt = 0;
@@ -2696,19 +2696,31 @@ static int hls_slice_data_wpp(HEVCContext *s, const 
H2645NAL *nal)
 return AVERROR_INVALIDDATA;
 }
 
-for (i = 1; i < s->threads_number; i++) {
-if (i < s->nb_local_ctx)
-continue;
-s->local_ctx[i] = av_mallocz(sizeof(HEVCLocalContext));
-if (!s->local_ctx[i])
-return AVERROR(ENOMEM);
-s->nb_local_ctx++;
+if (s->threads_number > s->nb_local_ctx) {
+HEVCLocalContext *tmp = av_malloc_array(s->threads_number, 
sizeof(*s->local_ctx));
 
-s->local_ctx[i]->logctx = s->avctx;
-s->local_ctx[i]->parent = s;
-s->local_ctx[i]->common_cabac_state = >cabac;
+if (!tmp)
+return AVERROR(ENOMEM);
+
+memcpy(tmp, s->local_ctx, sizeof(*s->local_ctx) * s->nb_local_ctx);
+av_free(s->local_ctx);
+s->local_ctx = tmp;
+s->HEVClc= >local_ctx[0];
+
+for (unsigned i = s->nb_local_ctx; i < s->threads_number; i++) {
+tmp = >local_ctx[i];
+
+memset(tmp, 0, sizeof(*tmp));
+
+tmp->logctx = s->avctx;
+tmp->parent = s;
+tmp->common_cabac_state = >cabac;
+}
+
+s->nb_local_ctx = s->threads_number;
 }
 
+lc = >local_ctx[0];
 offset = (lc->gb.index >> 3);
 
 for (j = 0, cmpt = 0, startheader = offset + s->sh.entry_point_offset[0]; 
j < nal->skipped_bytes; j++) {
@@ -2744,8 +2756,8 @@ static int hls_slice_data_wpp(HEVCContext *s, const 
H2645NAL *nal)
 s->data = data;
 
 for (i = 1; i < s->threads_number; i++) {
-s->local_ctx[i]->first_qp_group = 1;
-s->local_ctx[i]->qp_y = s->HEVClc->qp_y;
+s->local_ctx[i].first_qp_group = 1;
+s->local_ctx[i].qp_y = s->HEVClc->qp_y;
 }
 
 atomic_store(>wpp_err, 0);
@@ -3476,12 +3488,6 @@ static av_cold int hevc_decode_free(AVCodecContext 
*avctx)
 av_freep(>sh.offset);
 av_freep(>sh.size);
 
-if (s->local_ctx) {
-for (i = 1; i < s->nb_local_ctx; i++) {
-av_freep(>local_ctx[i]);
-}
-}
-av_freep(>HEVClc);
 av_freep(>local_ctx);
 
 ff_h2645_packet_uninit(>pkt);
@@ -3498,15 +3504,16 @@ static av_cold int hevc_init_context(AVCodecContext 
*avctx)
 
 s->avctx = avctx;
 
-s->HEVClc = av_mallocz(sizeof(HEVCLocalContext));
-s->local_ctx = av_mallocz(sizeof(HEVCLocalContext*) * s->threads_number);
-if (!s->HEVClc || !s->local_ctx)
+s->local_ctx = av_mallocz(sizeof(*s->local_ctx));
+if (!s->local_ctx)
 return AVERROR(ENOMEM);
+s->nb_local_ctx = 1;
+
+s->HEVClc = >local_ctx[0];
+
 s->HEVClc->parent = s;
 s->HEVClc->logctx = avctx;
 s->HEVClc->common_cabac_state = >cabac;
-s->local_ctx[0] = s->HEVClc;
-s->nb_local_ctx = 1;
 
 s->output_frame = av_frame_alloc();
 if (!s->output_frame)
diff --git a/libavcodec/hevcdec.h b/libavcodec/hevcdec.h
index a881eb9981..5a4ed270e8 100644
--- a/libavcodec/hevcdec.h
+++ b/libavcodec/hevcdec.h
@@ -441,7 +441,7 @@ typedef struct HEVCContext {
 const AVClass *c;  // needed by private avoptions
 AVCodecContext *avctx;
 
-HEVCLocalContext**local_ctx;
+HEVCLocalContext *local_ctx;
 unsigned   nb_local_ctx;
 
 HEVCLocalContext*HEVClc;
-- 
2.43.0

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".