On Sat, May 23, 2026 at 06:07:57AM +0530, Nithurshen wrote:
> While static batching successfully overlaps I/O and compute, different
> compression algorithms exhibit vastly different scheduling thresholds.
> Extremely fast algorithms like LZ4 require large batches (e.g., 32
> pclusters) to effectively hide the synchronization overhead of the
> thread pool.
> 
> Conversely, applying this large batch size to compute-heavy algorithms
> like LZMA or ZSTD causes memory bloat and thread starvation, as the
> main thread spends too much time reading and accumulating memory before
> waking up the background workers.
> 
> This patch modifies the workqueue submission logic in z_erofs_read_one_data
> to dynamically scale the batch size based on the algorithm format. LZ4
> is permitted to utilize the Z_EROFS_PCLUSTER_MAX_BATCH_SIZE, while
> other heavier algorithms trigger workqueue submission at a much lower
> threshold (8 pclusters) to ensure a steady pipeline of work and a
> bounded memory footprint.
> 
> Signed-off-by: Nithurshen <[email protected]>
> ---
>  include/erofs/internal.h |  2 +-
>  lib/data.c               | 15 +++++++++------
>  2 files changed, 10 insertions(+), 7 deletions(-)
> 
> diff --git a/include/erofs/internal.h b/include/erofs/internal.h
> index 38020ee..c8f056f 100644
> --- a/include/erofs/internal.h
> +++ b/include/erofs/internal.h
> @@ -62,7 +62,7 @@ struct erofs_buf {
>  #define erofs_pos(sbi, nr)      ((erofs_off_t)(nr) << (sbi)->blkszbits)
>  #define BLK_ROUND_UP(sbi, addr)      \
>       (roundup(addr, erofs_blksiz(sbi)) >> (sbi)->blkszbits)
> -#define Z_EROFS_PCLUSTER_BATCH_SIZE 32
> +#define Z_EROFS_PCLUSTER_MAX_BATCH_SIZE 32
>  
>  struct erofs_buffer_head;
>  struct erofs_bufmgr;
> diff --git a/lib/data.c b/lib/data.c
> index fa36899..a06f4c2 100644
> --- a/lib/data.c
> +++ b/lib/data.c
> @@ -17,11 +17,11 @@ struct erofs_workqueue erofs_wq;
>  struct z_erofs_decompress_task {
>       struct erofs_work work;
>       struct z_erofs_read_ctx *ctx;
> -     struct z_erofs_decompress_req reqs[Z_EROFS_PCLUSTER_BATCH_SIZE];
> -     char *raw_bufs[Z_EROFS_PCLUSTER_BATCH_SIZE];
> -     char *out_bufs[Z_EROFS_PCLUSTER_BATCH_SIZE];
> -     erofs_off_t out_offsets[Z_EROFS_PCLUSTER_BATCH_SIZE];
> -     unsigned int out_lengths[Z_EROFS_PCLUSTER_BATCH_SIZE];
> +     struct z_erofs_decompress_req reqs[Z_EROFS_PCLUSTER_MAX_BATCH_SIZE];
> +     char *raw_bufs[Z_EROFS_PCLUSTER_MAX_BATCH_SIZE];
> +     char *out_bufs[Z_EROFS_PCLUSTER_MAX_BATCH_SIZE];
> +     erofs_off_t out_offsets[Z_EROFS_PCLUSTER_MAX_BATCH_SIZE];
> +     unsigned int out_lengths[Z_EROFS_PCLUSTER_MAX_BATCH_SIZE];
>       unsigned int nr_reqs;
>  };
>  
> @@ -397,7 +397,10 @@ int z_erofs_read_one_data(struct erofs_inode *inode,
>       task->out_offsets[idx] = out_offset;
>       task->out_lengths[idx] = length;
>  
> -     if (task->nr_reqs == Z_EROFS_PCLUSTER_BATCH_SIZE) {
> +     int batch_limit = (map->m_algorithmformat == Z_EROFS_COMPRESSION_LZ4) ? 
> +                                             Z_EROFS_PCLUSTER_MAX_BATCH_SIZE 
> : 8;

Why it's called dynamic decompression batching?

> +
> +     if (task->nr_reqs >= batch_limit) {
>               z_erofs_read_ctx_enqueue(ctx);
>       }
>       return 0;
> -- 
> 2.52.0
> 

Reply via email to