On Sat, May 23, 2026 at 06:07:57AM +0530, Nithurshen wrote:
> While static batching successfully overlaps I/O and compute, different
> compression algorithms exhibit vastly different scheduling thresholds.
> Extremely fast algorithms like LZ4 require large batches (e.g., 32
> pclusters) to effectively hide the synchronization overhead of the
> thread pool.
>
> Conversely, applying this large batch size to compute-heavy algorithms
> like LZMA or ZSTD causes memory bloat and thread starvation, as the
> main thread spends too much time reading and accumulating memory before
> waking up the background workers.
>
> This patch modifies the workqueue submission logic in z_erofs_read_one_data
> to dynamically scale the batch size based on the algorithm format. LZ4
> is permitted to utilize the Z_EROFS_PCLUSTER_MAX_BATCH_SIZE, while
> other heavier algorithms trigger workqueue submission at a much lower
> threshold (8 pclusters) to ensure a steady pipeline of work and a
> bounded memory footprint.
>
> Signed-off-by: Nithurshen <[email protected]>
> ---
> include/erofs/internal.h | 2 +-
> lib/data.c | 15 +++++++++------
> 2 files changed, 10 insertions(+), 7 deletions(-)
>
> diff --git a/include/erofs/internal.h b/include/erofs/internal.h
> index 38020ee..c8f056f 100644
> --- a/include/erofs/internal.h
> +++ b/include/erofs/internal.h
> @@ -62,7 +62,7 @@ struct erofs_buf {
> #define erofs_pos(sbi, nr) ((erofs_off_t)(nr) << (sbi)->blkszbits)
> #define BLK_ROUND_UP(sbi, addr) \
> (roundup(addr, erofs_blksiz(sbi)) >> (sbi)->blkszbits)
> -#define Z_EROFS_PCLUSTER_BATCH_SIZE 32
> +#define Z_EROFS_PCLUSTER_MAX_BATCH_SIZE 32
>
> struct erofs_buffer_head;
> struct erofs_bufmgr;
> diff --git a/lib/data.c b/lib/data.c
> index fa36899..a06f4c2 100644
> --- a/lib/data.c
> +++ b/lib/data.c
> @@ -17,11 +17,11 @@ struct erofs_workqueue erofs_wq;
> struct z_erofs_decompress_task {
> struct erofs_work work;
> struct z_erofs_read_ctx *ctx;
> - struct z_erofs_decompress_req reqs[Z_EROFS_PCLUSTER_BATCH_SIZE];
> - char *raw_bufs[Z_EROFS_PCLUSTER_BATCH_SIZE];
> - char *out_bufs[Z_EROFS_PCLUSTER_BATCH_SIZE];
> - erofs_off_t out_offsets[Z_EROFS_PCLUSTER_BATCH_SIZE];
> - unsigned int out_lengths[Z_EROFS_PCLUSTER_BATCH_SIZE];
> + struct z_erofs_decompress_req reqs[Z_EROFS_PCLUSTER_MAX_BATCH_SIZE];
> + char *raw_bufs[Z_EROFS_PCLUSTER_MAX_BATCH_SIZE];
> + char *out_bufs[Z_EROFS_PCLUSTER_MAX_BATCH_SIZE];
> + erofs_off_t out_offsets[Z_EROFS_PCLUSTER_MAX_BATCH_SIZE];
> + unsigned int out_lengths[Z_EROFS_PCLUSTER_MAX_BATCH_SIZE];
> unsigned int nr_reqs;
> };
>
> @@ -397,7 +397,10 @@ int z_erofs_read_one_data(struct erofs_inode *inode,
> task->out_offsets[idx] = out_offset;
> task->out_lengths[idx] = length;
>
> - if (task->nr_reqs == Z_EROFS_PCLUSTER_BATCH_SIZE) {
> + int batch_limit = (map->m_algorithmformat == Z_EROFS_COMPRESSION_LZ4) ?
> + Z_EROFS_PCLUSTER_MAX_BATCH_SIZE
> : 8;
Why it's called dynamic decompression batching?
> +
> + if (task->nr_reqs >= batch_limit) {
> z_erofs_read_ctx_enqueue(ctx);
> }
> return 0;
> --
> 2.52.0
>