[PATCH 32/60] staging: lustre: osc: limits the number of chunks in write RPC
From: Jinshan XiongOSC has to make sure that it won't issue write RPCs with too many chunks otherwise it will casue ZFS to create transactions much bigger than DMU_MAX_ACCESS in size, which will end up with write failure. Signed-off-by: Jinshan Xiong Signed-off-by: Dmitry Eremin Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-8135 Reviewed-on: http://review.whamcloud.com/22369 Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-8632 Reviewed-on: http://review.whamcloud.com/22654 Reviewed-by: Andreas Dilger Reviewed-by: Patrick Farrell Reviewed-by: Oleg Drokin Signed-off-by: James Simmons --- drivers/staging/lustre/lustre/osc/osc_cache.c | 124 ++ 1 file changed, 87 insertions(+), 37 deletions(-) diff --git a/drivers/staging/lustre/lustre/osc/osc_cache.c b/drivers/staging/lustre/lustre/osc/osc_cache.c index 72dd554..0490478 100644 --- a/drivers/staging/lustre/lustre/osc/osc_cache.c +++ b/drivers/staging/lustre/lustre/osc/osc_cache.c @@ -1882,16 +1882,32 @@ static void osc_ap_completion(const struct lu_env *env, struct client_obd *cli, oap, osc, rc); } +struct extent_rpc_data { + struct list_head *erd_rpc_list; + unsigned interd_page_count; + unsigned interd_max_pages; + unsigned interd_max_chunks; +}; + +static inline unsigned osc_extent_chunks(const struct osc_extent *ext) +{ + struct client_obd *cli = osc_cli(ext->oe_obj); + unsigned ppc_bits = cli->cl_chunkbits - PAGE_SHIFT; + + return (ext->oe_end >> ppc_bits) - (ext->oe_start >> ppc_bits) + 1; +} + /** * Try to add extent to one RPC. We need to think about the following things: * - # of pages must not be over max_pages_per_rpc * - extent must be compatible with previous ones */ static int try_to_add_extent_for_io(struct client_obd *cli, - struct osc_extent *ext, struct list_head *rpclist, - unsigned int *pc, unsigned int *max_pages) + struct osc_extent *ext, + struct extent_rpc_data *data) { struct osc_extent *tmp; + unsigned int chunk_count; struct osc_async_page *oap = list_first_entry(>oe_pages, struct osc_async_page, oap_pending_item); @@ -1899,19 +1915,22 @@ static int try_to_add_extent_for_io(struct client_obd *cli, EASSERT((ext->oe_state == OES_CACHE || ext->oe_state == OES_LOCK_DONE), ext); - *max_pages = max(ext->oe_mppr, *max_pages); - if (*pc + ext->oe_nr_pages > *max_pages) + chunk_count = osc_extent_chunks(ext); + if (chunk_count > data->erd_max_chunks) + return 0; + + data->erd_max_pages = max(ext->oe_mppr, data->erd_max_pages); + if (data->erd_page_count + ext->oe_nr_pages > data->erd_max_pages) return 0; - list_for_each_entry(tmp, rpclist, oe_link) { + list_for_each_entry(tmp, data->erd_rpc_list, oe_link) { struct osc_async_page *oap2; oap2 = list_first_entry(>oe_pages, struct osc_async_page, oap_pending_item); EASSERT(tmp->oe_owner == current, tmp); if (oap2cl_page(oap)->cp_type != oap2cl_page(oap2)->cp_type) { - CDEBUG(D_CACHE, "Do not permit different type of IO" - " for a same RPC\n"); + CDEBUG(D_CACHE, "Do not permit different type of IO in one RPC\n"); return 0; } @@ -1924,12 +1943,41 @@ static int try_to_add_extent_for_io(struct client_obd *cli, break; } - *pc += ext->oe_nr_pages; - list_move_tail(>oe_link, rpclist); + data->erd_max_chunks -= chunk_count; + data->erd_page_count += ext->oe_nr_pages; + list_move_tail(>oe_link, data->erd_rpc_list); ext->oe_owner = current; return 1; } +static inline unsigned osc_max_write_chunks(const struct client_obd *cli) +{ + /* +* LU-8135: +* +* The maximum size of a single transaction is about 64MB in ZFS. +* #define DMU_MAX_ACCESS (64 * 1024 * 1024) +* +* Since ZFS is a copy-on-write file system, a single dirty page in +* a chunk will result in the rewrite of the whole chunk, therefore +* an RPC shouldn't be allowed to contain too many chunks otherwise +* it will make transaction size much bigger than 64MB, especially +* with big block size for ZFS. +* +* This piece of code is to make sure that OSC won't
[PATCH 32/60] staging: lustre: osc: limits the number of chunks in write RPC
From: Jinshan Xiong OSC has to make sure that it won't issue write RPCs with too many chunks otherwise it will casue ZFS to create transactions much bigger than DMU_MAX_ACCESS in size, which will end up with write failure. Signed-off-by: Jinshan Xiong Signed-off-by: Dmitry Eremin Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-8135 Reviewed-on: http://review.whamcloud.com/22369 Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-8632 Reviewed-on: http://review.whamcloud.com/22654 Reviewed-by: Andreas Dilger Reviewed-by: Patrick Farrell Reviewed-by: Oleg Drokin Signed-off-by: James Simmons --- drivers/staging/lustre/lustre/osc/osc_cache.c | 124 ++ 1 file changed, 87 insertions(+), 37 deletions(-) diff --git a/drivers/staging/lustre/lustre/osc/osc_cache.c b/drivers/staging/lustre/lustre/osc/osc_cache.c index 72dd554..0490478 100644 --- a/drivers/staging/lustre/lustre/osc/osc_cache.c +++ b/drivers/staging/lustre/lustre/osc/osc_cache.c @@ -1882,16 +1882,32 @@ static void osc_ap_completion(const struct lu_env *env, struct client_obd *cli, oap, osc, rc); } +struct extent_rpc_data { + struct list_head *erd_rpc_list; + unsigned interd_page_count; + unsigned interd_max_pages; + unsigned interd_max_chunks; +}; + +static inline unsigned osc_extent_chunks(const struct osc_extent *ext) +{ + struct client_obd *cli = osc_cli(ext->oe_obj); + unsigned ppc_bits = cli->cl_chunkbits - PAGE_SHIFT; + + return (ext->oe_end >> ppc_bits) - (ext->oe_start >> ppc_bits) + 1; +} + /** * Try to add extent to one RPC. We need to think about the following things: * - # of pages must not be over max_pages_per_rpc * - extent must be compatible with previous ones */ static int try_to_add_extent_for_io(struct client_obd *cli, - struct osc_extent *ext, struct list_head *rpclist, - unsigned int *pc, unsigned int *max_pages) + struct osc_extent *ext, + struct extent_rpc_data *data) { struct osc_extent *tmp; + unsigned int chunk_count; struct osc_async_page *oap = list_first_entry(>oe_pages, struct osc_async_page, oap_pending_item); @@ -1899,19 +1915,22 @@ static int try_to_add_extent_for_io(struct client_obd *cli, EASSERT((ext->oe_state == OES_CACHE || ext->oe_state == OES_LOCK_DONE), ext); - *max_pages = max(ext->oe_mppr, *max_pages); - if (*pc + ext->oe_nr_pages > *max_pages) + chunk_count = osc_extent_chunks(ext); + if (chunk_count > data->erd_max_chunks) + return 0; + + data->erd_max_pages = max(ext->oe_mppr, data->erd_max_pages); + if (data->erd_page_count + ext->oe_nr_pages > data->erd_max_pages) return 0; - list_for_each_entry(tmp, rpclist, oe_link) { + list_for_each_entry(tmp, data->erd_rpc_list, oe_link) { struct osc_async_page *oap2; oap2 = list_first_entry(>oe_pages, struct osc_async_page, oap_pending_item); EASSERT(tmp->oe_owner == current, tmp); if (oap2cl_page(oap)->cp_type != oap2cl_page(oap2)->cp_type) { - CDEBUG(D_CACHE, "Do not permit different type of IO" - " for a same RPC\n"); + CDEBUG(D_CACHE, "Do not permit different type of IO in one RPC\n"); return 0; } @@ -1924,12 +1943,41 @@ static int try_to_add_extent_for_io(struct client_obd *cli, break; } - *pc += ext->oe_nr_pages; - list_move_tail(>oe_link, rpclist); + data->erd_max_chunks -= chunk_count; + data->erd_page_count += ext->oe_nr_pages; + list_move_tail(>oe_link, data->erd_rpc_list); ext->oe_owner = current; return 1; } +static inline unsigned osc_max_write_chunks(const struct client_obd *cli) +{ + /* +* LU-8135: +* +* The maximum size of a single transaction is about 64MB in ZFS. +* #define DMU_MAX_ACCESS (64 * 1024 * 1024) +* +* Since ZFS is a copy-on-write file system, a single dirty page in +* a chunk will result in the rewrite of the whole chunk, therefore +* an RPC shouldn't be allowed to contain too many chunks otherwise +* it will make transaction size much bigger than 64MB, especially +* with big block size for ZFS. +* +* This piece of code is to make sure that OSC won't send write RPCs +* with too many chunks. The maximum chunk size that an RPC can cover +* is set to PTLRPC_MAX_BRW_SIZE, which is defined to 16MB.