[PATCH 32/60] staging: lustre: osc: limits the number of chunks in write RPC

2017-01-28 Thread James Simmons
From: Jinshan Xiong 

OSC has to make sure that it won't issue write RPCs with too many
chunks otherwise it will casue ZFS to create transactions much
bigger than DMU_MAX_ACCESS in size, which will end up with write
failure.

Signed-off-by: Jinshan Xiong 
Signed-off-by: Dmitry Eremin 
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-8135
Reviewed-on: http://review.whamcloud.com/22369
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-8632
Reviewed-on: http://review.whamcloud.com/22654
Reviewed-by: Andreas Dilger 
Reviewed-by: Patrick Farrell 
Reviewed-by: Oleg Drokin 
Signed-off-by: James Simmons 
---
 drivers/staging/lustre/lustre/osc/osc_cache.c | 124 ++
 1 file changed, 87 insertions(+), 37 deletions(-)

diff --git a/drivers/staging/lustre/lustre/osc/osc_cache.c 
b/drivers/staging/lustre/lustre/osc/osc_cache.c
index 72dd554..0490478 100644
--- a/drivers/staging/lustre/lustre/osc/osc_cache.c
+++ b/drivers/staging/lustre/lustre/osc/osc_cache.c
@@ -1882,16 +1882,32 @@ static void osc_ap_completion(const struct lu_env *env, 
struct client_obd *cli,
   oap, osc, rc);
 }
 
+struct extent_rpc_data {
+   struct list_head   *erd_rpc_list;
+   unsigned interd_page_count;
+   unsigned interd_max_pages;
+   unsigned interd_max_chunks;
+};
+
+static inline unsigned osc_extent_chunks(const struct osc_extent *ext)
+{
+   struct client_obd *cli = osc_cli(ext->oe_obj);
+   unsigned ppc_bits = cli->cl_chunkbits - PAGE_SHIFT;
+
+   return (ext->oe_end >> ppc_bits) - (ext->oe_start >> ppc_bits) + 1;
+}
+
 /**
  * Try to add extent to one RPC. We need to think about the following things:
  * - # of pages must not be over max_pages_per_rpc
  * - extent must be compatible with previous ones
  */
 static int try_to_add_extent_for_io(struct client_obd *cli,
-   struct osc_extent *ext, struct list_head 
*rpclist,
-   unsigned int *pc, unsigned int *max_pages)
+   struct osc_extent *ext,
+   struct extent_rpc_data *data)
 {
struct osc_extent *tmp;
+   unsigned int chunk_count;
struct osc_async_page *oap = list_first_entry(>oe_pages,
  struct osc_async_page,
  oap_pending_item);
@@ -1899,19 +1915,22 @@ static int try_to_add_extent_for_io(struct client_obd 
*cli,
EASSERT((ext->oe_state == OES_CACHE || ext->oe_state == OES_LOCK_DONE),
ext);
 
-   *max_pages = max(ext->oe_mppr, *max_pages);
-   if (*pc + ext->oe_nr_pages > *max_pages)
+   chunk_count = osc_extent_chunks(ext);
+   if (chunk_count > data->erd_max_chunks)
+   return 0;
+
+   data->erd_max_pages = max(ext->oe_mppr, data->erd_max_pages);
+   if (data->erd_page_count + ext->oe_nr_pages > data->erd_max_pages)
return 0;
 
-   list_for_each_entry(tmp, rpclist, oe_link) {
+   list_for_each_entry(tmp, data->erd_rpc_list, oe_link) {
struct osc_async_page *oap2;
 
oap2 = list_first_entry(>oe_pages, struct osc_async_page,
oap_pending_item);
EASSERT(tmp->oe_owner == current, tmp);
if (oap2cl_page(oap)->cp_type != oap2cl_page(oap2)->cp_type) {
-   CDEBUG(D_CACHE, "Do not permit different type of IO"
-   " for a same RPC\n");
+   CDEBUG(D_CACHE, "Do not permit different type of IO in 
one RPC\n");
return 0;
}
 
@@ -1924,12 +1943,41 @@ static int try_to_add_extent_for_io(struct client_obd 
*cli,
break;
}
 
-   *pc += ext->oe_nr_pages;
-   list_move_tail(>oe_link, rpclist);
+   data->erd_max_chunks -= chunk_count;
+   data->erd_page_count += ext->oe_nr_pages;
+   list_move_tail(>oe_link, data->erd_rpc_list);
ext->oe_owner = current;
return 1;
 }
 
+static inline unsigned osc_max_write_chunks(const struct client_obd *cli)
+{
+   /*
+* LU-8135:
+*
+* The maximum size of a single transaction is about 64MB in ZFS.
+* #define DMU_MAX_ACCESS (64 * 1024 * 1024)
+*
+* Since ZFS is a copy-on-write file system, a single dirty page in
+* a chunk will result in the rewrite of the whole chunk, therefore
+* an RPC shouldn't be allowed to contain too many chunks otherwise
+* it will make transaction size much bigger than 64MB, especially
+* with big block size for ZFS.
+*
+* This piece of code is to make sure that OSC won't 

[PATCH 32/60] staging: lustre: osc: limits the number of chunks in write RPC

2017-01-28 Thread James Simmons
From: Jinshan Xiong 

OSC has to make sure that it won't issue write RPCs with too many
chunks otherwise it will casue ZFS to create transactions much
bigger than DMU_MAX_ACCESS in size, which will end up with write
failure.

Signed-off-by: Jinshan Xiong 
Signed-off-by: Dmitry Eremin 
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-8135
Reviewed-on: http://review.whamcloud.com/22369
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-8632
Reviewed-on: http://review.whamcloud.com/22654
Reviewed-by: Andreas Dilger 
Reviewed-by: Patrick Farrell 
Reviewed-by: Oleg Drokin 
Signed-off-by: James Simmons 
---
 drivers/staging/lustre/lustre/osc/osc_cache.c | 124 ++
 1 file changed, 87 insertions(+), 37 deletions(-)

diff --git a/drivers/staging/lustre/lustre/osc/osc_cache.c 
b/drivers/staging/lustre/lustre/osc/osc_cache.c
index 72dd554..0490478 100644
--- a/drivers/staging/lustre/lustre/osc/osc_cache.c
+++ b/drivers/staging/lustre/lustre/osc/osc_cache.c
@@ -1882,16 +1882,32 @@ static void osc_ap_completion(const struct lu_env *env, 
struct client_obd *cli,
   oap, osc, rc);
 }
 
+struct extent_rpc_data {
+   struct list_head   *erd_rpc_list;
+   unsigned interd_page_count;
+   unsigned interd_max_pages;
+   unsigned interd_max_chunks;
+};
+
+static inline unsigned osc_extent_chunks(const struct osc_extent *ext)
+{
+   struct client_obd *cli = osc_cli(ext->oe_obj);
+   unsigned ppc_bits = cli->cl_chunkbits - PAGE_SHIFT;
+
+   return (ext->oe_end >> ppc_bits) - (ext->oe_start >> ppc_bits) + 1;
+}
+
 /**
  * Try to add extent to one RPC. We need to think about the following things:
  * - # of pages must not be over max_pages_per_rpc
  * - extent must be compatible with previous ones
  */
 static int try_to_add_extent_for_io(struct client_obd *cli,
-   struct osc_extent *ext, struct list_head 
*rpclist,
-   unsigned int *pc, unsigned int *max_pages)
+   struct osc_extent *ext,
+   struct extent_rpc_data *data)
 {
struct osc_extent *tmp;
+   unsigned int chunk_count;
struct osc_async_page *oap = list_first_entry(>oe_pages,
  struct osc_async_page,
  oap_pending_item);
@@ -1899,19 +1915,22 @@ static int try_to_add_extent_for_io(struct client_obd 
*cli,
EASSERT((ext->oe_state == OES_CACHE || ext->oe_state == OES_LOCK_DONE),
ext);
 
-   *max_pages = max(ext->oe_mppr, *max_pages);
-   if (*pc + ext->oe_nr_pages > *max_pages)
+   chunk_count = osc_extent_chunks(ext);
+   if (chunk_count > data->erd_max_chunks)
+   return 0;
+
+   data->erd_max_pages = max(ext->oe_mppr, data->erd_max_pages);
+   if (data->erd_page_count + ext->oe_nr_pages > data->erd_max_pages)
return 0;
 
-   list_for_each_entry(tmp, rpclist, oe_link) {
+   list_for_each_entry(tmp, data->erd_rpc_list, oe_link) {
struct osc_async_page *oap2;
 
oap2 = list_first_entry(>oe_pages, struct osc_async_page,
oap_pending_item);
EASSERT(tmp->oe_owner == current, tmp);
if (oap2cl_page(oap)->cp_type != oap2cl_page(oap2)->cp_type) {
-   CDEBUG(D_CACHE, "Do not permit different type of IO"
-   " for a same RPC\n");
+   CDEBUG(D_CACHE, "Do not permit different type of IO in 
one RPC\n");
return 0;
}
 
@@ -1924,12 +1943,41 @@ static int try_to_add_extent_for_io(struct client_obd 
*cli,
break;
}
 
-   *pc += ext->oe_nr_pages;
-   list_move_tail(>oe_link, rpclist);
+   data->erd_max_chunks -= chunk_count;
+   data->erd_page_count += ext->oe_nr_pages;
+   list_move_tail(>oe_link, data->erd_rpc_list);
ext->oe_owner = current;
return 1;
 }
 
+static inline unsigned osc_max_write_chunks(const struct client_obd *cli)
+{
+   /*
+* LU-8135:
+*
+* The maximum size of a single transaction is about 64MB in ZFS.
+* #define DMU_MAX_ACCESS (64 * 1024 * 1024)
+*
+* Since ZFS is a copy-on-write file system, a single dirty page in
+* a chunk will result in the rewrite of the whole chunk, therefore
+* an RPC shouldn't be allowed to contain too many chunks otherwise
+* it will make transaction size much bigger than 64MB, especially
+* with big block size for ZFS.
+*
+* This piece of code is to make sure that OSC won't send write RPCs
+* with too many chunks. The maximum chunk size that an RPC can cover
+* is set to PTLRPC_MAX_BRW_SIZE, which is defined to 16MB.