[RFC 18/23] block/xen-blkfront: Make it running on 64KB page granularity

2015-05-14 Thread Julien Grall
From: Julien Grall 

The PV block protocol is using 4KB page granularity. The goal of this
patch is to allow a Linux using 64KB page granularity using block
device on a non-modified Xen.

The block API is using segment which should at least be the size of a
Linux page. Therefore, the driver will have to break the page in chunk
of 4K before giving the page to the backend.

Breaking a 64KB segment in 4KB chunk will result to have some chunk with
no data. As the PV protocol always require to have data in the chunk, we
have to count the number of Xen page which will be in use and avoid to
sent empty chunk.

Note that, a pre-defined number of grant is reserved before preparing
the request. This pre-defined number is based on the number and the
maximum size of the segments. If each segment contain a very small
amount of data, the driver may reserve too much grant (16 grant is
reserved per segment with 64KB page granularity).

Futhermore, in the case of persistent grant we allocate one Linux page
per grant although only the 4KB of the page will be effectively use.
This could be improved by share the page with multiple grants.

Signed-off-by: Julien Grall 
Cc: Konrad Rzeszutek Wilk 
Cc: Roger Pau Monné 
Cc: Boris Ostrovsky 
Cc: David Vrabel 

---

Improvement such as support 64KB grant is not taken into consideration in
this patch because we have the requirement to run a Linux using 64KB page
on a non-modified Xen.
---
 drivers/block/xen-blkfront.c | 259 ++-
 1 file changed, 156 insertions(+), 103 deletions(-)

diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 60cf1d6..c6537ed 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -77,6 +77,7 @@ struct blk_shadow {
struct grant **grants_used;
struct grant **indirect_grants;
struct scatterlist *sg;
+   unsigned int num_sg;
 };
 
 struct split_bio {
@@ -98,7 +99,7 @@ static unsigned int xen_blkif_max_segments = 32;
 module_param_named(max, xen_blkif_max_segments, int, S_IRUGO);
 MODULE_PARM_DESC(max, "Maximum amount of segments in indirect requests 
(default is 32)");
 
-#define BLK_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE)
+#define BLK_RING_SIZE __CONST_RING_SIZE(blkif, XEN_PAGE_SIZE)
 
 /*
  * We have one of these per vbd, whether ide, scsi or 'other'.  They
@@ -131,6 +132,7 @@ struct blkfront_info
unsigned int discard_granularity;
unsigned int discard_alignment;
unsigned int feature_persistent:1;
+   /* Number of 4K segment handled */
unsigned int max_indirect_segments;
int is_ready;
 };
@@ -158,10 +160,19 @@ static DEFINE_SPINLOCK(minor_lock);
 
 #define DEV_NAME   "xvd"   /* name in /dev */
 
-#define SEGS_PER_INDIRECT_FRAME \
-   (PAGE_SIZE/sizeof(struct blkif_request_segment))
-#define INDIRECT_GREFS(_segs) \
-   ((_segs + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME)
+/*
+ * Xen use 4K pages. The guest may use different page size (4K or 64K)
+ * Number of Xen pages per segment
+ */
+#define XEN_PAGES_PER_SEGMENT   (PAGE_SIZE / XEN_PAGE_SIZE)
+
+#define SEGS_PER_INDIRECT_FRAME\
+   (XEN_PAGE_SIZE/sizeof(struct blkif_request_segment) / 
XEN_PAGES_PER_SEGMENT)
+#define XEN_PAGES_PER_INDIRECT_FRAME \
+   (XEN_PAGE_SIZE/sizeof(struct blkif_request_segment))
+
+#define INDIRECT_GREFS(_pages) \
+   ((_pages + XEN_PAGES_PER_INDIRECT_FRAME - 
1)/XEN_PAGES_PER_INDIRECT_FRAME)
 
 static int blkfront_setup_indirect(struct blkfront_info *info);
 
@@ -204,7 +215,7 @@ static int fill_grant_buffer(struct blkfront_info *info, 
int num)
kfree(gnt_list_entry);
goto out_of_memory;
}
-   gnt_list_entry->pfn = page_to_pfn(granted_page);
+   gnt_list_entry->pfn = xen_page_to_pfn(granted_page);
}
 
gnt_list_entry->gref = GRANT_INVALID_REF;
@@ -219,7 +230,7 @@ out_of_memory:
 >grants, node) {
list_del(_list_entry->node);
if (info->feature_persistent)
-   __free_page(pfn_to_page(gnt_list_entry->pfn));
+   __free_page(xen_pfn_to_page(gnt_list_entry->pfn));
kfree(gnt_list_entry);
i--;
}
@@ -389,7 +400,8 @@ static int blkif_queue_request(struct request *req)
struct blkif_request *ring_req;
unsigned long id;
unsigned int fsect, lsect;
-   int i, ref, n;
+   unsigned int shared_off, shared_len, bvec_off, sg_total;
+   int i, ref, n, grant;
struct blkif_request_segment *segments = NULL;
 
/*
@@ -401,18 +413,19 @@ static int blkif_queue_request(struct request *req)
grant_ref_t gref_head;
struct grant *gnt_list_entry = NULL;
struct scatterlist *sg;
-   int nseg, max_grefs;
+   int nseg, max_grefs, 

[RFC 18/23] block/xen-blkfront: Make it running on 64KB page granularity

2015-05-14 Thread Julien Grall
From: Julien Grall julien.gr...@linaro.org

The PV block protocol is using 4KB page granularity. The goal of this
patch is to allow a Linux using 64KB page granularity using block
device on a non-modified Xen.

The block API is using segment which should at least be the size of a
Linux page. Therefore, the driver will have to break the page in chunk
of 4K before giving the page to the backend.

Breaking a 64KB segment in 4KB chunk will result to have some chunk with
no data. As the PV protocol always require to have data in the chunk, we
have to count the number of Xen page which will be in use and avoid to
sent empty chunk.

Note that, a pre-defined number of grant is reserved before preparing
the request. This pre-defined number is based on the number and the
maximum size of the segments. If each segment contain a very small
amount of data, the driver may reserve too much grant (16 grant is
reserved per segment with 64KB page granularity).

Futhermore, in the case of persistent grant we allocate one Linux page
per grant although only the 4KB of the page will be effectively use.
This could be improved by share the page with multiple grants.

Signed-off-by: Julien Grall julien.gr...@citrix.com
Cc: Konrad Rzeszutek Wilk konrad.w...@oracle.com
Cc: Roger Pau Monné roger@citrix.com
Cc: Boris Ostrovsky boris.ostrov...@oracle.com
Cc: David Vrabel david.vra...@citrix.com

---

Improvement such as support 64KB grant is not taken into consideration in
this patch because we have the requirement to run a Linux using 64KB page
on a non-modified Xen.
---
 drivers/block/xen-blkfront.c | 259 ++-
 1 file changed, 156 insertions(+), 103 deletions(-)

diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 60cf1d6..c6537ed 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -77,6 +77,7 @@ struct blk_shadow {
struct grant **grants_used;
struct grant **indirect_grants;
struct scatterlist *sg;
+   unsigned int num_sg;
 };
 
 struct split_bio {
@@ -98,7 +99,7 @@ static unsigned int xen_blkif_max_segments = 32;
 module_param_named(max, xen_blkif_max_segments, int, S_IRUGO);
 MODULE_PARM_DESC(max, Maximum amount of segments in indirect requests 
(default is 32));
 
-#define BLK_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE)
+#define BLK_RING_SIZE __CONST_RING_SIZE(blkif, XEN_PAGE_SIZE)
 
 /*
  * We have one of these per vbd, whether ide, scsi or 'other'.  They
@@ -131,6 +132,7 @@ struct blkfront_info
unsigned int discard_granularity;
unsigned int discard_alignment;
unsigned int feature_persistent:1;
+   /* Number of 4K segment handled */
unsigned int max_indirect_segments;
int is_ready;
 };
@@ -158,10 +160,19 @@ static DEFINE_SPINLOCK(minor_lock);
 
 #define DEV_NAME   xvd   /* name in /dev */
 
-#define SEGS_PER_INDIRECT_FRAME \
-   (PAGE_SIZE/sizeof(struct blkif_request_segment))
-#define INDIRECT_GREFS(_segs) \
-   ((_segs + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME)
+/*
+ * Xen use 4K pages. The guest may use different page size (4K or 64K)
+ * Number of Xen pages per segment
+ */
+#define XEN_PAGES_PER_SEGMENT   (PAGE_SIZE / XEN_PAGE_SIZE)
+
+#define SEGS_PER_INDIRECT_FRAME\
+   (XEN_PAGE_SIZE/sizeof(struct blkif_request_segment) / 
XEN_PAGES_PER_SEGMENT)
+#define XEN_PAGES_PER_INDIRECT_FRAME \
+   (XEN_PAGE_SIZE/sizeof(struct blkif_request_segment))
+
+#define INDIRECT_GREFS(_pages) \
+   ((_pages + XEN_PAGES_PER_INDIRECT_FRAME - 
1)/XEN_PAGES_PER_INDIRECT_FRAME)
 
 static int blkfront_setup_indirect(struct blkfront_info *info);
 
@@ -204,7 +215,7 @@ static int fill_grant_buffer(struct blkfront_info *info, 
int num)
kfree(gnt_list_entry);
goto out_of_memory;
}
-   gnt_list_entry-pfn = page_to_pfn(granted_page);
+   gnt_list_entry-pfn = xen_page_to_pfn(granted_page);
}
 
gnt_list_entry-gref = GRANT_INVALID_REF;
@@ -219,7 +230,7 @@ out_of_memory:
 info-grants, node) {
list_del(gnt_list_entry-node);
if (info-feature_persistent)
-   __free_page(pfn_to_page(gnt_list_entry-pfn));
+   __free_page(xen_pfn_to_page(gnt_list_entry-pfn));
kfree(gnt_list_entry);
i--;
}
@@ -389,7 +400,8 @@ static int blkif_queue_request(struct request *req)
struct blkif_request *ring_req;
unsigned long id;
unsigned int fsect, lsect;
-   int i, ref, n;
+   unsigned int shared_off, shared_len, bvec_off, sg_total;
+   int i, ref, n, grant;
struct blkif_request_segment *segments = NULL;
 
/*
@@ -401,18 +413,19 @@ static int blkif_queue_request(struct request *req)
grant_ref_t gref_head;