RE: RE: [PATCH v29 4/4] scsi: ufs: Add HPB 2.0 support

2021-03-21 Thread Daejun Park
Hi Avri,

>> +static int ufshpb_execute_umap_req(struct ufshpb_lu *hpb,
>> +  struct ufshpb_req *umap_req,
>> +  struct ufshpb_region *rgn)
>> +{
>> +   struct request *req;
>> +   struct scsi_request *rq;
>> +
>> +   req = umap_req->req;
>> +   req->timeout = 0;
>> +   req->end_io_data = (void *)umap_req;
>> +   rq = scsi_req(req);
>> +   ufshpb_set_unmap_cmd(rq->cmd, rgn);
>> +   rq->cmd_len = HPB_WRITE_BUFFER_CMD_LENGTH;
>> +
>> +   blk_execute_rq_nowait(NULL, req, 1, ufshpb_umap_req_compl_fn);
>Typo? Forgot the struct request_queue *q?

The argument of q is removed after this patch.

https://lore.kernel.org/linux-scsi/1611550198-17142-1-git-send-email-guoqing.ji...@cloud.ionos.com/#r

Thanks,
Daejun

>> +
>> +   return 0;
>> +}
>> +
>>  static int ufshpb_execute_map_req(struct ufshpb_lu *hpb,
>>   struct ufshpb_req *map_req, bool last)
>>  {
>> @@ -533,12 +878,12 @@ static int ufshpb_execute_map_req(struct ufshpb_lu
>> *hpb,
>> 
>> q = hpb->sdev_ufs_lu->request_queue;
>> for (i = 0; i < hpb->pages_per_srgn; i++) {
>> -   ret = bio_add_pc_page(q, map_req->bio, 
>> map_req->mctx->m_page[i],
>> +   ret = bio_add_pc_page(q, map_req->bio, map_req->rb.mctx-
>> >m_page[i],
>>   PAGE_SIZE, 0);
>> if (ret != PAGE_SIZE) {
>> dev_err(>sdev_ufs_lu->sdev_dev,
>>"bio_add_pc_page fail %d - %d\n",
>> -  map_req->rgn_idx, map_req->srgn_idx);
>> +  map_req->rb.rgn_idx, 
>> map_req->rb.srgn_idx);
>> return ret;
>> }
>> }
>> @@ -554,8 +899,8 @@ static int ufshpb_execute_map_req(struct ufshpb_lu
>> *hpb,
>> if (unlikely(last))
>> mem_size = hpb->last_srgn_entries * HPB_ENTRY_SIZE;
>> 
>> -   ufshpb_set_read_buf_cmd(rq->cmd, map_req->rgn_idx,
>> -   map_req->srgn_idx, mem_size);
>> +   ufshpb_set_read_buf_cmd(rq->cmd, map_req->rb.rgn_idx,
>> +   map_req->rb.srgn_idx, mem_size);
>> rq->cmd_len = HPB_READ_BUFFER_CMD_LENGTH;
>> 
>> blk_execute_rq_nowait(NULL, req, 1, ufshpb_map_req_compl_fn);
>Ditto
> 
> 
>Thanks,
>Avri
> 
> 
>  


RE: [PATCH v29 4/4] scsi: ufs: Add HPB 2.0 support

2021-03-21 Thread Avri Altman

> +static int ufshpb_execute_umap_req(struct ufshpb_lu *hpb,
> +  struct ufshpb_req *umap_req,
> +  struct ufshpb_region *rgn)
> +{
> +   struct request *req;
> +   struct scsi_request *rq;
> +
> +   req = umap_req->req;
> +   req->timeout = 0;
> +   req->end_io_data = (void *)umap_req;
> +   rq = scsi_req(req);
> +   ufshpb_set_unmap_cmd(rq->cmd, rgn);
> +   rq->cmd_len = HPB_WRITE_BUFFER_CMD_LENGTH;
> +
> +   blk_execute_rq_nowait(NULL, req, 1, ufshpb_umap_req_compl_fn);
Typo? Forgot the struct request_queue *q?

> +
> +   return 0;
> +}
> +
>  static int ufshpb_execute_map_req(struct ufshpb_lu *hpb,
>   struct ufshpb_req *map_req, bool last)
>  {
> @@ -533,12 +878,12 @@ static int ufshpb_execute_map_req(struct ufshpb_lu
> *hpb,
> 
> q = hpb->sdev_ufs_lu->request_queue;
> for (i = 0; i < hpb->pages_per_srgn; i++) {
> -   ret = bio_add_pc_page(q, map_req->bio, 
> map_req->mctx->m_page[i],
> +   ret = bio_add_pc_page(q, map_req->bio, map_req->rb.mctx-
> >m_page[i],
>   PAGE_SIZE, 0);
> if (ret != PAGE_SIZE) {
> dev_err(>sdev_ufs_lu->sdev_dev,
>"bio_add_pc_page fail %d - %d\n",
> -  map_req->rgn_idx, map_req->srgn_idx);
> +  map_req->rb.rgn_idx, map_req->rb.srgn_idx);
> return ret;
> }
> }
> @@ -554,8 +899,8 @@ static int ufshpb_execute_map_req(struct ufshpb_lu
> *hpb,
> if (unlikely(last))
> mem_size = hpb->last_srgn_entries * HPB_ENTRY_SIZE;
> 
> -   ufshpb_set_read_buf_cmd(rq->cmd, map_req->rgn_idx,
> -   map_req->srgn_idx, mem_size);
> +   ufshpb_set_read_buf_cmd(rq->cmd, map_req->rb.rgn_idx,
> +   map_req->rb.srgn_idx, mem_size);
> rq->cmd_len = HPB_READ_BUFFER_CMD_LENGTH;
> 
> blk_execute_rq_nowait(NULL, req, 1, ufshpb_map_req_compl_fn);
Ditto


Thanks,
Avri


RE: Re: [PATCH v29 4/4] scsi: ufs: Add HPB 2.0 support

2021-03-18 Thread Daejun Park
>On 2021-03-18 10:02, Daejun Park wrote:
>>> On 2021-03-17 09:42, Daejun Park wrote:
> On 2021-03-15 15:23, Can Guo wrote:
>> On 2021-03-15 15:07, Daejun Park wrote:
> This patch supports the HPB 2.0.
> 
> The HPB 2.0 supports read of varying sizes from 4KB to 512KB.
> In the case of Read (<= 32KB) is supported as single HPB read.
> In the case of Read (36KB ~ 512KB) is supported by as a
> combination
> of
> write buffer command and HPB read command to deliver more PPN.
> The write buffer commands may not be issued immediately due to
> busy
> tags.
> To use HPB read more aggressively, the driver can requeue the
> write
> buffer
> command. The requeue threshold is implemented as timeout and can
> be
> modified with requeue_timeout_ms entry in sysfs.
> 
> Signed-off-by: Daejun Park 
> ---
> +static struct attribute *hpb_dev_param_attrs[] = {
> +_attr_requeue_timeout_ms.attr,
> +NULL,
> +};
> +
> +struct attribute_group ufs_sysfs_hpb_param_group = {
> +.name = "hpb_param_sysfs",
> +.attrs = hpb_dev_param_attrs,
> +};
> +
> +static int ufshpb_pre_req_mempool_init(struct ufshpb_lu *hpb)
> +{
> +struct ufshpb_req *pre_req = NULL;
> +int qd = hpb->sdev_ufs_lu->queue_depth / 2;
> +int i, j;
> +
> +INIT_LIST_HEAD(>lh_pre_req_free);
> +
> +hpb->pre_req = kcalloc(qd, sizeof(struct ufshpb_req),
> GFP_KERNEL);
> +hpb->throttle_pre_req = qd;
> +hpb->num_inflight_pre_req = 0;
> +
> +if (!hpb->pre_req)
> +goto release_mem;
> +
> +for (i = 0; i < qd; i++) {
> +pre_req = hpb->pre_req + i;
> +INIT_LIST_HEAD(_req->list_req);
> +pre_req->req = NULL;
> +pre_req->bio = NULL;
 
 Why don't prepare bio as same as wb.m_page? Won't that save more
 time
 for ufshpb_issue_pre_req()?
>>> 
>>> It is pre_req pool. So although we prepare bio at this time, it 
>>> just
>>> only for first pre_req.
>> 
>> I meant removing the bio_alloc() in ufshpb_issue_pre_req() and
>> bio_put()
>> in ufshpb_pre_req_compl_fn(). bios, in pre_req's case, just hold a
>> page.
>> So, prepare 16 (if queue depth is 32) bios here, just use them 
>> along
>> with
>> wb.m_page and call bio_reset() in ufshpb_pre_req_compl_fn(). Shall 
>> it
>> work?
>> 
> 
> If it works, you can even have the bio_add_pc_page() called here.
> Later
> in
> ufshpb_execute_pre_req(), you don't need to call
> ufshpb_pre_req_add_bio_page(),
> just call ufshpb_prep_entry() once instead - it save many repeated
> steps
> for a
> pre_req, and you don't even need to call bio_reset() in this case,
> since
> for a
> bio, nothing changes after it is binded with a specific page...
 
 Hi, Can Guo
 
 I tried the idea that you suggested, but it doesn't work properly.
 This optimization should be done next time for enhancement.
>>> 
>>> Can you elaborate please? Any error seen?
>>> 
>>> Per my understanding, in the case for pre_reqs, a bio is no different
>>> from a page. Here it can reserve 16 pages for later use, which can be
>>> done the same for bios.
>> 
>> I found some problem with re-using pre allocated bio.
>> 
>> The following kernel message is related with problem.
>> [2.750530] [ cut here ]
>> [2.751404] WARNING: CPU: 4 PID: 170 at
>> drivers/scsi/scsi_lib.c:1020 scsi_alloc_sgtables+0x253/0x2b0
>> [2.753054] Modules linked in:
>> [2.753651] CPU: 4 PID: 170 Comm: mount Not tainted 5.12.0-rc1+ #331
>> [2.754752] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996),
>> BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
>> [2.756813] RIP: 0010:scsi_alloc_sgtables+0x253/0x2b0
>> [2.757699] Code: 85 c0 74 19 41 0f b6 44 24 18 8d 50 e0 83 fa 03
>> 76 30 41 bd 01 00 00 00 e9 1f fe ff ff be 01 00 00 00 45 31 ed e9 19
>> fe ff ff <0f> 0b b8 0a f
>> [2.761021] RSP: 0018:b06e0027f538 EFLAGS: 00010246
>> [2.761902] RAX:  RBX: 9c3a42d424d0 RCX: 
>> b06e0027f5e0
>> [2.763184] RDX: 9c3a42d426a8 RSI:  RDI: 
>> 9c3a42d424d0
>> [2.764446] RBP: b06e0027f570 R08:  R09: 
>> 
>> [2.765704] R10: 8eb0dda0 R11: fffb7675 R12: 
>> 9c3a42d423c0
>> [2.766976] R13:  R14: 9c3a41bed000 R15: 
>> 9c3a420f4000
>> [2.768225] FS:  7f42d1eab100() 

Re: [PATCH v29 4/4] scsi: ufs: Add HPB 2.0 support

2021-03-17 Thread Can Guo

On 2021-03-18 10:02, Daejun Park wrote:

On 2021-03-17 09:42, Daejun Park wrote:

On 2021-03-15 15:23, Can Guo wrote:

On 2021-03-15 15:07, Daejun Park wrote:

This patch supports the HPB 2.0.

The HPB 2.0 supports read of varying sizes from 4KB to 512KB.
In the case of Read (<= 32KB) is supported as single HPB read.
In the case of Read (36KB ~ 512KB) is supported by as a
combination
of
write buffer command and HPB read command to deliver more PPN.
The write buffer commands may not be issued immediately due to
busy
tags.
To use HPB read more aggressively, the driver can requeue the
write
buffer
command. The requeue threshold is implemented as timeout and can
be
modified with requeue_timeout_ms entry in sysfs.

Signed-off-by: Daejun Park 
---
+static struct attribute *hpb_dev_param_attrs[] = {
+_attr_requeue_timeout_ms.attr,
+NULL,
+};
+
+struct attribute_group ufs_sysfs_hpb_param_group = {
+.name = "hpb_param_sysfs",
+.attrs = hpb_dev_param_attrs,
+};
+
+static int ufshpb_pre_req_mempool_init(struct ufshpb_lu *hpb)
+{
+struct ufshpb_req *pre_req = NULL;
+int qd = hpb->sdev_ufs_lu->queue_depth / 2;
+int i, j;
+
+INIT_LIST_HEAD(>lh_pre_req_free);
+
+hpb->pre_req = kcalloc(qd, sizeof(struct ufshpb_req),
GFP_KERNEL);
+hpb->throttle_pre_req = qd;
+hpb->num_inflight_pre_req = 0;
+
+if (!hpb->pre_req)
+goto release_mem;
+
+for (i = 0; i < qd; i++) {
+pre_req = hpb->pre_req + i;
+INIT_LIST_HEAD(_req->list_req);
+pre_req->req = NULL;
+pre_req->bio = NULL;


Why don't prepare bio as same as wb.m_page? Won't that save more
time
for ufshpb_issue_pre_req()?


It is pre_req pool. So although we prepare bio at this time, it 
just

only for first pre_req.


I meant removing the bio_alloc() in ufshpb_issue_pre_req() and
bio_put()
in ufshpb_pre_req_compl_fn(). bios, in pre_req's case, just hold a
page.
So, prepare 16 (if queue depth is 32) bios here, just use them 
along

with
wb.m_page and call bio_reset() in ufshpb_pre_req_compl_fn(). Shall 
it

work?



If it works, you can even have the bio_add_pc_page() called here.
Later
in
ufshpb_execute_pre_req(), you don't need to call
ufshpb_pre_req_add_bio_page(),
just call ufshpb_prep_entry() once instead - it save many repeated
steps
for a
pre_req, and you don't even need to call bio_reset() in this case,
since
for a
bio, nothing changes after it is binded with a specific page...


Hi, Can Guo

I tried the idea that you suggested, but it doesn't work properly.
This optimization should be done next time for enhancement.


Can you elaborate please? Any error seen?

Per my understanding, in the case for pre_reqs, a bio is no different
from a page. Here it can reserve 16 pages for later use, which can be
done the same for bios.


I found some problem with re-using pre allocated bio.

The following kernel message is related with problem.
[2.750530] [ cut here ]
[2.751404] WARNING: CPU: 4 PID: 170 at
drivers/scsi/scsi_lib.c:1020 scsi_alloc_sgtables+0x253/0x2b0
[2.753054] Modules linked in:
[2.753651] CPU: 4 PID: 170 Comm: mount Not tainted 5.12.0-rc1+ #331
[2.754752] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996),
BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
[2.756813] RIP: 0010:scsi_alloc_sgtables+0x253/0x2b0
[2.757699] Code: 85 c0 74 19 41 0f b6 44 24 18 8d 50 e0 83 fa 03
76 30 41 bd 01 00 00 00 e9 1f fe ff ff be 01 00 00 00 45 31 ed e9 19
fe ff ff <0f> 0b b8 0a f
[2.761021] RSP: 0018:b06e0027f538 EFLAGS: 00010246
[2.761902] RAX:  RBX: 9c3a42d424d0 RCX: 
b06e0027f5e0
[2.763184] RDX: 9c3a42d426a8 RSI:  RDI: 
9c3a42d424d0
[2.764446] RBP: b06e0027f570 R08:  R09: 

[2.765704] R10: 8eb0dda0 R11: fffb7675 R12: 
9c3a42d423c0
[2.766976] R13:  R14: 9c3a41bed000 R15: 
9c3a420f4000

[2.768225] FS:  7f42d1eab100() GS:9c3b77c0()
knlGS:
[2.769666] CS:  0010 DS:  ES:  CR0: 80050033
[2.770719] CR2: 7f42d1ac1000 CR3: 000104bee006 CR4: 
00370ee0
[2.771997] DR0:  DR1:  DR2: 

[2.773288] DR3:  DR6: fffe0ff0 DR7: 
0400

[2.774543] Call Trace:
[2.775092]  scsi_queue_rq+0x9b6/0xb20
[2.775754]  __blk_mq_try_issue_directly+0x150/0x1f0
[2.776636]  blk_mq_request_issue_directly+0x49/0x80
[2.777616]  blk_insert_cloned_request+0x85/0xd0
[2.778470]  ufshpb_prep.cold+0x793/0x7be
[2.779179]  ufshcd_queuecommand+0x114/0x690
[2.779986]  scsi_queue_rq+0x38a/0xb20
[2.780755]  blk_mq_dispatch_rq_list+0x13d/0x760
[2.781605]  ? dd_dispatch_request+0x67/0x1c0
[2.782337]  

RE: Re: [PATCH v29 4/4] scsi: ufs: Add HPB 2.0 support

2021-03-17 Thread Daejun Park
>On 2021-03-18 10:02, Daejun Park wrote:
>>> On 2021-03-17 09:42, Daejun Park wrote:
> On 2021-03-15 15:23, Can Guo wrote:
>> On 2021-03-15 15:07, Daejun Park wrote:
> This patch supports the HPB 2.0.
> 
> The HPB 2.0 supports read of varying sizes from 4KB to 512KB.
> In the case of Read (<= 32KB) is supported as single HPB read.
> In the case of Read (36KB ~ 512KB) is supported by as a
> combination
> of
> write buffer command and HPB read command to deliver more PPN.
> The write buffer commands may not be issued immediately due to
> busy
> tags.
> To use HPB read more aggressively, the driver can requeue the
> write
> buffer
> command. The requeue threshold is implemented as timeout and can
> be
> modified with requeue_timeout_ms entry in sysfs.
> 
> Signed-off-by: Daejun Park 
> ---
> +static struct attribute *hpb_dev_param_attrs[] = {
> +_attr_requeue_timeout_ms.attr,
> +NULL,
> +};
> +
> +struct attribute_group ufs_sysfs_hpb_param_group = {
> +.name = "hpb_param_sysfs",
> +.attrs = hpb_dev_param_attrs,
> +};
> +
> +static int ufshpb_pre_req_mempool_init(struct ufshpb_lu *hpb)
> +{
> +struct ufshpb_req *pre_req = NULL;
> +int qd = hpb->sdev_ufs_lu->queue_depth / 2;
> +int i, j;
> +
> +INIT_LIST_HEAD(>lh_pre_req_free);
> +
> +hpb->pre_req = kcalloc(qd, sizeof(struct ufshpb_req),
> GFP_KERNEL);
> +hpb->throttle_pre_req = qd;
> +hpb->num_inflight_pre_req = 0;
> +
> +if (!hpb->pre_req)
> +goto release_mem;
> +
> +for (i = 0; i < qd; i++) {
> +pre_req = hpb->pre_req + i;
> +INIT_LIST_HEAD(_req->list_req);
> +pre_req->req = NULL;
> +pre_req->bio = NULL;
 
 Why don't prepare bio as same as wb.m_page? Won't that save more
 time
 for ufshpb_issue_pre_req()?
>>> 
>>> It is pre_req pool. So although we prepare bio at this time, it 
>>> just
>>> only for first pre_req.
>> 
>> I meant removing the bio_alloc() in ufshpb_issue_pre_req() and
>> bio_put()
>> in ufshpb_pre_req_compl_fn(). bios, in pre_req's case, just hold a
>> page.
>> So, prepare 16 (if queue depth is 32) bios here, just use them 
>> along
>> with
>> wb.m_page and call bio_reset() in ufshpb_pre_req_compl_fn(). Shall 
>> it
>> work?
>> 
> 
> If it works, you can even have the bio_add_pc_page() called here.
> Later
> in
> ufshpb_execute_pre_req(), you don't need to call
> ufshpb_pre_req_add_bio_page(),
> just call ufshpb_prep_entry() once instead - it save many repeated
> steps
> for a
> pre_req, and you don't even need to call bio_reset() in this case,
> since
> for a
> bio, nothing changes after it is binded with a specific page...
 
 Hi, Can Guo
 
 I tried the idea that you suggested, but it doesn't work properly.
 This optimization should be done next time for enhancement.
>>> 
>>> Can you elaborate please? Any error seen?
>>> 
>>> Per my understanding, in the case for pre_reqs, a bio is no different
>>> from a page. Here it can reserve 16 pages for later use, which can be
>>> done the same for bios.
>> 
>> I found some problem with re-using pre allocated bio.
>> 
>> The following kernel message is related with problem.
>> [2.750530] [ cut here ]
>> [2.751404] WARNING: CPU: 4 PID: 170 at
>> drivers/scsi/scsi_lib.c:1020 scsi_alloc_sgtables+0x253/0x2b0
>> [2.753054] Modules linked in:
>> [2.753651] CPU: 4 PID: 170 Comm: mount Not tainted 5.12.0-rc1+ #331
>> [2.754752] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996),
>> BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
>> [2.756813] RIP: 0010:scsi_alloc_sgtables+0x253/0x2b0
>> [2.757699] Code: 85 c0 74 19 41 0f b6 44 24 18 8d 50 e0 83 fa 03
>> 76 30 41 bd 01 00 00 00 e9 1f fe ff ff be 01 00 00 00 45 31 ed e9 19
>> fe ff ff <0f> 0b b8 0a f
>> [2.761021] RSP: 0018:b06e0027f538 EFLAGS: 00010246
>> [2.761902] RAX:  RBX: 9c3a42d424d0 RCX: 
>> b06e0027f5e0
>> [2.763184] RDX: 9c3a42d426a8 RSI:  RDI: 
>> 9c3a42d424d0
>> [2.764446] RBP: b06e0027f570 R08:  R09: 
>> 
>> [2.765704] R10: 8eb0dda0 R11: fffb7675 R12: 
>> 9c3a42d423c0
>> [2.766976] R13:  R14: 9c3a41bed000 R15: 
>> 9c3a420f4000
>> [2.768225] FS:  7f42d1eab100() 

Re: [PATCH v29 4/4] scsi: ufs: Add HPB 2.0 support

2021-03-17 Thread Can Guo

On 2021-03-18 10:02, Daejun Park wrote:

On 2021-03-17 09:42, Daejun Park wrote:

On 2021-03-15 15:23, Can Guo wrote:

On 2021-03-15 15:07, Daejun Park wrote:

This patch supports the HPB 2.0.

The HPB 2.0 supports read of varying sizes from 4KB to 512KB.
In the case of Read (<= 32KB) is supported as single HPB read.
In the case of Read (36KB ~ 512KB) is supported by as a
combination
of
write buffer command and HPB read command to deliver more PPN.
The write buffer commands may not be issued immediately due to
busy
tags.
To use HPB read more aggressively, the driver can requeue the
write
buffer
command. The requeue threshold is implemented as timeout and can
be
modified with requeue_timeout_ms entry in sysfs.

Signed-off-by: Daejun Park 
---
+static struct attribute *hpb_dev_param_attrs[] = {
+_attr_requeue_timeout_ms.attr,
+NULL,
+};
+
+struct attribute_group ufs_sysfs_hpb_param_group = {
+.name = "hpb_param_sysfs",
+.attrs = hpb_dev_param_attrs,
+};
+
+static int ufshpb_pre_req_mempool_init(struct ufshpb_lu *hpb)
+{
+struct ufshpb_req *pre_req = NULL;
+int qd = hpb->sdev_ufs_lu->queue_depth / 2;
+int i, j;
+
+INIT_LIST_HEAD(>lh_pre_req_free);
+
+hpb->pre_req = kcalloc(qd, sizeof(struct ufshpb_req),
GFP_KERNEL);
+hpb->throttle_pre_req = qd;
+hpb->num_inflight_pre_req = 0;
+
+if (!hpb->pre_req)
+goto release_mem;
+
+for (i = 0; i < qd; i++) {
+pre_req = hpb->pre_req + i;
+INIT_LIST_HEAD(_req->list_req);
+pre_req->req = NULL;
+pre_req->bio = NULL;


Why don't prepare bio as same as wb.m_page? Won't that save more
time
for ufshpb_issue_pre_req()?


It is pre_req pool. So although we prepare bio at this time, it 
just

only for first pre_req.


I meant removing the bio_alloc() in ufshpb_issue_pre_req() and
bio_put()
in ufshpb_pre_req_compl_fn(). bios, in pre_req's case, just hold a
page.
So, prepare 16 (if queue depth is 32) bios here, just use them 
along

with
wb.m_page and call bio_reset() in ufshpb_pre_req_compl_fn(). Shall 
it

work?



If it works, you can even have the bio_add_pc_page() called here.
Later
in
ufshpb_execute_pre_req(), you don't need to call
ufshpb_pre_req_add_bio_page(),
just call ufshpb_prep_entry() once instead - it save many repeated
steps
for a
pre_req, and you don't even need to call bio_reset() in this case,
since
for a
bio, nothing changes after it is binded with a specific page...


Hi, Can Guo

I tried the idea that you suggested, but it doesn't work properly.
This optimization should be done next time for enhancement.


Can you elaborate please? Any error seen?

Per my understanding, in the case for pre_reqs, a bio is no different
from a page. Here it can reserve 16 pages for later use, which can be
done the same for bios.


I found some problem with re-using pre allocated bio.

The following kernel message is related with problem.
[2.750530] [ cut here ]
[2.751404] WARNING: CPU: 4 PID: 170 at
drivers/scsi/scsi_lib.c:1020 scsi_alloc_sgtables+0x253/0x2b0
[2.753054] Modules linked in:
[2.753651] CPU: 4 PID: 170 Comm: mount Not tainted 5.12.0-rc1+ #331
[2.754752] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996),
BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
[2.756813] RIP: 0010:scsi_alloc_sgtables+0x253/0x2b0
[2.757699] Code: 85 c0 74 19 41 0f b6 44 24 18 8d 50 e0 83 fa 03
76 30 41 bd 01 00 00 00 e9 1f fe ff ff be 01 00 00 00 45 31 ed e9 19
fe ff ff <0f> 0b b8 0a f
[2.761021] RSP: 0018:b06e0027f538 EFLAGS: 00010246
[2.761902] RAX:  RBX: 9c3a42d424d0 RCX: 
b06e0027f5e0
[2.763184] RDX: 9c3a42d426a8 RSI:  RDI: 
9c3a42d424d0
[2.764446] RBP: b06e0027f570 R08:  R09: 

[2.765704] R10: 8eb0dda0 R11: fffb7675 R12: 
9c3a42d423c0
[2.766976] R13:  R14: 9c3a41bed000 R15: 
9c3a420f4000

[2.768225] FS:  7f42d1eab100() GS:9c3b77c0()
knlGS:
[2.769666] CS:  0010 DS:  ES:  CR0: 80050033
[2.770719] CR2: 7f42d1ac1000 CR3: 000104bee006 CR4: 
00370ee0
[2.771997] DR0:  DR1:  DR2: 

[2.773288] DR3:  DR6: fffe0ff0 DR7: 
0400

[2.774543] Call Trace:
[2.775092]  scsi_queue_rq+0x9b6/0xb20
[2.775754]  __blk_mq_try_issue_directly+0x150/0x1f0
[2.776636]  blk_mq_request_issue_directly+0x49/0x80
[2.777616]  blk_insert_cloned_request+0x85/0xd0
[2.778470]  ufshpb_prep.cold+0x793/0x7be
[2.779179]  ufshcd_queuecommand+0x114/0x690
[2.779986]  scsi_queue_rq+0x38a/0xb20
[2.780755]  blk_mq_dispatch_rq_list+0x13d/0x760
[2.781605]  ? dd_dispatch_request+0x67/0x1c0
[2.782337]  

RE: Re: [PATCH v29 4/4] scsi: ufs: Add HPB 2.0 support

2021-03-17 Thread Daejun Park
>On 2021-03-17 09:42, Daejun Park wrote:
>>> On 2021-03-15 15:23, Can Guo wrote:
 On 2021-03-15 15:07, Daejun Park wrote:
>>> This patch supports the HPB 2.0.
>>> 
>>> The HPB 2.0 supports read of varying sizes from 4KB to 512KB.
>>> In the case of Read (<= 32KB) is supported as single HPB read.
>>> In the case of Read (36KB ~ 512KB) is supported by as a 
>>> combination
>>> of
>>> write buffer command and HPB read command to deliver more PPN.
>>> The write buffer commands may not be issued immediately due to 
>>> busy
>>> tags.
>>> To use HPB read more aggressively, the driver can requeue the 
>>> write
>>> buffer
>>> command. The requeue threshold is implemented as timeout and can 
>>> be
>>> modified with requeue_timeout_ms entry in sysfs.
>>> 
>>> Signed-off-by: Daejun Park 
>>> ---
>>> +static struct attribute *hpb_dev_param_attrs[] = {
>>> +_attr_requeue_timeout_ms.attr,
>>> +NULL,
>>> +};
>>> +
>>> +struct attribute_group ufs_sysfs_hpb_param_group = {
>>> +.name = "hpb_param_sysfs",
>>> +.attrs = hpb_dev_param_attrs,
>>> +};
>>> +
>>> +static int ufshpb_pre_req_mempool_init(struct ufshpb_lu *hpb)
>>> +{
>>> +struct ufshpb_req *pre_req = NULL;
>>> +int qd = hpb->sdev_ufs_lu->queue_depth / 2;
>>> +int i, j;
>>> +
>>> +INIT_LIST_HEAD(>lh_pre_req_free);
>>> +
>>> +hpb->pre_req = kcalloc(qd, sizeof(struct ufshpb_req),
>>> GFP_KERNEL);
>>> +hpb->throttle_pre_req = qd;
>>> +hpb->num_inflight_pre_req = 0;
>>> +
>>> +if (!hpb->pre_req)
>>> +goto release_mem;
>>> +
>>> +for (i = 0; i < qd; i++) {
>>> +pre_req = hpb->pre_req + i;
>>> +INIT_LIST_HEAD(_req->list_req);
>>> +pre_req->req = NULL;
>>> +pre_req->bio = NULL;
>> 
>> Why don't prepare bio as same as wb.m_page? Won't that save more 
>> time
>> for ufshpb_issue_pre_req()?
> 
> It is pre_req pool. So although we prepare bio at this time, it just
> only for first pre_req.
 
 I meant removing the bio_alloc() in ufshpb_issue_pre_req() and
 bio_put()
 in ufshpb_pre_req_compl_fn(). bios, in pre_req's case, just hold a
 page.
 So, prepare 16 (if queue depth is 32) bios here, just use them along
 with
 wb.m_page and call bio_reset() in ufshpb_pre_req_compl_fn(). Shall it
 work?
 
>>> 
>>> If it works, you can even have the bio_add_pc_page() called here. 
>>> Later
>>> in
>>> ufshpb_execute_pre_req(), you don't need to call
>>> ufshpb_pre_req_add_bio_page(),
>>> just call ufshpb_prep_entry() once instead - it save many repeated 
>>> steps
>>> for a
>>> pre_req, and you don't even need to call bio_reset() in this case, 
>>> since
>>> for a
>>> bio, nothing changes after it is binded with a specific page...
>> 
>> Hi, Can Guo
>> 
>> I tried the idea that you suggested, but it doesn't work properly.
>> This optimization should be done next time for enhancement.
> 
>Can you elaborate please? Any error seen?
> 
>Per my understanding, in the case for pre_reqs, a bio is no different
>from a page. Here it can reserve 16 pages for later use, which can be
>done the same for bios.

I found some problem with re-using pre allocated bio.

The following kernel message is related with problem.
[2.750530] [ cut here ]
[2.751404] WARNING: CPU: 4 PID: 170 at drivers/scsi/scsi_lib.c:1020 
scsi_alloc_sgtables+0x253/0x2b0
[2.753054] Modules linked in:
[2.753651] CPU: 4 PID: 170 Comm: mount Not tainted 5.12.0-rc1+ #331
[2.754752] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 
rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
[2.756813] RIP: 0010:scsi_alloc_sgtables+0x253/0x2b0
[2.757699] Code: 85 c0 74 19 41 0f b6 44 24 18 8d 50 e0 83 fa 03 76 30 41 
bd 01 00 00 00 e9 1f fe ff ff be 01 00 00 00 45 31 ed e9 19 fe ff ff <0f> 0b b8 
0a f
[2.761021] RSP: 0018:b06e0027f538 EFLAGS: 00010246
[2.761902] RAX:  RBX: 9c3a42d424d0 RCX: b06e0027f5e0
[2.763184] RDX: 9c3a42d426a8 RSI:  RDI: 9c3a42d424d0
[2.764446] RBP: b06e0027f570 R08:  R09: 
[2.765704] R10: 8eb0dda0 R11: fffb7675 R12: 9c3a42d423c0
[2.766976] R13:  R14: 9c3a41bed000 R15: 9c3a420f4000
[2.768225] FS:  7f42d1eab100() GS:9c3b77c0() 
knlGS:
[2.769666] CS:  0010 DS:  ES:  CR0: 80050033
[2.770719] CR2: 7f42d1ac1000 CR3: 000104bee006 CR4: 00370ee0
[2.771997] DR0:  DR1:  DR2: 
[2.773288] DR3:  DR6: fffe0ff0 

Re: [PATCH v29 4/4] scsi: ufs: Add HPB 2.0 support

2021-03-16 Thread Can Guo

On 2021-03-17 09:42, Daejun Park wrote:

On 2021-03-15 15:23, Can Guo wrote:

On 2021-03-15 15:07, Daejun Park wrote:

This patch supports the HPB 2.0.

The HPB 2.0 supports read of varying sizes from 4KB to 512KB.
In the case of Read (<= 32KB) is supported as single HPB read.
In the case of Read (36KB ~ 512KB) is supported by as a 
combination

of
write buffer command and HPB read command to deliver more PPN.
The write buffer commands may not be issued immediately due to 
busy

tags.
To use HPB read more aggressively, the driver can requeue the 
write

buffer
command. The requeue threshold is implemented as timeout and can 
be

modified with requeue_timeout_ms entry in sysfs.

Signed-off-by: Daejun Park 
---
+static struct attribute *hpb_dev_param_attrs[] = {
+_attr_requeue_timeout_ms.attr,
+NULL,
+};
+
+struct attribute_group ufs_sysfs_hpb_param_group = {
+.name = "hpb_param_sysfs",
+.attrs = hpb_dev_param_attrs,
+};
+
+static int ufshpb_pre_req_mempool_init(struct ufshpb_lu *hpb)
+{
+struct ufshpb_req *pre_req = NULL;
+int qd = hpb->sdev_ufs_lu->queue_depth / 2;
+int i, j;
+
+INIT_LIST_HEAD(>lh_pre_req_free);
+
+hpb->pre_req = kcalloc(qd, sizeof(struct ufshpb_req),
GFP_KERNEL);
+hpb->throttle_pre_req = qd;
+hpb->num_inflight_pre_req = 0;
+
+if (!hpb->pre_req)
+goto release_mem;
+
+for (i = 0; i < qd; i++) {
+pre_req = hpb->pre_req + i;
+INIT_LIST_HEAD(_req->list_req);
+pre_req->req = NULL;
+pre_req->bio = NULL;


Why don't prepare bio as same as wb.m_page? Won't that save more 
time

for ufshpb_issue_pre_req()?


It is pre_req pool. So although we prepare bio at this time, it just
only for first pre_req.


I meant removing the bio_alloc() in ufshpb_issue_pre_req() and
bio_put()
in ufshpb_pre_req_compl_fn(). bios, in pre_req's case, just hold a
page.
So, prepare 16 (if queue depth is 32) bios here, just use them along
with
wb.m_page and call bio_reset() in ufshpb_pre_req_compl_fn(). Shall it
work?



If it works, you can even have the bio_add_pc_page() called here. 
Later

in
ufshpb_execute_pre_req(), you don't need to call
ufshpb_pre_req_add_bio_page(),
just call ufshpb_prep_entry() once instead - it save many repeated 
steps

for a
pre_req, and you don't even need to call bio_reset() in this case, 
since

for a
bio, nothing changes after it is binded with a specific page...


Hi, Can Guo

I tried the idea that you suggested, but it doesn't work properly.
This optimization should be done next time for enhancement.


Can you elaborate please? Any error seen?

Per my understanding, in the case for pre_reqs, a bio is no different
from a page. Here it can reserve 16 pages for later use, which can be
done the same for bios.

This is not an enhancement, but a doubt - why not? Unless it is not 
doable.


Thanks,
Can Guo.



Thanks
Daejun


Can Guo.


Thanks,
Can Guo.


After use it, it should be prepared bio at issue phase.

Thanks,
Daejun



Thanks,
Can Guo.


+
+pre_req->wb.m_page = alloc_page(GFP_KERNEL |
__GFP_ZERO);
+if (!pre_req->wb.m_page) {
+for (j = 0; j < i; j++)
+
__free_page(hpb->pre_req[j].wb.m_page);
+
+goto release_mem;
+}
+list_add_tail(_req->list_req,
>lh_pre_req_free);
+}
+
+return 0;
+release_mem:
+kfree(hpb->pre_req);
+return -ENOMEM;
+}
+










RE: Re: [PATCH v29 4/4] scsi: ufs: Add HPB 2.0 support

2021-03-16 Thread Daejun Park
>On 2021-03-15 15:23, Can Guo wrote:
>> On 2021-03-15 15:07, Daejun Park wrote:
> This patch supports the HPB 2.0.
> 
> The HPB 2.0 supports read of varying sizes from 4KB to 512KB.
> In the case of Read (<= 32KB) is supported as single HPB read.
> In the case of Read (36KB ~ 512KB) is supported by as a combination 
> of
> write buffer command and HPB read command to deliver more PPN.
> The write buffer commands may not be issued immediately due to busy
> tags.
> To use HPB read more aggressively, the driver can requeue the write
> buffer
> command. The requeue threshold is implemented as timeout and can be
> modified with requeue_timeout_ms entry in sysfs.
> 
> Signed-off-by: Daejun Park 
> ---
> +static struct attribute *hpb_dev_param_attrs[] = {
> +_attr_requeue_timeout_ms.attr,
> +NULL,
> +};
> +
> +struct attribute_group ufs_sysfs_hpb_param_group = {
> +.name = "hpb_param_sysfs",
> +.attrs = hpb_dev_param_attrs,
> +};
> +
> +static int ufshpb_pre_req_mempool_init(struct ufshpb_lu *hpb)
> +{
> +struct ufshpb_req *pre_req = NULL;
> +int qd = hpb->sdev_ufs_lu->queue_depth / 2;
> +int i, j;
> +
> +INIT_LIST_HEAD(>lh_pre_req_free);
> +
> +hpb->pre_req = kcalloc(qd, sizeof(struct ufshpb_req), 
> GFP_KERNEL);
> +hpb->throttle_pre_req = qd;
> +hpb->num_inflight_pre_req = 0;
> +
> +if (!hpb->pre_req)
> +goto release_mem;
> +
> +for (i = 0; i < qd; i++) {
> +pre_req = hpb->pre_req + i;
> +INIT_LIST_HEAD(_req->list_req);
> +pre_req->req = NULL;
> +pre_req->bio = NULL;
 
 Why don't prepare bio as same as wb.m_page? Won't that save more time
 for ufshpb_issue_pre_req()?
>>> 
>>> It is pre_req pool. So although we prepare bio at this time, it just
>>> only for first pre_req.
>> 
>> I meant removing the bio_alloc() in ufshpb_issue_pre_req() and 
>> bio_put()
>> in ufshpb_pre_req_compl_fn(). bios, in pre_req's case, just hold a 
>> page.
>> So, prepare 16 (if queue depth is 32) bios here, just use them along 
>> with
>> wb.m_page and call bio_reset() in ufshpb_pre_req_compl_fn(). Shall it 
>> work?
>> 
> 
>If it works, you can even have the bio_add_pc_page() called here. Later 
>in
>ufshpb_execute_pre_req(), you don't need to call 
>ufshpb_pre_req_add_bio_page(),
>just call ufshpb_prep_entry() once instead - it save many repeated steps 
>for a
>pre_req, and you don't even need to call bio_reset() in this case, since 
>for a
>bio, nothing changes after it is binded with a specific page...

Hi, Can Guo

I tried the idea that you suggested, but it doesn't work properly.
This optimization should be done next time for enhancement.

Thanks
Daejun

>Can Guo.
> 
>> Thanks,
>> Can Guo.
>> 
>>> After use it, it should be prepared bio at issue phase.
>>> 
>>> Thanks,
>>> Daejun
>>> 
 
 Thanks,
 Can Guo.
 
> +
> +pre_req->wb.m_page = alloc_page(GFP_KERNEL | 
> __GFP_ZERO);
> +if (!pre_req->wb.m_page) {
> +for (j = 0; j < i; j++)
> +
> __free_page(hpb->pre_req[j].wb.m_page);
> +
> +goto release_mem;
> +}
> +list_add_tail(_req->list_req, 
> >lh_pre_req_free);
> +}
> +
> +return 0;
> +release_mem:
> +kfree(hpb->pre_req);
> +return -ENOMEM;
> +}
> +
 
 
 
> 
> 
>  


Re: [PATCH v29 4/4] scsi: ufs: Add HPB 2.0 support

2021-03-15 Thread Can Guo

On 2021-03-15 15:23, Can Guo wrote:

On 2021-03-15 15:07, Daejun Park wrote:

This patch supports the HPB 2.0.

The HPB 2.0 supports read of varying sizes from 4KB to 512KB.
In the case of Read (<= 32KB) is supported as single HPB read.
In the case of Read (36KB ~ 512KB) is supported by as a combination 
of

write buffer command and HPB read command to deliver more PPN.
The write buffer commands may not be issued immediately due to busy
tags.
To use HPB read more aggressively, the driver can requeue the write
buffer
command. The requeue threshold is implemented as timeout and can be
modified with requeue_timeout_ms entry in sysfs.

Signed-off-by: Daejun Park 
---
+static struct attribute *hpb_dev_param_attrs[] = {
+_attr_requeue_timeout_ms.attr,
+NULL,
+};
+
+struct attribute_group ufs_sysfs_hpb_param_group = {
+.name = "hpb_param_sysfs",
+.attrs = hpb_dev_param_attrs,
+};
+
+static int ufshpb_pre_req_mempool_init(struct ufshpb_lu *hpb)
+{
+struct ufshpb_req *pre_req = NULL;
+int qd = hpb->sdev_ufs_lu->queue_depth / 2;
+int i, j;
+
+INIT_LIST_HEAD(>lh_pre_req_free);
+
+hpb->pre_req = kcalloc(qd, sizeof(struct ufshpb_req), 
GFP_KERNEL);

+hpb->throttle_pre_req = qd;
+hpb->num_inflight_pre_req = 0;
+
+if (!hpb->pre_req)
+goto release_mem;
+
+for (i = 0; i < qd; i++) {
+pre_req = hpb->pre_req + i;
+INIT_LIST_HEAD(_req->list_req);
+pre_req->req = NULL;
+pre_req->bio = NULL;


Why don't prepare bio as same as wb.m_page? Won't that save more time
for ufshpb_issue_pre_req()?


It is pre_req pool. So although we prepare bio at this time, it just
only for first pre_req.


I meant removing the bio_alloc() in ufshpb_issue_pre_req() and 
bio_put()
in ufshpb_pre_req_compl_fn(). bios, in pre_req's case, just hold a 
page.
So, prepare 16 (if queue depth is 32) bios here, just use them along 
with
wb.m_page and call bio_reset() in ufshpb_pre_req_compl_fn(). Shall it 
work?




If it works, you can even have the bio_add_pc_page() called here. Later 
in
ufshpb_execute_pre_req(), you don't need to call 
ufshpb_pre_req_add_bio_page(),
just call ufshpb_prep_entry() once instead - it save many repeated steps 
for a
pre_req, and you don't even need to call bio_reset() in this case, since 
for a

bio, nothing changes after it is binded with a specific page...

Can Guo.


Thanks,
Can Guo.


After use it, it should be prepared bio at issue phase.

Thanks,
Daejun



Thanks,
Can Guo.


+
+pre_req->wb.m_page = alloc_page(GFP_KERNEL | 
__GFP_ZERO);

+if (!pre_req->wb.m_page) {
+for (j = 0; j < i; j++)
+
__free_page(hpb->pre_req[j].wb.m_page);

+
+goto release_mem;
+}
+list_add_tail(_req->list_req, 
>lh_pre_req_free);

+}
+
+return 0;
+release_mem:
+kfree(hpb->pre_req);
+return -ENOMEM;
+}
+






Re: [PATCH v29 4/4] scsi: ufs: Add HPB 2.0 support

2021-03-15 Thread Can Guo

On 2021-03-15 15:07, Daejun Park wrote:

This patch supports the HPB 2.0.

The HPB 2.0 supports read of varying sizes from 4KB to 512KB.
In the case of Read (<= 32KB) is supported as single HPB read.
In the case of Read (36KB ~ 512KB) is supported by as a combination 
of

write buffer command and HPB read command to deliver more PPN.
The write buffer commands may not be issued immediately due to busy
tags.
To use HPB read more aggressively, the driver can requeue the write
buffer
command. The requeue threshold is implemented as timeout and can be
modified with requeue_timeout_ms entry in sysfs.

Signed-off-by: Daejun Park 
---
+static struct attribute *hpb_dev_param_attrs[] = {
+_attr_requeue_timeout_ms.attr,
+NULL,
+};
+
+struct attribute_group ufs_sysfs_hpb_param_group = {
+.name = "hpb_param_sysfs",
+.attrs = hpb_dev_param_attrs,
+};
+
+static int ufshpb_pre_req_mempool_init(struct ufshpb_lu *hpb)
+{
+struct ufshpb_req *pre_req = NULL;
+int qd = hpb->sdev_ufs_lu->queue_depth / 2;
+int i, j;
+
+INIT_LIST_HEAD(>lh_pre_req_free);
+
+hpb->pre_req = kcalloc(qd, sizeof(struct ufshpb_req), 
GFP_KERNEL);

+hpb->throttle_pre_req = qd;
+hpb->num_inflight_pre_req = 0;
+
+if (!hpb->pre_req)
+goto release_mem;
+
+for (i = 0; i < qd; i++) {
+pre_req = hpb->pre_req + i;
+INIT_LIST_HEAD(_req->list_req);
+pre_req->req = NULL;
+pre_req->bio = NULL;


Why don't prepare bio as same as wb.m_page? Won't that save more time
for ufshpb_issue_pre_req()?


It is pre_req pool. So although we prepare bio at this time, it just
only for first pre_req.


I meant removing the bio_alloc() in ufshpb_issue_pre_req() and bio_put()
in ufshpb_pre_req_compl_fn(). bios, in pre_req's case, just hold a page.
So, prepare 16 (if queue depth is 32) bios here, just use them along 
with
wb.m_page and call bio_reset() in ufshpb_pre_req_compl_fn(). Shall it 
work?


Thanks,
Can Guo.


After use it, it should be prepared bio at issue phase.

Thanks,
Daejun



Thanks,
Can Guo.


+
+pre_req->wb.m_page = alloc_page(GFP_KERNEL | 
__GFP_ZERO);

+if (!pre_req->wb.m_page) {
+for (j = 0; j < i; j++)
+
__free_page(hpb->pre_req[j].wb.m_page);

+
+goto release_mem;
+}
+list_add_tail(_req->list_req, 
>lh_pre_req_free);

+}
+
+return 0;
+release_mem:
+kfree(hpb->pre_req);
+return -ENOMEM;
+}
+






RE: Re: [PATCH v29 4/4] scsi: ufs: Add HPB 2.0 support

2021-03-15 Thread Daejun Park
>> This patch supports the HPB 2.0.
>> 
>> The HPB 2.0 supports read of varying sizes from 4KB to 512KB.
>> In the case of Read (<= 32KB) is supported as single HPB read.
>> In the case of Read (36KB ~ 512KB) is supported by as a combination of
>> write buffer command and HPB read command to deliver more PPN.
>> The write buffer commands may not be issued immediately due to busy 
>> tags.
>> To use HPB read more aggressively, the driver can requeue the write 
>> buffer
>> command. The requeue threshold is implemented as timeout and can be
>> modified with requeue_timeout_ms entry in sysfs.
>> 
>> Signed-off-by: Daejun Park 
>> ---
>> +static struct attribute *hpb_dev_param_attrs[] = {
>> +_attr_requeue_timeout_ms.attr,
>> +NULL,
>> +};
>> +
>> +struct attribute_group ufs_sysfs_hpb_param_group = {
>> +.name = "hpb_param_sysfs",
>> +.attrs = hpb_dev_param_attrs,
>> +};
>> +
>> +static int ufshpb_pre_req_mempool_init(struct ufshpb_lu *hpb)
>> +{
>> +struct ufshpb_req *pre_req = NULL;
>> +int qd = hpb->sdev_ufs_lu->queue_depth / 2;
>> +int i, j;
>> +
>> +INIT_LIST_HEAD(>lh_pre_req_free);
>> +
>> +hpb->pre_req = kcalloc(qd, sizeof(struct ufshpb_req), GFP_KERNEL);
>> +hpb->throttle_pre_req = qd;
>> +hpb->num_inflight_pre_req = 0;
>> +
>> +if (!hpb->pre_req)
>> +goto release_mem;
>> +
>> +for (i = 0; i < qd; i++) {
>> +pre_req = hpb->pre_req + i;
>> +INIT_LIST_HEAD(_req->list_req);
>> +pre_req->req = NULL;
>> +pre_req->bio = NULL;
> 
>Why don't prepare bio as same as wb.m_page? Won't that save more time
>for ufshpb_issue_pre_req()?

It is pre_req pool. So although we prepare bio at this time, it just only for 
first pre_req.
After use it, it should be prepared bio at issue phase.

Thanks,
Daejun

> 
>Thanks,
>Can Guo.
> 
>> +
>> +pre_req->wb.m_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
>> +if (!pre_req->wb.m_page) {
>> +for (j = 0; j < i; j++)
>> +__free_page(hpb->pre_req[j].wb.m_page);
>> +
>> +goto release_mem;
>> +}
>> +list_add_tail(_req->list_req, >lh_pre_req_free);
>> +}
>> +
>> +return 0;
>> +release_mem:
>> +kfree(hpb->pre_req);
>> +return -ENOMEM;
>> +}
>> +
> 
> 
>  


Re: [PATCH v29 4/4] scsi: ufs: Add HPB 2.0 support

2021-03-15 Thread Can Guo

On 2021-03-15 09:31, Daejun Park wrote:

This patch supports the HPB 2.0.

The HPB 2.0 supports read of varying sizes from 4KB to 512KB.
In the case of Read (<= 32KB) is supported as single HPB read.
In the case of Read (36KB ~ 512KB) is supported by as a combination of
write buffer command and HPB read command to deliver more PPN.
The write buffer commands may not be issued immediately due to busy 
tags.
To use HPB read more aggressively, the driver can requeue the write 
buffer

command. The requeue threshold is implemented as timeout and can be
modified with requeue_timeout_ms entry in sysfs.

Signed-off-by: Daejun Park 
---
+static int ufshpb_issue_pre_req(struct ufshpb_lu *hpb, struct 
scsi_cmnd *cmd,

+   int *read_id)
+{
+   struct ufshpb_req *pre_req;
+   struct request *req = NULL;
+   struct bio *bio = NULL;
+   unsigned long flags;
+   int _read_id;
+   int ret = 0;
+
+   req = blk_get_request(cmd->device->request_queue,


To keep symmetry with ufshpb_get_req(), can we use 
hpb->sdev_ufs_lu->request_queue?


Thanks,
Can Guo.


+ REQ_OP_SCSI_OUT | REQ_SYNC, BLK_MQ_REQ_NOWAIT);
+   if (IS_ERR(req))
+   return -EAGAIN;
+
+   bio = bio_alloc(GFP_ATOMIC, 1);
+   if (!bio) {
+   blk_put_request(req);
+   return -EAGAIN;
+   }
+
+   spin_lock_irqsave(>rgn_state_lock, flags);
+   pre_req = ufshpb_get_pre_req(hpb);
+   if (!pre_req) {
+   ret = -EAGAIN;
+   goto unlock_out;
+   }
+   _read_id = ufshpb_get_read_id(hpb);
+   spin_unlock_irqrestore(>rgn_state_lock, flags);
+
+   pre_req->req = req;
+   pre_req->bio = bio;
+
+   ret = ufshpb_execute_pre_req(hpb, cmd, pre_req, _read_id);
+   if (ret)
+   goto free_pre_req;
+
+   *read_id = _read_id;
+
+   return ret;
+free_pre_req:
+   spin_lock_irqsave(>rgn_state_lock, flags);
+   ufshpb_put_pre_req(hpb, pre_req);
+unlock_out:
+   spin_unlock_irqrestore(>rgn_state_lock, flags);
+   bio_put(bio);
+   blk_put_request(req);
+   return ret;
+}
+


Re: [PATCH v29 4/4] scsi: ufs: Add HPB 2.0 support

2021-03-14 Thread Can Guo

On 2021-03-15 09:31, Daejun Park wrote:

This patch supports the HPB 2.0.

The HPB 2.0 supports read of varying sizes from 4KB to 512KB.
In the case of Read (<= 32KB) is supported as single HPB read.
In the case of Read (36KB ~ 512KB) is supported by as a combination of
write buffer command and HPB read command to deliver more PPN.
The write buffer commands may not be issued immediately due to busy 
tags.
To use HPB read more aggressively, the driver can requeue the write 
buffer

command. The requeue threshold is implemented as timeout and can be
modified with requeue_timeout_ms entry in sysfs.

Signed-off-by: Daejun Park 
---
+static struct attribute *hpb_dev_param_attrs[] = {
+   _attr_requeue_timeout_ms.attr,
+   NULL,
+};
+
+struct attribute_group ufs_sysfs_hpb_param_group = {
+   .name = "hpb_param_sysfs",
+   .attrs = hpb_dev_param_attrs,
+};
+
+static int ufshpb_pre_req_mempool_init(struct ufshpb_lu *hpb)
+{
+   struct ufshpb_req *pre_req = NULL;
+   int qd = hpb->sdev_ufs_lu->queue_depth / 2;
+   int i, j;
+
+   INIT_LIST_HEAD(>lh_pre_req_free);
+
+   hpb->pre_req = kcalloc(qd, sizeof(struct ufshpb_req), GFP_KERNEL);
+   hpb->throttle_pre_req = qd;
+   hpb->num_inflight_pre_req = 0;
+
+   if (!hpb->pre_req)
+   goto release_mem;
+
+   for (i = 0; i < qd; i++) {
+   pre_req = hpb->pre_req + i;
+   INIT_LIST_HEAD(_req->list_req);
+   pre_req->req = NULL;
+   pre_req->bio = NULL;


Why don't prepare bio as same as wb.m_page? Won't that save more time
for ufshpb_issue_pre_req()?

Thanks,
Can Guo.


+
+   pre_req->wb.m_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+   if (!pre_req->wb.m_page) {
+   for (j = 0; j < i; j++)
+   __free_page(hpb->pre_req[j].wb.m_page);
+
+   goto release_mem;
+   }
+   list_add_tail(_req->list_req, >lh_pre_req_free);
+   }
+
+   return 0;
+release_mem:
+   kfree(hpb->pre_req);
+   return -ENOMEM;
+}
+


[PATCH v29 4/4] scsi: ufs: Add HPB 2.0 support

2021-03-14 Thread Daejun Park
This patch supports the HPB 2.0.

The HPB 2.0 supports read of varying sizes from 4KB to 512KB.
In the case of Read (<= 32KB) is supported as single HPB read.
In the case of Read (36KB ~ 512KB) is supported by as a combination of
write buffer command and HPB read command to deliver more PPN.
The write buffer commands may not be issued immediately due to busy tags.
To use HPB read more aggressively, the driver can requeue the write buffer
command. The requeue threshold is implemented as timeout and can be
modified with requeue_timeout_ms entry in sysfs.

Signed-off-by: Daejun Park 
---
 Documentation/ABI/testing/sysfs-driver-ufs |  47 +-
 drivers/scsi/ufs/ufs-sysfs.c   |   4 +
 drivers/scsi/ufs/ufs.h |   3 +-
 drivers/scsi/ufs/ufshcd.c  |  25 +-
 drivers/scsi/ufs/ufshcd.h  |   7 +
 drivers/scsi/ufs/ufshpb.c  | 627 +++--
 drivers/scsi/ufs/ufshpb.h  |  66 ++-
 7 files changed, 698 insertions(+), 81 deletions(-)

diff --git a/Documentation/ABI/testing/sysfs-driver-ufs 
b/Documentation/ABI/testing/sysfs-driver-ufs
index 528bf89fc98b..419adf450b89 100644
--- a/Documentation/ABI/testing/sysfs-driver-ufs
+++ b/Documentation/ABI/testing/sysfs-driver-ufs
@@ -1253,14 +1253,14 @@ Description:This entry shows the number of HPB 
pinned regions assigned to
 
The file is read only.
 
-What:  /sys/class/scsi_device/*/device/hpb_sysfs/hit_cnt
+What:  /sys/class/scsi_device/*/device/hpb_stat_sysfs/hit_cnt
 Date:  March 2021
 Contact:   Daejun Park 
 Description:   This entry shows the number of reads that changed to HPB read.
 
The file is read only.
 
-What:  /sys/class/scsi_device/*/device/hpb_sysfs/miss_cnt
+What:  /sys/class/scsi_device/*/device/hpb_stat_sysfs/miss_cnt
 Date:  March 2021
 Contact:   Daejun Park 
 Description:   This entry shows the number of reads that cannot be changed to
@@ -1268,7 +1268,7 @@ Description:  This entry shows the number of reads 
that cannot be changed to
 
The file is read only.
 
-What:  /sys/class/scsi_device/*/device/hpb_sysfs/rb_noti_cnt
+What:  /sys/class/scsi_device/*/device/hpb_stat_sysfs/rb_noti_cnt
 Date:  March 2021
 Contact:   Daejun Park 
 Description:   This entry shows the number of response UPIUs that has
@@ -1276,7 +1276,7 @@ Description:  This entry shows the number of response 
UPIUs that has
 
The file is read only.
 
-What:  /sys/class/scsi_device/*/device/hpb_sysfs/rb_active_cnt
+What:  /sys/class/scsi_device/*/device/hpb_stat_sysfs/rb_active_cnt
 Date:  March 2021
 Contact:   Daejun Park 
 Description:   This entry shows the number of active sub-regions recommended by
@@ -1284,7 +1284,7 @@ Description:  This entry shows the number of active 
sub-regions recommended by
 
The file is read only.
 
-What:  /sys/class/scsi_device/*/device/hpb_sysfs/rb_inactive_cnt
+What:  /sys/class/scsi_device/*/device/hpb_stat_sysfs/rb_inactive_cnt
 Date:  March 2021
 Contact:   Daejun Park 
 Description:   This entry shows the number of inactive regions recommended by
@@ -1292,10 +1292,45 @@ Description:This entry shows the number of inactive 
regions recommended by
 
The file is read only.
 
-What:  /sys/class/scsi_device/*/device/hpb_sysfs/map_req_cnt
+What:  /sys/class/scsi_device/*/device/hpb_stat_sysfs/map_req_cnt
 Date:  March 2021
 Contact:   Daejun Park 
 Description:   This entry shows the number of read buffer commands for
activating sub-regions recommended by response UPIUs.
 
The file is read only.
+
+What:  
/sys/class/scsi_device/*/device/hpb_param_sysfs/requeue_timeout_ms
+Date:  March 2021
+Contact:   Daejun Park 
+Description:   This entry shows the requeue timeout threshold for write buffer
+   command in ms. This value can be changed by writing proper 
integer to
+   this entry.
+
+What:  
/sys/bus/platform/drivers/ufshcd/*/attributes/max_data_size_hpb_single_cmd
+Date:  March 2021
+Contact:   Daejun Park 
+Description:   This entry shows the maximum HPB data size for using single HPB
+   command.
+
+   ===  
+   00h  4KB
+   01h  8KB
+   02h  12KB
+   ...
+   FFh  1024KB
+   ===  
+
+   The file is read only.
+
+What:  /sys/bus/platform/drivers/ufshcd/*/flags/wb_enable
+Date:  March 2021
+Contact:   Daejun Park 
+Description:   This entry shows the status of HPB.
+
+   == 
+   0  HPB is not enabled.
+   1  HPB is enabled
+   == 
+
+