[PATCH v2] fs/block_dev.c: remove unused include

2019-04-09 Thread Chengguang Xu
Just remove unused include  from
fs/block_dev.c.

Signed-off-by: Chengguang Xu 
---
v1->v2:
- Modify patch title and commit log.

 fs/block_dev.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 78d3257435c0..6b584817d461 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -30,7 +30,6 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 
--
2.20.1



[PATCH] fs/block_dev.c: remove unused header file badblocks.h

2019-04-08 Thread Chengguang Xu
This patch just removes unused header file badblocks.h
from fs/block_dev.c

Signed-off-by: Chengguang Xu 
---
 fs/block_dev.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 78d3257435c0..6b584817d461 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -30,7 +30,6 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 
--
2.20.1



[PATCH 2/2] block: code cleanup for bio_find_or_create_slab()

2019-03-08 Thread Chengguang Xu
It's just a simple code cleanup, not functional change.

Signed-off-by: Chengguang Xu 
---
 block/bio.c | 26 --
 1 file changed, 12 insertions(+), 14 deletions(-)

diff --git a/block/bio.c b/block/bio.c
index 8081906f5d01..fefd49a561ab 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -90,29 +90,27 @@ static struct kmem_cache *bio_find_or_create_slab(unsigned 
int extra_size)
else if (bslab->slab_size == sz) {
slab = bslab->slab;
bslab->slab_ref++;
-   break;
+   goto out_unlock;
}
i++;
}
 
-   if (slab)
-   goto out_unlock;
-
-   if (bio_slab_nr == bio_slab_max && entry == -1) {
-   new_bio_slab_max = bio_slab_max << 1;
-   new_bio_slabs = krealloc(bio_slabs,
+   if (entry == -1) {
+   if (bio_slab_nr == bio_slab_max) {
+   new_bio_slab_max = bio_slab_max << 1;
+   new_bio_slabs = krealloc(bio_slabs,
 new_bio_slab_max * sizeof(struct 
bio_slab),
 GFP_KERNEL);
-   if (!new_bio_slabs)
-   goto out_unlock;
-   bio_slab_max = new_bio_slab_max;
-   bio_slabs = new_bio_slabs;
-   }
-   if (entry == -1)
+   if (!new_bio_slabs)
+   goto out_unlock;
+   bio_slab_max = new_bio_slab_max;
+   bio_slabs = new_bio_slabs;
+   }
+
entry = bio_slab_nr;
+   }
 
bslab = &bio_slabs[entry];
-
snprintf(bslab->name, sizeof(bslab->name), "bio-%d", entry);
slab = kmem_cache_create(bslab->name, sz, ARCH_KMALLOC_MINALIGN,
 SLAB_HWCACHE_ALIGN, NULL);
-- 
2.20.1



[PATCH 1/2] block: increase bio_slab_nr only for necessary case

2019-03-08 Thread Chengguang Xu
When fail to create new cache in bio_find_or_create_slab(),
we should not increase bio_slab_nr because it will cause
referring uninitilized data in subsequent search.

Signed-off-by: Chengguang Xu 
---
 block/bio.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/block/bio.c b/block/bio.c
index 4db1008309ed..8081906f5d01 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -109,7 +109,7 @@ static struct kmem_cache *bio_find_or_create_slab(unsigned 
int extra_size)
bio_slabs = new_bio_slabs;
}
if (entry == -1)
-   entry = bio_slab_nr++;
+   entry = bio_slab_nr;
 
bslab = &bio_slabs[entry];
 
@@ -122,6 +122,10 @@ static struct kmem_cache *bio_find_or_create_slab(unsigned 
int extra_size)
bslab->slab = slab;
bslab->slab_ref = 1;
bslab->slab_size = sz;
+
+   if (entry == bio_slab_nr)
+   bio_slab_nr++;
+
 out_unlock:
mutex_unlock(&bio_slab_lock);
return slab;
-- 
2.20.1



[PATCH] block: remove redundant unlikely annotation

2019-02-11 Thread Chengguang Xu
unlikely has already included in IS_ERR(),
so just remove it.

Signed-off-by: Chengguang Xu 
---
 block/blk-cgroup.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 2bed5725aa03..699dba716c83 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -880,7 +880,7 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct 
blkcg_policy *pol,
blkg_free(new_blkg);
} else {
blkg = blkg_create(pos, q, new_blkg);
-   if (unlikely(IS_ERR(blkg))) {
+   if (IS_ERR(blkg)) {
ret = PTR_ERR(blkg);
goto fail_unlock;
}
-- 
2.20.1



[PATCH] block: refactor register_blkdev() to compare major number when allocating unused major number

2019-02-10 Thread Chengguang Xu
Currently when specifying major number as 0,
register_blkdev() will try to alloc any unused
major number in the range. However, the allocating
logic does not accuretaly compare major number
with existing entries, so even we have plenty of
available major numbers but still might fail with
-EBUSY in extreme case.

Signed-off-by: Chengguang Xu 
---
 block/genhd.c | 103 ++
 1 file changed, 54 insertions(+), 49 deletions(-)

diff --git a/block/genhd.c b/block/genhd.c
index 1dd8fd6613b8..80b788ed17d1 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -330,6 +330,40 @@ void blkdev_show(struct seq_file *seqf, off_t offset)
 }
 #endif /* CONFIG_PROC_FS */
 
+static int __register_blkdev(unsigned int major, const char *name,
+struct blk_major_name *new)
+{
+   struct blk_major_name *n, *p = NULL;
+   int index = major_to_index(major);
+
+   for (n = major_names[index]; n; n = n->next) {
+   if (n->major == major)
+   return -EBUSY;
+   p = n;
+   }
+
+   new->major = major;
+   if (p == NULL)
+   major_names[index] = new;
+   else
+   p->next = new;
+
+   return 0;
+}
+
+static int alloc_blkdev(unsigned int major, const char *name,
+   struct blk_major_name *new)
+{
+   int index;
+
+   for (index = ARRAY_SIZE(major_names) - 1; index; index--) {
+   if (__register_blkdev(index, name, new) == 0)
+   return index;
+   }
+
+   return -EBUSY;
+}
+
 /**
  * register_blkdev - register a new block device
  *
@@ -345,73 +379,44 @@ void blkdev_show(struct seq_file *seqf, off_t offset)
  *then the function returns zero on success, or a negative error code
  *  - if any unused major number was requested with @major = 0 parameter
  *then the return value is the allocated major number in range
- *[1..BLKDEV_MAJOR_MAX-1] or a negative error code otherwise
+ *[1..BLKDEV_MAJOR_HASH_SIZE-1] or a negative error code otherwise
  *
  * See Documentation/admin-guide/devices.txt for the list of allocated
  * major numbers.
  */
 int register_blkdev(unsigned int major, const char *name)
 {
-   struct blk_major_name **n, *p;
-   int index, ret = 0;
-
-   mutex_lock(&block_class_lock);
-
-   /* temporary */
-   if (major == 0) {
-   for (index = ARRAY_SIZE(major_names)-1; index > 0; index--) {
-   if (major_names[index] == NULL)
-   break;
-   }
-
-   if (index == 0) {
-   printk("register_blkdev: failed to get major for %s\n",
-  name);
-   ret = -EBUSY;
-   goto out;
-   }
-   major = index;
-   ret = major;
-   }
+   struct blk_major_name *new;
+   int ret;
 
if (major >= BLKDEV_MAJOR_MAX) {
-   pr_err("register_blkdev: major requested (%u) is greater than 
the maximum (%u) for %s\n",
-  major, BLKDEV_MAJOR_MAX-1, name);
-
-   ret = -EINVAL;
-   goto out;
+   pr_err("%s: major requested (%u) is greater than the maximum 
(%u) for %s\n",
+   __func__, major, BLKDEV_MAJOR_MAX-1, name);
+   return -EINVAL;
}
 
-   p = kmalloc(sizeof(struct blk_major_name), GFP_KERNEL);
-   if (p == NULL) {
-   ret = -ENOMEM;
-   goto out;
-   }
+   new = kmalloc(sizeof(struct blk_major_name), GFP_KERNEL);
+   if (new == NULL)
+   return -ENOMEM;
 
-   p->major = major;
-   strlcpy(p->name, name, sizeof(p->name));
-   p->next = NULL;
-   index = major_to_index(major);
+   strlcpy(new->name, name, sizeof(new->name));
+   new->next = NULL;
 
-   for (n = &major_names[index]; *n; n = &(*n)->next) {
-   if ((*n)->major == major)
-   break;
-   }
-   if (!*n)
-   *n = p;
+   mutex_lock(&block_class_lock);
+   if (major == 0)
+   ret = alloc_blkdev(major, name, new);
else
-   ret = -EBUSY;
+   ret = __register_blkdev(major, name, new);
+   mutex_unlock(&block_class_lock);
 
if (ret < 0) {
-   printk("register_blkdev: cannot get major %u for %s\n",
-  major, name);
-   kfree(p);
+   kfree(new);
+   pr_err("%s: cannot get major for %s, major requested (%u)\n",
+   __func__, name, major);
}
-out:
-   mutex_unlock(&block_class_lock);
+
return ret;
 }
-
 EXPORT_SYMBOL(register_blkdev);
 
 void unregister_blkdev(unsigned int major, const char *name)
-- 
2.20.1



[PATCH] block: break loop when getting target major number in blkdev_show()

2019-02-08 Thread Chengguang Xu
The entry in major_names table has unique major number,
so there is no need to continue to search after getting
target major number in blkdev_show().

Signed-off-by: Chengguang Xu 
---
 block/genhd.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/block/genhd.c b/block/genhd.c
index 1dd8fd6613b8..908e3b4399a3 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -324,8 +324,10 @@ void blkdev_show(struct seq_file *seqf, off_t offset)
 
mutex_lock(&block_class_lock);
for (dp = major_names[major_to_index(offset)]; dp; dp = dp->next)
-   if (dp->major == offset)
+   if (dp->major == offset) {
seq_printf(seqf, "%3d %s\n", dp->major, dp->name);
+   break;
+   }
mutex_unlock(&block_class_lock);
 }
 #endif /* CONFIG_PROC_FS */
-- 
2.17.2



[PATCH] block: use strncmp() instead of strcmp() for comparing device name

2019-01-31 Thread Chengguang Xu
We use strlcpy() to copy device name in register_blkdev(),
so if the device name is longer enough(accurately longer
than 15 bytes), the copied name is truncated to 15 bytes.
In this case, it's better to use strncmp() to compare device
name instead of strcmp() in unregister_blkdev(), so that
we can recognize the device name correctly.

Signed-off-by: Chengguang Xu 
---
 block/genhd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/genhd.c b/block/genhd.c
index 1dd8fd6613b8..efc532ae9de9 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -424,7 +424,7 @@ void unregister_blkdev(unsigned int major, const char *name)
for (n = &major_names[index]; *n; n = &(*n)->next)
if ((*n)->major == major)
break;
-   if (!*n || strcmp((*n)->name, name)) {
+   if (!*n || strncmp((*n)->name, name, strlen((*n)->name))) {
WARN_ON(1);
} else {
p = *n;
-- 
2.20.1



[PATCH] block: return NULL if request_module() failed in elevator_get()

2019-01-04 Thread Chengguang Xu
If request_module() failed then it seems no reason to lookup
the iosched again, so add a check to handle this case.

Signed-off-by: Chengguang Xu 
---
 block/elevator.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/block/elevator.c b/block/elevator.c
index 8fdcd64ae12e..f89527951243 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -124,7 +124,9 @@ static struct elevator_type *elevator_get(struct 
request_queue *q,
e = elevator_find(name, q->mq_ops != NULL);
if (!e && try_loading) {
spin_unlock(&elv_list_lock);
-   request_module("%s-iosched", name);
+   if (request_module("%s-iosched", name))
+   return e;
+
spin_lock(&elv_list_lock);
e = elevator_find(name, q->mq_ops != NULL);
}
-- 
2.17.2



[PATCH] block: add sanity check for uid in syscall of ioprio_get()

2019-01-01 Thread Chengguang Xu
It better to validate uid info which is from userspace.

Signed-off-by: Chengguang Xu 
---
 block/ioprio.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/block/ioprio.c b/block/ioprio.c
index f9821080c92c..b59397662da6 100644
--- a/block/ioprio.c
+++ b/block/ioprio.c
@@ -218,6 +218,8 @@ SYSCALL_DEFINE2(ioprio_get, int, which, int, who)
break;
case IOPRIO_WHO_USER:
uid = make_kuid(current_user_ns(), who);
+   if (!uid_valid(uid))
+   break;
if (!who)
user = current_user();
else
-- 
2.17.2



[PATCH] block: add prio data check for IOPRIO_CLASS_IDLE

2018-12-26 Thread Chengguang Xu
The idle class does not have class data, so let's
fall through to case IOPRIO_CLASS_NONE to check
the specification of data field.

Signed-off-by: Chengguang Xu 
---
 block/ioprio.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/ioprio.c b/block/ioprio.c
index 8c0f971015c9..63db916f86fb 100644
--- a/block/ioprio.c
+++ b/block/ioprio.c
@@ -78,7 +78,7 @@ int ioprio_check_cap(int ioprio)
 
break;
case IOPRIO_CLASS_IDLE:
-   break;
+   /* fall through */
case IOPRIO_CLASS_NONE:
if (data)
return -EINVAL;
-- 
2.17.2



[PATCH] block: fix a typo in description of ioprio

2018-12-26 Thread Chengguang Xu
Just fix a typo in description of ioprio.

Signed-off-by: Chengguang Xu 
---
 block/ioprio.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/ioprio.c b/block/ioprio.c
index f9821080c92c..8c0f971015c9 100644
--- a/block/ioprio.c
+++ b/block/ioprio.c
@@ -4,7 +4,7 @@
  * Copyright (C) 2004 Jens Axboe 
  *
  * Helper functions for setting/querying io priorities of processes. The
- * system calls closely mimmick getpriority/setpriority, see the man page for
+ * system calls closely mimic getpriority/setpriority, see the man page for
  * those. The prio argument is a composite of prio class and prio data, where
  * the data argument has meaning within that class. The standard scheduling
  * classes have 8 distinct prio levels, with 0 being the highest prio and 7
-- 
2.17.2



[PATCH] block: change return type to bool from int

2018-12-24 Thread Chengguang Xu
Change some functions' return type to bool from int
because their checking results could represnt in bool
value.

Signed-off-by: Chengguang Xu 
---
 block/cfq-iosched.c  | 4 ++--
 block/elevator.c | 2 +-
 include/linux/elevator.h | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index ed41aa978c4a..b58969b8b6ea 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -2582,7 +2582,7 @@ cfq_merged_requests(struct request_queue *q, struct 
request *rq,
cfq_del_cfqq_rr(cfqd, cfqq);
 }
 
-static int cfq_allow_bio_merge(struct request_queue *q, struct request *rq,
+static bool cfq_allow_bio_merge(struct request_queue *q, struct request *rq,
   struct bio *bio)
 {
struct cfq_data *cfqd = q->elevator->elevator_data;
@@ -2608,7 +2608,7 @@ static int cfq_allow_bio_merge(struct request_queue *q, 
struct request *rq,
return cfqq == RQ_CFQQ(rq);
 }
 
-static int cfq_allow_rq_merge(struct request_queue *q, struct request *rq,
+static bool cfq_allow_rq_merge(struct request_queue *q, struct request *rq,
  struct request *next)
 {
return RQ_CFQQ(rq) == RQ_CFQQ(next);
diff --git a/block/elevator.c b/block/elevator.c
index 8fdcd64ae12e..432717fc782f 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -56,7 +56,7 @@ static LIST_HEAD(elv_list);
  * Query io scheduler to see if the current process issuing bio may be
  * merged with rq.
  */
-static int elv_iosched_allow_bio_merge(struct request *rq, struct bio *bio)
+static bool elv_iosched_allow_bio_merge(struct request *rq, struct bio *bio)
 {
struct request_queue *q = rq->q;
struct elevator_queue *e = q->elevator;
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 015bb59c0331..f6928aa5cc3d 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -30,10 +30,10 @@ typedef void (elevator_merge_req_fn) (struct request_queue 
*, struct request *,
 
 typedef void (elevator_merged_fn) (struct request_queue *, struct request *, 
enum elv_merge);
 
-typedef int (elevator_allow_bio_merge_fn) (struct request_queue *,
+typedef bool (elevator_allow_bio_merge_fn) (struct request_queue *,
   struct request *, struct bio *);
 
-typedef int (elevator_allow_rq_merge_fn) (struct request_queue *,
+typedef bool (elevator_allow_rq_merge_fn) (struct request_queue *,
  struct request *, struct request *);
 
 typedef void (elevator_bio_merged_fn) (struct request_queue *,
-- 
2.19.2



[PATCH] block: loop: remvoe redundant code

2018-12-22 Thread Chengguang Xu
Code cleanup for removing redundant break in switch case.

Signed-off-by: Chengguang Xu 
---
 drivers/block/loop.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index cb0cc8685076..4e1d8aa6e1e9 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -615,7 +615,6 @@ static int do_req_filebacked(struct loop_device *lo, struct 
request *rq)
default:
WARN_ON_ONCE(1);
return -EIO;
-   break;
}
 }
 
-- 
2.17.2



[PATCH] block: sunvdc: remvoe redundant code

2018-12-22 Thread Chengguang Xu
Code cleanup for removing redundant break in switch case.

Signed-off-by: Chengguang Xu 
---
 drivers/block/sunvdc.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c
index b54fa6726303..c8f3e569296a 100644
--- a/drivers/block/sunvdc.c
+++ b/drivers/block/sunvdc.c
@@ -617,7 +617,6 @@ static int generic_request(struct vdc_port *port, u8 op, 
void *buf, int len)
case VD_OP_GET_EFI:
case VD_OP_SET_EFI:
return -EOPNOTSUPP;
-   break;
};
 
map_perm |= LDC_MAP_SHADOW | LDC_MAP_DIRECT | LDC_MAP_IO;
-- 
2.17.2



[PATCH] block: loop: check error using IS_ERR instead of IS_ERR_OR_NULL in loop_add()

2018-12-16 Thread Chengguang Xu
blk_mq_init_queue() will not return NULL pointer to its caller,
so it's better to replace IS_ERR_OR_NULL using IS_ERR in loop_add().

If in the future things change to check NULL pointer inside loop_add(),
we should return -ENOMEM as return code instead of PTR_ERR(NULL).

Signed-off-by: Chengguang Xu 
---
 drivers/block/loop.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index cb0cc8685076..849a48120cca 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1853,7 +1853,7 @@ static int loop_add(struct loop_device **l, int i)
goto out_free_idr;
 
lo->lo_queue = blk_mq_init_queue(&lo->tag_set);
-   if (IS_ERR_OR_NULL(lo->lo_queue)) {
+   if (IS_ERR(lo->lo_queue)) {
err = PTR_ERR(lo->lo_queue);
goto out_cleanup_tags;
}
-- 
2.17.2



[PATCH] aoe: add __exit annotation

2018-12-15 Thread Chengguang Xu
Add __exit annotation to cleanup helper which
is only called once in the module.

Signed-off-by: Chengguang Xu 
---
 drivers/block/aoe/aoemain.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/block/aoe/aoemain.c b/drivers/block/aoe/aoemain.c
index 251482066977..1e4e2971171c 100644
--- a/drivers/block/aoe/aoemain.c
+++ b/drivers/block/aoe/aoemain.c
@@ -24,7 +24,7 @@ static void discover_timer(struct timer_list *t)
aoecmd_cfg(0x, 0xff);
 }
 
-static void
+static void __exit
 aoe_exit(void)
 {
del_timer_sync(&timer);
-- 
2.17.2



[PATCH] block: remove unnecessary condition check

2018-08-27 Thread Chengguang Xu
kmem_cache_destroy() can handle NULL pointer correctly, so there is
no need to check e->icq_cache before calling kmem_cache_destroy().

Signed-off-by: Chengguang Xu 
---
 block/elevator.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/block/elevator.c b/block/elevator.c
index 5ea6e7d600e4..6a06b5d040e5 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -895,8 +895,7 @@ int elv_register(struct elevator_type *e)
spin_lock(&elv_list_lock);
if (elevator_find(e->elevator_name, e->uses_mq)) {
spin_unlock(&elv_list_lock);
-   if (e->icq_cache)
-   kmem_cache_destroy(e->icq_cache);
+   kmem_cache_destroy(e->icq_cache);
return -EBUSY;
}
list_add_tail(&e->list, &elv_list);
-- 
2.17.1



[PATCH] block: change return type to bool

2018-08-16 Thread Chengguang Xu
Because blk_do_io_stat() only does a judgement about the request
contributes to IO statistics, it better changes return type to bool.

Signed-off-by: Chengguang Xu 
---
 block/blk.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/blk.h b/block/blk.h
index d4d67e948920..644975e85053 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -297,7 +297,7 @@ extern int blk_update_nr_requests(struct request_queue *, 
unsigned int);
  * b) the queue had IO stats enabled when this request was started, and
  * c) it's a file system request
  */
-static inline int blk_do_io_stat(struct request *rq)
+static inline bool blk_do_io_stat(struct request *rq)
 {
return rq->rq_disk &&
   (rq->rq_flags & RQF_IO_STAT) &&
-- 
2.17.1



[PATCH] blk-throttle: return proper bool type to caller instead of 0/1

2018-05-29 Thread Chengguang Xu
Change to return true/false only for bool type return code.

Signed-off-by: Chengguang Xu 
---
 block/blk-throttle.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index c5a1316..3155dee 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -821,7 +821,7 @@ static bool throtl_slice_used(struct throtl_grp *tg, bool 
rw)
if (time_in_range(jiffies, tg->slice_start[rw], tg->slice_end[rw]))
return false;
 
-   return 1;
+   return true;
 }
 
 /* Trim the used slices and adjust slice start accordingly */
@@ -931,7 +931,7 @@ static bool tg_with_in_iops_limit(struct throtl_grp *tg, 
struct bio *bio,
 
if (wait)
*wait = jiffy_wait;
-   return 0;
+   return false;
 }
 
 static bool tg_with_in_bps_limit(struct throtl_grp *tg, struct bio *bio,
@@ -974,7 +974,7 @@ static bool tg_with_in_bps_limit(struct throtl_grp *tg, 
struct bio *bio,
jiffy_wait = jiffy_wait + (jiffy_elapsed_rnd - jiffy_elapsed);
if (wait)
*wait = jiffy_wait;
-   return 0;
+   return false;
 }
 
 /*
@@ -1024,7 +1024,7 @@ static bool tg_may_dispatch(struct throtl_grp *tg, struct 
bio *bio,
tg_with_in_iops_limit(tg, bio, &iops_wait)) {
if (wait)
*wait = 0;
-   return 1;
+   return true;
}
 
max_wait = max(bps_wait, iops_wait);
@@ -1035,7 +1035,7 @@ static bool tg_may_dispatch(struct throtl_grp *tg, struct 
bio *bio,
if (time_before(tg->slice_end[rw], jiffies + max_wait))
throtl_extend_slice(tg, rw, jiffies + max_wait);
 
-   return 0;
+   return false;
 }
 
 static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio)
-- 
1.8.3.1



Re: [PATCH v2] bcache: move closure debug file into debug direcotry

2018-03-06 Thread Chengguang Xu
Hi Mike

I'm really sorry for the inconvenient, in my test box it can compile with no 
error,
so I didn't notice that before, I've sent modified v2 version for the problem.


> Sent: Wednesday, March 07, 2018 at 10:54 AM
> From: "Michael Lyle" 
> To: "Chengguang Xu" , tang.jun...@zte.com.cn, 
> kent.overstr...@gmail.com
> Cc: linux-bca...@vger.kernel.org, linux-block@vger.kernel.org
> Subject: Re: [PATCH v2] bcache: move closure debug file into debug direcotry
>
> Sorry- I had to pull/unapply this actually.
> 
> On 03/04/2018 11:40 PM, Chengguang Xu wrote:
> > -static struct dentry *debug
> > +struct dentry *debug;
> 
> This conflicts with other symbols called "debug" and doesn't compile.
> Please be sure that your patch set compiles before submitting.
> 
> Mike
> 
> >  
> >  #ifdef CONFIG_BCACHE_DEBUG
> >  
> > diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
> > index 1a9fdab..b784292 100644
> > --- a/drivers/md/bcache/super.c
> > +++ b/drivers/md/bcache/super.c
> > @@ -2133,7 +2133,6 @@ static int __init bcache_init(void)
> > mutex_init(&bch_register_lock);
> > init_waitqueue_head(&unregister_wait);
> > register_reboot_notifier(&reboot);
> > -   closure_debug_init();
> >  
> > bcache_major = register_blkdev(0, "bcache");
> > if (bcache_major < 0) {
> > @@ -2145,7 +2144,7 @@ static int __init bcache_init(void)
> > if (!(bcache_wq = alloc_workqueue("bcache", WQ_MEM_RECLAIM, 0)) ||
> > !(bcache_kobj = kobject_create_and_add("bcache", fs_kobj)) ||
> > bch_request_init() ||
> > -   bch_debug_init(bcache_kobj) ||
> > +   bch_debug_init(bcache_kobj) || closure_debug_init() ||
> > sysfs_create_files(bcache_kobj, files))
> > goto err;
> >  
> > 
> 
> 


[PATCH v2] bcache: move closure debug file into debug direcotry

2018-03-06 Thread Chengguang Xu
In current code closure debug file is outside of debug directory
and when unloading module there is lack of removing operation
for closure debug file, so it will cause creating error when trying
to reload  module.

This patch move closure debug file into "bcache" debug direcory
so that the file can get deleted properly.

Signed-off-by: Chengguang Xu 
---
Changes since v1:
- Rename dentry name of debug directory to "bcache_debug" from "debug" to
avoid compile error.

 drivers/md/bcache/closure.c |  9 +
 drivers/md/bcache/closure.h |  5 +++--
 drivers/md/bcache/debug.c   | 14 +++---
 drivers/md/bcache/super.c   |  3 +--
 4 files changed, 16 insertions(+), 15 deletions(-)

diff --git a/drivers/md/bcache/closure.c b/drivers/md/bcache/closure.c
index 7f12920..c0949c9 100644
--- a/drivers/md/bcache/closure.c
+++ b/drivers/md/bcache/closure.c
@@ -157,7 +157,7 @@ void closure_debug_destroy(struct closure *cl)
 }
 EXPORT_SYMBOL(closure_debug_destroy);
 
-static struct dentry *debug;
+static struct dentry *closure_debug;
 
 static int debug_seq_show(struct seq_file *f, void *data)
 {
@@ -199,11 +199,12 @@ static int debug_seq_open(struct inode *inode, struct 
file *file)
.release= single_release
 };
 
-void __init closure_debug_init(void)
+int __init closure_debug_init(void)
 {
-   debug = debugfs_create_file("closures", 0400, NULL, NULL, &debug_ops);
+   closure_debug = debugfs_create_file("closures",
+   0400, bcache_debug, NULL, &debug_ops);
+   return IS_ERR_OR_NULL(closure_debug);
 }
-
 #endif
 
 MODULE_AUTHOR("Kent Overstreet ");
diff --git a/drivers/md/bcache/closure.h b/drivers/md/bcache/closure.h
index 3b9dfc9..71427eb 100644
--- a/drivers/md/bcache/closure.h
+++ b/drivers/md/bcache/closure.h
@@ -105,6 +105,7 @@
 struct closure;
 struct closure_syncer;
 typedef void (closure_fn) (struct closure *);
+extern struct dentry *bcache_debug;
 
 struct closure_waitlist {
struct llist_head   list;
@@ -185,13 +186,13 @@ static inline void closure_sync(struct closure *cl)
 
 #ifdef CONFIG_BCACHE_CLOSURES_DEBUG
 
-void closure_debug_init(void);
+int closure_debug_init(void);
 void closure_debug_create(struct closure *cl);
 void closure_debug_destroy(struct closure *cl);
 
 #else
 
-static inline void closure_debug_init(void) {}
+static inline int closure_debug_init(void) { return 0; }
 static inline void closure_debug_create(struct closure *cl) {}
 static inline void closure_debug_destroy(struct closure *cl) {}
 
diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c
index af89408..028f7b3 100644
--- a/drivers/md/bcache/debug.c
+++ b/drivers/md/bcache/debug.c
@@ -17,7 +17,7 @@
 #include 
 #include 
 
-static struct dentry *debug;
+struct dentry *bcache_debug;
 
 #ifdef CONFIG_BCACHE_DEBUG
 
@@ -232,11 +232,11 @@ static int bch_dump_release(struct inode *inode, struct 
file *file)
 
 void bch_debug_init_cache_set(struct cache_set *c)
 {
-   if (!IS_ERR_OR_NULL(debug)) {
+   if (!IS_ERR_OR_NULL(bcache_debug)) {
char name[50];
snprintf(name, 50, "bcache-%pU", c->sb.set_uuid);
 
-   c->debug = debugfs_create_file(name, 0400, debug, c,
+   c->debug = debugfs_create_file(name, 0400, bcache_debug, c,
   &cache_set_debug_ops);
}
 }
@@ -245,13 +245,13 @@ void bch_debug_init_cache_set(struct cache_set *c)
 
 void bch_debug_exit(void)
 {
-   if (!IS_ERR_OR_NULL(debug))
-   debugfs_remove_recursive(debug);
+   if (!IS_ERR_OR_NULL(bcache_debug))
+   debugfs_remove_recursive(bcache_debug);
 }
 
 int __init bch_debug_init(struct kobject *kobj)
 {
-   debug = debugfs_create_dir("bcache", NULL);
+   bcache_debug = debugfs_create_dir("bcache", NULL);
 
-   return IS_ERR_OR_NULL(debug);
+   return IS_ERR_OR_NULL(bcache_debug);
 }
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 1a9fdab..b784292 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -2133,7 +2133,6 @@ static int __init bcache_init(void)
mutex_init(&bch_register_lock);
init_waitqueue_head(&unregister_wait);
register_reboot_notifier(&reboot);
-   closure_debug_init();
 
bcache_major = register_blkdev(0, "bcache");
if (bcache_major < 0) {
@@ -2145,7 +2144,7 @@ static int __init bcache_init(void)
if (!(bcache_wq = alloc_workqueue("bcache", WQ_MEM_RECLAIM, 0)) ||
!(bcache_kobj = kobject_create_and_add("bcache", fs_kobj)) ||
bch_request_init() ||
-   bch_debug_init(bcache_kobj) ||
+   bch_debug_init(bcache_kobj) || closure_debug_init() ||
sysfs_create_files(bcache_kobj, files))
goto err;
 
-- 
1.8.3.1



[PATCH v2] bcache: move closure debug file into debug direcotry

2018-03-04 Thread Chengguang Xu
In current code closure debug file is outside of debug directory
and when unloading module there is lack of removing operation
for closure debug file, so it will cause creating error when trying
to reload  module.

This patch move closure debug file into "bcache" debug direcory
so that the file can get deleted properly.

Signed-off-by: Chengguang Xu 
---
Changes since v1:
- Move closure debug file into "bcache" debug direcory instead of
deleting it individually.
- Change Signed-off-by mail address.

 drivers/md/bcache/closure.c | 9 +
 drivers/md/bcache/closure.h | 5 +++--
 drivers/md/bcache/debug.c   | 2 +-
 drivers/md/bcache/super.c   | 3 +--
 4 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/drivers/md/bcache/closure.c b/drivers/md/bcache/closure.c
index 7f12920..64b123c 100644
--- a/drivers/md/bcache/closure.c
+++ b/drivers/md/bcache/closure.c
@@ -157,7 +157,7 @@ void closure_debug_destroy(struct closure *cl)
 }
 EXPORT_SYMBOL(closure_debug_destroy);
 
-static struct dentry *debug;
+static struct dentry *closure_debug;
 
 static int debug_seq_show(struct seq_file *f, void *data)
 {
@@ -199,11 +199,12 @@ static int debug_seq_open(struct inode *inode, struct 
file *file)
.release= single_release
 };
 
-void __init closure_debug_init(void)
+int __init closure_debug_init(void)
 {
-   debug = debugfs_create_file("closures", 0400, NULL, NULL, &debug_ops);
+   closure_debug = debugfs_create_file("closures",
+   0400, debug, NULL, &debug_ops);
+   return IS_ERR_OR_NULL(closure_debug);
 }
-
 #endif
 
 MODULE_AUTHOR("Kent Overstreet ");
diff --git a/drivers/md/bcache/closure.h b/drivers/md/bcache/closure.h
index 3b9dfc9..0fb704d 100644
--- a/drivers/md/bcache/closure.h
+++ b/drivers/md/bcache/closure.h
@@ -105,6 +105,7 @@
 struct closure;
 struct closure_syncer;
 typedef void (closure_fn) (struct closure *);
+extern struct dentry *debug;
 
 struct closure_waitlist {
struct llist_head   list;
@@ -185,13 +186,13 @@ static inline void closure_sync(struct closure *cl)
 
 #ifdef CONFIG_BCACHE_CLOSURES_DEBUG
 
-void closure_debug_init(void);
+int closure_debug_init(void);
 void closure_debug_create(struct closure *cl);
 void closure_debug_destroy(struct closure *cl);
 
 #else
 
-static inline void closure_debug_init(void) {}
+static inline int closure_debug_init(void) { return 0; }
 static inline void closure_debug_create(struct closure *cl) {}
 static inline void closure_debug_destroy(struct closure *cl) {}
 
diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c
index af89408..5db02de 100644
--- a/drivers/md/bcache/debug.c
+++ b/drivers/md/bcache/debug.c
@@ -17,7 +17,7 @@
 #include 
 #include 
 
-static struct dentry *debug;
+struct dentry *debug;
 
 #ifdef CONFIG_BCACHE_DEBUG
 
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 1a9fdab..b784292 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -2133,7 +2133,6 @@ static int __init bcache_init(void)
mutex_init(&bch_register_lock);
init_waitqueue_head(&unregister_wait);
register_reboot_notifier(&reboot);
-   closure_debug_init();
 
bcache_major = register_blkdev(0, "bcache");
if (bcache_major < 0) {
@@ -2145,7 +2144,7 @@ static int __init bcache_init(void)
if (!(bcache_wq = alloc_workqueue("bcache", WQ_MEM_RECLAIM, 0)) ||
!(bcache_kobj = kobject_create_and_add("bcache", fs_kobj)) ||
bch_request_init() ||
-   bch_debug_init(bcache_kobj) ||
+   bch_debug_init(bcache_kobj) || closure_debug_init() ||
sysfs_create_files(bcache_kobj, files))
goto err;
 
-- 
1.8.3.1



Re: [PATCH] bcache: remove closure debug file when unloading module

2018-03-04 Thread Chengguang Xu


> 在 2018年3月2日,下午2:34,tang.jun...@zte.com.cn 写道:
> 
> From: Tang Junhui 
> 
> Hello Chengguang
> 
>> When unloading bcache module there is lack of removing
>> operation for closure debug file, so it will cause
>> creating error when trying to reload module.
>> 
> 
> Yes, This issue is true. 
> Actually, the original code try to remove closure debug file
> by bch_debug_exit(), which remove all the debug file in
> bcache directory, and closure debug file is expected to be
> one debug file in bcache debug directory.
> 
> But currently code, closure_debug_init() is called to create
> closure debug file before the bcache debug crated in 
> bch_debug_init(), so closure debug file created outside
> the bcache directory, then when bch_debug_exit() being called,
> bcache diretory removed, but closure debug file didn't removed.
> 
> So the best way to resolve this issue is not remove the 
> closure debug file again, but to take the closure debug file
> under the bcache directory in debug sysfs.

Yes, that looks better, I’ll modify as your suggestion in v2. Thanks for your 
review.

> 
>> This fix introduces closure_debug_exit to handle removing
>> operation properly.
>> 
>> Signed-off-by: Chengguang Xu 
>> ---
>> drivers/md/bcache/closure.c | 5 +
>> drivers/md/bcache/closure.h | 2 ++
>> drivers/md/bcache/super.c   | 2 ++
>> 3 files changed, 9 insertions(+)
>> 
>> diff --git a/drivers/md/bcache/closure.c b/drivers/md/bcache/closure.c
>> index 7f12920..8fcd737 100644
>> --- a/drivers/md/bcache/closure.c
>> +++ b/drivers/md/bcache/closure.c
>> @@ -204,6 +204,11 @@ void __init closure_debug_init(void)
>>debug = debugfs_create_file("closures", 0400, NULL, NULL, &debug_ops);
>> }
>> 
>> +void closure_debug_exit(void)
>> +{
>> +debugfs_remove(debug);
>> +}
>> +
>> #endif
>> 
>> MODULE_AUTHOR("Kent Overstreet ");
>> diff --git a/drivers/md/bcache/closure.h b/drivers/md/bcache/closure.h
>> index 3b9dfc9..1aa0f7e 100644
>> --- a/drivers/md/bcache/closure.h
>> +++ b/drivers/md/bcache/closure.h
>> @@ -186,12 +186,14 @@ static inline void closure_sync(struct closure *cl)
>> #ifdef CONFIG_BCACHE_CLOSURES_DEBUG
>> 
>> void closure_debug_init(void);
>> +void closure_debug_exit(void);
>> void closure_debug_create(struct closure *cl);
>> void closure_debug_destroy(struct closure *cl);
>> 
>> #else
>> 
>> static inline void closure_debug_init(void) {}
>> +static inline void closure_debug_exit(void) {}
>> static inline void closure_debug_create(struct closure *cl) {}
>> static inline void closure_debug_destroy(struct closure *cl) {}
>> 
>> diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
>> index 1a9fdab..38e2e21 100644
>> --- a/drivers/md/bcache/super.c
>> +++ b/drivers/md/bcache/super.c
>> @@ -2118,6 +2118,7 @@ static void bcache_exit(void)
>>destroy_workqueue(bcache_wq);
>>if (bcache_major)
>>unregister_blkdev(bcache_major, "bcache");
>> +closure_debug_exit();
>>unregister_reboot_notifier(&reboot);
>>mutex_destroy(&bch_register_lock);
>> }
>> @@ -2137,6 +2138,7 @@ static int __init bcache_init(void)
>> 
>>bcache_major = register_blkdev(0, "bcache");
>>if (bcache_major < 0) {
>> +closure_debug_exit();
>>unregister_reboot_notifier(&reboot);
>>mutex_destroy(&bch_register_lock);
>>return bcache_major;
>> -- 
>> 1.8.3.1
> 
> Thanks
> Tang Junhui



Re: [PATCH] blk-throttle: avoid multiple counting for same bio

2018-02-22 Thread Chengguang Xu
Hi Joseph,

Thanks for quick reply, I didn’t notice that patch before.


Thanks,
Chengguang.


> 在 2018年2月23日,上午11:55,Joseph Qi  写道:
> 
> 
> 
> On 18/2/23 11:33, Chengguang Xu wrote:
>> Hi Tejun,
>> 
>> Sorry for delayed reply, I was on vacation last week.
>> 
>> The problem still exists in current code of 4.16.0-rc2, 
>> detail test information is below, if further info is needed please let me 
>> know.
>> 
>> Thanks.
>> 
> That's true, the issue Shaohua has fixed is double charge, but double
> stat issue still exists.
> 
> Jiufei has posted a fix, which has already been tested by Bo Liu:
> [PATCH RESEND] blk-throttle: avoid double counted
> https://www.mail-archive.com/linux-block@vger.kernel.org/msg18516.html
> 
> Thanks,
> Joseph
> --
> To unsubscribe from this list: send the line "unsubscribe cgroups" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html



Re: [PATCH] blk-throttle: avoid multiple counting for same bio

2018-02-22 Thread Chengguang Xu
Hi Tejun,

Sorry for delayed reply, I was on vacation last week.

The problem still exists in current code of 4.16.0-rc2, 
detail test information is below, if further info is needed please let me know.

Thanks.

———
Both read/write bps are limited to 10MB/s in blkio cgroup v1 & v2

$ uname -r
4.16.0-rc2+


[Without this patch]

CGROUP V1 (direct write):

$ dd if=/dev/zero of=/mnt/sdb1/20/test bs=1M count=1024 oflag=direct
1024+0 records in
1024+0 records out
1073741824 bytes (1.1 GB) copied, 102.402 s, 10.5 MB/s

8:16 Read 16384
8:16 Write 2684354560
8:16 Sync 2684370944
8:16 Async 0
8:16 Total 2684370944

CGROUP V1 (read):

$ dd if=/mnt/sdb1/20/test of=/dev/zero bs=1M count=1024
1024+0 records in
1024+0 records out
1073741824 bytes (1.1 GB) copied, 102.412 s, 10.5 MB/s

8:16 Read 4831838208
8:16 Write 0
8:16 Sync 4831838208
8:16 Async 0
8:16 Total 4831838208


CGROUP V2 (direct write):

$ cat io.max
8:16 rbps=max wbps=10485760 riops=max wiops=max

$ dd if=/dev/zero of=/mnt/sdb1/20/test bs=1M count=1024 oflag=direct
1024+0 records in
1024+0 records out
1073741824 bytes (1.1 GB) copied, 102.408 s, 10.5 MB/s

8:16 rbytes=24576 wbytes=2684354560 rios=5 wios=4096


CGROUP V2 (buffered write):

$ dd if=/dev/zero of=/mnt/sdb1/20/test bs=1M count=1024
1024+0 records in
1024+0 records out
1073741824 bytes (1.1 GB) copied, 0.637822 s, 1.7 GB/s

8:16 rbytes=0 wbytes=4831838208 rios=0 wios=4096

CGROUP V2 (read):

$ cat io.max
8:16 rbps=10485760 wbps=max riops=max wiops=max

$ dd if=/mnt/sdb1/20/test of=/dev/zero bs=1M count=1024
1024+0 records in
1024+0 records out
1073741824 bytes (1.1 GB) copied, 102.409 s, 10.5 MB/s

8:16 rbytes=4831846400 wbytes=0 rios=4097 wios=0

[With this patch]

CGROUP V1 (direct write):

$ dd if=/dev/zero of=/mnt/sdb1/20/test bs=1M count=1024 oflag=direct
1024+0 records in
1024+0 records out
1073741824 bytes (1.1 GB) copied, 102.402 s, 10.5 MB/s

8:16 Read 24576
8:16 Write 1073741824
8:16 Sync 1073766400
8:16 Async 0
8:16 Total 1073766400

CGROUP V1 (read):

$ dd if=/mnt/sdb1/20/test of=/dev/zero bs=1M count=1024
1024+0 records in
1024+0 records out
1073741824 bytes (1.1 GB) copied, 102.406 s, 10.5 MB/s

8:16 Read 1073741824
8:16 Write 0
8:16 Sync 1073741824
8:16 Async 0
8:16 Total 1073741824

CGROUP V2 (direct write):

$ dd if=/dev/zero of=/mnt/sdb1/20/test bs=1M count=1024 oflag=direct
1024+0 records in
1024+0 records out
1073741824 bytes (1.1 GB) copied, 102.407 s, 10.5 MB/s

8:16 rbytes=16384 wbytes=1073741824 rios=4 wios=1024


CGROUP V2 (buffered write):

$ dd if=/dev/zero of=/mnt/sdb1/20/test bs=1M count=1024
1024+0 records in
1024+0 records out
1073741824 bytes (1.1 GB) copied, 0.650783 s, 1.6 GB/s

8:16 rbytes=0 wbytes=1073741824 rios=0 wios=512

CGROUP V2 (read):

$ dd if=/mnt/sdb1/20/test of=/dev/zero bs=1M count=1024
1024+0 records in
1024+0 records out
1073741824 bytes (1.1 GB) copied, 102.411 s, 10.5 MB/s

8:16 rbytes=1077314048 wbytes=0 rios=572 wios=0


———


> 在 2018年2月13日,下午10:43,Tejun Heo  写道:
> 
> On Tue, Feb 13, 2018 at 02:45:50PM +0800, Chengguang Xu wrote:
>> In current throttling/upper limit policy of blkio cgroup
>> blkio.throttle.io_service_bytes does not exactly represent
>> the number of bytes issued to the disk by the group, sometimes
>> this number could be counted multiple times of real bytes.
>> This fix introduces BIO_COUNTED flag to avoid multiple counting
>> for same bio.
>> 
>> Signed-off-by: Chengguang Xu 
> 
> We had a series of fixes / changes for this problem during the last
> cycle.  Can you please see whether the current linus master has the
> same problem.
> 
> Thanks.
> 
> -- 
> tejun



[PATCH] blk-throttle: avoid multiple counting for same bio

2018-02-12 Thread Chengguang Xu
In current throttling/upper limit policy of blkio cgroup
blkio.throttle.io_service_bytes does not exactly represent
the number of bytes issued to the disk by the group, sometimes
this number could be counted multiple times of real bytes.
This fix introduces BIO_COUNTED flag to avoid multiple counting
for same bio.

Signed-off-by: Chengguang Xu 
---
 include/linux/blk-cgroup.h | 4 
 include/linux/blk_types.h  | 1 +
 2 files changed, 5 insertions(+)

diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index 69bea82..6c77711 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -686,6 +686,9 @@ static inline bool blkcg_bio_issue_check(struct 
request_queue *q,
struct blkcg_gq *blkg;
bool throtl = false;
 
+   if (bio_flagged(bio, BIO_COUNTED))
+   return !throtl;
+
rcu_read_lock();
blkcg = bio_blkcg(bio);
 
@@ -708,6 +711,7 @@ static inline bool blkcg_bio_issue_check(struct 
request_queue *q,
blkg_rwstat_add(&blkg->stat_bytes, bio->bi_opf,
bio->bi_iter.bi_size);
blkg_rwstat_add(&blkg->stat_ios, bio->bi_opf, 1);
+   bio_set_flag(bio, BIO_COUNTED);
}
 
rcu_read_unlock();
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index bf18b95..23b83e7 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -181,6 +181,7 @@ struct bio {
 * throttling rules. Don't do it again. */
 #define BIO_TRACE_COMPLETION 10/* bio_endio() should trace the final 
completion
 * of this bio. */
+#define BIO_COUNTED11  /* This bio has already been counted. */
 /* See BVEC_POOL_OFFSET below before adding new flags */
 
 /*
-- 
1.8.3.1



Question about strange phenomenon of cgroup blkio stat

2018-02-11 Thread Chengguang Xu
Hi All

I’m new to cgroup and block layer, recently I found a strange phenomenon in 
cgroup blkio stat as blow.

I setup a blkio cgroup and wrote 1GB data in direct mode, but the stat 
indicated almost wrote 2.56GB, 

I tested several times in physical machine on cgroup v1 & v2 and both were same 
results, but interesting

thing is when testing on VM(virtual machine) the stat is exactly the same as 
the data size had written.

Could anyone give me a hint or explanation for this?


testing info:
-

$ uname -r
4.15.2

$ echo $$
4242

$ cat /cgroup2/22/cgroup.procs
4242
13868

$ cat cgroup.controllers
io memory

$ df /mnt/sdb1
Filesystem 1K-blocks Used Available Use% Mounted on
/dev/sdb1  209612800 18102292 191510508   9% /mnt/sdb1

$ lsblk
NAME   MAJ:MIN RM   SIZE RO TYPE MOUNTPOINT
sdb  8:16   0 557.9G  0 disk
├─sdb2   8:18   0 357.9G  0 part
└─sdb1   8:17   0   200G  0 part /mnt/sdb1
sdc  8:32   0 557.9G  0 disk
├─sdc2   8:34   0   100G  0 part
├─sdc3   8:35   0   100G  0 part
├─sdc1   8:33   0   100G  0 part
└─sdc4   8:36   0 257.9G  0 part
sda  8:00 557.9G  0 disk
├─sda2   8:20 542.2G  0 part /
└─sda1   8:10  15.6G  0 part [SWAP]


$ dd if=/dev/zero of=/mnt/sdb1/22/test bs=1M count=1024 oflag=direct
1024+0 records in
1024+0 records out
1073741824 bytes (1.1 GB) copied, 2.47852 s, 433 MB/s


$ cat /cgroup2/22/io.stat
8:0 rbytes=1105920 wbytes=36864 rios=59 wios=4
8:16 rbytes=10330624 wbytes=2693799936 rios=102 wios=4105




Thanks,
Chengguang.