Anybody could help review this bug?

thanks,

Junxiao.

On 8/5/19 1:01 PM, Junxiao Bi wrote:
When md raid1 was used with imsm metadata, during the boot stage,
the raid device will first be set to readonly, then mdmon will set
it read-write later. When there were some partitions in this device,
the following race would make some partition left ro and fail to mount.

CPU 1:                                                 CPU 2:
add_partition()                                        set_disk_ro() //set disk 
RW
  //disk was RO, so partition set to RO
  p->policy = get_disk_ro(disk);
                                                         if (disk->part0.policy 
!= flag) {
                                                             
set_disk_ro_uevent(disk, flag);
                                                             // disk set to RW
                                                             disk->part0.policy 
= flag;
                                                         }
                                                         // set all exit 
partition to RW
                                                         while ((part = 
disk_part_iter_next(&piter)))
                                                             part->policy = 
flag;
  // this part was not yet added, so it was still RO
  rcu_assign_pointer(ptbl->part[partno], p);

Move RO status setting of partitions after they were added into partition
table and introduce a mutex to sync RO status between disk and partitions.

Signed-off-by: Junxiao Bi <junxiao...@oracle.com>
---
  block/genhd.c             | 3 +++
  block/partition-generic.c | 5 ++++-
  include/linux/genhd.h     | 1 +
  3 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/block/genhd.c b/block/genhd.c
index 54f1f0d381f4..f3cce1d354cf 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1479,6 +1479,7 @@ struct gendisk *__alloc_disk_node(int minors, int node_id)
                }
                ptbl = rcu_dereference_protected(disk->part_tbl, 1);
                rcu_assign_pointer(ptbl->part[0], &disk->part0);
+               mutex_init(&disk->part_lock);
/*
                 * set_capacity() and get_capacity() currently don't use
@@ -1570,6 +1571,7 @@ void set_disk_ro(struct gendisk *disk, int flag)
        struct disk_part_iter piter;
        struct hd_struct *part;
+ mutex_lock(&disk->part_lock);
        if (disk->part0.policy != flag) {
                set_disk_ro_uevent(disk, flag);
                disk->part0.policy = flag;
@@ -1579,6 +1581,7 @@ void set_disk_ro(struct gendisk *disk, int flag)
        while ((part = disk_part_iter_next(&piter)))
                part->policy = flag;
        disk_part_iter_exit(&piter);
+       mutex_unlock(&disk->part_lock);
  }
EXPORT_SYMBOL(set_disk_ro);
diff --git a/block/partition-generic.c b/block/partition-generic.c
index aee643ce13d1..63cb6fb996ff 100644
--- a/block/partition-generic.c
+++ b/block/partition-generic.c
@@ -345,7 +345,6 @@ struct hd_struct *add_partition(struct gendisk *disk, int 
partno,
                queue_limit_discard_alignment(&disk->queue->limits, start);
        p->nr_sects = len;
        p->partno = partno;
-       p->policy = get_disk_ro(disk);
if (info) {
                struct partition_meta_info *pinfo = alloc_part_info(disk);
@@ -401,6 +400,10 @@ struct hd_struct *add_partition(struct gendisk *disk, int 
partno,
        /* everything is up and running, commence */
        rcu_assign_pointer(ptbl->part[partno], p);
+ mutex_lock(&disk->part_lock);
+       p->policy = get_disk_ro(disk);
+       mutex_unlock(&disk->part_lock);
+
        /* suppress uevent if the disk suppresses it */
        if (!dev_get_uevent_suppress(ddev))
                kobject_uevent(&pdev->kobj, KOBJ_ADD);
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 8b5330dd5ac0..df6ddca8a92c 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -201,6 +201,7 @@ struct gendisk {
         */
        struct disk_part_tbl __rcu *part_tbl;
        struct hd_struct part0;
+       struct mutex part_lock;
const struct block_device_operations *fops;
        struct request_queue *queue;

Reply via email to