Hi,

在 2025/07/21 14:20, Hannes Reinecke 写道:
On 7/18/25 11:23, Yu Kuai wrote:
From: Yu Kuai <yuku...@huawei.com>


+
+#define BITMAP_DATA_OFFSET 1024
+
+/* 64k is the max IO size of sync IO for raid1/raid10 */
+#define MIN_CHUNK_SIZE (64 * 2)
+

Hmm? Which one is it?
Comment says 'max IO size', but it's called 'MIN_CHUNK_SIZE'...

max IO size here means the internal recovery IO size for raid1/10,
and we're handling at most one llbimtap bit(chunksie) at a time, so
chunksize should be at least 64k, otherwise recovery IO size will be
less.

+/*
+ * Dirtied bits that have not been accessed for more than 5s will be cleared
+ * by daemon.
+ */
+#define BARRIER_IDLE 5
+

Should this be changeable, too?

Yes, idealy this should. Perhaps a new sysfs api?



+    if (!test_bit(LLPageDirty, &pctl->flags))
+        set_bit(LLPageDirty, &pctl->flags);
+
+    /*
+     * The subpage usually contains a total of 512 bits. If any single bit
+     * within the subpage is marked as dirty, the entire sector will be
+     * written. To avoid impacting write performance, when multiple bits
+     * within the same sector are modified within a short time frame, all
+     * bits in the sector will be collectively marked as dirty at once.
+     */

How short is the 'short timeframe'?
Is this the BARRIER_IDLE setting?
Please clarify.

Yes, if the page is not accessed for BARRIER_IDLE seconds.

+static struct page *llbitmap_read_page(struct llbitmap *llbitmap, int idx)
+{
+    struct mddev *mddev = llbitmap->mddev;
+    struct page *page = NULL;
+    struct md_rdev *rdev;
+
+    if (llbitmap->pctl && llbitmap->pctl[idx])
+        page = llbitmap->pctl[idx]->page;
+    if (page)
+        return page;
+
+    page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+    if (!page)
+        return ERR_PTR(-ENOMEM);
+
+    rdev_for_each(rdev, mddev) {
+        sector_t sector;
+
+        if (rdev->raid_disk < 0 || test_bit(Faulty, &rdev->flags))
+            continue;
+
+        sector = mddev->bitmap_info.offset +
+             (idx << PAGE_SECTORS_SHIFT);
+
+        if (sync_page_io(rdev, sector, PAGE_SIZE, page, REQ_OP_READ,
+                 true))
+            return page;
+
+        md_error(mddev, rdev);
+    }
+
+    __free_page(page);
+    return ERR_PTR(-EIO);
+}
+

Have you considered moving to folios here?


Of course, however, because the md high level helpers is still using
page, I'm thinking about using page for now, which is simpler, and
moving to folios for all md code later.


+static int llbitmap_resize(struct mddev *mddev, sector_t blocks, int chunksize)
+{
+    struct llbitmap *llbitmap = mddev->bitmap;
+    unsigned long chunks;
+
+    if (chunksize == 0)
+        chunksize = llbitmap->chunksize;
+
+    /* If there is enough space, leave the chunksize unchanged. */
+    chunks = DIV_ROUND_UP(blocks, chunksize);
+    while (chunks > mddev->bitmap_info.space << SECTOR_SHIFT) {
+        chunksize = chunksize << 1;
+        chunks = DIV_ROUND_UP(blocks, chunksize);
+    }
+
+    llbitmap->chunkshift = ffz(~chunksize);
+    llbitmap->chunksize = chunksize;
+    llbitmap->chunks = chunks;
+
+    return 0;
+}
+

Hmm. I do get confused with the chunksize here.
Is this the granularity of the bits in the bitmap
(ie how many data bytes are covered by one bit)?
Or is it the chunksize of the bitmap itself?

Yes, the llbitmap->chunksize means the data bytes by one llbitmap bit.

In either case, if it's a 'chunksize' in the sense
of the block layer (namely a boundary which I/O
must not cross), shouldn't you set the request
queue limits accordingly?

I think we don't, it's fine if IO cross the boundary of
llbitmap->chunksize, multiple bits will be recorded. In fact, we support
plug and recored lots of bits at a time, the only restriction is that
dirty bits have to be written beffore issuing IO.


+static int llbitmap_load(struct mddev *mddev)
+{
+    enum llbitmap_action action = BitmapActionReload;
+    struct llbitmap *llbitmap = mddev->bitmap;
+
+    if (test_and_clear_bit(BITMAP_STALE, &llbitmap->flags))
+        action = BitmapActionStale;
+
+    llbitmap_state_machine(llbitmap, 0, llbitmap->chunks - 1, action);
+    return 0;
+}
+
+static void llbitmap_destroy(struct mddev *mddev)
+{
+    struct llbitmap *llbitmap = mddev->bitmap;
+
+    if (!llbitmap)
+        return;
+
+    mutex_lock(&mddev->bitmap_info.mutex);
+
+    timer_delete_sync(&llbitmap->pending_timer);
+    flush_workqueue(md_llbitmap_io_wq);
+    flush_workqueue(md_llbitmap_unplug_wq);
+
+    mddev->bitmap = NULL;
+    llbitmap_free_pages(llbitmap);
+    kfree(llbitmap);
+    mutex_unlock(&mddev->bitmap_info.mutex);
+}
+
+static void llbitmap_start_write(struct mddev *mddev, sector_t offset,
+                 unsigned long sectors)
+{
+    struct llbitmap *llbitmap = mddev->bitmap;
+    unsigned long start = offset >> llbitmap->chunkshift;
+    unsigned long end = (offset + sectors - 1) >> llbitmap->chunkshift;
+    int page_start = (start + BITMAP_DATA_OFFSET) >> PAGE_SHIFT;
+    int page_end = (end + BITMAP_DATA_OFFSET) >> PAGE_SHIFT;
+
+    llbitmap_state_machine(llbitmap, start, end, BitmapActionStartwrite);
+
+
+    while (page_start <= page_end) {
+        llbitmap_raise_barrier(llbitmap, page_start);
+        page_start++;
+    }
+}
+
+static void llbitmap_end_write(struct mddev *mddev, sector_t offset,
+                   unsigned long sectors)
+{
+    struct llbitmap *llbitmap = mddev->bitmap;
+    unsigned long start = offset >> llbitmap->chunkshift;
+    unsigned long end = (offset + sectors - 1) >> llbitmap->chunkshift;
+    int page_start = (start + BITMAP_DATA_OFFSET) >> PAGE_SHIFT;
+    int page_end = (end + BITMAP_DATA_OFFSET) >> PAGE_SHIFT;
+
+    while (page_start <= page_end) {
+        llbitmap_release_barrier(llbitmap, page_start);
+        page_start++;
+    }
+}
+
+static void llbitmap_start_discard(struct mddev *mddev, sector_t offset,
+                   unsigned long sectors)
+{
+    struct llbitmap *llbitmap = mddev->bitmap;
+    unsigned long start = DIV_ROUND_UP(offset, llbitmap->chunksize);
+    unsigned long end = (offset + sectors - 1) >> llbitmap->chunkshift;
+    int page_start = (start + BITMAP_DATA_OFFSET) >> PAGE_SHIFT;
+    int page_end = (end + BITMAP_DATA_OFFSET) >> PAGE_SHIFT;
+
+    llbitmap_state_machine(llbitmap, start, end, BitmapActionDiscard);
+
+    while (page_start <= page_end) {
+        llbitmap_raise_barrier(llbitmap, page_start);
+        page_start++;
+    }
+}
+
+static void llbitmap_end_discard(struct mddev *mddev, sector_t offset,
+                 unsigned long sectors)
+{
+    struct llbitmap *llbitmap = mddev->bitmap;
+    unsigned long start = DIV_ROUND_UP(offset, llbitmap->chunksize);
+    unsigned long end = (offset + sectors - 1) >> llbitmap->chunkshift;
+    int page_start = (start + BITMAP_DATA_OFFSET) >> PAGE_SHIFT;
+    int page_end = (end + BITMAP_DATA_OFFSET) >> PAGE_SHIFT;
+
+    while (page_start <= page_end) {
+        llbitmap_release_barrier(llbitmap, page_start);
+        page_start++;
+    }
+}
+
+static void llbitmap_unplug_fn(struct work_struct *work)
+{
+    struct llbitmap_unplug_work *unplug_work =
+        container_of(work, struct llbitmap_unplug_work, work);
+    struct llbitmap *llbitmap = unplug_work->llbitmap;
+    struct blk_plug plug;
+    int i;
+
+    blk_start_plug(&plug);
+
+    for (i = 0; i < llbitmap->nr_pages; i++) {
+        if (!test_bit(LLPageDirty, &llbitmap->pctl[i]->flags) ||
+            !test_and_clear_bit(LLPageDirty, &llbitmap->pctl[i]->flags))

Confused. Is this some kind of micro-optimisation?
Why not simply 'test_and_clear_bit()'?

Yes, because this is called from IO hot path, and in the most cases,
only a few pages will be dirtied by plugged IO.

Thanks for the review!
Kuai

Cheers,

Hannes


Reply via email to