On Wed, Oct 02, 2024 at 10:34:58PM GMT, Dave Chinner wrote:
> On Wed, Oct 02, 2024 at 12:00:01PM +0200, Christian Brauner wrote:
> > On Wed, Oct 02, 2024 at 11:33:17AM GMT, Dave Chinner wrote:
> > > What do people think of moving towards per-sb inode caching and
> > > traversal mechanisms like this?
> > 
> > Patches 1-4 are great cleanups that I would like us to merge even
> > independent of the rest.
> 
> Yes, they make it much easier to manage the iteration code.
> 
> > I don't have big conceptual issues with the series otherwise. The only
> > thing that makes me a bit uneasy is that we are now providing an api
> > that may encourage filesystems to do their own inode caching even if
> > they don't really have a need for it just because it's there.  So really
> > a way that would've solved this issue generically would have been my
> > preference.
> 
> Well, that's the problem, isn't it? :/
> 
> There really isn't a good generic solution for global list access
> and management.  The dlist stuff kinda works, but it still has
> significant overhead and doesn't get rid of spinlock contention
> completely because of the lack of locality between list add and
> remove operations.

There is though; I haven't posted it yet because it still needs some
work, but the concept works and performs about the same as dlock-list.

https://evilpiepirate.org/git/bcachefs.git/log/?h=fast_list

The thing that needs to be sorted before posting is that it can't shrink
the radix tree. generic-radix-tree doesn't support shrinking, and I
could add that, but then ida doesn't provide a way to query the highest
id allocated (xarray doesn't support backwards iteration).

So I'm going to try it using idr and see how that performs (idr is not
really the right data structure for this, split ida and item radix tree
is better, so might end up doing something else).

But - this approach with more work will work for the list_lru lock
contention as well.

>From 32cb8103ecfacdd5ed8e1eb390221c3f8339de6f Mon Sep 17 00:00:00 2001
From: Kent Overstreet <[email protected]>
Date: Sat, 28 Sep 2024 16:22:38 -0400
Subject: [PATCH] lib/fast_list.c

A fast "list" data structure, which is actually a radix tree, with an
IDA for slot allocation and a percpu buffer on top of that.

Items cannot be added or moved to the head or tail, only added at some
(arbitrary) position and removed. The advantage is that adding, removing
and iteration is generally lockless, only hitting the lock in ida when
the percpu buffer is full or empty.

Signed-off-by: Kent Overstreet <[email protected]>

diff --git a/include/linux/fast_list.h b/include/linux/fast_list.h
new file mode 100644
index 000000000000..7d5d8592864d
--- /dev/null
+++ b/include/linux/fast_list.h
@@ -0,0 +1,22 @@
+#ifndef _LINUX_FAST_LIST_H
+#define _LINUX_FAST_LIST_H
+
+#include <linux/generic-radix-tree.h>
+#include <linux/idr.h>
+#include <linux/percpu.h>
+
+struct fast_list_pcpu;
+
+struct fast_list {
+       GENRADIX(void *)        items;
+       struct ida              slots_allocated;;
+       struct fast_list_pcpu   *buffer;
+};
+
+int fast_list_get_idx(struct fast_list *l);
+int fast_list_add(struct fast_list *l, void *item);
+void fast_list_remove(struct fast_list *l, unsigned idx);
+void fast_list_exit(struct fast_list *l);
+int fast_list_init(struct fast_list *l);
+
+#endif /* _LINUX_FAST_LIST_H */
diff --git a/lib/Makefile b/lib/Makefile
index 773adf88af41..85cf5a0d36b1 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -49,7 +49,7 @@ obj-y += bcd.o sort.o parser.o debug_locks.o random32.o \
         bsearch.o find_bit.o llist.o lwq.o memweight.o kfifo.o \
         percpu-refcount.o rhashtable.o base64.o \
         once.o refcount.o rcuref.o usercopy.o errseq.o bucket_locks.o \
-        generic-radix-tree.o bitmap-str.o
+        generic-radix-tree.o bitmap-str.o fast_list.o
 obj-$(CONFIG_STRING_KUNIT_TEST) += string_kunit.o
 obj-y += string_helpers.o
 obj-$(CONFIG_STRING_HELPERS_KUNIT_TEST) += string_helpers_kunit.o
diff --git a/lib/fast_list.c b/lib/fast_list.c
new file mode 100644
index 000000000000..bbb69bb29687
--- /dev/null
+++ b/lib/fast_list.c
@@ -0,0 +1,140 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Fast, unordered lists
+ *
+ * Supports add, remove, and iterate
+ *
+ * Underneath, they're a radix tree and an IDA, with a percpu buffer for slot
+ * allocation and freeing.
+ *
+ * This means that adding, removing, and iterating over items is lockless,
+ * except when refilling/emptying the percpu slot buffers.
+ */
+
+#include <linux/fast_list.h>
+
+struct fast_list_pcpu {
+       size_t                  nr;
+       size_t                  entries[31];
+};
+
+/**
+ * fast_list_get_idx - get a slot in a fast_list
+ * @l:         list to get slot in
+ *
+ * This allocates a slot in the radix tree without storing to it, so that we 
can
+ * take the potential memory allocation failure early and do the list add later
+ * when we can't take an allocation failure.
+ *
+ * Returns: positive integer on success, -ENOMEM on failure
+ */
+int fast_list_get_idx(struct fast_list *l)
+{
+       int idx;
+
+       preempt_disable();
+       struct fast_list_pcpu *lp = this_cpu_ptr(l->buffer);
+
+       if (unlikely(!lp->nr))
+               while (lp->nr <= ARRAY_SIZE(lp->entries) / 2) {
+                       idx = ida_alloc_range(&l->slots_allocated, 1, ~0, 
GFP_NOWAIT|__GFP_NOWARN);
+                       if (unlikely(idx < 0)) {
+                               preempt_enable();
+                               idx = ida_alloc_range(&l->slots_allocated, 1, 
~0, GFP_KERNEL);
+                               if (unlikely(idx < 0))
+                                       return idx;
+
+                               preempt_disable();
+                               lp = this_cpu_ptr(l->buffer);
+                       }
+
+                       if (unlikely(!genradix_ptr_alloc_inlined(&l->items, idx,
+                                                       
GFP_NOWAIT|__GFP_NOWARN))) {
+                               preempt_enable();
+                               if (!genradix_ptr_alloc(&l->items, idx, 
GFP_KERNEL)) {
+                                       ida_free(&l->slots_allocated, idx);
+                                       return -ENOMEM;
+                               }
+
+                               preempt_disable();
+                               lp = this_cpu_ptr(l->buffer);
+                       }
+
+                       if (unlikely(lp->nr == ARRAY_SIZE(lp->entries)))
+                               ida_free(&l->slots_allocated, idx);
+                       else
+                               lp->entries[lp->nr++] = idx;
+               }
+
+       idx = lp->entries[--lp->nr];
+       preempt_enable();
+
+       return idx;
+}
+
+/**
+ * fast_list_add - add an item to a fast_list
+ * @l:         list
+ * @item:      item to add
+ *
+ * Allocates a slot in the radix tree and stores to it and then returns the
+ * slot index, which must be passed to fast_list_remove().
+ *
+ * Returns: positive integer on success, -ENOMEM on failure
+ */
+int fast_list_add(struct fast_list *l, void *item)
+{
+       int idx = fast_list_get_idx(l);
+       if (idx < 0)
+               return idx;
+
+       *genradix_ptr_inlined(&l->items, idx) = item;
+       return idx;
+}
+
+/**
+ * fast_list_remove - remove an item from a fast_list
+ * @l:         list
+ * @idx:       item's slot index
+ *
+ * Zeroes out the slot in the radix tree and frees the slot for future
+ * fast_list_add() operations.
+ */
+void fast_list_remove(struct fast_list *l, unsigned idx)
+{
+       if (!idx)
+               return;
+
+       *genradix_ptr_inlined(&l->items, idx) = NULL;
+
+       preempt_disable();
+       struct fast_list_pcpu *lp = this_cpu_ptr(l->buffer);
+
+       if (unlikely(lp->nr == ARRAY_SIZE(lp->entries)))
+               while (lp->nr >= ARRAY_SIZE(lp->entries) / 2) {
+                       ida_free(&l->slots_allocated, idx);
+                       idx = lp->entries[--lp->nr];
+               }
+
+       lp->entries[lp->nr++] = idx;
+       preempt_enable();
+}
+
+void fast_list_exit(struct fast_list *l)
+{
+       /* XXX: warn if list isn't empty */
+       free_percpu(l->buffer);
+       ida_destroy(&l->slots_allocated);
+       genradix_free(&l->items);
+}
+
+int fast_list_init(struct fast_list *l)
+{
+       genradix_init(&l->items);
+       ida_init(&l->slots_allocated);
+       l->buffer = alloc_percpu(*l->buffer);
+       if (!l->buffer)
+               return -ENOMEM;
+       return 0;
+}

Reply via email to