Originally, the XOR code benchmarked all algorithms at load time, but
it has since then been hacked multiple times to allow forcing an
algorithm, and then commit 524ccdbdfb52 ("crypto: xor - defer load time
benchmark to a later time") changed the logic to a two-step process
or registration and benchmarking, but only when built-in.

Rework this, so that the XOR_TRY_TEMPLATES macro magic now always just
deals with adding the templates to the list, and benchmarking is always
done in a second pass; for modular builds from module_init, and for the
built-in case using a separate init call level.

Signed-off-by: Christoph Hellwig <[email protected]>
---
 lib/raid/xor/xor-core.c | 98 ++++++++++++++++++++---------------------
 1 file changed, 48 insertions(+), 50 deletions(-)

diff --git a/lib/raid/xor/xor-core.c b/lib/raid/xor/xor-core.c
index 28aa654c288d..a2c529d7b7c2 100644
--- a/lib/raid/xor/xor-core.c
+++ b/lib/raid/xor/xor-core.c
@@ -52,29 +52,14 @@ EXPORT_SYMBOL(xor_blocks);
 
 /* Set of all registered templates.  */
 static struct xor_block_template *__initdata template_list;
+static int __initdata xor_forced = false;
 
-#ifndef MODULE
 static void __init do_xor_register(struct xor_block_template *tmpl)
 {
        tmpl->next = template_list;
        template_list = tmpl;
 }
 
-static int __init register_xor_blocks(void)
-{
-       active_template = XOR_SELECT_TEMPLATE(NULL);
-
-       if (!active_template) {
-#define xor_speed      do_xor_register
-               // register all the templates and pick the first as the default
-               XOR_TRY_TEMPLATES;
-#undef xor_speed
-               active_template = template_list;
-       }
-       return 0;
-}
-#endif
-
 #define BENCH_SIZE     4096
 #define REPS           800U
 
@@ -85,9 +70,6 @@ do_xor_speed(struct xor_block_template *tmpl, void *b1, void 
*b2)
        unsigned long reps;
        ktime_t min, start, t0;
 
-       tmpl->next = template_list;
-       template_list = tmpl;
-
        preempt_disable();
 
        reps = 0;
@@ -111,63 +93,79 @@ do_xor_speed(struct xor_block_template *tmpl, void *b1, 
void *b2)
        pr_info("   %-16s: %5d MB/sec\n", tmpl->name, speed);
 }
 
-static int __init
-calibrate_xor_blocks(void)
+static int __init calibrate_xor_blocks(void)
 {
        void *b1, *b2;
        struct xor_block_template *f, *fastest;
 
-       fastest = XOR_SELECT_TEMPLATE(NULL);
-
-       if (fastest) {
-               printk(KERN_INFO "xor: automatically using best "
-                                "checksumming function   %-10s\n",
-                      fastest->name);
-               goto out;
-       }
+       if (xor_forced)
+               return 0;
 
        b1 = (void *) __get_free_pages(GFP_KERNEL, 2);
        if (!b1) {
-               printk(KERN_WARNING "xor: Yikes!  No memory available.\n");
+               pr_info("xor: Yikes!  No memory available.\n");
                return -ENOMEM;
        }
        b2 = b1 + 2*PAGE_SIZE + BENCH_SIZE;
 
-       /*
-        * If this arch/cpu has a short-circuited selection, don't loop through
-        * all the possible functions, just test the best one
-        */
-
-#define xor_speed(templ)       do_xor_speed((templ), b1, b2)
-
-       printk(KERN_INFO "xor: measuring software checksum speed\n");
-       template_list = NULL;
-       XOR_TRY_TEMPLATES;
+       pr_info("xor: measuring software checksum speed\n");
        fastest = template_list;
-       for (f = fastest; f; f = f->next)
+       for (f = template_list; f; f = f->next) {
+               do_xor_speed(f, b1, b2);
                if (f->speed > fastest->speed)
                        fastest = f;
-
+       }
+       active_template = fastest;
        pr_info("xor: using function: %s (%d MB/sec)\n",
               fastest->name, fastest->speed);
 
+       free_pages((unsigned long)b1, 2);
+       return 0;
+}
+
+static int __init xor_init(void)
+{
+       /*
+        * If this arch/cpu has a short-circuited selection, don't loop through
+        * all the possible functions, just use the best one.
+        */
+       active_template = XOR_SELECT_TEMPLATE(NULL);
+       if (active_template) {
+               pr_info("xor: automatically using best checksumming function   
%-10s\n",
+                       active_template->name);
+               xor_forced = true;
+               return 0;
+       }
+
+#define xor_speed      do_xor_register
+       XOR_TRY_TEMPLATES;
 #undef xor_speed
 
-       free_pages((unsigned long)b1, 2);
-out:
-       active_template = fastest;
+#ifdef MODULE
+       return calibrate_xor_blocks();
+#else
+       /*
+        * Pick the first template as the temporary default until calibration
+        * happens.
+        */
+       active_template = template_list;
        return 0;
+#endif
 }
 
-static __exit void xor_exit(void) { }
+static __exit void xor_exit(void)
+{
+}
 
 MODULE_DESCRIPTION("RAID-5 checksumming functions");
 MODULE_LICENSE("GPL");
 
+/*
+ * When built-in we must register the default template before md, but we don't
+ * want calibration to run that early as that would delay the boot process.
+ */
 #ifndef MODULE
-/* when built-in xor.o must initialize before drivers/md/md.o */
-core_initcall(register_xor_blocks);
+__initcall(calibrate_xor_blocks);
 #endif
-
-module_init(calibrate_xor_blocks);
+core_initcall(xor_init);
 module_exit(xor_exit);
-- 
2.47.3


Reply via email to