Race condition in vfio will cause deadwait
From: Wen CongyangHi, Alex Williamson When using vfio, we encounter a problem: too many lspci processes are blocked in D state. I analyzed all processes, and found one process is blocked in pci_dev_lock(), another process is blocked in vfio_del_group_put(). I checked the backtrace, and found the following race condition: Process A(use sysfs to unbind the device): device_release_driver() device_lock(dev) __device_release_driver() dev->bus->remove(dev) // pci_device_remove() drv->remove(pci_dev) // vfio_pci_remove() vfio_del_group_dev() vfio_device_put() wait vfio_device is remove from the group. Process B gets vfio_device, so we will wait it. Process B: vfio_group_fops_unl_ioctl() vfio_group_get_device_fd() vfio_device_get_from_name() // we get vfio_device here device->ops->open() // vfio_pci_open() vfio_pci_enable() pci_reset_function() pci_dev_reset(dev, 0) pci_dev_lock() pci_cfg_access_lock(dev) // After this, lspci will be blocked device_lock(>dev) // the lock is hold by another process A Now, Process A is waiting Process B, and Process B is waiting Process A. I think we use pci_try_reset_function() to instead of pci_reset_function() can break this deadwait. Thanks, Wen Congyang
Race condition in vfio will cause deadwait
From: Wen Congyang Hi, Alex Williamson When using vfio, we encounter a problem: too many lspci processes are blocked in D state. I analyzed all processes, and found one process is blocked in pci_dev_lock(), another process is blocked in vfio_del_group_put(). I checked the backtrace, and found the following race condition: Process A(use sysfs to unbind the device): device_release_driver() device_lock(dev) __device_release_driver() dev->bus->remove(dev) // pci_device_remove() drv->remove(pci_dev) // vfio_pci_remove() vfio_del_group_dev() vfio_device_put() wait vfio_device is remove from the group. Process B gets vfio_device, so we will wait it. Process B: vfio_group_fops_unl_ioctl() vfio_group_get_device_fd() vfio_device_get_from_name() // we get vfio_device here device->ops->open() // vfio_pci_open() vfio_pci_enable() pci_reset_function() pci_dev_reset(dev, 0) pci_dev_lock() pci_cfg_access_lock(dev) // After this, lspci will be blocked device_lock(>dev) // the lock is hold by another process A Now, Process A is waiting Process B, and Process B is waiting Process A. I think we use pci_try_reset_function() to instead of pci_reset_function() can break this deadwait. Thanks, Wen Congyang
[PATCH v2] modpost: abort if module name is too long
Module name has a limited length, but currently the build system allows the build finishing even if the module name is too long. CC /root/kprobe_example/abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz.mod.o /root/kprobe_example/abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz.mod.c:9:2: warning: initializer-string for array of chars is too long [enabled by default] .name = KBUILD_MODNAME, ^ but it's merely a warning. This patch adds the check of the module name length in modpost and stops the build properly. Signed-off-by: Wanlong Gao <wanlong@gmail.com> --- scripts/mod/modpost.c | 29 - 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 30d752a..cb16985 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -47,6 +47,12 @@ enum export { export_unused_gpl, export_gpl_future, export_unknown }; +/* In kernel, this size is defined in linux/module.h; + * here we use Elf_Addr instead of long for covering cross-compile + */ + +#define MODULE_NAME_LEN (64 - sizeof(Elf_Addr)) + #define PRINTF __attribute__ ((format (printf, 1, 2))) PRINTF void fatal(const char *fmt, ...) @@ -2116,6 +2122,23 @@ static void check_exports(struct module *mod) } } +static int check_modname_len(struct module *mod) +{ + const char *mod_name; + + mod_name = strrchr(mod->name, '/'); + if (mod_name == NULL) + mod_name = mod->name; + else + mod_name++; + if (strlen(mod_name) >= MODULE_NAME_LEN) { + merror("module name is too long [%s.ko]\n", mod->name); + return 1; + } + + return 0; +} + /** * Header for the generated file **/ @@ -2154,11 +2177,6 @@ static void add_staging_flag(struct buffer *b, const char *name) buf_printf(b, "\nMODULE_INFO(staging, \"Y\");\n"); } -/* In kernel, this size is defined in linux/module.h; - * here we use Elf_Addr instead of long for covering cross-compile - */ -#define MODULE_NAME_LEN (64 - sizeof(Elf_Addr)) - /** * Record CRCs for unresolved symbols **/ @@ -2489,6 +2507,7 @@ int main(int argc, char **argv) buf.pos = 0; + err |= check_modname_len(mod); add_header(, mod); add_intree_flag(, !external_module); add_staging_flag(, mod->name); -- 2.9.4
[PATCH v2] modpost: abort if module name is too long
Module name has a limited length, but currently the build system allows the build finishing even if the module name is too long. CC /root/kprobe_example/abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz.mod.o /root/kprobe_example/abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz.mod.c:9:2: warning: initializer-string for array of chars is too long [enabled by default] .name = KBUILD_MODNAME, ^ but it's merely a warning. This patch adds the check of the module name length in modpost and stops the build properly. Signed-off-by: Wanlong Gao --- scripts/mod/modpost.c | 29 - 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 30d752a..cb16985 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -47,6 +47,12 @@ enum export { export_unused_gpl, export_gpl_future, export_unknown }; +/* In kernel, this size is defined in linux/module.h; + * here we use Elf_Addr instead of long for covering cross-compile + */ + +#define MODULE_NAME_LEN (64 - sizeof(Elf_Addr)) + #define PRINTF __attribute__ ((format (printf, 1, 2))) PRINTF void fatal(const char *fmt, ...) @@ -2116,6 +2122,23 @@ static void check_exports(struct module *mod) } } +static int check_modname_len(struct module *mod) +{ + const char *mod_name; + + mod_name = strrchr(mod->name, '/'); + if (mod_name == NULL) + mod_name = mod->name; + else + mod_name++; + if (strlen(mod_name) >= MODULE_NAME_LEN) { + merror("module name is too long [%s.ko]\n", mod->name); + return 1; + } + + return 0; +} + /** * Header for the generated file **/ @@ -2154,11 +2177,6 @@ static void add_staging_flag(struct buffer *b, const char *name) buf_printf(b, "\nMODULE_INFO(staging, \"Y\");\n"); } -/* In kernel, this size is defined in linux/module.h; - * here we use Elf_Addr instead of long for covering cross-compile - */ -#define MODULE_NAME_LEN (64 - sizeof(Elf_Addr)) - /** * Record CRCs for unresolved symbols **/ @@ -2489,6 +2507,7 @@ int main(int argc, char **argv) buf.pos = 0; + err |= check_modname_len(mod); add_header(, mod); add_intree_flag(, !external_module); add_staging_flag(, mod->name); -- 2.9.4
Re: [PATCH] modpost: abort if a module name is too long
On 2017/6/22 22:25, Jessica Yu wrote: > +++ Wanlong Gao [22/06/17 09:11 +0800]: >> >> >> On 2017/6/22 0:09, Jessica Yu wrote: >>> +++ Jessica Yu [06/06/17 20:41 -0700]: >>>> +++ Wanlong Gao [06/06/17 09:07 +0800]: >>>>> >>>>> >>>>> On 2017/6/5 10:09, Jessica Yu wrote: >>>>>> +++ Wanlong Gao [02/06/17 11:04 +0800]: >>>>>>> >>>>>>> >>>>>>> On 2017/6/2 7:23, Jessica Yu wrote: >>>>>>>> +++ Wanlong Gao [31/05/17 11:48 +0800]: >>>>>>>>> >>>>>>>>> >>>>>>>>> On 2017/5/31 11:30, Jessica Yu wrote: >>>>>>>>>> +++ Wanlong Gao [31/05/17 10:23 +0800]: >>>>>>>>>>> Hi Jessica, >>>>>>>>>>> >>>>>>>>>>> On 2017/5/29 17:10, Jessica Yu wrote: >>>>>>>>>>>> +++ Xie XiuQi [20/05/17 15:46 +0800]: >>>>>>>>>>>>> From: Wanlong Gao <gaowanl...@huawei.com> >>>>>>>>>>>>> >>>>>>>>>>>>> Module name has a limited length, but currently the build system >>>>>>>>>>>>> allows the build finishing even if the module name is too long. >>>>>>>>>>>>> >>>>>>>>>>>>> CC >>>>>>>>>>>>> /root/kprobe_example/abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz.mod.o >>>>>>>>>>>>> /root/kprobe_example/abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz.mod.c:9:2: >>>>>>>>>>>>> warning: initializer-string for array of chars is too long >>>>>>>>>>>>> [enabled by default] >>>>>>>>>>>>> .name = KBUILD_MODNAME, >>>>>>>>>>>>> ^ >>>>>>>>>>>>> >>>>>>>>>>>>> but it's merely a warning. >>>>>>>>>>>>> >>>>>>>>>>>>> This patch adds the check of the module name length in modpost >>>>>>>>>>>>> and stops >>>>>>>>>>>>> the build properly. >>>>>>>>>>>>> >>>>>>>>>>>>> Signed-off-by: Wanlong Gao <gaowanl...@huawei.com> >>>>>>>>>>>>> Signed-off-by: Xie XiuQi <xiexi...@huawei.com> >>>>>>>>>>>>> --- >>>>>>>>>>>>> scripts/mod/modpost.c | 11 +++ >>>>>>>>>>>>> 1 file changed, 11 insertions(+) >>>>>>>>>>>>> >>>>>>>>>>>>> diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c >>>>>>>>>>>>> index 30d752a..db11c57 100644 >>>>>>>>>>>>> --- a/scripts/mod/modpost.c >>>>>>>>>>>>> +++ b/scripts/mod/modpost.c >>>>>>>>>>>>> @@ -2166,6 +2166,17 @@ static int add_versions(struct buffer *b, >>>>>>>>>>>>> struct module *mod) >>>>>>>>>>>>> { >>>>>>>>>>>>>struct symbol *s, *exp; >>>>>>>>>>>>>int err = 0; >>>>>>>>>>>>> +const char *mod_name; >>>>>>>>>>>>> + >>>>>>>>>>>>> +mod_name = strrchr(mod->name, '/'); >>>>>>>>>>>>> +if (mod_name == NULL) >>>>>>>>>>>>> +mod_name = mod->name; >>>>>>>>>>>>> +else >>>>>>>>>>>>> +mod_name++; >>>>>>>>>>>>> +if (strlen(mod_name) >= MODULE_NAME_LEN) { >>>>>>>>>>>>> +merror("module name is too long [%s.ko]\n", mod->name); >>>>>>>>>>>>> +return 1; >>>>>>>>>>>>> +} >>>>>>>>>>>> >>>>>
Re: [PATCH] modpost: abort if a module name is too long
On 2017/6/22 22:25, Jessica Yu wrote: > +++ Wanlong Gao [22/06/17 09:11 +0800]: >> >> >> On 2017/6/22 0:09, Jessica Yu wrote: >>> +++ Jessica Yu [06/06/17 20:41 -0700]: >>>> +++ Wanlong Gao [06/06/17 09:07 +0800]: >>>>> >>>>> >>>>> On 2017/6/5 10:09, Jessica Yu wrote: >>>>>> +++ Wanlong Gao [02/06/17 11:04 +0800]: >>>>>>> >>>>>>> >>>>>>> On 2017/6/2 7:23, Jessica Yu wrote: >>>>>>>> +++ Wanlong Gao [31/05/17 11:48 +0800]: >>>>>>>>> >>>>>>>>> >>>>>>>>> On 2017/5/31 11:30, Jessica Yu wrote: >>>>>>>>>> +++ Wanlong Gao [31/05/17 10:23 +0800]: >>>>>>>>>>> Hi Jessica, >>>>>>>>>>> >>>>>>>>>>> On 2017/5/29 17:10, Jessica Yu wrote: >>>>>>>>>>>> +++ Xie XiuQi [20/05/17 15:46 +0800]: >>>>>>>>>>>>> From: Wanlong Gao >>>>>>>>>>>>> >>>>>>>>>>>>> Module name has a limited length, but currently the build system >>>>>>>>>>>>> allows the build finishing even if the module name is too long. >>>>>>>>>>>>> >>>>>>>>>>>>> CC >>>>>>>>>>>>> /root/kprobe_example/abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz.mod.o >>>>>>>>>>>>> /root/kprobe_example/abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz.mod.c:9:2: >>>>>>>>>>>>> warning: initializer-string for array of chars is too long >>>>>>>>>>>>> [enabled by default] >>>>>>>>>>>>> .name = KBUILD_MODNAME, >>>>>>>>>>>>> ^ >>>>>>>>>>>>> >>>>>>>>>>>>> but it's merely a warning. >>>>>>>>>>>>> >>>>>>>>>>>>> This patch adds the check of the module name length in modpost >>>>>>>>>>>>> and stops >>>>>>>>>>>>> the build properly. >>>>>>>>>>>>> >>>>>>>>>>>>> Signed-off-by: Wanlong Gao >>>>>>>>>>>>> Signed-off-by: Xie XiuQi >>>>>>>>>>>>> --- >>>>>>>>>>>>> scripts/mod/modpost.c | 11 +++ >>>>>>>>>>>>> 1 file changed, 11 insertions(+) >>>>>>>>>>>>> >>>>>>>>>>>>> diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c >>>>>>>>>>>>> index 30d752a..db11c57 100644 >>>>>>>>>>>>> --- a/scripts/mod/modpost.c >>>>>>>>>>>>> +++ b/scripts/mod/modpost.c >>>>>>>>>>>>> @@ -2166,6 +2166,17 @@ static int add_versions(struct buffer *b, >>>>>>>>>>>>> struct module *mod) >>>>>>>>>>>>> { >>>>>>>>>>>>>struct symbol *s, *exp; >>>>>>>>>>>>>int err = 0; >>>>>>>>>>>>> +const char *mod_name; >>>>>>>>>>>>> + >>>>>>>>>>>>> +mod_name = strrchr(mod->name, '/'); >>>>>>>>>>>>> +if (mod_name == NULL) >>>>>>>>>>>>> +mod_name = mod->name; >>>>>>>>>>>>> +else >>>>>>>>>>>>> +mod_name++; >>>>>>>>>>>>> +if (strlen(mod_name) >= MODULE_NAME_LEN) { >>>>>>>>>>>>> +merror("module name is too long [%s.ko]\n", mod->name); >>>>>>>>>>>>> +return 1; >>>>>>>>>>>>> +} >>>>>>>>>>>> >>>>>>>>>>>> Hi Xie, >>>>>>>>>>>> >
Re: [PATCH] modpost: abort if a module name is too long
On 2017/6/22 0:09, Jessica Yu wrote: > +++ Jessica Yu [06/06/17 20:41 -0700]: >> +++ Wanlong Gao [06/06/17 09:07 +0800]: >>> >>> >>> On 2017/6/5 10:09, Jessica Yu wrote: >>>> +++ Wanlong Gao [02/06/17 11:04 +0800]: >>>>> >>>>> >>>>> On 2017/6/2 7:23, Jessica Yu wrote: >>>>>> +++ Wanlong Gao [31/05/17 11:48 +0800]: >>>>>>> >>>>>>> >>>>>>> On 2017/5/31 11:30, Jessica Yu wrote: >>>>>>>> +++ Wanlong Gao [31/05/17 10:23 +0800]: >>>>>>>>> Hi Jessica, >>>>>>>>> >>>>>>>>> On 2017/5/29 17:10, Jessica Yu wrote: >>>>>>>>>> +++ Xie XiuQi [20/05/17 15:46 +0800]: >>>>>>>>>>> From: Wanlong Gao <gaowanl...@huawei.com> >>>>>>>>>>> >>>>>>>>>>> Module name has a limited length, but currently the build system >>>>>>>>>>> allows the build finishing even if the module name is too long. >>>>>>>>>>> >>>>>>>>>>> CC >>>>>>>>>>> /root/kprobe_example/abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz.mod.o >>>>>>>>>>> /root/kprobe_example/abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz.mod.c:9:2: >>>>>>>>>>> warning: initializer-string for array of chars is too long [enabled >>>>>>>>>>> by default] >>>>>>>>>>> .name = KBUILD_MODNAME, >>>>>>>>>>> ^ >>>>>>>>>>> >>>>>>>>>>> but it's merely a warning. >>>>>>>>>>> >>>>>>>>>>> This patch adds the check of the module name length in modpost and >>>>>>>>>>> stops >>>>>>>>>>> the build properly. >>>>>>>>>>> >>>>>>>>>>> Signed-off-by: Wanlong Gao <gaowanl...@huawei.com> >>>>>>>>>>> Signed-off-by: Xie XiuQi <xiexi...@huawei.com> >>>>>>>>>>> --- >>>>>>>>>>> scripts/mod/modpost.c | 11 +++ >>>>>>>>>>> 1 file changed, 11 insertions(+) >>>>>>>>>>> >>>>>>>>>>> diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c >>>>>>>>>>> index 30d752a..db11c57 100644 >>>>>>>>>>> --- a/scripts/mod/modpost.c >>>>>>>>>>> +++ b/scripts/mod/modpost.c >>>>>>>>>>> @@ -2166,6 +2166,17 @@ static int add_versions(struct buffer *b, >>>>>>>>>>> struct module *mod) >>>>>>>>>>> { >>>>>>>>>>>struct symbol *s, *exp; >>>>>>>>>>>int err = 0; >>>>>>>>>>> +const char *mod_name; >>>>>>>>>>> + >>>>>>>>>>> +mod_name = strrchr(mod->name, '/'); >>>>>>>>>>> +if (mod_name == NULL) >>>>>>>>>>> +mod_name = mod->name; >>>>>>>>>>> +else >>>>>>>>>>> +mod_name++; >>>>>>>>>>> +if (strlen(mod_name) >= MODULE_NAME_LEN) { >>>>>>>>>>> +merror("module name is too long [%s.ko]\n", mod->name); >>>>>>>>>>> +return 1; >>>>>>>>>>> +} >>>>>>>>>> >>>>>>>>>> Hi Xie, >>>>>>>>>> >>>>>>>>>> This check shouldn't be in add_versions() (which does something else >>>>>>>>>> entirely), >>>>>>>>>> it should probably be put in a separate helper function called from >>>>>>>>>> main. But >>>>>>>>>> I'm not a big fan of the extra string manipulation to do something >>>>>>>>>> this simple. >>>>>>>>>> >>>>>>>>&g
Re: [PATCH] modpost: abort if a module name is too long
On 2017/6/22 0:09, Jessica Yu wrote: > +++ Jessica Yu [06/06/17 20:41 -0700]: >> +++ Wanlong Gao [06/06/17 09:07 +0800]: >>> >>> >>> On 2017/6/5 10:09, Jessica Yu wrote: >>>> +++ Wanlong Gao [02/06/17 11:04 +0800]: >>>>> >>>>> >>>>> On 2017/6/2 7:23, Jessica Yu wrote: >>>>>> +++ Wanlong Gao [31/05/17 11:48 +0800]: >>>>>>> >>>>>>> >>>>>>> On 2017/5/31 11:30, Jessica Yu wrote: >>>>>>>> +++ Wanlong Gao [31/05/17 10:23 +0800]: >>>>>>>>> Hi Jessica, >>>>>>>>> >>>>>>>>> On 2017/5/29 17:10, Jessica Yu wrote: >>>>>>>>>> +++ Xie XiuQi [20/05/17 15:46 +0800]: >>>>>>>>>>> From: Wanlong Gao >>>>>>>>>>> >>>>>>>>>>> Module name has a limited length, but currently the build system >>>>>>>>>>> allows the build finishing even if the module name is too long. >>>>>>>>>>> >>>>>>>>>>> CC >>>>>>>>>>> /root/kprobe_example/abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz.mod.o >>>>>>>>>>> /root/kprobe_example/abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz.mod.c:9:2: >>>>>>>>>>> warning: initializer-string for array of chars is too long [enabled >>>>>>>>>>> by default] >>>>>>>>>>> .name = KBUILD_MODNAME, >>>>>>>>>>> ^ >>>>>>>>>>> >>>>>>>>>>> but it's merely a warning. >>>>>>>>>>> >>>>>>>>>>> This patch adds the check of the module name length in modpost and >>>>>>>>>>> stops >>>>>>>>>>> the build properly. >>>>>>>>>>> >>>>>>>>>>> Signed-off-by: Wanlong Gao >>>>>>>>>>> Signed-off-by: Xie XiuQi >>>>>>>>>>> --- >>>>>>>>>>> scripts/mod/modpost.c | 11 +++ >>>>>>>>>>> 1 file changed, 11 insertions(+) >>>>>>>>>>> >>>>>>>>>>> diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c >>>>>>>>>>> index 30d752a..db11c57 100644 >>>>>>>>>>> --- a/scripts/mod/modpost.c >>>>>>>>>>> +++ b/scripts/mod/modpost.c >>>>>>>>>>> @@ -2166,6 +2166,17 @@ static int add_versions(struct buffer *b, >>>>>>>>>>> struct module *mod) >>>>>>>>>>> { >>>>>>>>>>>struct symbol *s, *exp; >>>>>>>>>>>int err = 0; >>>>>>>>>>> +const char *mod_name; >>>>>>>>>>> + >>>>>>>>>>> +mod_name = strrchr(mod->name, '/'); >>>>>>>>>>> +if (mod_name == NULL) >>>>>>>>>>> +mod_name = mod->name; >>>>>>>>>>> +else >>>>>>>>>>> +mod_name++; >>>>>>>>>>> +if (strlen(mod_name) >= MODULE_NAME_LEN) { >>>>>>>>>>> +merror("module name is too long [%s.ko]\n", mod->name); >>>>>>>>>>> +return 1; >>>>>>>>>>> +} >>>>>>>>>> >>>>>>>>>> Hi Xie, >>>>>>>>>> >>>>>>>>>> This check shouldn't be in add_versions() (which does something else >>>>>>>>>> entirely), >>>>>>>>>> it should probably be put in a separate helper function called from >>>>>>>>>> main. But >>>>>>>>>> I'm not a big fan of the extra string manipulation to do something >>>>>>>>>> this simple. >>>>>>>>>> >>>>>>>>>> I think this check can be vastly simplified, how about something >>&
Re: [PATCH] modpost: abort if a module name is too long
On 2017/6/5 10:09, Jessica Yu wrote: > +++ Wanlong Gao [02/06/17 11:04 +0800]: >> >> >> On 2017/6/2 7:23, Jessica Yu wrote: >>> +++ Wanlong Gao [31/05/17 11:48 +0800]: >>>> >>>> >>>> On 2017/5/31 11:30, Jessica Yu wrote: >>>>> +++ Wanlong Gao [31/05/17 10:23 +0800]: >>>>>> Hi Jessica, >>>>>> >>>>>> On 2017/5/29 17:10, Jessica Yu wrote: >>>>>>> +++ Xie XiuQi [20/05/17 15:46 +0800]: >>>>>>>> From: Wanlong Gao <gaowanl...@huawei.com> >>>>>>>> >>>>>>>> Module name has a limited length, but currently the build system >>>>>>>> allows the build finishing even if the module name is too long. >>>>>>>> >>>>>>>> CC >>>>>>>> /root/kprobe_example/abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz.mod.o >>>>>>>> /root/kprobe_example/abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz.mod.c:9:2: >>>>>>>> warning: initializer-string for array of chars is too long [enabled by >>>>>>>> default] >>>>>>>> .name = KBUILD_MODNAME, >>>>>>>> ^ >>>>>>>> >>>>>>>> but it's merely a warning. >>>>>>>> >>>>>>>> This patch adds the check of the module name length in modpost and >>>>>>>> stops >>>>>>>> the build properly. >>>>>>>> >>>>>>>> Signed-off-by: Wanlong Gao <gaowanl...@huawei.com> >>>>>>>> Signed-off-by: Xie XiuQi <xiexi...@huawei.com> >>>>>>>> --- >>>>>>>> scripts/mod/modpost.c | 11 +++ >>>>>>>> 1 file changed, 11 insertions(+) >>>>>>>> >>>>>>>> diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c >>>>>>>> index 30d752a..db11c57 100644 >>>>>>>> --- a/scripts/mod/modpost.c >>>>>>>> +++ b/scripts/mod/modpost.c >>>>>>>> @@ -2166,6 +2166,17 @@ static int add_versions(struct buffer *b, >>>>>>>> struct module *mod) >>>>>>>> { >>>>>>>> struct symbol *s, *exp; >>>>>>>> int err = 0; >>>>>>>> +const char *mod_name; >>>>>>>> + >>>>>>>> +mod_name = strrchr(mod->name, '/'); >>>>>>>> +if (mod_name == NULL) >>>>>>>> +mod_name = mod->name; >>>>>>>> +else >>>>>>>> +mod_name++; >>>>>>>> +if (strlen(mod_name) >= MODULE_NAME_LEN) { >>>>>>>> +merror("module name is too long [%s.ko]\n", mod->name); >>>>>>>> +return 1; >>>>>>>> +} >>>>>>> >>>>>>> Hi Xie, >>>>>>> >>>>>>> This check shouldn't be in add_versions() (which does something else >>>>>>> entirely), >>>>>>> it should probably be put in a separate helper function called from >>>>>>> main. But >>>>>>> I'm not a big fan of the extra string manipulation to do something this >>>>>>> simple. >>>>>>> >>>>>>> I think this check can be vastly simplified, how about something like >>>>>>> the >>>>>>> following? >>>>>> >>>>>> This looks better, would you apply your following patch? >>>>>> >>>>>> Reviewed-by: Wanlong Gao <gaowanl...@huawei.com> >>>>>> Tested-by: Wanlong Gao <gaowanl...@huawei.com> >>>>> >>>>> Sure, thanks for testing. I'll go ahead and format this into a proper >>>>> patch and resend. >>>> >>>> Please wait, I just found that this patch makes the built module can't >>>> be inserted by the following error: >>>> >>>> # insmod abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabc.ko >>>> insmod: ERROR: could not insert module >>>> abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabc.ko: In
Re: [PATCH] modpost: abort if a module name is too long
On 2017/6/5 10:09, Jessica Yu wrote: > +++ Wanlong Gao [02/06/17 11:04 +0800]: >> >> >> On 2017/6/2 7:23, Jessica Yu wrote: >>> +++ Wanlong Gao [31/05/17 11:48 +0800]: >>>> >>>> >>>> On 2017/5/31 11:30, Jessica Yu wrote: >>>>> +++ Wanlong Gao [31/05/17 10:23 +0800]: >>>>>> Hi Jessica, >>>>>> >>>>>> On 2017/5/29 17:10, Jessica Yu wrote: >>>>>>> +++ Xie XiuQi [20/05/17 15:46 +0800]: >>>>>>>> From: Wanlong Gao >>>>>>>> >>>>>>>> Module name has a limited length, but currently the build system >>>>>>>> allows the build finishing even if the module name is too long. >>>>>>>> >>>>>>>> CC >>>>>>>> /root/kprobe_example/abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz.mod.o >>>>>>>> /root/kprobe_example/abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz.mod.c:9:2: >>>>>>>> warning: initializer-string for array of chars is too long [enabled by >>>>>>>> default] >>>>>>>> .name = KBUILD_MODNAME, >>>>>>>> ^ >>>>>>>> >>>>>>>> but it's merely a warning. >>>>>>>> >>>>>>>> This patch adds the check of the module name length in modpost and >>>>>>>> stops >>>>>>>> the build properly. >>>>>>>> >>>>>>>> Signed-off-by: Wanlong Gao >>>>>>>> Signed-off-by: Xie XiuQi >>>>>>>> --- >>>>>>>> scripts/mod/modpost.c | 11 +++ >>>>>>>> 1 file changed, 11 insertions(+) >>>>>>>> >>>>>>>> diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c >>>>>>>> index 30d752a..db11c57 100644 >>>>>>>> --- a/scripts/mod/modpost.c >>>>>>>> +++ b/scripts/mod/modpost.c >>>>>>>> @@ -2166,6 +2166,17 @@ static int add_versions(struct buffer *b, >>>>>>>> struct module *mod) >>>>>>>> { >>>>>>>> struct symbol *s, *exp; >>>>>>>> int err = 0; >>>>>>>> +const char *mod_name; >>>>>>>> + >>>>>>>> +mod_name = strrchr(mod->name, '/'); >>>>>>>> +if (mod_name == NULL) >>>>>>>> +mod_name = mod->name; >>>>>>>> +else >>>>>>>> +mod_name++; >>>>>>>> +if (strlen(mod_name) >= MODULE_NAME_LEN) { >>>>>>>> +merror("module name is too long [%s.ko]\n", mod->name); >>>>>>>> +return 1; >>>>>>>> +} >>>>>>> >>>>>>> Hi Xie, >>>>>>> >>>>>>> This check shouldn't be in add_versions() (which does something else >>>>>>> entirely), >>>>>>> it should probably be put in a separate helper function called from >>>>>>> main. But >>>>>>> I'm not a big fan of the extra string manipulation to do something this >>>>>>> simple. >>>>>>> >>>>>>> I think this check can be vastly simplified, how about something like >>>>>>> the >>>>>>> following? >>>>>> >>>>>> This looks better, would you apply your following patch? >>>>>> >>>>>> Reviewed-by: Wanlong Gao >>>>>> Tested-by: Wanlong Gao >>>>> >>>>> Sure, thanks for testing. I'll go ahead and format this into a proper >>>>> patch and resend. >>>> >>>> Please wait, I just found that this patch makes the built module can't >>>> be inserted by the following error: >>>> >>>> # insmod abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabc.ko >>>> insmod: ERROR: could not insert module >>>> abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabc.ko: Invalid >>>> parameters >>>> >>>> # dmesg >>>> abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwx
Re: [PATCH] modpost: abort if a module name is too long
On 2017/6/2 11:04, Wanlong Gao wrote: > > > On 2017/6/2 7:23, Jessica Yu wrote: >> +++ Wanlong Gao [31/05/17 11:48 +0800]: >>> >>> >>> On 2017/5/31 11:30, Jessica Yu wrote: >>>> +++ Wanlong Gao [31/05/17 10:23 +0800]: >>>>> Hi Jessica, >>>>> >>>>> On 2017/5/29 17:10, Jessica Yu wrote: >>>>>> +++ Xie XiuQi [20/05/17 15:46 +0800]: >>>>>>> From: Wanlong Gao <gaowanl...@huawei.com> >>>>>>> >>>>>>> Module name has a limited length, but currently the build system >>>>>>> allows the build finishing even if the module name is too long. >>>>>>> >>>>>>> CC >>>>>>> /root/kprobe_example/abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz.mod.o >>>>>>> /root/kprobe_example/abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz.mod.c:9:2: >>>>>>> warning: initializer-string for array of chars is too long [enabled by >>>>>>> default] >>>>>>> .name = KBUILD_MODNAME, >>>>>>> ^ >>>>>>> >>>>>>> but it's merely a warning. >>>>>>> >>>>>>> This patch adds the check of the module name length in modpost and stops >>>>>>> the build properly. >>>>>>> >>>>>>> Signed-off-by: Wanlong Gao <gaowanl...@huawei.com> >>>>>>> Signed-off-by: Xie XiuQi <xiexi...@huawei.com> >>>>>>> --- >>>>>>> scripts/mod/modpost.c | 11 +++ >>>>>>> 1 file changed, 11 insertions(+) >>>>>>> >>>>>>> diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c >>>>>>> index 30d752a..db11c57 100644 >>>>>>> --- a/scripts/mod/modpost.c >>>>>>> +++ b/scripts/mod/modpost.c >>>>>>> @@ -2166,6 +2166,17 @@ static int add_versions(struct buffer *b, struct >>>>>>> module *mod) >>>>>>> { >>>>>>> struct symbol *s, *exp; >>>>>>> int err = 0; >>>>>>> +const char *mod_name; >>>>>>> + >>>>>>> +mod_name = strrchr(mod->name, '/'); >>>>>>> +if (mod_name == NULL) >>>>>>> +mod_name = mod->name; >>>>>>> +else >>>>>>> +mod_name++; >>>>>>> +if (strlen(mod_name) >= MODULE_NAME_LEN) { >>>>>>> +merror("module name is too long [%s.ko]\n", mod->name); >>>>>>> +return 1; >>>>>>> +} >>>>>> >>>>>> Hi Xie, >>>>>> >>>>>> This check shouldn't be in add_versions() (which does something else >>>>>> entirely), >>>>>> it should probably be put in a separate helper function called from >>>>>> main. But >>>>>> I'm not a big fan of the extra string manipulation to do something this >>>>>> simple. >>>>>> >>>>>> I think this check can be vastly simplified, how about something like the >>>>>> following? >>>>> >>>>> This looks better, would you apply your following patch? >>>>> >>>>> Reviewed-by: Wanlong Gao <gaowanl...@huawei.com> >>>>> Tested-by: Wanlong Gao <gaowanl...@huawei.com> >>>> >>>> Sure, thanks for testing. I'll go ahead and format this into a proper >>>> patch and resend. >>> >>> Please wait, I just found that this patch makes the built module can't >>> be inserted by the following error: >>> >>> # insmod abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabc.ko >>> insmod: ERROR: could not insert module >>> abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabc.ko: Invalid >>> parameters >>> >>> # dmesg >>> abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabc: Unknown symbol >>> __fentry__ (err -22) >> >> Hm, I am unable to reproduce this. It looks like __fentry__ is missing >> from your kernel, you may have a mismatch between the kernel config >> that you're running and the config you are using to build the module.
Re: [PATCH] modpost: abort if a module name is too long
On 2017/6/2 11:04, Wanlong Gao wrote: > > > On 2017/6/2 7:23, Jessica Yu wrote: >> +++ Wanlong Gao [31/05/17 11:48 +0800]: >>> >>> >>> On 2017/5/31 11:30, Jessica Yu wrote: >>>> +++ Wanlong Gao [31/05/17 10:23 +0800]: >>>>> Hi Jessica, >>>>> >>>>> On 2017/5/29 17:10, Jessica Yu wrote: >>>>>> +++ Xie XiuQi [20/05/17 15:46 +0800]: >>>>>>> From: Wanlong Gao >>>>>>> >>>>>>> Module name has a limited length, but currently the build system >>>>>>> allows the build finishing even if the module name is too long. >>>>>>> >>>>>>> CC >>>>>>> /root/kprobe_example/abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz.mod.o >>>>>>> /root/kprobe_example/abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz.mod.c:9:2: >>>>>>> warning: initializer-string for array of chars is too long [enabled by >>>>>>> default] >>>>>>> .name = KBUILD_MODNAME, >>>>>>> ^ >>>>>>> >>>>>>> but it's merely a warning. >>>>>>> >>>>>>> This patch adds the check of the module name length in modpost and stops >>>>>>> the build properly. >>>>>>> >>>>>>> Signed-off-by: Wanlong Gao >>>>>>> Signed-off-by: Xie XiuQi >>>>>>> --- >>>>>>> scripts/mod/modpost.c | 11 +++ >>>>>>> 1 file changed, 11 insertions(+) >>>>>>> >>>>>>> diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c >>>>>>> index 30d752a..db11c57 100644 >>>>>>> --- a/scripts/mod/modpost.c >>>>>>> +++ b/scripts/mod/modpost.c >>>>>>> @@ -2166,6 +2166,17 @@ static int add_versions(struct buffer *b, struct >>>>>>> module *mod) >>>>>>> { >>>>>>> struct symbol *s, *exp; >>>>>>> int err = 0; >>>>>>> +const char *mod_name; >>>>>>> + >>>>>>> + mod_name = strrchr(mod->name, '/'); >>>>>>> +if (mod_name == NULL) >>>>>>> +mod_name = mod->name; >>>>>>> +else >>>>>>> +mod_name++; >>>>>>> +if (strlen(mod_name) >= MODULE_NAME_LEN) { >>>>>>> +merror("module name is too long [%s.ko]\n", mod->name); >>>>>>> +return 1; >>>>>>> +} >>>>>> >>>>>> Hi Xie, >>>>>> >>>>>> This check shouldn't be in add_versions() (which does something else >>>>>> entirely), >>>>>> it should probably be put in a separate helper function called from >>>>>> main. But >>>>>> I'm not a big fan of the extra string manipulation to do something this >>>>>> simple. >>>>>> >>>>>> I think this check can be vastly simplified, how about something like the >>>>>> following? >>>>> >>>>> This looks better, would you apply your following patch? >>>>> >>>>> Reviewed-by: Wanlong Gao >>>>> Tested-by: Wanlong Gao >>>> >>>> Sure, thanks for testing. I'll go ahead and format this into a proper >>>> patch and resend. >>> >>> Please wait, I just found that this patch makes the built module can't >>> be inserted by the following error: >>> >>> # insmod abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabc.ko >>> insmod: ERROR: could not insert module >>> abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabc.ko: Invalid >>> parameters >>> >>> # dmesg >>> abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabc: Unknown symbol >>> __fentry__ (err -22) >> >> Hm, I am unable to reproduce this. It looks like __fentry__ is missing >> from your kernel, you may have a mismatch between the kernel config >> that you're running and the config you are using to build the module. >> In other words, it seems like you might have built the module with >> CONFIG_FTRACE but built the kernel without. >> >>
Re: [PATCH] modpost: abort if a module name is too long
On 2017/6/2 7:23, Jessica Yu wrote: > +++ Wanlong Gao [31/05/17 11:48 +0800]: >> >> >> On 2017/5/31 11:30, Jessica Yu wrote: >>> +++ Wanlong Gao [31/05/17 10:23 +0800]: >>>> Hi Jessica, >>>> >>>> On 2017/5/29 17:10, Jessica Yu wrote: >>>>> +++ Xie XiuQi [20/05/17 15:46 +0800]: >>>>>> From: Wanlong Gao <gaowanl...@huawei.com> >>>>>> >>>>>> Module name has a limited length, but currently the build system >>>>>> allows the build finishing even if the module name is too long. >>>>>> >>>>>> CC >>>>>> /root/kprobe_example/abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz.mod.o >>>>>> /root/kprobe_example/abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz.mod.c:9:2: >>>>>> warning: initializer-string for array of chars is too long [enabled by >>>>>> default] >>>>>> .name = KBUILD_MODNAME, >>>>>> ^ >>>>>> >>>>>> but it's merely a warning. >>>>>> >>>>>> This patch adds the check of the module name length in modpost and stops >>>>>> the build properly. >>>>>> >>>>>> Signed-off-by: Wanlong Gao <gaowanl...@huawei.com> >>>>>> Signed-off-by: Xie XiuQi <xiexi...@huawei.com> >>>>>> --- >>>>>> scripts/mod/modpost.c | 11 +++ >>>>>> 1 file changed, 11 insertions(+) >>>>>> >>>>>> diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c >>>>>> index 30d752a..db11c57 100644 >>>>>> --- a/scripts/mod/modpost.c >>>>>> +++ b/scripts/mod/modpost.c >>>>>> @@ -2166,6 +2166,17 @@ static int add_versions(struct buffer *b, struct >>>>>> module *mod) >>>>>> { >>>>>> struct symbol *s, *exp; >>>>>> int err = 0; >>>>>> +const char *mod_name; >>>>>> + >>>>>> +mod_name = strrchr(mod->name, '/'); >>>>>> +if (mod_name == NULL) >>>>>> +mod_name = mod->name; >>>>>> +else >>>>>> +mod_name++; >>>>>> +if (strlen(mod_name) >= MODULE_NAME_LEN) { >>>>>> +merror("module name is too long [%s.ko]\n", mod->name); >>>>>> +return 1; >>>>>> +} >>>>> >>>>> Hi Xie, >>>>> >>>>> This check shouldn't be in add_versions() (which does something else >>>>> entirely), >>>>> it should probably be put in a separate helper function called from main. >>>>> But >>>>> I'm not a big fan of the extra string manipulation to do something this >>>>> simple. >>>>> >>>>> I think this check can be vastly simplified, how about something like the >>>>> following? >>>> >>>> This looks better, would you apply your following patch? >>>> >>>> Reviewed-by: Wanlong Gao <gaowanl...@huawei.com> >>>> Tested-by: Wanlong Gao <gaowanl...@huawei.com> >>> >>> Sure, thanks for testing. I'll go ahead and format this into a proper >>> patch and resend. >> >> Please wait, I just found that this patch makes the built module can't >> be inserted by the following error: >> >> # insmod abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabc.ko >> insmod: ERROR: could not insert module >> abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabc.ko: Invalid >> parameters >> >> # dmesg >> abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabc: Unknown symbol >> __fentry__ (err -22) > > Hm, I am unable to reproduce this. It looks like __fentry__ is missing > from your kernel, you may have a mismatch between the kernel config > that you're running and the config you are using to build the module. > In other words, it seems like you might have built the module with > CONFIG_FTRACE but built the kernel without. > > Few questions - > > What is the output of running `grep __fentry__ /proc/kallsyms`? > Sure it has. > Does your module correspond to the running kernel version? Sure. > > Do you have CONFIG_FTRACE/FUNCTION_TRACER enabled in your running > kernel? >
Re: [PATCH] modpost: abort if a module name is too long
On 2017/6/2 7:23, Jessica Yu wrote: > +++ Wanlong Gao [31/05/17 11:48 +0800]: >> >> >> On 2017/5/31 11:30, Jessica Yu wrote: >>> +++ Wanlong Gao [31/05/17 10:23 +0800]: >>>> Hi Jessica, >>>> >>>> On 2017/5/29 17:10, Jessica Yu wrote: >>>>> +++ Xie XiuQi [20/05/17 15:46 +0800]: >>>>>> From: Wanlong Gao >>>>>> >>>>>> Module name has a limited length, but currently the build system >>>>>> allows the build finishing even if the module name is too long. >>>>>> >>>>>> CC >>>>>> /root/kprobe_example/abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz.mod.o >>>>>> /root/kprobe_example/abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz.mod.c:9:2: >>>>>> warning: initializer-string for array of chars is too long [enabled by >>>>>> default] >>>>>> .name = KBUILD_MODNAME, >>>>>> ^ >>>>>> >>>>>> but it's merely a warning. >>>>>> >>>>>> This patch adds the check of the module name length in modpost and stops >>>>>> the build properly. >>>>>> >>>>>> Signed-off-by: Wanlong Gao >>>>>> Signed-off-by: Xie XiuQi >>>>>> --- >>>>>> scripts/mod/modpost.c | 11 +++ >>>>>> 1 file changed, 11 insertions(+) >>>>>> >>>>>> diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c >>>>>> index 30d752a..db11c57 100644 >>>>>> --- a/scripts/mod/modpost.c >>>>>> +++ b/scripts/mod/modpost.c >>>>>> @@ -2166,6 +2166,17 @@ static int add_versions(struct buffer *b, struct >>>>>> module *mod) >>>>>> { >>>>>> struct symbol *s, *exp; >>>>>> int err = 0; >>>>>> +const char *mod_name; >>>>>> + >>>>>> +mod_name = strrchr(mod->name, '/'); >>>>>> +if (mod_name == NULL) >>>>>> + mod_name = mod->name; >>>>>> +else >>>>>> +mod_name++; >>>>>> +if (strlen(mod_name) >= MODULE_NAME_LEN) { >>>>>> +merror("module name is too long [%s.ko]\n", mod->name); >>>>>> +return 1; >>>>>> +} >>>>> >>>>> Hi Xie, >>>>> >>>>> This check shouldn't be in add_versions() (which does something else >>>>> entirely), >>>>> it should probably be put in a separate helper function called from main. >>>>> But >>>>> I'm not a big fan of the extra string manipulation to do something this >>>>> simple. >>>>> >>>>> I think this check can be vastly simplified, how about something like the >>>>> following? >>>> >>>> This looks better, would you apply your following patch? >>>> >>>> Reviewed-by: Wanlong Gao >>>> Tested-by: Wanlong Gao >>> >>> Sure, thanks for testing. I'll go ahead and format this into a proper >>> patch and resend. >> >> Please wait, I just found that this patch makes the built module can't >> be inserted by the following error: >> >> # insmod abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabc.ko >> insmod: ERROR: could not insert module >> abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabc.ko: Invalid >> parameters >> >> # dmesg >> abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabc: Unknown symbol >> __fentry__ (err -22) > > Hm, I am unable to reproduce this. It looks like __fentry__ is missing > from your kernel, you may have a mismatch between the kernel config > that you're running and the config you are using to build the module. > In other words, it seems like you might have built the module with > CONFIG_FTRACE but built the kernel without. > > Few questions - > > What is the output of running `grep __fentry__ /proc/kallsyms`? > Sure it has. > Does your module correspond to the running kernel version? Sure. > > Do you have CONFIG_FTRACE/FUNCTION_TRACER enabled in your running > kernel? > Sure. > Is that the full dmesg output (are there any other error messages)? Even when I compiled the kernel with your patch, the kernel
Re: [PATCH] modpost: abort if a module name is too long
Jessica, On 2017/5/31 11:48, Wanlong Gao wrote: > > > On 2017/5/31 11:30, Jessica Yu wrote: >> +++ Wanlong Gao [31/05/17 10:23 +0800]: >>> Hi Jessica, >>> >>> On 2017/5/29 17:10, Jessica Yu wrote: >>>> +++ Xie XiuQi [20/05/17 15:46 +0800]: >>>>> From: Wanlong Gao <gaowanl...@huawei.com> >>>>> >>>>> Module name has a limited length, but currently the build system >>>>> allows the build finishing even if the module name is too long. >>>>> >>>>> CC >>>>> /root/kprobe_example/abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz.mod.o >>>>> /root/kprobe_example/abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz.mod.c:9:2: >>>>> warning: initializer-string for array of chars is too long [enabled by >>>>> default] >>>>> .name = KBUILD_MODNAME, >>>>> ^ >>>>> >>>>> but it's merely a warning. >>>>> >>>>> This patch adds the check of the module name length in modpost and stops >>>>> the build properly. >>>>> >>>>> Signed-off-by: Wanlong Gao <gaowanl...@huawei.com> >>>>> Signed-off-by: Xie XiuQi <xiexi...@huawei.com> >>>>> --- >>>>> scripts/mod/modpost.c | 11 +++ >>>>> 1 file changed, 11 insertions(+) >>>>> >>>>> diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c >>>>> index 30d752a..db11c57 100644 >>>>> --- a/scripts/mod/modpost.c >>>>> +++ b/scripts/mod/modpost.c >>>>> @@ -2166,6 +2166,17 @@ static int add_versions(struct buffer *b, struct >>>>> module *mod) >>>>> { >>>>> struct symbol *s, *exp; >>>>> int err = 0; >>>>> +const char *mod_name; >>>>> + >>>>> +mod_name = strrchr(mod->name, '/'); >>>>> +if (mod_name == NULL) >>>>> +mod_name = mod->name; >>>>> +else >>>>> +mod_name++; >>>>> + if (strlen(mod_name) >= MODULE_NAME_LEN) { >>>>> +merror("module name is too long [%s.ko]\n", mod->name); >>>>> +return 1; >>>>> +} >>>> >>>> Hi Xie, >>>> >>>> This check shouldn't be in add_versions() (which does something else >>>> entirely), >>>> it should probably be put in a separate helper function called from main. >>>> But >>>> I'm not a big fan of the extra string manipulation to do something this >>>> simple. >>>> >>>> I think this check can be vastly simplified, how about something like the >>>> following? >>> >>> This looks better, would you apply your following patch? >>> >>> Reviewed-by: Wanlong Gao <gaowanl...@huawei.com> >>> Tested-by: Wanlong Gao <gaowanl...@huawei.com> >> >> Sure, thanks for testing. I'll go ahead and format this into a proper >> patch and resend. > > Please wait, I just found that this patch makes the built module can't > be inserted by the following error: > > # insmod abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabc.ko > insmod: ERROR: could not insert module > abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabc.ko: Invalid parameters > > # dmesg > abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabc: Unknown symbol > __fentry__ (err -22) > > Any idea about this error? Thanks, Wanlong Gao > Thanks, > Wanlong Gao > >> >> Jessica >> >>>> >>>> diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c >>>> index 48397fe..bb09fc7 100644 >>>> --- a/scripts/mod/modpost.c >>>> +++ b/scripts/mod/modpost.c >>>> @@ -2139,6 +2139,9 @@ static void add_header(struct buffer *b, struct >>>> module *mod) >>>> "#endif\n"); >>>> buf_printf(b, "\t.arch = MODULE_ARCH_INIT,\n"); >>>> buf_printf(b, "};\n"); >>>> +buf_printf(b, "\n"); >>>> +buf_printf(b, "static void __attribute__((section(\".discard\"), >>>> used)) __modname_test(void)\n"); >>>> +buf_printf(b, "{ BUILD_BUG_ON(sizeof(KBUILD_MODNAME) > >>>> MODULE_NAME_LEN); }\n"); >>>> } >>>> >>>> static void add_intree_flag(struct buffer *b, int is_intree) >>>> >>>> This simply checks if KBUILD_MODNAME > MODULE_NAME_LEN and breaks the >>>> build if >>>> it does. >>>> >>>> Jessica >>>> >>>>> for (s = mod->unres; s; s = s->next) { >>>>> exp = find_symbol(s->name); >>>>> -- >>>>> 1.8.3.1 >>>>> >>>> >>>> . >>>> >>> >> >> . >> > > > . >
Re: [PATCH] modpost: abort if a module name is too long
Jessica, On 2017/5/31 11:48, Wanlong Gao wrote: > > > On 2017/5/31 11:30, Jessica Yu wrote: >> +++ Wanlong Gao [31/05/17 10:23 +0800]: >>> Hi Jessica, >>> >>> On 2017/5/29 17:10, Jessica Yu wrote: >>>> +++ Xie XiuQi [20/05/17 15:46 +0800]: >>>>> From: Wanlong Gao >>>>> >>>>> Module name has a limited length, but currently the build system >>>>> allows the build finishing even if the module name is too long. >>>>> >>>>> CC >>>>> /root/kprobe_example/abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz.mod.o >>>>> /root/kprobe_example/abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz.mod.c:9:2: >>>>> warning: initializer-string for array of chars is too long [enabled by >>>>> default] >>>>> .name = KBUILD_MODNAME, >>>>> ^ >>>>> >>>>> but it's merely a warning. >>>>> >>>>> This patch adds the check of the module name length in modpost and stops >>>>> the build properly. >>>>> >>>>> Signed-off-by: Wanlong Gao >>>>> Signed-off-by: Xie XiuQi >>>>> --- >>>>> scripts/mod/modpost.c | 11 +++ >>>>> 1 file changed, 11 insertions(+) >>>>> >>>>> diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c >>>>> index 30d752a..db11c57 100644 >>>>> --- a/scripts/mod/modpost.c >>>>> +++ b/scripts/mod/modpost.c >>>>> @@ -2166,6 +2166,17 @@ static int add_versions(struct buffer *b, struct >>>>> module *mod) >>>>> { >>>>> struct symbol *s, *exp; >>>>> int err = 0; >>>>> +const char *mod_name; >>>>> + >>>>> +mod_name = strrchr(mod->name, '/'); >>>>> +if (mod_name == NULL) >>>>> +mod_name = mod->name; >>>>> +else >>>>> + mod_name++; >>>>> +if (strlen(mod_name) >= MODULE_NAME_LEN) { >>>>> +merror("module name is too long [%s.ko]\n", mod->name); >>>>> +return 1; >>>>> +} >>>> >>>> Hi Xie, >>>> >>>> This check shouldn't be in add_versions() (which does something else >>>> entirely), >>>> it should probably be put in a separate helper function called from main. >>>> But >>>> I'm not a big fan of the extra string manipulation to do something this >>>> simple. >>>> >>>> I think this check can be vastly simplified, how about something like the >>>> following? >>> >>> This looks better, would you apply your following patch? >>> >>> Reviewed-by: Wanlong Gao >>> Tested-by: Wanlong Gao >> >> Sure, thanks for testing. I'll go ahead and format this into a proper >> patch and resend. > > Please wait, I just found that this patch makes the built module can't > be inserted by the following error: > > # insmod abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabc.ko > insmod: ERROR: could not insert module > abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabc.ko: Invalid parameters > > # dmesg > abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabc: Unknown symbol > __fentry__ (err -22) > > Any idea about this error? Thanks, Wanlong Gao > Thanks, > Wanlong Gao > >> >> Jessica >> >>>> >>>> diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c >>>> index 48397fe..bb09fc7 100644 >>>> --- a/scripts/mod/modpost.c >>>> +++ b/scripts/mod/modpost.c >>>> @@ -2139,6 +2139,9 @@ static void add_header(struct buffer *b, struct >>>> module *mod) >>>> "#endif\n"); >>>> buf_printf(b, "\t.arch = MODULE_ARCH_INIT,\n"); >>>> buf_printf(b, "};\n"); >>>> +buf_printf(b, "\n"); >>>> +buf_printf(b, "static void __attribute__((section(\".discard\"), >>>> used)) __modname_test(void)\n"); >>>> +buf_printf(b, "{ BUILD_BUG_ON(sizeof(KBUILD_MODNAME) > >>>> MODULE_NAME_LEN); }\n"); >>>> } >>>> >>>> static void add_intree_flag(struct buffer *b, int is_intree) >>>> >>>> This simply checks if KBUILD_MODNAME > MODULE_NAME_LEN and breaks the >>>> build if >>>> it does. >>>> >>>> Jessica >>>> >>>>> for (s = mod->unres; s; s = s->next) { >>>>> exp = find_symbol(s->name); >>>>> -- >>>>> 1.8.3.1 >>>>> >>>> >>>> . >>>> >>> >> >> . >> > > > . >
Re: [PATCH] modpost: abort if a module name is too long
On 2017/5/31 11:30, Jessica Yu wrote: > +++ Wanlong Gao [31/05/17 10:23 +0800]: >> Hi Jessica, >> >> On 2017/5/29 17:10, Jessica Yu wrote: >>> +++ Xie XiuQi [20/05/17 15:46 +0800]: >>>> From: Wanlong Gao <gaowanl...@huawei.com> >>>> >>>> Module name has a limited length, but currently the build system >>>> allows the build finishing even if the module name is too long. >>>> >>>> CC >>>> /root/kprobe_example/abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz.mod.o >>>> /root/kprobe_example/abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz.mod.c:9:2: >>>> warning: initializer-string for array of chars is too long [enabled by >>>> default] >>>> .name = KBUILD_MODNAME, >>>> ^ >>>> >>>> but it's merely a warning. >>>> >>>> This patch adds the check of the module name length in modpost and stops >>>> the build properly. >>>> >>>> Signed-off-by: Wanlong Gao <gaowanl...@huawei.com> >>>> Signed-off-by: Xie XiuQi <xiexi...@huawei.com> >>>> --- >>>> scripts/mod/modpost.c | 11 +++ >>>> 1 file changed, 11 insertions(+) >>>> >>>> diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c >>>> index 30d752a..db11c57 100644 >>>> --- a/scripts/mod/modpost.c >>>> +++ b/scripts/mod/modpost.c >>>> @@ -2166,6 +2166,17 @@ static int add_versions(struct buffer *b, struct >>>> module *mod) >>>> { >>>> struct symbol *s, *exp; >>>> int err = 0; >>>> +const char *mod_name; >>>> + >>>> +mod_name = strrchr(mod->name, '/'); >>>> +if (mod_name == NULL) >>>> +mod_name = mod->name; >>>> +else >>>> +mod_name++; >>>> +if (strlen(mod_name) >= MODULE_NAME_LEN) { >>>> + merror("module name is too long [%s.ko]\n", mod->name); >>>> +return 1; >>>> +} >>> >>> Hi Xie, >>> >>> This check shouldn't be in add_versions() (which does something else >>> entirely), >>> it should probably be put in a separate helper function called from main. >>> But >>> I'm not a big fan of the extra string manipulation to do something this >>> simple. >>> >>> I think this check can be vastly simplified, how about something like the >>> following? >> >> This looks better, would you apply your following patch? >> >> Reviewed-by: Wanlong Gao <gaowanl...@huawei.com> >> Tested-by: Wanlong Gao <gaowanl...@huawei.com> > > Sure, thanks for testing. I'll go ahead and format this into a proper > patch and resend. Please wait, I just found that this patch makes the built module can't be inserted by the following error: # insmod abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabc.ko insmod: ERROR: could not insert module abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabc.ko: Invalid parameters # dmesg abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabc: Unknown symbol __fentry__ (err -22) Thanks, Wanlong Gao > > Jessica > >>> >>> diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c >>> index 48397fe..bb09fc7 100644 >>> --- a/scripts/mod/modpost.c >>> +++ b/scripts/mod/modpost.c >>> @@ -2139,6 +2139,9 @@ static void add_header(struct buffer *b, struct >>> module *mod) >>> "#endif\n"); >>> buf_printf(b, "\t.arch = MODULE_ARCH_INIT,\n"); >>> buf_printf(b, "};\n"); >>> +buf_printf(b, "\n"); >>> +buf_printf(b, "static void __attribute__((section(\".discard\"), >>> used)) __modname_test(void)\n"); >>> +buf_printf(b, "{ BUILD_BUG_ON(sizeof(KBUILD_MODNAME) > >>> MODULE_NAME_LEN); }\n"); >>> } >>> >>> static void add_intree_flag(struct buffer *b, int is_intree) >>> >>> This simply checks if KBUILD_MODNAME > MODULE_NAME_LEN and breaks the build >>> if >>> it does. >>> >>> Jessica >>> >>>> for (s = mod->unres; s; s = s->next) { >>>> exp = find_symbol(s->name); >>>> -- >>>> 1.8.3.1 >>>> >>> >>> . >>> >> > > . >
Re: [PATCH] modpost: abort if a module name is too long
On 2017/5/31 11:30, Jessica Yu wrote: > +++ Wanlong Gao [31/05/17 10:23 +0800]: >> Hi Jessica, >> >> On 2017/5/29 17:10, Jessica Yu wrote: >>> +++ Xie XiuQi [20/05/17 15:46 +0800]: >>>> From: Wanlong Gao >>>> >>>> Module name has a limited length, but currently the build system >>>> allows the build finishing even if the module name is too long. >>>> >>>> CC >>>> /root/kprobe_example/abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz.mod.o >>>> /root/kprobe_example/abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz.mod.c:9:2: >>>> warning: initializer-string for array of chars is too long [enabled by >>>> default] >>>> .name = KBUILD_MODNAME, >>>> ^ >>>> >>>> but it's merely a warning. >>>> >>>> This patch adds the check of the module name length in modpost and stops >>>> the build properly. >>>> >>>> Signed-off-by: Wanlong Gao >>>> Signed-off-by: Xie XiuQi >>>> --- >>>> scripts/mod/modpost.c | 11 +++ >>>> 1 file changed, 11 insertions(+) >>>> >>>> diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c >>>> index 30d752a..db11c57 100644 >>>> --- a/scripts/mod/modpost.c >>>> +++ b/scripts/mod/modpost.c >>>> @@ -2166,6 +2166,17 @@ static int add_versions(struct buffer *b, struct >>>> module *mod) >>>> { >>>> struct symbol *s, *exp; >>>> int err = 0; >>>> +const char *mod_name; >>>> + >>>> +mod_name = strrchr(mod->name, '/'); >>>> +if (mod_name == NULL) >>>> +mod_name = mod->name; >>>> +else >>>> +mod_name++; >>>> +if (strlen(mod_name) >= MODULE_NAME_LEN) { >>>> +merror("module name is too long [%s.ko]\n", mod->name); >>>> +return 1; >>>> +} >>> >>> Hi Xie, >>> >>> This check shouldn't be in add_versions() (which does something else >>> entirely), >>> it should probably be put in a separate helper function called from main. >>> But >>> I'm not a big fan of the extra string manipulation to do something this >>> simple. >>> >>> I think this check can be vastly simplified, how about something like the >>> following? >> >> This looks better, would you apply your following patch? >> >> Reviewed-by: Wanlong Gao >> Tested-by: Wanlong Gao > > Sure, thanks for testing. I'll go ahead and format this into a proper > patch and resend. Please wait, I just found that this patch makes the built module can't be inserted by the following error: # insmod abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabc.ko insmod: ERROR: could not insert module abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabc.ko: Invalid parameters # dmesg abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabc: Unknown symbol __fentry__ (err -22) Thanks, Wanlong Gao > > Jessica > >>> >>> diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c >>> index 48397fe..bb09fc7 100644 >>> --- a/scripts/mod/modpost.c >>> +++ b/scripts/mod/modpost.c >>> @@ -2139,6 +2139,9 @@ static void add_header(struct buffer *b, struct >>> module *mod) >>> "#endif\n"); >>> buf_printf(b, "\t.arch = MODULE_ARCH_INIT,\n"); >>> buf_printf(b, "};\n"); >>> +buf_printf(b, "\n"); >>> +buf_printf(b, "static void __attribute__((section(\".discard\"), >>> used)) __modname_test(void)\n"); >>> +buf_printf(b, "{ BUILD_BUG_ON(sizeof(KBUILD_MODNAME) > >>> MODULE_NAME_LEN); }\n"); >>> } >>> >>> static void add_intree_flag(struct buffer *b, int is_intree) >>> >>> This simply checks if KBUILD_MODNAME > MODULE_NAME_LEN and breaks the build >>> if >>> it does. >>> >>> Jessica >>> >>>> for (s = mod->unres; s; s = s->next) { >>>> exp = find_symbol(s->name); >>>> -- >>>> 1.8.3.1 >>>> >>> >>> . >>> >> > > . >
Re: [PATCH] modpost: abort if a module name is too long
Hi Jessica, On 2017/5/31 11:30, Jessica Yu wrote: > +++ Wanlong Gao [31/05/17 10:23 +0800]: >> Hi Jessica, >> >> On 2017/5/29 17:10, Jessica Yu wrote: >>> +++ Xie XiuQi [20/05/17 15:46 +0800]: >>>> From: Wanlong Gao <gaowanl...@huawei.com> >>>> >>>> Module name has a limited length, but currently the build system >>>> allows the build finishing even if the module name is too long. >>>> >>>> CC >>>> /root/kprobe_example/abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz.mod.o >>>> /root/kprobe_example/abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz.mod.c:9:2: >>>> warning: initializer-string for array of chars is too long [enabled by >>>> default] >>>> .name = KBUILD_MODNAME, >>>> ^ >>>> >>>> but it's merely a warning. >>>> >>>> This patch adds the check of the module name length in modpost and stops >>>> the build properly. >>>> >>>> Signed-off-by: Wanlong Gao <gaowanl...@huawei.com> >>>> Signed-off-by: Xie XiuQi <xiexi...@huawei.com> >>>> --- >>>> scripts/mod/modpost.c | 11 +++ >>>> 1 file changed, 11 insertions(+) >>>> >>>> diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c >>>> index 30d752a..db11c57 100644 >>>> --- a/scripts/mod/modpost.c >>>> +++ b/scripts/mod/modpost.c >>>> @@ -2166,6 +2166,17 @@ static int add_versions(struct buffer *b, struct >>>> module *mod) >>>> { >>>> struct symbol *s, *exp; >>>> int err = 0; >>>> +const char *mod_name; >>>> + >>>> +mod_name = strrchr(mod->name, '/'); >>>> +if (mod_name == NULL) >>>> +mod_name = mod->name; >>>> +else >>>> +mod_name++; >>>> +if (strlen(mod_name) >= MODULE_NAME_LEN) { >>>> + merror("module name is too long [%s.ko]\n", mod->name); >>>> +return 1; >>>> +} >>> >>> Hi Xie, >>> >>> This check shouldn't be in add_versions() (which does something else >>> entirely), >>> it should probably be put in a separate helper function called from main. >>> But >>> I'm not a big fan of the extra string manipulation to do something this >>> simple. >>> >>> I think this check can be vastly simplified, how about something like the >>> following? >> >> This looks better, would you apply your following patch? >> >> Reviewed-by: Wanlong Gao <gaowanl...@huawei.com> >> Tested-by: Wanlong Gao <gaowanl...@huawei.com> > > Sure, thanks for testing. I'll go ahead and format this into a proper > patch and resend. Nice, thank you. Cheers, Wanlong Gao > > Jessica > >>> >>> diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c >>> index 48397fe..bb09fc7 100644 >>> --- a/scripts/mod/modpost.c >>> +++ b/scripts/mod/modpost.c >>> @@ -2139,6 +2139,9 @@ static void add_header(struct buffer *b, struct >>> module *mod) >>> "#endif\n"); >>> buf_printf(b, "\t.arch = MODULE_ARCH_INIT,\n"); >>> buf_printf(b, "};\n"); >>> +buf_printf(b, "\n"); >>> +buf_printf(b, "static void __attribute__((section(\".discard\"), >>> used)) __modname_test(void)\n"); >>> +buf_printf(b, "{ BUILD_BUG_ON(sizeof(KBUILD_MODNAME) > >>> MODULE_NAME_LEN); }\n"); >>> } >>> >>> static void add_intree_flag(struct buffer *b, int is_intree) >>> >>> This simply checks if KBUILD_MODNAME > MODULE_NAME_LEN and breaks the build >>> if >>> it does. >>> >>> Jessica >>> >>>> for (s = mod->unres; s; s = s->next) { >>>> exp = find_symbol(s->name); >>>> -- >>>> 1.8.3.1 >>>> >>> >>> . >>> >> > > . >
Re: [PATCH] modpost: abort if a module name is too long
Hi Jessica, On 2017/5/31 11:30, Jessica Yu wrote: > +++ Wanlong Gao [31/05/17 10:23 +0800]: >> Hi Jessica, >> >> On 2017/5/29 17:10, Jessica Yu wrote: >>> +++ Xie XiuQi [20/05/17 15:46 +0800]: >>>> From: Wanlong Gao >>>> >>>> Module name has a limited length, but currently the build system >>>> allows the build finishing even if the module name is too long. >>>> >>>> CC >>>> /root/kprobe_example/abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz.mod.o >>>> /root/kprobe_example/abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz.mod.c:9:2: >>>> warning: initializer-string for array of chars is too long [enabled by >>>> default] >>>> .name = KBUILD_MODNAME, >>>> ^ >>>> >>>> but it's merely a warning. >>>> >>>> This patch adds the check of the module name length in modpost and stops >>>> the build properly. >>>> >>>> Signed-off-by: Wanlong Gao >>>> Signed-off-by: Xie XiuQi >>>> --- >>>> scripts/mod/modpost.c | 11 +++ >>>> 1 file changed, 11 insertions(+) >>>> >>>> diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c >>>> index 30d752a..db11c57 100644 >>>> --- a/scripts/mod/modpost.c >>>> +++ b/scripts/mod/modpost.c >>>> @@ -2166,6 +2166,17 @@ static int add_versions(struct buffer *b, struct >>>> module *mod) >>>> { >>>> struct symbol *s, *exp; >>>> int err = 0; >>>> +const char *mod_name; >>>> + >>>> +mod_name = strrchr(mod->name, '/'); >>>> +if (mod_name == NULL) >>>> +mod_name = mod->name; >>>> +else >>>> +mod_name++; >>>> +if (strlen(mod_name) >= MODULE_NAME_LEN) { >>>> +merror("module name is too long [%s.ko]\n", mod->name); >>>> +return 1; >>>> +} >>> >>> Hi Xie, >>> >>> This check shouldn't be in add_versions() (which does something else >>> entirely), >>> it should probably be put in a separate helper function called from main. >>> But >>> I'm not a big fan of the extra string manipulation to do something this >>> simple. >>> >>> I think this check can be vastly simplified, how about something like the >>> following? >> >> This looks better, would you apply your following patch? >> >> Reviewed-by: Wanlong Gao >> Tested-by: Wanlong Gao > > Sure, thanks for testing. I'll go ahead and format this into a proper > patch and resend. Nice, thank you. Cheers, Wanlong Gao > > Jessica > >>> >>> diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c >>> index 48397fe..bb09fc7 100644 >>> --- a/scripts/mod/modpost.c >>> +++ b/scripts/mod/modpost.c >>> @@ -2139,6 +2139,9 @@ static void add_header(struct buffer *b, struct >>> module *mod) >>> "#endif\n"); >>> buf_printf(b, "\t.arch = MODULE_ARCH_INIT,\n"); >>> buf_printf(b, "};\n"); >>> +buf_printf(b, "\n"); >>> +buf_printf(b, "static void __attribute__((section(\".discard\"), >>> used)) __modname_test(void)\n"); >>> +buf_printf(b, "{ BUILD_BUG_ON(sizeof(KBUILD_MODNAME) > >>> MODULE_NAME_LEN); }\n"); >>> } >>> >>> static void add_intree_flag(struct buffer *b, int is_intree) >>> >>> This simply checks if KBUILD_MODNAME > MODULE_NAME_LEN and breaks the build >>> if >>> it does. >>> >>> Jessica >>> >>>> for (s = mod->unres; s; s = s->next) { >>>> exp = find_symbol(s->name); >>>> -- >>>> 1.8.3.1 >>>> >>> >>> . >>> >> > > . >
Re: [PATCH] modpost: abort if a module name is too long
Hi Jessica, On 2017/5/29 17:10, Jessica Yu wrote: > +++ Xie XiuQi [20/05/17 15:46 +0800]: >> From: Wanlong Gao <gaowanl...@huawei.com> >> >> Module name has a limited length, but currently the build system >> allows the build finishing even if the module name is too long. >> >> CC >> /root/kprobe_example/abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz.mod.o >> /root/kprobe_example/abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz.mod.c:9:2: >> warning: initializer-string for array of chars is too long [enabled by >> default] >> .name = KBUILD_MODNAME, >> ^ >> >> but it's merely a warning. >> >> This patch adds the check of the module name length in modpost and stops >> the build properly. >> >> Signed-off-by: Wanlong Gao <gaowanl...@huawei.com> >> Signed-off-by: Xie XiuQi <xiexi...@huawei.com> >> --- >> scripts/mod/modpost.c | 11 +++ >> 1 file changed, 11 insertions(+) >> >> diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c >> index 30d752a..db11c57 100644 >> --- a/scripts/mod/modpost.c >> +++ b/scripts/mod/modpost.c >> @@ -2166,6 +2166,17 @@ static int add_versions(struct buffer *b, struct >> module *mod) >> { >> struct symbol *s, *exp; >> int err = 0; >> +const char *mod_name; >> + >> +mod_name = strrchr(mod->name, '/'); >> +if (mod_name == NULL) >> +mod_name = mod->name; >> +else >> +mod_name++; >> +if (strlen(mod_name) >= MODULE_NAME_LEN) { >> +merror("module name is too long [%s.ko]\n", mod->name); >> +return 1; >> +} > > Hi Xie, > > This check shouldn't be in add_versions() (which does something else > entirely), > it should probably be put in a separate helper function called from main. But > I'm not a big fan of the extra string manipulation to do something this > simple. > > I think this check can be vastly simplified, how about something like the > following? This looks better, would you apply your following patch? Reviewed-by: Wanlong Gao <gaowanl...@huawei.com> Tested-by: Wanlong Gao <gaowanl...@huawei.com> Thanks, Wanlong Gao > > diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c > index 48397fe..bb09fc7 100644 > --- a/scripts/mod/modpost.c > +++ b/scripts/mod/modpost.c > @@ -2139,6 +2139,9 @@ static void add_header(struct buffer *b, struct module > *mod) > "#endif\n"); > buf_printf(b, "\t.arch = MODULE_ARCH_INIT,\n"); > buf_printf(b, "};\n"); > +buf_printf(b, "\n"); > +buf_printf(b, "static void __attribute__((section(\".discard\"), used)) > __modname_test(void)\n"); > +buf_printf(b, "{ BUILD_BUG_ON(sizeof(KBUILD_MODNAME) > MODULE_NAME_LEN); > }\n"); > } > > static void add_intree_flag(struct buffer *b, int is_intree) > > This simply checks if KBUILD_MODNAME > MODULE_NAME_LEN and breaks the build if > it does. > > Jessica > >> for (s = mod->unres; s; s = s->next) { >> exp = find_symbol(s->name); >> -- >> 1.8.3.1 >> > > . >
Re: [PATCH] modpost: abort if a module name is too long
Hi Jessica, On 2017/5/29 17:10, Jessica Yu wrote: > +++ Xie XiuQi [20/05/17 15:46 +0800]: >> From: Wanlong Gao >> >> Module name has a limited length, but currently the build system >> allows the build finishing even if the module name is too long. >> >> CC >> /root/kprobe_example/abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz.mod.o >> /root/kprobe_example/abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz.mod.c:9:2: >> warning: initializer-string for array of chars is too long [enabled by >> default] >> .name = KBUILD_MODNAME, >> ^ >> >> but it's merely a warning. >> >> This patch adds the check of the module name length in modpost and stops >> the build properly. >> >> Signed-off-by: Wanlong Gao >> Signed-off-by: Xie XiuQi >> --- >> scripts/mod/modpost.c | 11 +++ >> 1 file changed, 11 insertions(+) >> >> diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c >> index 30d752a..db11c57 100644 >> --- a/scripts/mod/modpost.c >> +++ b/scripts/mod/modpost.c >> @@ -2166,6 +2166,17 @@ static int add_versions(struct buffer *b, struct >> module *mod) >> { >> struct symbol *s, *exp; >> int err = 0; >> +const char *mod_name; >> + >> +mod_name = strrchr(mod->name, '/'); >> +if (mod_name == NULL) >> +mod_name = mod->name; >> +else >> +mod_name++; >> +if (strlen(mod_name) >= MODULE_NAME_LEN) { >> +merror("module name is too long [%s.ko]\n", mod->name); >> +return 1; >> +} > > Hi Xie, > > This check shouldn't be in add_versions() (which does something else > entirely), > it should probably be put in a separate helper function called from main. But > I'm not a big fan of the extra string manipulation to do something this > simple. > > I think this check can be vastly simplified, how about something like the > following? This looks better, would you apply your following patch? Reviewed-by: Wanlong Gao Tested-by: Wanlong Gao Thanks, Wanlong Gao > > diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c > index 48397fe..bb09fc7 100644 > --- a/scripts/mod/modpost.c > +++ b/scripts/mod/modpost.c > @@ -2139,6 +2139,9 @@ static void add_header(struct buffer *b, struct module > *mod) > "#endif\n"); > buf_printf(b, "\t.arch = MODULE_ARCH_INIT,\n"); > buf_printf(b, "};\n"); > +buf_printf(b, "\n"); > +buf_printf(b, "static void __attribute__((section(\".discard\"), used)) > __modname_test(void)\n"); > +buf_printf(b, "{ BUILD_BUG_ON(sizeof(KBUILD_MODNAME) > MODULE_NAME_LEN); > }\n"); > } > > static void add_intree_flag(struct buffer *b, int is_intree) > > This simply checks if KBUILD_MODNAME > MODULE_NAME_LEN and breaks the build if > it does. > > Jessica > >> for (s = mod->unres; s; s = s->next) { >> exp = find_symbol(s->name); >> -- >> 1.8.3.1 >> > > . >
Re: [PATCH] modpost: abort if a module name is too long
Folks, Any comments? On 2017/5/20 15:46, Xie XiuQi wrote: > From: Wanlong Gao <gaowanl...@huawei.com> > > Module name has a limited length, but currently the build system > allows the build finishing even if the module name is too long. > > CC > /root/kprobe_example/abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz.mod.o > /root/kprobe_example/abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz.mod.c:9:2: > warning: initializer-string for array of chars is too long [enabled by > default] > .name = KBUILD_MODNAME, > ^ > > but it's merely a warning. > > This patch adds the check of the module name length in modpost and stops > the build properly. > > Signed-off-by: Wanlong Gao <gaowanl...@huawei.com> > Signed-off-by: Xie XiuQi <xiexi...@huawei.com> > --- > scripts/mod/modpost.c | 11 +++ > 1 file changed, 11 insertions(+) > > diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c > index 30d752a..db11c57 100644 > --- a/scripts/mod/modpost.c > +++ b/scripts/mod/modpost.c > @@ -2166,6 +2166,17 @@ static int add_versions(struct buffer *b, struct > module *mod) > { > struct symbol *s, *exp; > int err = 0; > + const char *mod_name; > + > + mod_name = strrchr(mod->name, '/'); > + if (mod_name == NULL) > + mod_name = mod->name; > + else > + mod_name++; > + if (strlen(mod_name) >= MODULE_NAME_LEN) { > + merror("module name is too long [%s.ko]\n", mod->name); > + return 1; > + } > > for (s = mod->unres; s; s = s->next) { > exp = find_symbol(s->name); >
Re: [PATCH] modpost: abort if a module name is too long
Folks, Any comments? On 2017/5/20 15:46, Xie XiuQi wrote: > From: Wanlong Gao > > Module name has a limited length, but currently the build system > allows the build finishing even if the module name is too long. > > CC > /root/kprobe_example/abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz.mod.o > /root/kprobe_example/abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz.mod.c:9:2: > warning: initializer-string for array of chars is too long [enabled by > default] > .name = KBUILD_MODNAME, > ^ > > but it's merely a warning. > > This patch adds the check of the module name length in modpost and stops > the build properly. > > Signed-off-by: Wanlong Gao > Signed-off-by: Xie XiuQi > --- > scripts/mod/modpost.c | 11 +++ > 1 file changed, 11 insertions(+) > > diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c > index 30d752a..db11c57 100644 > --- a/scripts/mod/modpost.c > +++ b/scripts/mod/modpost.c > @@ -2166,6 +2166,17 @@ static int add_versions(struct buffer *b, struct > module *mod) > { > struct symbol *s, *exp; > int err = 0; > + const char *mod_name; > + > + mod_name = strrchr(mod->name, '/'); > + if (mod_name == NULL) > + mod_name = mod->name; > + else > + mod_name++; > + if (strlen(mod_name) >= MODULE_NAME_LEN) { > + merror("module name is too long [%s.ko]\n", mod->name); > + return 1; > + } > > for (s = mod->unres; s; s = s->next) { > exp = find_symbol(s->name); >
Re: [PATCH 2/2] virtio-net: fix buggy features advertised by host
On 11/13/2014 01:52 PM, Jason Wang wrote: > This patch tries to detect the possible buggy features advertised by host > and fix them. One example is booting virtio-net with only ctrl_vq disabled, > qemu may still advertise many features which depends on it. This will > trigger several BUG()s in virtnet_send_command(). > > This patch utilizes the fix_features() method, and disables all features that > depends on ctrl_vq if it was not advertised. > > This fixes the crash when booting with ctrl_vq=off. > > Cc: Rusty Russell > Cc: Michael S. Tsirkin > Signed-off-by: Jason Wang > --- > Changes from V1: > - fix the cut-and-paste error > --- > drivers/net/virtio_net.c | 35 +++ > 1 file changed, 35 insertions(+) > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c > index ec2a8b4..6ce125e 100644 > --- a/drivers/net/virtio_net.c > +++ b/drivers/net/virtio_net.c > @@ -1948,6 +1948,40 @@ static int virtnet_restore(struct virtio_device *vdev) > } > #endif > > +static void virtnet_fix_features(struct virtio_device *dev) > +{ > + if (!virtio_has_feature(dev, VIRTIO_NET_F_CTRL_VQ)) { > + if (virtio_has_feature(dev, VIRTIO_NET_F_CTRL_RX)) { > + pr_warning("Disable VIRTIO_NET_F_CTRL_RX since host " > +"does not advertise VIRTIO_NET_F_CTRL_VQ"); > + virtio_disable_feature(dev, VIRTIO_NET_F_CTRL_RX); > + } > + if (virtio_has_feature(dev, VIRTIO_NET_F_CTRL_VLAN)) { > + pr_warning("Disable VIRTIO_NET_F_CTRL_VLAN since host " > +"does not advertise VIRTIO_NET_F_CTRL_VQ"); > + virtio_disable_feature(dev, VIRTIO_NET_F_CTRL_VLAN); > + } > + if (virtio_has_feature(dev, VIRTIO_NET_F_GUEST_ANNOUNCE)) { > + pr_warning("Disable VIRTIO_NET_F_GUEST_ANNOUNCE since " > +"host does not advertise " > +"VIRTIO_NET_F_CTRL_VQ"); > + virtio_disable_feature(dev, > +VIRTIO_NET_F_GUEST_ANNOUNCE); > + } > + if (virtio_has_feature(dev, VIRTIO_NET_F_MQ)) { > + pr_warning("Disable VIRTIO_NET_F_MQ since host " > +"does not advertise VIRTIO_NET_F_CTRL_VQ"); > + virtio_disable_feature(dev, VIRTIO_NET_F_MQ); > + } > + if (virtio_has_feature(dev, VIRTIO_NET_F_CTRL_MAC_ADDR)) { > + pr_warning("Disable VIRTIO_NET_F_CTRL_MAC_ADDR since " > +"host does not advertise " > +"VIRTIO_NET_F_CTRL_VQ"); > + virtio_disable_feature(dev, VIRTIO_NET_F_CTRL_MAC_ADDR); > + } Can we use a feature array and check with one loop? The current check looks so dup? Thanks, Wanlong Gao > + } > +} > + > static struct virtio_device_id id_table[] = { > { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID }, > { 0 }, > @@ -1975,6 +2009,7 @@ static struct virtio_driver virtio_net_driver = { > .probe =virtnet_probe, > .remove = virtnet_remove, > .config_changed = virtnet_config_changed, > + .fix_features = virtnet_fix_features, > #ifdef CONFIG_PM_SLEEP > .freeze = virtnet_freeze, > .restore = virtnet_restore, > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 2/2] virtio-net: fix buggy features advertised by host
On 11/13/2014 01:52 PM, Jason Wang wrote: This patch tries to detect the possible buggy features advertised by host and fix them. One example is booting virtio-net with only ctrl_vq disabled, qemu may still advertise many features which depends on it. This will trigger several BUG()s in virtnet_send_command(). This patch utilizes the fix_features() method, and disables all features that depends on ctrl_vq if it was not advertised. This fixes the crash when booting with ctrl_vq=off. Cc: Rusty Russell ru...@rustcorp.com.au Cc: Michael S. Tsirkin m...@redhat.com Signed-off-by: Jason Wang jasow...@redhat.com --- Changes from V1: - fix the cut-and-paste error --- drivers/net/virtio_net.c | 35 +++ 1 file changed, 35 insertions(+) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index ec2a8b4..6ce125e 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1948,6 +1948,40 @@ static int virtnet_restore(struct virtio_device *vdev) } #endif +static void virtnet_fix_features(struct virtio_device *dev) +{ + if (!virtio_has_feature(dev, VIRTIO_NET_F_CTRL_VQ)) { + if (virtio_has_feature(dev, VIRTIO_NET_F_CTRL_RX)) { + pr_warning(Disable VIRTIO_NET_F_CTRL_RX since host +does not advertise VIRTIO_NET_F_CTRL_VQ); + virtio_disable_feature(dev, VIRTIO_NET_F_CTRL_RX); + } + if (virtio_has_feature(dev, VIRTIO_NET_F_CTRL_VLAN)) { + pr_warning(Disable VIRTIO_NET_F_CTRL_VLAN since host +does not advertise VIRTIO_NET_F_CTRL_VQ); + virtio_disable_feature(dev, VIRTIO_NET_F_CTRL_VLAN); + } + if (virtio_has_feature(dev, VIRTIO_NET_F_GUEST_ANNOUNCE)) { + pr_warning(Disable VIRTIO_NET_F_GUEST_ANNOUNCE since +host does not advertise +VIRTIO_NET_F_CTRL_VQ); + virtio_disable_feature(dev, +VIRTIO_NET_F_GUEST_ANNOUNCE); + } + if (virtio_has_feature(dev, VIRTIO_NET_F_MQ)) { + pr_warning(Disable VIRTIO_NET_F_MQ since host +does not advertise VIRTIO_NET_F_CTRL_VQ); + virtio_disable_feature(dev, VIRTIO_NET_F_MQ); + } + if (virtio_has_feature(dev, VIRTIO_NET_F_CTRL_MAC_ADDR)) { + pr_warning(Disable VIRTIO_NET_F_CTRL_MAC_ADDR since +host does not advertise +VIRTIO_NET_F_CTRL_VQ); + virtio_disable_feature(dev, VIRTIO_NET_F_CTRL_MAC_ADDR); + } Can we use a feature array and check with one loop? The current check looks so dup? Thanks, Wanlong Gao + } +} + static struct virtio_device_id id_table[] = { { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID }, { 0 }, @@ -1975,6 +2009,7 @@ static struct virtio_driver virtio_net_driver = { .probe =virtnet_probe, .remove = virtnet_remove, .config_changed = virtnet_config_changed, + .fix_features = virtnet_fix_features, #ifdef CONFIG_PM_SLEEP .freeze = virtnet_freeze, .restore = virtnet_restore, -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[BUG] LTP test case readahead01 fails after commit 63d0f0a3c7
Hi AKPM and folks, LTP test readahead syscall return value like this: = static void test_invalid_fd(void) { int fd[2]; tst_resm(TINFO, "test_invalid_fd pipe"); if (pipe(fd) < 0) tst_resm(TBROK | TERRNO, "Failed to create pipe"); TEST(ltp_syscall(__NR_readahead, fd[0], 0, getpagesize())); check_ret(-1); check_errno(EINVAL); close(fd[0]); close(fd[1]); tst_resm(TINFO, "test_invalid_fd socket"); fd[0] = socket(AF_INET, SOCK_STREAM, 0); if (fd[0] < 0) tst_resm(TBROK | TERRNO, "Failed to create socket"); TEST(ltp_syscall(__NR_readahead, fd[0], 0, getpagesize())); check_ret(-1); check_errno(EINVAL); close(fd[0]); } FULL case code: https://github.com/linux-test-project/ltp/blob/master/testcases/kernel/syscalls/readahead/readahead01.c This case passed before the following kernel commit[1], it means kernel will return -1 with errno=EINVAL. But after this commit[1], kernel will return 0 here. The different between before and after this commit[1] is that: BEFORE: = do_readahead(): if (!mapping || !mapping->a_ops || !mapping->a_ops->readpage) return EINVAL; AFTER: == do_readahead(): if (!mapping || !mapping->a_ops) return EINVAL; And followed with: force_page_cache_readahead(): if (unlikely(!mapping->a_ops->readpage && !mapping->a_ops->readpages)) return -EINVAL; ========= I think you must know which is right? Thanks, Wanlong Gao [1]: commit 63d0f0a3c7e1281fd79268a8d988167eff607fb6 Author: Andrew Morton Date: Tue Nov 12 15:07:09 2013 -0800 mm/readahead.c:do_readhead(): don't check for ->readpage The callee force_page_cache_readahead() already does this and unlike do_readahead(), force_page_cache_readahead() remembers to check for ->readpages() as well. Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/readahead.c b/mm/readahead.c index e4ed041..5024183 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -569,7 +569,7 @@ static ssize_t do_readahead(struct address_space *mapping, struct file *filp, pgoff_t index, unsigned long nr) { - if (!mapping || !mapping->a_ops || !mapping->a_ops->readpage) + if (!mapping || !mapping->a_ops) return -EINVAL; force_page_cache_readahead(mapping, filp, index, nr); -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[BUG] LTP test case readahead01 fails after commit 63d0f0a3c7
Hi AKPM and folks, LTP test readahead syscall return value like this: = static void test_invalid_fd(void) { int fd[2]; tst_resm(TINFO, test_invalid_fd pipe); if (pipe(fd) 0) tst_resm(TBROK | TERRNO, Failed to create pipe); TEST(ltp_syscall(__NR_readahead, fd[0], 0, getpagesize())); check_ret(-1); check_errno(EINVAL); close(fd[0]); close(fd[1]); tst_resm(TINFO, test_invalid_fd socket); fd[0] = socket(AF_INET, SOCK_STREAM, 0); if (fd[0] 0) tst_resm(TBROK | TERRNO, Failed to create socket); TEST(ltp_syscall(__NR_readahead, fd[0], 0, getpagesize())); check_ret(-1); check_errno(EINVAL); close(fd[0]); } FULL case code: https://github.com/linux-test-project/ltp/blob/master/testcases/kernel/syscalls/readahead/readahead01.c This case passed before the following kernel commit[1], it means kernel will return -1 with errno=EINVAL. But after this commit[1], kernel will return 0 here. The different between before and after this commit[1] is that: BEFORE: = do_readahead(): if (!mapping || !mapping-a_ops || !mapping-a_ops-readpage) return EINVAL; AFTER: == do_readahead(): if (!mapping || !mapping-a_ops) return EINVAL; And followed with: force_page_cache_readahead(): if (unlikely(!mapping-a_ops-readpage !mapping-a_ops-readpages)) return -EINVAL; = I think you must know which is right? Thanks, Wanlong Gao [1]: commit 63d0f0a3c7e1281fd79268a8d988167eff607fb6 Author: Andrew Morton a...@linux-foundation.org Date: Tue Nov 12 15:07:09 2013 -0800 mm/readahead.c:do_readhead(): don't check for -readpage The callee force_page_cache_readahead() already does this and unlike do_readahead(), force_page_cache_readahead() remembers to check for -readpages() as well. Signed-off-by: Andrew Morton a...@linux-foundation.org Signed-off-by: Linus Torvalds torva...@linux-foundation.org diff --git a/mm/readahead.c b/mm/readahead.c index e4ed041..5024183 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -569,7 +569,7 @@ static ssize_t do_readahead(struct address_space *mapping, struct file *filp, pgoff_t index, unsigned long nr) { - if (!mapping || !mapping-a_ops || !mapping-a_ops-readpage) + if (!mapping || !mapping-a_ops) return -EINVAL; force_page_cache_readahead(mapping, filp, index, nr); -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [net-next/master PATCH] lib: fix compile warning in hashlib_init
On 12/19/2013 09:32 AM, Daniel Borkmann wrote: > On 12/19/2013 02:15 AM, Fengguang Wu wrote: >> CC the list. >> >> On Thu, Dec 19, 2013 at 09:08:34AM +0800, Wanlong Gao wrote: >>> /git/linux/lib/hash.c: In function 'hashlib_init': >>> /git/linux/lib/hash.c:35:2: warning: passing argument 1 of >>> 'setup_arch_fast_hash' from incompatible pointer type [enabled by default] >>> /git/linux/include/asm-generic/hash.h:5:20: note: expected 'struct >>> arch_hash_ops *' but argument is of type 'struct fast_hash_ops *' >>> >>> Reported-by: Fengguang Wu >>> Signed-off-by: Wanlong Gao > > That's already in netdev patchwork: > > http://patchwork.ozlabs.org/patch/302908/ Nice, thank you. Regards, Wanlong Gao > > Thanks ! > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [net-next/master PATCH] lib: fix compile warning in hashlib_init
On 12/19/2013 09:32 AM, Daniel Borkmann wrote: On 12/19/2013 02:15 AM, Fengguang Wu wrote: CC the list. On Thu, Dec 19, 2013 at 09:08:34AM +0800, Wanlong Gao wrote: /git/linux/lib/hash.c: In function 'hashlib_init': /git/linux/lib/hash.c:35:2: warning: passing argument 1 of 'setup_arch_fast_hash' from incompatible pointer type [enabled by default] /git/linux/include/asm-generic/hash.h:5:20: note: expected 'struct arch_hash_ops *' but argument is of type 'struct fast_hash_ops *' Reported-by: Fengguang Wu fengguang...@intel.com Signed-off-by: Wanlong Gao gaowanl...@cn.fujitsu.com That's already in netdev patchwork: http://patchwork.ozlabs.org/patch/302908/ Nice, thank you. Regards, Wanlong Gao Thanks ! -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
mqueue perf data
Hi Fengguang, Do we need to stat out the perf data of mqueue test in kernel_selftests? It's like following. Thanks, Wanlong Gao # make run_tests -C mqueue make: Entering directory `/git/linux/tools/testing/selftests/mqueue' Initial system state: Using queue path: /test1 RLIMIT_MSGQUEUE(soft): 819200 RLIMIT_MSGQUEUE(hard): 819200 Maximum Message Size: 8192 Maximum Queue Size: 10 Default Message Size: 8192 Default Queue Size: 10 Adjusted system state for testing: RLIMIT_MSGQUEUE(soft): 819200 RLIMIT_MSGQUEUE(hard): 819200 Maximum Message Size: 8192 Maximum Queue Size: 10 Default Message Size: 8192 Default Queue Size: 10 Test series 1, behavior when no attr struct passed to mq_open: Kernel supports setting defaults separately from maximums: PASS Given sane values, mq_open without an attr struct succeeds: PASS Kernel properly honors default setting knobs: PASS Kernel properly limits default values to lesser of default/max: PASS Kernel properly fails to create queue when defaults would exceed rlimit: PASS Test series 2, behavior when attr struct is passed to mq_open: Queue open in excess of rlimit max when euid = 0 failed:PASS Queue open with mq_maxmsg > limit when euid = 0 succeeded: PASS Queue open with mq_msgsize > limit when euid = 0 succeeded: PASS Queue open with total size > 2GB when euid = 0 failed: PASS Queue open in excess of rlimit max when euid = 99 failed: PASS Queue open with mq_maxmsg > limit when euid = 99 failed:PASS Queue open with mq_msgsize > limit when euid = 99 failed: PASS Queue open with total size > 2GB when euid = 99 failed: PASS Initial system state: Using queue path: /mq_perf_tests RLIMIT_MSGQUEUE(soft): 819200 RLIMIT_MSGQUEUE(hard): 819200 Maximum Message Size: 8192 Maximum Queue Size: 10 Nice value: 0 Adjusted system state for testing: RLIMIT_MSGQUEUE(soft): (unlimited) RLIMIT_MSGQUEUE(hard): (unlimited) Maximum Message Size: 16777216 Maximum Queue Size: 65530 Nice value: -20 Continuous mode:(disabled) CPUs to pin:3 Queue /mq_perf_tests created: mq_flags: O_NONBLOCK mq_maxmsg: 65530 mq_msgsize: 16 mq_curmsgs: 0 Started mqueue performance test thread on CPU 3 Max priorities: 32768 Clock resolution: 1 nsec Test #1: Time send/recv message, queue empty (1000 iterations) Send msg: 4.50690280s total time 405 nsec/msg Recv msg: 4.123621560s total time 412 nsec/msg Test #2a: Time send/recv message, queue full, constant prio (10 iterations) Filling queue...done. 0.14554407s Testing...done. Send msg: 0.40292962s total time 402 nsec/msg Recv msg: 0.40605786s total time 406 nsec/msg Draining queue...done. 0.15010003s Test #2b: Time send/recv message, queue full, increasing prio (10 iterations) Filling queue...done. 0.25628197s Testing...done. Send msg: 0.53792862s total time 537 nsec/msg Recv msg: 0.52323416s total time 523 nsec/msg Draining queue...done. 0.17617835s Test #2c: Time send/recv message, queue full, decreasing prio (10 iterations) Filling queue...done. 0.26939894s Testing...done. Send msg: 0.55733128s total time
mqueue perf data
Hi Fengguang, Do we need to stat out the perf data of mqueue test in kernel_selftests? It's like following. Thanks, Wanlong Gao # make run_tests -C mqueue make: Entering directory `/git/linux/tools/testing/selftests/mqueue' Initial system state: Using queue path: /test1 RLIMIT_MSGQUEUE(soft): 819200 RLIMIT_MSGQUEUE(hard): 819200 Maximum Message Size: 8192 Maximum Queue Size: 10 Default Message Size: 8192 Default Queue Size: 10 Adjusted system state for testing: RLIMIT_MSGQUEUE(soft): 819200 RLIMIT_MSGQUEUE(hard): 819200 Maximum Message Size: 8192 Maximum Queue Size: 10 Default Message Size: 8192 Default Queue Size: 10 Test series 1, behavior when no attr struct passed to mq_open: Kernel supports setting defaults separately from maximums: PASS Given sane values, mq_open without an attr struct succeeds: PASS Kernel properly honors default setting knobs: PASS Kernel properly limits default values to lesser of default/max: PASS Kernel properly fails to create queue when defaults would exceed rlimit: PASS Test series 2, behavior when attr struct is passed to mq_open: Queue open in excess of rlimit max when euid = 0 failed:PASS Queue open with mq_maxmsg limit when euid = 0 succeeded: PASS Queue open with mq_msgsize limit when euid = 0 succeeded: PASS Queue open with total size 2GB when euid = 0 failed: PASS Queue open in excess of rlimit max when euid = 99 failed: PASS Queue open with mq_maxmsg limit when euid = 99 failed:PASS Queue open with mq_msgsize limit when euid = 99 failed: PASS Queue open with total size 2GB when euid = 99 failed: PASS Initial system state: Using queue path: /mq_perf_tests RLIMIT_MSGQUEUE(soft): 819200 RLIMIT_MSGQUEUE(hard): 819200 Maximum Message Size: 8192 Maximum Queue Size: 10 Nice value: 0 Adjusted system state for testing: RLIMIT_MSGQUEUE(soft): (unlimited) RLIMIT_MSGQUEUE(hard): (unlimited) Maximum Message Size: 16777216 Maximum Queue Size: 65530 Nice value: -20 Continuous mode:(disabled) CPUs to pin:3 Queue /mq_perf_tests created: mq_flags: O_NONBLOCK mq_maxmsg: 65530 mq_msgsize: 16 mq_curmsgs: 0 Started mqueue performance test thread on CPU 3 Max priorities: 32768 Clock resolution: 1 nsec Test #1: Time send/recv message, queue empty (1000 iterations) Send msg: 4.50690280s total time 405 nsec/msg Recv msg: 4.123621560s total time 412 nsec/msg Test #2a: Time send/recv message, queue full, constant prio (10 iterations) Filling queue...done. 0.14554407s Testing...done. Send msg: 0.40292962s total time 402 nsec/msg Recv msg: 0.40605786s total time 406 nsec/msg Draining queue...done. 0.15010003s Test #2b: Time send/recv message, queue full, increasing prio (10 iterations) Filling queue...done. 0.25628197s Testing...done. Send msg: 0.53792862s total time 537 nsec/msg Recv msg: 0.52323416s total time 523 nsec/msg Draining queue...done. 0.17617835s Test #2c: Time send/recv message, queue full, decreasing prio (10 iterations) Filling queue...done. 0.26939894s Testing...done. Send msg: 0.55733128s total time 557 nsec/msg
Re: [PATCH] sh: fix kernel build error
On 11/14/2013 02:14 AM, Sergei Shtylyov wrote: > Hello. > > On 11/13/2013 05:38 PM, Wanlong Gao wrote: > >> arch/sh/kernel/kgdb.c: In function 'sleeping_thread_to_gdb_regs': >>>> arch/sh/kernel/kgdb.c:225:32: error: implicit declaration of function >>>> 'task_stack_page' [-Werror=implicit-function-declaration] >>>> arch/sh/kernel/kgdb.c:242:23: error: dereferencing pointer to incomplete >>>> type >>>> arch/sh/kernel/kgdb.c:243:22: error: dereferencing pointer to incomplete >>>> type >> arch/sh/kernel/kgdb.c: In function 'singlestep_trap_handler': >>>> arch/sh/kernel/kgdb.c:310:27: error: 'SIGTRAP' undeclared (first use in >>>> this function) >> arch/sh/kernel/kgdb.c:310:27: note: each undeclared identifier is >> reported only once for each function it appears in >> cc1: all warnings being treated as errors > >Strange quoting... Strange? It's the build error log. > >> This is introduced by commit 16559ae. > >Please also specify that commit's summary line in parens. commit 16559ae48c76f1ceb970b9719dea62b77eb5d06b Author: Greg Kroah-Hartman Date: Mon Feb 4 15:35:26 2013 -0800 kgdb: remove #include from kgdb.h There's no reason kgdb.h itself needs to include the 8250 serial port header file. So push it down to the _very_ limited number of individual drivers that need the values in that file, and fix up the places where people really wanted serial_core.h and platform_device.h. Signed-off-by: Greg Kroah-Hartman And the line caused this build error is: diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h index 4dff0c6..c6e091b 100644 --- a/include/linux/kgdb.h +++ b/include/linux/kgdb.h @@ -13,7 +13,6 @@ #ifndef _KGDB_H_ #define _KGDB_H_ -#include #include #include #include Thanks, Wanlong Gao > >> CC: kbuild-...@01.org >> Reported-by: Fengguang Wu >> Signed-off-by: Wanlong Gao > > WBR, Sergei > > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH] sh: fix kernel build error
arch/sh/kernel/kgdb.c: In function 'sleeping_thread_to_gdb_regs': >> arch/sh/kernel/kgdb.c:225:32: error: implicit declaration of function >> 'task_stack_page' [-Werror=implicit-function-declaration] >> arch/sh/kernel/kgdb.c:242:23: error: dereferencing pointer to incomplete type >> arch/sh/kernel/kgdb.c:243:22: error: dereferencing pointer to incomplete type arch/sh/kernel/kgdb.c: In function 'singlestep_trap_handler': >> arch/sh/kernel/kgdb.c:310:27: error: 'SIGTRAP' undeclared (first use in this >> function) arch/sh/kernel/kgdb.c:310:27: note: each undeclared identifier is reported only once for each function it appears in cc1: all warnings being treated as errors This is introduced by commit 16559ae. CC: kbuild-...@01.org Reported-by: Fengguang Wu Signed-off-by: Wanlong Gao --- arch/sh/kernel/kgdb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/sh/kernel/kgdb.c b/arch/sh/kernel/kgdb.c index 38b3139..adad46e 100644 --- a/arch/sh/kernel/kgdb.c +++ b/arch/sh/kernel/kgdb.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include -- 1.8.5.rc1.17.g0ecd94d -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH] sh: fix kernel build error
arch/sh/kernel/kgdb.c: In function 'sleeping_thread_to_gdb_regs': arch/sh/kernel/kgdb.c:225:32: error: implicit declaration of function 'task_stack_page' [-Werror=implicit-function-declaration] arch/sh/kernel/kgdb.c:242:23: error: dereferencing pointer to incomplete type arch/sh/kernel/kgdb.c:243:22: error: dereferencing pointer to incomplete type arch/sh/kernel/kgdb.c: In function 'singlestep_trap_handler': arch/sh/kernel/kgdb.c:310:27: error: 'SIGTRAP' undeclared (first use in this function) arch/sh/kernel/kgdb.c:310:27: note: each undeclared identifier is reported only once for each function it appears in cc1: all warnings being treated as errors This is introduced by commit 16559ae. CC: kbuild-...@01.org Reported-by: Fengguang Wu fengguang...@intel.com Signed-off-by: Wanlong Gao gaowanl...@cn.fujitsu.com --- arch/sh/kernel/kgdb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/sh/kernel/kgdb.c b/arch/sh/kernel/kgdb.c index 38b3139..adad46e 100644 --- a/arch/sh/kernel/kgdb.c +++ b/arch/sh/kernel/kgdb.c @@ -13,6 +13,7 @@ #include linux/kdebug.h #include linux/irq.h #include linux/io.h +#include linux/sched.h #include asm/cacheflush.h #include asm/traps.h -- 1.8.5.rc1.17.g0ecd94d -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] sh: fix kernel build error
On 11/14/2013 02:14 AM, Sergei Shtylyov wrote: Hello. On 11/13/2013 05:38 PM, Wanlong Gao wrote: arch/sh/kernel/kgdb.c: In function 'sleeping_thread_to_gdb_regs': arch/sh/kernel/kgdb.c:225:32: error: implicit declaration of function 'task_stack_page' [-Werror=implicit-function-declaration] arch/sh/kernel/kgdb.c:242:23: error: dereferencing pointer to incomplete type arch/sh/kernel/kgdb.c:243:22: error: dereferencing pointer to incomplete type arch/sh/kernel/kgdb.c: In function 'singlestep_trap_handler': arch/sh/kernel/kgdb.c:310:27: error: 'SIGTRAP' undeclared (first use in this function) arch/sh/kernel/kgdb.c:310:27: note: each undeclared identifier is reported only once for each function it appears in cc1: all warnings being treated as errors Strange quoting... Strange? It's the build error log. This is introduced by commit 16559ae. Please also specify that commit's summary line in parens. commit 16559ae48c76f1ceb970b9719dea62b77eb5d06b Author: Greg Kroah-Hartman gre...@linuxfoundation.org Date: Mon Feb 4 15:35:26 2013 -0800 kgdb: remove #include linux/serial_8250.h from kgdb.h There's no reason kgdb.h itself needs to include the 8250 serial port header file. So push it down to the _very_ limited number of individual drivers that need the values in that file, and fix up the places where people really wanted serial_core.h and platform_device.h. Signed-off-by: Greg Kroah-Hartman gre...@linuxfoundation.org And the line caused this build error is: diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h index 4dff0c6..c6e091b 100644 --- a/include/linux/kgdb.h +++ b/include/linux/kgdb.h @@ -13,7 +13,6 @@ #ifndef _KGDB_H_ #define _KGDB_H_ -#include linux/serial_8250.h #include linux/linkage.h #include linux/init.h #include linux/atomic.h Thanks, Wanlong Gao CC: kbuild-...@01.org Reported-by: Fengguang Wu fengguang...@intel.com Signed-off-by: Wanlong Gao gaowanl...@cn.fujitsu.com WBR, Sergei -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH net] virtio-net: correctly handle cpu hotplug notifier during resuming
On 10/29/2013 03:11 PM, Jason Wang wrote: > commit 3ab098df35f8b98b6553edc2e40234af512ba877 (virtio-net: don't respond to > cpu hotplug notifier if we're not ready) tries to bypass the cpu hotplug > notifier by checking the config_enable and does nothing is it was false. So it > need to try to hold the config_lock mutex which may happen in atomic > environment which leads the following warnings: > > [ 622.91] CPU0 attaching NULL sched-domain. > [ 622.96] CPU1 attaching NULL sched-domain. > [ 622.944485] CPU0 attaching NULL sched-domain. > [ 622.950795] BUG: sleeping function called from invalid context at > kernel/mutex.c:616 > [ 622.950796] in_atomic(): 1, irqs_disabled(): 1, pid: 10, name: migration/1 > [ 622.950796] no locks held by migration/1/10. > [ 622.950798] CPU: 1 PID: 10 Comm: migration/1 Not tainted > 3.12.0-rc5-wl-01249-gb91e82d #317 > [ 622.950799] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 > [ 622.950802] 88001d42dba0 81a32f22 > 88001bfb9c70 > [ 622.950803] 88001d42dbb0 810edb02 88001d42dc38 > 81a396ed > [ 622.950805] 0046 88001d42dbe8 810e861d > > [ 622.950805] Call Trace: > [ 622.950810] [] dump_stack+0x54/0x74 > [ 622.950815] [] __might_sleep+0x112/0x114 > [ 622.950817] [] mutex_lock_nested+0x3c/0x3c6 > [ 622.950818] [] ? up+0x39/0x3e > [ 622.950821] [] ? acpi_os_signal_semaphore+0x21/0x2d > [ 622.950824] [] ? acpi_ut_release_mutex+0x5e/0x62 > [ 622.950828] [] virtnet_cpu_callback+0x33/0x87 > [ 622.950830] [] notifier_call_chain+0x3c/0x5e > [ 622.950832] [] __raw_notifier_call_chain+0xe/0x10 > [ 622.950835] [] __cpu_notify+0x20/0x37 > [ 622.950836] [] cpu_notify+0x13/0x15 > [ 622.950838] [] take_cpu_down+0x27/0x3a > [ 622.950841] [] stop_machine_cpu_stop+0x93/0xf1 > [ 622.950842] [] cpu_stopper_thread+0xa0/0x12f > [ 622.950844] [] ? cpu_stopper_thread+0x12f/0x12f > [ 622.950847] [] ? lock_release_holdtime.part.7+0xa3/0xa8 > [ 622.950848] [] ? cpu_stop_should_run+0x3f/0x47 > [ 622.950850] [] smpboot_thread_fn+0x1c5/0x1e3 > [ 622.950852] [] ? lg_global_unlock+0x67/0x67 > [ 622.950854] [] kthread+0xd8/0xe0 > [ 622.950857] [] ? wait_for_common+0x12f/0x164 > [ 622.950859] [] ? kthread_create_on_node+0x124/0x124 > [ 622.950861] [] ret_from_fork+0x7c/0xb0 > [ 622.950862] [] ? kthread_create_on_node+0x124/0x124 > [ 622.950876] smpboot: CPU 1 is now offline > [ 623.194556] SMP alternatives: lockdep: fixing up alternatives > [ 623.194559] smpboot: Booting Node 0 Processor 1 APIC 0x1 > ... > > A correct fix is to unregister the hotcpu notifier during restore and > register a > new one in resume. > > Reported-by: Fengguang Wu > Tested-by: Fengguang Wu > Cc: Wanlong Gao > Cc: Rusty Russell > Cc: Michael S. Tsirkin > Signed-off-by: Jason Wang Thank you for your fix. Reviewed-by: Wanlong Gao > --- > This patch is needed for 3.8 and above. > --- > drivers/net/virtio_net.c | 13 ++--- > 1 file changed, 6 insertions(+), 7 deletions(-) > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c > index 9fbdfcd..bbc9cb8 100644 > --- a/drivers/net/virtio_net.c > +++ b/drivers/net/virtio_net.c > @@ -1118,11 +1118,6 @@ static int virtnet_cpu_callback(struct notifier_block > *nfb, > { > struct virtnet_info *vi = container_of(nfb, struct virtnet_info, nb); > > - mutex_lock(>config_lock); > - > - if (!vi->config_enable) > - goto done; > - > switch(action & ~CPU_TASKS_FROZEN) { > case CPU_ONLINE: > case CPU_DOWN_FAILED: > @@ -1136,8 +1131,6 @@ static int virtnet_cpu_callback(struct notifier_block > *nfb, > break; > } > > -done: > - mutex_unlock(>config_lock); > return NOTIFY_OK; > } > > @@ -1699,6 +1692,8 @@ static int virtnet_freeze(struct virtio_device *vdev) > struct virtnet_info *vi = vdev->priv; > int i; > > + unregister_hotcpu_notifier(>nb); > + > /* Prevent config work handler from accessing the device */ > mutex_lock(>config_lock); > vi->config_enable = false; > @@ -1747,6 +1742,10 @@ static int virtnet_restore(struct virtio_device *vdev) > virtnet_set_queues(vi, vi->curr_queue_pairs); > rtnl_unlock(); > > + err = register_hotcpu_notifier(>nb); > + if (err) > + return err; > + > return 0; > } > #endif > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH net] virtio-net: correctly handle cpu hotplug notifier during resuming
On 10/29/2013 03:11 PM, Jason Wang wrote: commit 3ab098df35f8b98b6553edc2e40234af512ba877 (virtio-net: don't respond to cpu hotplug notifier if we're not ready) tries to bypass the cpu hotplug notifier by checking the config_enable and does nothing is it was false. So it need to try to hold the config_lock mutex which may happen in atomic environment which leads the following warnings: [ 622.91] CPU0 attaching NULL sched-domain. [ 622.96] CPU1 attaching NULL sched-domain. [ 622.944485] CPU0 attaching NULL sched-domain. [ 622.950795] BUG: sleeping function called from invalid context at kernel/mutex.c:616 [ 622.950796] in_atomic(): 1, irqs_disabled(): 1, pid: 10, name: migration/1 [ 622.950796] no locks held by migration/1/10. [ 622.950798] CPU: 1 PID: 10 Comm: migration/1 Not tainted 3.12.0-rc5-wl-01249-gb91e82d #317 [ 622.950799] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 [ 622.950802] 88001d42dba0 81a32f22 88001bfb9c70 [ 622.950803] 88001d42dbb0 810edb02 88001d42dc38 81a396ed [ 622.950805] 0046 88001d42dbe8 810e861d [ 622.950805] Call Trace: [ 622.950810] [81a32f22] dump_stack+0x54/0x74 [ 622.950815] [810edb02] __might_sleep+0x112/0x114 [ 622.950817] [81a396ed] mutex_lock_nested+0x3c/0x3c6 [ 622.950818] [810e861d] ? up+0x39/0x3e [ 622.950821] [8153ea7c] ? acpi_os_signal_semaphore+0x21/0x2d [ 622.950824] [81565ed1] ? acpi_ut_release_mutex+0x5e/0x62 [ 622.950828] [816d04ec] virtnet_cpu_callback+0x33/0x87 [ 622.950830] [81a42576] notifier_call_chain+0x3c/0x5e [ 622.950832] [810e86a8] __raw_notifier_call_chain+0xe/0x10 [ 622.950835] [810c5556] __cpu_notify+0x20/0x37 [ 622.950836] [810c5580] cpu_notify+0x13/0x15 [ 622.950838] [81a237cd] take_cpu_down+0x27/0x3a [ 622.950841] [81136289] stop_machine_cpu_stop+0x93/0xf1 [ 622.950842] [81136167] cpu_stopper_thread+0xa0/0x12f [ 622.950844] [811361f6] ? cpu_stopper_thread+0x12f/0x12f [ 622.950847] [81119710] ? lock_release_holdtime.part.7+0xa3/0xa8 [ 622.950848] [81135e4b] ? cpu_stop_should_run+0x3f/0x47 [ 622.950850] [810ea9b0] smpboot_thread_fn+0x1c5/0x1e3 [ 622.950852] [810ea7eb] ? lg_global_unlock+0x67/0x67 [ 622.950854] [810e36b7] kthread+0xd8/0xe0 [ 622.950857] [81a3bfad] ? wait_for_common+0x12f/0x164 [ 622.950859] [810e35df] ? kthread_create_on_node+0x124/0x124 [ 622.950861] [81a45ffc] ret_from_fork+0x7c/0xb0 [ 622.950862] [810e35df] ? kthread_create_on_node+0x124/0x124 [ 622.950876] smpboot: CPU 1 is now offline [ 623.194556] SMP alternatives: lockdep: fixing up alternatives [ 623.194559] smpboot: Booting Node 0 Processor 1 APIC 0x1 ... A correct fix is to unregister the hotcpu notifier during restore and register a new one in resume. Reported-by: Fengguang Wu fengguang...@intel.com Tested-by: Fengguang Wu fengguang...@intel.com Cc: Wanlong Gao gaowanl...@cn.fujitsu.com Cc: Rusty Russell ru...@rustcorp.com.au Cc: Michael S. Tsirkin m...@redhat.com Signed-off-by: Jason Wang jasow...@redhat.com Thank you for your fix. Reviewed-by: Wanlong Gao gaowanl...@cn.fujitsu.com --- This patch is needed for 3.8 and above. --- drivers/net/virtio_net.c | 13 ++--- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 9fbdfcd..bbc9cb8 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1118,11 +1118,6 @@ static int virtnet_cpu_callback(struct notifier_block *nfb, { struct virtnet_info *vi = container_of(nfb, struct virtnet_info, nb); - mutex_lock(vi-config_lock); - - if (!vi-config_enable) - goto done; - switch(action ~CPU_TASKS_FROZEN) { case CPU_ONLINE: case CPU_DOWN_FAILED: @@ -1136,8 +1131,6 @@ static int virtnet_cpu_callback(struct notifier_block *nfb, break; } -done: - mutex_unlock(vi-config_lock); return NOTIFY_OK; } @@ -1699,6 +1692,8 @@ static int virtnet_freeze(struct virtio_device *vdev) struct virtnet_info *vi = vdev-priv; int i; + unregister_hotcpu_notifier(vi-nb); + /* Prevent config work handler from accessing the device */ mutex_lock(vi-config_lock); vi-config_enable = false; @@ -1747,6 +1742,10 @@ static int virtnet_restore(struct virtio_device *vdev) virtnet_set_queues(vi, vi-curr_queue_pairs); rtnl_unlock(); + err = register_hotcpu_notifier(vi-nb); + if (err) + return err; + return 0; } #endif -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord
Re: [PATCH net 1/2] virtio-net: don't respond to cpu hotplug notifier if we're not ready
On 10/14/2013 05:56 PM, Jason Wang wrote: > We're trying to re-configure the affinity unconditionally in cpu hotplug > callback. This may lead the issue during resuming from s3/s4 since > > - virt queues haven't been allocated at that time. > - it's unnecessary since thaw method will re-configure the affinity. > > Fix this issue by checking the config_enable and do nothing is we're not > ready. > > The bug were introduced by commit 8de4b2f3ae90c8fc0f17eeaab87d5a951b66ee17 > (virtio-net: reset virtqueue affinity when doing cpu hotplug). > > Cc: Rusty Russell > Cc: Michael S. Tsirkin > Cc: Wanlong Gao > Signed-off-by: Jason Wang Thank you. Reviewed-by: Wanlong Gao > --- > The patch is need for 3.8 and above. > --- > drivers/net/virtio_net.c | 8 > 1 file changed, 8 insertions(+) > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c > index defec2b..c4bc1cc 100644 > --- a/drivers/net/virtio_net.c > +++ b/drivers/net/virtio_net.c > @@ -1116,6 +1116,11 @@ static int virtnet_cpu_callback(struct notifier_block > *nfb, > { > struct virtnet_info *vi = container_of(nfb, struct virtnet_info, nb); > > + mutex_lock(>config_lock); > + > + if (!vi->config_enable) > + goto done; > + > switch(action & ~CPU_TASKS_FROZEN) { > case CPU_ONLINE: > case CPU_DOWN_FAILED: > @@ -1128,6 +1133,9 @@ static int virtnet_cpu_callback(struct notifier_block > *nfb, > default: > break; > } > + > +done: > + mutex_unlock(>config_lock); > return NOTIFY_OK; > } > > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH net 1/2] virtio-net: don't respond to cpu hotplug notifier if we're not ready
On 10/14/2013 05:56 PM, Jason Wang wrote: We're trying to re-configure the affinity unconditionally in cpu hotplug callback. This may lead the issue during resuming from s3/s4 since - virt queues haven't been allocated at that time. - it's unnecessary since thaw method will re-configure the affinity. Fix this issue by checking the config_enable and do nothing is we're not ready. The bug were introduced by commit 8de4b2f3ae90c8fc0f17eeaab87d5a951b66ee17 (virtio-net: reset virtqueue affinity when doing cpu hotplug). Cc: Rusty Russell ru...@rustcorp.com.au Cc: Michael S. Tsirkin m...@redhat.com Cc: Wanlong Gao gaowanl...@cn.fujitsu.com Signed-off-by: Jason Wang jasow...@redhat.com Thank you. Reviewed-by: Wanlong Gao gaowanl...@cn.fujitsu.com --- The patch is need for 3.8 and above. --- drivers/net/virtio_net.c | 8 1 file changed, 8 insertions(+) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index defec2b..c4bc1cc 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1116,6 +1116,11 @@ static int virtnet_cpu_callback(struct notifier_block *nfb, { struct virtnet_info *vi = container_of(nfb, struct virtnet_info, nb); + mutex_lock(vi-config_lock); + + if (!vi-config_enable) + goto done; + switch(action ~CPU_TASKS_FROZEN) { case CPU_ONLINE: case CPU_DOWN_FAILED: @@ -1128,6 +1133,9 @@ static int virtnet_cpu_callback(struct notifier_block *nfb, default: break; } + +done: + mutex_unlock(vi-config_lock); return NOTIFY_OK; } -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [BUG REPORT] Kernel panic on 3.9.0-rc7-4-gbb33db7
On 04/19/2013 02:17 PM, Tejun Heo wrote: > On Thu, Apr 18, 2013 at 10:57:54PM -0700, Tejun Heo wrote: >> No wonder this thing crashes. Chris, can't the original bio carry >> bbio in bi_private and let end_bio_extent_readpage() free the bbio >> instead of abusing bi_bdev like this? > > BTW, I think it's a bit too late to fix this properly from btrfs side > unless we're gonna do -rc8, so let's revert the TP patch for now and > sort this out in the next devel cycle. AFAICS, while disturbingly > (ha|yu)cky, the bi_bdev trick should be okay without the new TP. Thank you for investigating this. And sorry for my incompleted panic picture before that let you detour. :-( Regards, Wanlong Gao > > Thanks. > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [BUG REPORT] Kernel panic on 3.9.0-rc7-4-gbb33db7
On 04/19/2013 02:17 PM, Tejun Heo wrote: On Thu, Apr 18, 2013 at 10:57:54PM -0700, Tejun Heo wrote: No wonder this thing crashes. Chris, can't the original bio carry bbio in bi_private and let end_bio_extent_readpage() free the bbio instead of abusing bi_bdev like this? BTW, I think it's a bit too late to fix this properly from btrfs side unless we're gonna do -rc8, so let's revert the TP patch for now and sort this out in the next devel cycle. AFAICS, while disturbingly (ha|yu)cky, the bi_bdev trick should be okay without the new TP. Thank you for investigating this. And sorry for my incompleted panic picture before that let you detour. :-( Regards, Wanlong Gao Thanks. -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [BUG REPORT] Kernel panic on 3.9.0-rc7-4-gbb33db7
On 04/18/2013 10:30 PM, Jens Axboe wrote: > On Thu, Apr 18 2013, Wanlong Gao wrote: >> On 04/18/2013 09:35 PM, Jens Axboe wrote: >>> On Thu, Apr 18 2013, Wanlong Gao wrote: >>>>> A bio is always fully initialized, regardless of which internal >>>>> allocator it came from. If people are doing private kmallocs, then they >>>>> better be using bio_init() as well. >>>>> >>>>> Wanlong, would it be possible to get a full dmesg on boot see I can see >>>>> what drivers and file systems are in use? Anything special about your >>>>> setup. >>>> >>>> Sure, attached. >>> >>> Does the below help? >> >> Still got panic with this patch. :-( > > Could you capture the full dump? I'd like to see what rcx was, and that > seems to have scrolled off. > I got the panic message using netconsole: EXT4-fs (sda5): mounted filesystem with ordered data mode. Opts: (null) EXT4-fs (sda8): mounted filesystem with ordered data mode. Opts: (null) EXT4-fs (sda9): warning: maximal mount count reached, running e2fsck is recommended EXT4-fs (sda9): mounted filesystem with ordered data mode. Opts: (null) EXT4-fs (sda10): mounted filesystem with ordered data mode. Opts: (null) SGI XFS with ACLs, security attributes, large block/inode numbers, no debug enabled XFS (sda11): Mounting Filesystem XFS (sda11): Ending clean mount kjournald starting. Commit interval 5 seconds EXT3-fs (sda12): warning: maximal mount count reached, running e2fsck is recommended EXT3-fs (sda12): using internal journal EXT3-fs (sda12): mounted filesystem with ordered data mode XFS (sda13): Mounting Filesystem XFS (sda13): Ending clean mount xor: measuring software checksum speed prefetch64-sse: 7572.000 MB/sec generic_sse: 6300.000 MB/sec xor: using function: prefetch64-sse (7572.000 MB/sec) raid6: sse2x14796 MB/s raid6: sse2x25449 MB/s raid6: sse2x45816 MB/s raid6: using algorithm sse2x4 (5816 MB/s) raid6: using ssse3x2 recovery algorithm Btrfs loaded device fsid 01548710-abce-4290-801d-5f19dde14497 devid 1 transid 131526 /dev/sda14 BUG: unable to handle kernel NULL pointer dereference at 0001 IP: [] ftrace_raw_event_block_bio_complete+0x73/0xf0 PGD 0 Oops: [#1] SMP Modules linked in: btrfs raid6_pq zlib_deflate xor xfs libcrc32c netconsole ip6table_filter ip6_tables ebtable_nat ebtables nf_conntrack_ipv4 nf_defrag_ipv4 xt_state nf_conntrack ipt_REJECT xt_CHECKSUM iptable_mangle iptable_filter ip_tables nfsd lockd nfs_acl auth_rpcgss exportfs autofs4 sunrpc target_core_iblock target_core_file target_core_pscsi target_core_mod configfs bnx2fc cnic uio fcoe libfcoe libfc scsi_transport_fc scsi_tgt 8021q garp ppdev parport_pc parport fuse cpufreq_ondemand cachefiles fscache bridge stp llc ipv6 ext3 jbd dm_mirror dm_region_hash dm_log vhost_net macvtap macvlan tun uinput iTCO_wdt iTCO_vendor_support sg acpi_cpufreq freq_table mperf coretemp kvm_intel kvm crc32c_intel microcode serio_raw pcspkr i2c_i801 lpc_ich mfd_core ioatdma i7core_edac edac_core igb dca i2c_algo_bit i2c_core ptp pps_core dm_mod ext4 mbcache jbd2 sr_mod cdrom sd_mod crc_t10dif pata_acpi ata_generic ata_piix CPU 3 Pid: 0, comm: swapper/3 Not tainted 3.9.0-rc7-4-gbb33db7 #1 Lenovo Lenovo WQ TR280 G3/TR350 G7/TB36D RIP: 0010:[] [] ftrace_raw_event_block_bio_complete+0x73/0xf0 RSP: 0018:8802bfc63bb8 EFLAGS: 00010202 RAX: 8802b1e88698 RBX: 8802b05f6bc0 RCX: 0001 RDX: RSI: RDI: 8802b1e8869c RBP: 8802bfc63c08 R08: 000a R09: R10: 002c R11: R12: 81a95b60 R13: 0100 R14: 8802b1e88694 R15: FS: () GS:8802bfc6() knlGS: CS: 0010 DS: ES: CR0: 8005003b CR2: 0001 CR3: 01a0c000 CR4: 07e0 DR0: DR1: DR2: DR3: DR6: 0ff0 DR7: 0400 Process swapper/3 (pid: 0, threadinfo 8802b4598000, task 8802b4594a80) Stack: 8802b4546c40 0286 0100 8802b4546c40 8802bfc63c08 8802b1f82160 8802b05f6bc0 1000 8802bfc63c38 811b6c10 Call Trace: [] bio_endio+0x80/0x90 [] btrfs_end_bio+0xf6/0x190 [btrfs] [] bio_endio+0x3d/0x90 [] req_bio_endio+0xa3/0xe0 [] blk_update_request+0x10f/0x480 [] blk_update_bidi_request+0x27/0xb0 [] blk_end_bidi_request+0x2f/0x80 [] blk_end_request+0x10/0x20 [] scsi_end_request+0x40/0xb0 [] scsi_io_completion+0x9f/0x660 [] scsi_finish_command+0xc9/0x130 [] scsi_softirq_done+0x147/0x170 [] blk_done_softirq+0x82/0xa0 [] __do_softirq+0xe7/0x250 [] irq_exit+0xb5/0xc0 [] do_IRQ+0x66/0xe0 [] common_i
Re: [BUG REPORT] Kernel panic on 3.9.0-rc7-4-gbb33db7
On 04/18/2013 10:30 PM, Jens Axboe wrote: > On Thu, Apr 18 2013, Wanlong Gao wrote: >> On 04/18/2013 09:35 PM, Jens Axboe wrote: >>> On Thu, Apr 18 2013, Wanlong Gao wrote: >>>>> A bio is always fully initialized, regardless of which internal >>>>> allocator it came from. If people are doing private kmallocs, then they >>>>> better be using bio_init() as well. >>>>> >>>>> Wanlong, would it be possible to get a full dmesg on boot see I can see >>>>> what drivers and file systems are in use? Anything special about your >>>>> setup. >>>> >>>> Sure, attached. >>> >>> Does the below help? >> >> Still got panic with this patch. :-( > > Could you capture the full dump? I'd like to see what rcx was, and that > seems to have scrolled off. OK, but I should capture it tomorrow morning because this remote machine has already panicked and need hard reboot. Thanks, Wanlong Gao > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [BUG REPORT] Kernel panic on 3.9.0-rc7-4-gbb33db7
On 04/18/2013 09:35 PM, Jens Axboe wrote: > On Thu, Apr 18 2013, Wanlong Gao wrote: >>> A bio is always fully initialized, regardless of which internal >>> allocator it came from. If people are doing private kmallocs, then they >>> better be using bio_init() as well. >>> >>> Wanlong, would it be possible to get a full dmesg on boot see I can see >>> what drivers and file systems are in use? Anything special about your >>> setup. >> >> Sure, attached. > > Does the below help? Still got panic with this patch. :-( Thanks, Wanlong Gao > > > diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c > index 9e5b8c2..f9a51a6 100644 > --- a/kernel/trace/blktrace.c > +++ b/kernel/trace/blktrace.c > @@ -780,15 +780,23 @@ static void blk_add_trace_bio_bounce(void *ignore, > blk_add_trace_bio(q, bio, BLK_TA_BOUNCE, 0); > } > > +static struct request_queue *blk_trace_get_queue(struct block_device *bdev) > +{ > + if (bdev->bd_disk == NULL) > + return NULL; > + > + return bdev_get_queue(bdev); > +} > + > static void blk_add_trace_bio_complete(void *ignore, struct bio *bio, int > error) > { > struct request_queue *q; > struct blk_trace *bt; > > - if (!bio->bi_bdev) > + q = blk_trace_get_queue(bio->bi_bdev); > + if (!q) > return; > > - q = bdev_get_queue(bio->bi_bdev); > bt = q->blk_trace; > > /* > @@ -1641,14 +1649,6 @@ static ssize_t blk_trace_mask2str(char *buf, int mask) > return p - buf; > } > > -static struct request_queue *blk_trace_get_queue(struct block_device *bdev) > -{ > - if (bdev->bd_disk == NULL) > - return NULL; > - > - return bdev_get_queue(bdev); > -} > - > static ssize_t sysfs_blk_trace_attr_show(struct device *dev, >struct device_attribute *attr, >char *buf) > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [BUG REPORT] Kernel panic on 3.9.0-rc7-4-gbb33db7
On 04/18/2013 09:35 PM, Jens Axboe wrote: On Thu, Apr 18 2013, Wanlong Gao wrote: A bio is always fully initialized, regardless of which internal allocator it came from. If people are doing private kmallocs, then they better be using bio_init() as well. Wanlong, would it be possible to get a full dmesg on boot see I can see what drivers and file systems are in use? Anything special about your setup. Sure, attached. Does the below help? Still got panic with this patch. :-( Thanks, Wanlong Gao diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 9e5b8c2..f9a51a6 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -780,15 +780,23 @@ static void blk_add_trace_bio_bounce(void *ignore, blk_add_trace_bio(q, bio, BLK_TA_BOUNCE, 0); } +static struct request_queue *blk_trace_get_queue(struct block_device *bdev) +{ + if (bdev-bd_disk == NULL) + return NULL; + + return bdev_get_queue(bdev); +} + static void blk_add_trace_bio_complete(void *ignore, struct bio *bio, int error) { struct request_queue *q; struct blk_trace *bt; - if (!bio-bi_bdev) + q = blk_trace_get_queue(bio-bi_bdev); + if (!q) return; - q = bdev_get_queue(bio-bi_bdev); bt = q-blk_trace; /* @@ -1641,14 +1649,6 @@ static ssize_t blk_trace_mask2str(char *buf, int mask) return p - buf; } -static struct request_queue *blk_trace_get_queue(struct block_device *bdev) -{ - if (bdev-bd_disk == NULL) - return NULL; - - return bdev_get_queue(bdev); -} - static ssize_t sysfs_blk_trace_attr_show(struct device *dev, struct device_attribute *attr, char *buf) -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [BUG REPORT] Kernel panic on 3.9.0-rc7-4-gbb33db7
On 04/18/2013 10:30 PM, Jens Axboe wrote: On Thu, Apr 18 2013, Wanlong Gao wrote: On 04/18/2013 09:35 PM, Jens Axboe wrote: On Thu, Apr 18 2013, Wanlong Gao wrote: A bio is always fully initialized, regardless of which internal allocator it came from. If people are doing private kmallocs, then they better be using bio_init() as well. Wanlong, would it be possible to get a full dmesg on boot see I can see what drivers and file systems are in use? Anything special about your setup. Sure, attached. Does the below help? Still got panic with this patch. :-( Could you capture the full dump? I'd like to see what rcx was, and that seems to have scrolled off. OK, but I should capture it tomorrow morning because this remote machine has already panicked and need hard reboot. Thanks, Wanlong Gao -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [BUG REPORT] Kernel panic on 3.9.0-rc7-4-gbb33db7
On 04/18/2013 10:30 PM, Jens Axboe wrote: On Thu, Apr 18 2013, Wanlong Gao wrote: On 04/18/2013 09:35 PM, Jens Axboe wrote: On Thu, Apr 18 2013, Wanlong Gao wrote: A bio is always fully initialized, regardless of which internal allocator it came from. If people are doing private kmallocs, then they better be using bio_init() as well. Wanlong, would it be possible to get a full dmesg on boot see I can see what drivers and file systems are in use? Anything special about your setup. Sure, attached. Does the below help? Still got panic with this patch. :-( Could you capture the full dump? I'd like to see what rcx was, and that seems to have scrolled off. I got the panic message using netconsole: EXT4-fs (sda5): mounted filesystem with ordered data mode. Opts: (null) EXT4-fs (sda8): mounted filesystem with ordered data mode. Opts: (null) EXT4-fs (sda9): warning: maximal mount count reached, running e2fsck is recommended EXT4-fs (sda9): mounted filesystem with ordered data mode. Opts: (null) EXT4-fs (sda10): mounted filesystem with ordered data mode. Opts: (null) SGI XFS with ACLs, security attributes, large block/inode numbers, no debug enabled XFS (sda11): Mounting Filesystem XFS (sda11): Ending clean mount kjournald starting. Commit interval 5 seconds EXT3-fs (sda12): warning: maximal mount count reached, running e2fsck is recommended EXT3-fs (sda12): using internal journal EXT3-fs (sda12): mounted filesystem with ordered data mode XFS (sda13): Mounting Filesystem XFS (sda13): Ending clean mount xor: measuring software checksum speed prefetch64-sse: 7572.000 MB/sec generic_sse: 6300.000 MB/sec xor: using function: prefetch64-sse (7572.000 MB/sec) raid6: sse2x14796 MB/s raid6: sse2x25449 MB/s raid6: sse2x45816 MB/s raid6: using algorithm sse2x4 (5816 MB/s) raid6: using ssse3x2 recovery algorithm Btrfs loaded device fsid 01548710-abce-4290-801d-5f19dde14497 devid 1 transid 131526 /dev/sda14 BUG: unable to handle kernel NULL pointer dereference at 0001 IP: [812484d3] ftrace_raw_event_block_bio_complete+0x73/0xf0 PGD 0 Oops: [#1] SMP Modules linked in: btrfs raid6_pq zlib_deflate xor xfs libcrc32c netconsole ip6table_filter ip6_tables ebtable_nat ebtables nf_conntrack_ipv4 nf_defrag_ipv4 xt_state nf_conntrack ipt_REJECT xt_CHECKSUM iptable_mangle iptable_filter ip_tables nfsd lockd nfs_acl auth_rpcgss exportfs autofs4 sunrpc target_core_iblock target_core_file target_core_pscsi target_core_mod configfs bnx2fc cnic uio fcoe libfcoe libfc scsi_transport_fc scsi_tgt 8021q garp ppdev parport_pc parport fuse cpufreq_ondemand cachefiles fscache bridge stp llc ipv6 ext3 jbd dm_mirror dm_region_hash dm_log vhost_net macvtap macvlan tun uinput iTCO_wdt iTCO_vendor_support sg acpi_cpufreq freq_table mperf coretemp kvm_intel kvm crc32c_intel microcode serio_raw pcspkr i2c_i801 lpc_ich mfd_core ioatdma i7core_edac edac_core igb dca i2c_algo_bit i2c_core ptp pps_core dm_mod ext4 mbcache jbd2 sr_mod cdrom sd_mod crc_t10dif pata_acpi ata_generic ata_piix CPU 3 Pid: 0, comm: swapper/3 Not tainted 3.9.0-rc7-4-gbb33db7 #1 Lenovo Lenovo WQ TR280 G3/TR350 G7/TB36D RIP: 0010:[812484d3] [812484d3] ftrace_raw_event_block_bio_complete+0x73/0xf0 RSP: 0018:8802bfc63bb8 EFLAGS: 00010202 RAX: 8802b1e88698 RBX: 8802b05f6bc0 RCX: 0001 RDX: RSI: RDI: 8802b1e8869c RBP: 8802bfc63c08 R08: 000a R09: R10: 002c R11: R12: 81a95b60 R13: 0100 R14: 8802b1e88694 R15: FS: () GS:8802bfc6() knlGS: CS: 0010 DS: ES: CR0: 8005003b CR2: 0001 CR3: 01a0c000 CR4: 07e0 DR0: DR1: DR2: DR3: DR6: 0ff0 DR7: 0400 Process swapper/3 (pid: 0, threadinfo 8802b4598000, task 8802b4594a80) Stack: 8802b4546c40 0286 0100 8802b4546c40 8802bfc63c08 8802b1f82160 8802b05f6bc0 1000 8802bfc63c38 811b6c10 Call Trace: IRQ [811b6c10] bio_endio+0x80/0x90 [a0790d26] btrfs_end_bio+0xf6/0x190 [btrfs] [811b6bcd] bio_endio+0x3d/0x90 [81249873] req_bio_endio+0xa3/0xe0 [8124af9f] blk_update_request+0x10f/0x480 [8124b337] blk_update_bidi_request+0x27/0xb0 [8124c52f] blk_end_bidi_request+0x2f/0x80 [8124c5d0] blk_end_request+0x10/0x20 [8139c1b0] scsi_end_request+0x40/0xb0 [8139c52f] scsi_io_completion+0x9f/0x660 [81393899] scsi_finish_command+0xc9/0x130 [8139cc57] scsi_softirq_done+0x147/0x170 [81252c52] blk_done_softirq+0x82/0xa0 [8105f537] __do_softirq+0xe7/0x250 [8105f7a5] irq_exit+0xb5/0xc0
Re: [PATCH V7 0/5] virtio-scsi multiqueue
On 04/06/2013 04:40 PM, James Bottomley wrote: > On Fri, 2013-04-05 at 16:55 +0800, Wanlong Gao wrote: >> On 03/28/2013 10:22 AM, Wanlong Gao wrote: >>> On 03/23/2013 07:28 PM, Wanlong Gao wrote: >>>> This series implements virtio-scsi queue steering, which gives >>>> performance improvements of up to 50% (measured both with QEMU and >>>> tcm_vhost backends). >>>> >>>> This version rebased on Rusty's virtio ring rework patches, which >>>> has already gone into virtio-next today. >>>> We hope this can go into virtio-next together with the virtio ring >>>> rework pathes. >>>> >>>> V7: respin to fix the patch apply error >>>> >>>> V6: rework "redo allocation of target data" (James) >>>> fix comments (Venkatesh) >>>> rebase to virtio-next >>>> >>>> V5: improving the grammar of 1/5 (Paolo) >>>> move the dropping of sg_elems to 'virtio-scsi: use virtqueue_add_sgs >>>> for command buffers'. (Asias) >>>> >>>> V4: rebase on virtio ring rework patches (rusty's pending-rebases branch) >>>> >>>> V3 and be found >>>> http://marc.info/?l=linux-virtualization=136067440717154=2 >>>> >>>> >>>> It would probably be easier to get it in via Rusty's tree >>>> because of the prerequisites. James, can we get your Acked-by? >>> >>> James, any thoughts for this version? >> >> Ping James > > Well, I haven't had time to look at anything other than the patch I > commented on. I'm happy with your fix, so you can add my acked by to > that one. Since it's going through the virtio tree, don't wait for me, > put it in and I'll make you fix up anything I find later that I'm > unhappy with. Got it, thank you James. Regards, Wanlong Gao > > James > > > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH V7 0/5] virtio-scsi multiqueue
On 04/06/2013 04:40 PM, James Bottomley wrote: On Fri, 2013-04-05 at 16:55 +0800, Wanlong Gao wrote: On 03/28/2013 10:22 AM, Wanlong Gao wrote: On 03/23/2013 07:28 PM, Wanlong Gao wrote: This series implements virtio-scsi queue steering, which gives performance improvements of up to 50% (measured both with QEMU and tcm_vhost backends). This version rebased on Rusty's virtio ring rework patches, which has already gone into virtio-next today. We hope this can go into virtio-next together with the virtio ring rework pathes. V7: respin to fix the patch apply error V6: rework redo allocation of target data (James) fix comments (Venkatesh) rebase to virtio-next V5: improving the grammar of 1/5 (Paolo) move the dropping of sg_elems to 'virtio-scsi: use virtqueue_add_sgs for command buffers'. (Asias) V4: rebase on virtio ring rework patches (rusty's pending-rebases branch) V3 and be found http://marc.info/?l=linux-virtualizationm=136067440717154w=2 It would probably be easier to get it in via Rusty's tree because of the prerequisites. James, can we get your Acked-by? James, any thoughts for this version? Ping James Well, I haven't had time to look at anything other than the patch I commented on. I'm happy with your fix, so you can add my acked by to that one. Since it's going through the virtio tree, don't wait for me, put it in and I'll make you fix up anything I find later that I'm unhappy with. Got it, thank you James. Regards, Wanlong Gao James -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH V7 0/5] virtio-scsi multiqueue
On 03/28/2013 10:22 AM, Wanlong Gao wrote: > On 03/23/2013 07:28 PM, Wanlong Gao wrote: >> This series implements virtio-scsi queue steering, which gives >> performance improvements of up to 50% (measured both with QEMU and >> tcm_vhost backends). >> >> This version rebased on Rusty's virtio ring rework patches, which >> has already gone into virtio-next today. >> We hope this can go into virtio-next together with the virtio ring >> rework pathes. >> >> V7: respin to fix the patch apply error >> >> V6: rework "redo allocation of target data" (James) >> fix comments (Venkatesh) >> rebase to virtio-next >> >> V5: improving the grammar of 1/5 (Paolo) >> move the dropping of sg_elems to 'virtio-scsi: use virtqueue_add_sgs for >> command buffers'. (Asias) >> >> V4: rebase on virtio ring rework patches (rusty's pending-rebases branch) >> >> V3 and be found >> http://marc.info/?l=linux-virtualization=136067440717154=2 >> >> >> It would probably be easier to get it in via Rusty's tree >> because of the prerequisites. James, can we get your Acked-by? > > James, any thoughts for this version? Ping James > > Thanks, > Wanlong Gao > >> >> Paolo Bonzini (3): >> virtio-scsi: pass struct virtio_scsi to virtqueue completion function >> virtio-scsi: push vq lock/unlock into virtscsi_vq_done >> virtio-scsi: introduce multiqueue support >> >> Wanlong Gao (2): >> virtio-scsi: redo allocation of target data >> virtio-scsi: reset virtqueue affinity when doing cpu hotplug >> >> drivers/scsi/virtio_scsi.c | 387 >> - >> 1 file changed, 309 insertions(+), 78 deletions(-) >> > > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH V7 0/5] virtio-scsi multiqueue
On 03/28/2013 10:22 AM, Wanlong Gao wrote: On 03/23/2013 07:28 PM, Wanlong Gao wrote: This series implements virtio-scsi queue steering, which gives performance improvements of up to 50% (measured both with QEMU and tcm_vhost backends). This version rebased on Rusty's virtio ring rework patches, which has already gone into virtio-next today. We hope this can go into virtio-next together with the virtio ring rework pathes. V7: respin to fix the patch apply error V6: rework redo allocation of target data (James) fix comments (Venkatesh) rebase to virtio-next V5: improving the grammar of 1/5 (Paolo) move the dropping of sg_elems to 'virtio-scsi: use virtqueue_add_sgs for command buffers'. (Asias) V4: rebase on virtio ring rework patches (rusty's pending-rebases branch) V3 and be found http://marc.info/?l=linux-virtualizationm=136067440717154w=2 It would probably be easier to get it in via Rusty's tree because of the prerequisites. James, can we get your Acked-by? James, any thoughts for this version? Ping James Thanks, Wanlong Gao Paolo Bonzini (3): virtio-scsi: pass struct virtio_scsi to virtqueue completion function virtio-scsi: push vq lock/unlock into virtscsi_vq_done virtio-scsi: introduce multiqueue support Wanlong Gao (2): virtio-scsi: redo allocation of target data virtio-scsi: reset virtqueue affinity when doing cpu hotplug drivers/scsi/virtio_scsi.c | 387 - 1 file changed, 309 insertions(+), 78 deletions(-) -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH V7 0/5] virtio-scsi multiqueue
On 03/23/2013 07:28 PM, Wanlong Gao wrote: > This series implements virtio-scsi queue steering, which gives > performance improvements of up to 50% (measured both with QEMU and > tcm_vhost backends). > > This version rebased on Rusty's virtio ring rework patches, which > has already gone into virtio-next today. > We hope this can go into virtio-next together with the virtio ring > rework pathes. > > V7: respin to fix the patch apply error > > V6: rework "redo allocation of target data" (James) > fix comments (Venkatesh) > rebase to virtio-next > > V5: improving the grammar of 1/5 (Paolo) > move the dropping of sg_elems to 'virtio-scsi: use virtqueue_add_sgs for > command buffers'. (Asias) > > V4: rebase on virtio ring rework patches (rusty's pending-rebases branch) > > V3 and be found http://marc.info/?l=linux-virtualization=136067440717154=2 > > > It would probably be easier to get it in via Rusty's tree > because of the prerequisites. James, can we get your Acked-by? James, any thoughts for this version? Thanks, Wanlong Gao > > Paolo Bonzini (3): > virtio-scsi: pass struct virtio_scsi to virtqueue completion function > virtio-scsi: push vq lock/unlock into virtscsi_vq_done > virtio-scsi: introduce multiqueue support > > Wanlong Gao (2): > virtio-scsi: redo allocation of target data > virtio-scsi: reset virtqueue affinity when doing cpu hotplug > > drivers/scsi/virtio_scsi.c | 387 > - > 1 file changed, 309 insertions(+), 78 deletions(-) > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH V7 0/5] virtio-scsi multiqueue
On 03/23/2013 07:28 PM, Wanlong Gao wrote: This series implements virtio-scsi queue steering, which gives performance improvements of up to 50% (measured both with QEMU and tcm_vhost backends). This version rebased on Rusty's virtio ring rework patches, which has already gone into virtio-next today. We hope this can go into virtio-next together with the virtio ring rework pathes. V7: respin to fix the patch apply error V6: rework redo allocation of target data (James) fix comments (Venkatesh) rebase to virtio-next V5: improving the grammar of 1/5 (Paolo) move the dropping of sg_elems to 'virtio-scsi: use virtqueue_add_sgs for command buffers'. (Asias) V4: rebase on virtio ring rework patches (rusty's pending-rebases branch) V3 and be found http://marc.info/?l=linux-virtualizationm=136067440717154w=2 It would probably be easier to get it in via Rusty's tree because of the prerequisites. James, can we get your Acked-by? James, any thoughts for this version? Thanks, Wanlong Gao Paolo Bonzini (3): virtio-scsi: pass struct virtio_scsi to virtqueue completion function virtio-scsi: push vq lock/unlock into virtscsi_vq_done virtio-scsi: introduce multiqueue support Wanlong Gao (2): virtio-scsi: redo allocation of target data virtio-scsi: reset virtqueue affinity when doing cpu hotplug drivers/scsi/virtio_scsi.c | 387 - 1 file changed, 309 insertions(+), 78 deletions(-) -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH V7 5/5] virtio-scsi: reset virtqueue affinity when doing cpu hotplug
Add hot cpu notifier to reset the request virtqueue affinity when doing cpu hotplug. Cc: linux-s...@vger.kernel.org Signed-off-by: Paolo Bonzini Signed-off-by: Wanlong Gao Reviewed-by: Asias He --- drivers/scsi/virtio_scsi.c | 29 + 1 file changed, 29 insertions(+) diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index 8dcdef0..2168258 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c @@ -110,6 +110,9 @@ struct virtio_scsi { /* If the affinity hint is set for virtqueues */ bool affinity_hint_set; + /* CPU hotplug notifier */ + struct notifier_block nb; + struct virtio_scsi_vq ctrl_vq; struct virtio_scsi_vq event_vq; struct virtio_scsi_vq req_vqs[]; @@ -762,6 +765,23 @@ static void virtscsi_set_affinity(struct virtio_scsi *vscsi, bool affinity) put_online_cpus(); } +static int virtscsi_cpu_callback(struct notifier_block *nfb, +unsigned long action, void *hcpu) +{ + struct virtio_scsi *vscsi = container_of(nfb, struct virtio_scsi, nb); + switch(action) { + case CPU_ONLINE: + case CPU_ONLINE_FROZEN: + case CPU_DEAD: + case CPU_DEAD_FROZEN: + __virtscsi_set_affinity(vscsi, true); + break; + default: + break; + } + return NOTIFY_OK; +} + static void virtscsi_init_vq(struct virtio_scsi_vq *virtscsi_vq, struct virtqueue *vq) { @@ -884,6 +904,13 @@ static int virtscsi_probe(struct virtio_device *vdev) if (err) goto virtscsi_init_failed; + vscsi->nb.notifier_call = _cpu_callback; + err = register_hotcpu_notifier(>nb); + if (err) { + pr_err("registering cpu notifier failed\n"); + goto scsi_add_host_failed; + } + cmd_per_lun = virtscsi_config_get(vdev, cmd_per_lun) ?: 1; shost->cmd_per_lun = min_t(u32, cmd_per_lun, shost->can_queue); shost->max_sectors = virtscsi_config_get(vdev, max_sectors) ?: 0x; @@ -921,6 +948,8 @@ static void virtscsi_remove(struct virtio_device *vdev) scsi_remove_host(shost); + unregister_hotcpu_notifier(>nb); + virtscsi_remove_vqs(vdev); scsi_host_put(shost); } -- 1.8.2.rc2 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH V7 1/5] virtio-scsi: redo allocation of target data
virtio_scsi_target_state is now empty. We will find new uses for it in the next few patches, so this patch does not drop it completely. And as James suggested, we use entries target_alloc and target_destroy in the host template to allocate and destroy the virtio_scsi_target_state of each target, attach this struct to scsi_target->hostdata. Now we can get at it from the sdev with scsi_target(sdev)->hostdata. No messing around with fixed size arrays and bulk memory allocation and no need to pass in the maximum target size as a parameter because everything should now happen dynamically. Cc: James Bottomley Cc: linux-s...@vger.kernel.org Signed-off-by: Paolo Bonzini Signed-off-by: Wanlong Gao Reviewed-by: Asias He --- drivers/scsi/virtio_scsi.c | 71 -- 1 file changed, 25 insertions(+), 46 deletions(-) diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index b53ba9e..ffa03e8 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c @@ -75,8 +75,6 @@ struct virtio_scsi { /* Get some buffers ready for event vq */ struct virtio_scsi_event_node event_list[VIRTIO_SCSI_EVENT_LEN]; - - struct virtio_scsi_target_state *tgt[]; }; static struct kmem_cache *virtscsi_cmd_cache; @@ -530,6 +528,25 @@ static int virtscsi_abort(struct scsi_cmnd *sc) return virtscsi_tmf(vscsi, cmd); } +static int virtscsi_target_alloc(struct scsi_target *starget) +{ + struct virtio_scsi_target_state *tgt = + kmalloc(sizeof(*tgt), GFP_KERNEL); + if (!tgt) + return -ENOMEM; + + spin_lock_init(>tgt_lock); + + starget->hostdata = tgt; + return 0; +} + +static void virtscsi_target_destroy(struct scsi_target *starget) +{ + struct virtio_scsi_target_state *tgt = starget->hostdata; + kfree(tgt); +} + static struct scsi_host_template virtscsi_host_template = { .module = THIS_MODULE, .name = "Virtio SCSI HBA", @@ -542,6 +559,8 @@ static struct scsi_host_template virtscsi_host_template = { .can_queue = 1024, .dma_boundary = UINT_MAX, .use_clustering = ENABLE_CLUSTERING, + .target_alloc = virtscsi_target_alloc, + .target_destroy = virtscsi_target_destroy, }; #define virtscsi_config_get(vdev, fld) \ @@ -568,20 +587,6 @@ static void virtscsi_init_vq(struct virtio_scsi_vq *virtscsi_vq, virtscsi_vq->vq = vq; } -static struct virtio_scsi_target_state *virtscsi_alloc_tgt( - struct virtio_device *vdev) -{ - struct virtio_scsi_target_state *tgt; - gfp_t gfp_mask = GFP_KERNEL; - - tgt = kmalloc(sizeof(*tgt), gfp_mask); - if (!tgt) - return NULL; - - spin_lock_init(>tgt_lock); - return tgt; -} - static void virtscsi_scan(struct virtio_device *vdev) { struct Scsi_Host *shost = (struct Scsi_Host *)vdev->priv; @@ -591,28 +596,17 @@ static void virtscsi_scan(struct virtio_device *vdev) static void virtscsi_remove_vqs(struct virtio_device *vdev) { - struct Scsi_Host *sh = virtio_scsi_host(vdev); - struct virtio_scsi *vscsi = shost_priv(sh); - u32 i, num_targets; - /* Stop all the virtqueues. */ vdev->config->reset(vdev); - num_targets = sh->max_id; - for (i = 0; i < num_targets; i++) { - kfree(vscsi->tgt[i]); - vscsi->tgt[i] = NULL; - } - vdev->config->del_vqs(vdev); } static int virtscsi_init(struct virtio_device *vdev, -struct virtio_scsi *vscsi, int num_targets) +struct virtio_scsi *vscsi) { int err; struct virtqueue *vqs[3]; - u32 i; vq_callback_t *callbacks[] = { virtscsi_ctrl_done, @@ -640,18 +634,6 @@ static int virtscsi_init(struct virtio_device *vdev, if (virtio_has_feature(vdev, VIRTIO_SCSI_F_HOTPLUG)) virtscsi_kick_event_all(vscsi); - for (i = 0; i < num_targets; i++) { - vscsi->tgt[i] = virtscsi_alloc_tgt(vdev); - if (!vscsi->tgt[i]) { - err = -ENOMEM; - goto out; - } - } - err = 0; - -out: - if (err) - virtscsi_remove_vqs(vdev); return err; } @@ -663,12 +645,9 @@ static int virtscsi_probe(struct virtio_device *vdev) u32 sg_elems, num_targets; u32 cmd_per_lun; - /* Allocate memory and link the structs together. */ num_targets = virtscsi_config_get(vdev, max_target) + 1; - shost = scsi_host_alloc(_host_template, - sizeof(*vscsi) - + num_targets * sizeof(struct virtio_scsi_target_state)); + shost = scsi_host_alloc(_host_template, sizeof(*vscsi)); if (!shost) return -ENOMEM; @@ -678,7 +657,7 @@ static
Re: [PATCH V6 5/5] virtio-scsi: reset virtqueue affinity when doing cpu hotplug
On 03/23/2013 02:36 PM, Paolo Bonzini wrote: > Il 20/03/2013 08:56, Wanlong Gao ha scritto: >>> This one does not apply on top of virtio-next + patch 1-4 in this series. >> >> I'm very sorry. >> >> This fault is because I modified the 4/5 from >> "/* if the affinity hint is set for virtqueues */" >> to >> "/* If the affinity hint is set for virtqueues */" >> by hand. >> >> You can also change "If" to "if" in 5/5 to apply this patch without 3-way >> merge. > > Can you respin? > > So we can ask James for his Acked-by and get this merged in the next window. Sure. Thanks, Wanlong Gao > > Paolo > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH V7 4/5] virtio-scsi: introduce multiqueue support
From: Paolo Bonzini This patch adds queue steering to virtio-scsi. When a target is sent multiple requests, we always drive them to the same queue so that FIFO processing order is kept. However, if a target was idle, we can choose a queue arbitrarily. In this case the queue is chosen according to the current VCPU, so the driver expects the number of request queues to be equal to the number of VCPUs. This makes it easy and fast to select the queue, and also lets the driver optimize the IRQ affinity for the virtqueues (each virtqueue's affinity is set to the CPU that "owns" the queue). The speedup comes from improving cache locality and giving CPU affinity to the virtqueues, which is why this scheme was selected. Assuming that the thread that is sending requests to the device is I/O-bound, it is likely to be sleeping at the time the ISR is executed, and thus executing the ISR on the same processor that sent the requests is cheap. However, the kernel will not execute the ISR on the "best" processor unless you explicitly set the affinity. This is because in practice you will have many such I/O-bound processes and thus many otherwise idle processors. Then the kernel will execute the ISR on a random processor, rather than the one that is sending requests to the device. The alternative to per-CPU virtqueues is per-target virtqueues. To achieve the same locality, we could dynamically choose the virtqueue's affinity based on the CPU of the last task that sent a request. This is less appealing because we do not set the affinity directly---we only provide a hint to the irqbalanced running in userspace. Dynamically changing the affinity only works if the userspace applies the hint fast enough. Cc: linux-s...@vger.kernel.org Signed-off-by: Paolo Bonzini Signed-off-by: Wanlong Gao Reviewed-by: Asias He Tested-by: Venkatesh Srinivas --- drivers/scsi/virtio_scsi.c | 282 - 1 file changed, 254 insertions(+), 28 deletions(-) diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index dc2daec..8dcdef0 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c @@ -22,12 +22,14 @@ #include #include #include +#include #include #include #include #define VIRTIO_SCSI_MEMPOOL_SZ 64 #define VIRTIO_SCSI_EVENT_LEN 8 +#define VIRTIO_SCSI_VQ_BASE 2 /* Command queue element */ struct virtio_scsi_cmd { @@ -59,22 +61,58 @@ struct virtio_scsi_vq { struct virtqueue *vq; }; -/* Per-target queue state */ +/* + * Per-target queue state. + * + * This struct holds the data needed by the queue steering policy. When a + * target is sent multiple requests, we need to drive them to the same queue so + * that FIFO processing order is kept. However, if a target was idle, we can + * choose a queue arbitrarily. In this case the queue is chosen according to + * the current VCPU, so the driver expects the number of request queues to be + * equal to the number of VCPUs. This makes it easy and fast to select the + * queue, and also lets the driver optimize the IRQ affinity for the virtqueues + * (each virtqueue's affinity is set to the CPU that "owns" the queue). + * + * An interesting effect of this policy is that only writes to req_vq need to + * take the tgt_lock. Read can be done outside the lock because: + * + * - writes of req_vq only occur when atomic_inc_return(>reqs) returns 1. + * In that case, no other CPU is reading req_vq: even if they were in + * virtscsi_queuecommand_multi, they would be spinning on tgt_lock. + * + * - reads of req_vq only occur when the target is not idle (reqs != 0). + * A CPU that enters virtscsi_queuecommand_multi will not modify req_vq. + * + * Similarly, decrements of reqs are never concurrent with writes of req_vq. + * Thus they can happen outside the tgt_lock, provided of course we make reqs + * an atomic_t. + */ struct virtio_scsi_target_state { - /* Never held at the same time as vq_lock. */ + /* This spinlock never held at the same time as vq_lock. */ spinlock_t tgt_lock; + + /* Count of outstanding requests. */ + atomic_t reqs; + + /* Currently active virtqueue for requests sent to this target. */ + struct virtio_scsi_vq *req_vq; }; /* Driver instance state */ struct virtio_scsi { struct virtio_device *vdev; - struct virtio_scsi_vq ctrl_vq; - struct virtio_scsi_vq event_vq; - struct virtio_scsi_vq req_vq; - /* Get some buffers ready for event vq */ struct virtio_scsi_event_node event_list[VIRTIO_SCSI_EVENT_LEN]; + + u32 num_queues; + + /* If the affinity hint is set for virtqueues */ + bool affinity_hint_set; + + struct virtio_scsi_vq ctrl_vq; + struct virtio_scsi_vq event_vq; + struct virtio_scsi_vq req_vqs[]; }; static struct kmem_cache *virtscsi_cmd_cache; @@ -109,6 +147,8 @@ static void virtscsi_complete_cmd
[PATCH V7 2/5] virtio-scsi: pass struct virtio_scsi to virtqueue completion function
From: Paolo Bonzini This will be needed soon in order to retrieve the per-target struct. Cc: linux-s...@vger.kernel.org Signed-off-by: Paolo Bonzini Signed-off-by: Wanlong Gao Reviewed-by: Asias He --- drivers/scsi/virtio_scsi.c | 17 + 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index ffa03e8..c23560c 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c @@ -104,7 +104,7 @@ static void virtscsi_compute_resid(struct scsi_cmnd *sc, u32 resid) * * Called with vq_lock held. */ -static void virtscsi_complete_cmd(void *buf) +static void virtscsi_complete_cmd(struct virtio_scsi *vscsi, void *buf) { struct virtio_scsi_cmd *cmd = buf; struct scsi_cmnd *sc = cmd->sc; @@ -165,7 +165,8 @@ static void virtscsi_complete_cmd(void *buf) sc->scsi_done(sc); } -static void virtscsi_vq_done(struct virtqueue *vq, void (*fn)(void *buf)) +static void virtscsi_vq_done(struct virtio_scsi *vscsi, struct virtqueue *vq, +void (*fn)(struct virtio_scsi *vscsi, void *buf)) { void *buf; unsigned int len; @@ -173,7 +174,7 @@ static void virtscsi_vq_done(struct virtqueue *vq, void (*fn)(void *buf)) do { virtqueue_disable_cb(vq); while ((buf = virtqueue_get_buf(vq, )) != NULL) - fn(buf); + fn(vscsi, buf); } while (!virtqueue_enable_cb(vq)); } @@ -184,11 +185,11 @@ static void virtscsi_req_done(struct virtqueue *vq) unsigned long flags; spin_lock_irqsave(>req_vq.vq_lock, flags); - virtscsi_vq_done(vq, virtscsi_complete_cmd); + virtscsi_vq_done(vscsi, vq, virtscsi_complete_cmd); spin_unlock_irqrestore(>req_vq.vq_lock, flags); }; -static void virtscsi_complete_free(void *buf) +static void virtscsi_complete_free(struct virtio_scsi *vscsi, void *buf) { struct virtio_scsi_cmd *cmd = buf; @@ -205,7 +206,7 @@ static void virtscsi_ctrl_done(struct virtqueue *vq) unsigned long flags; spin_lock_irqsave(>ctrl_vq.vq_lock, flags); - virtscsi_vq_done(vq, virtscsi_complete_free); + virtscsi_vq_done(vscsi, vq, virtscsi_complete_free); spin_unlock_irqrestore(>ctrl_vq.vq_lock, flags); }; @@ -329,7 +330,7 @@ static void virtscsi_handle_event(struct work_struct *work) virtscsi_kick_event(vscsi, event_node); } -static void virtscsi_complete_event(void *buf) +static void virtscsi_complete_event(struct virtio_scsi *vscsi, void *buf) { struct virtio_scsi_event_node *event_node = buf; @@ -344,7 +345,7 @@ static void virtscsi_event_done(struct virtqueue *vq) unsigned long flags; spin_lock_irqsave(>event_vq.vq_lock, flags); - virtscsi_vq_done(vq, virtscsi_complete_event); + virtscsi_vq_done(vscsi, vq, virtscsi_complete_event); spin_unlock_irqrestore(>event_vq.vq_lock, flags); }; -- 1.8.2.rc2 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH V7 3/5] virtio-scsi: push vq lock/unlock into virtscsi_vq_done
From: Paolo Bonzini Avoid duplicated code in all of the callers. Cc: linux-s...@vger.kernel.org Signed-off-by: Paolo Bonzini Signed-off-by: Wanlong Gao Reviewed-by: Asias He --- drivers/scsi/virtio_scsi.c | 22 +- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index c23560c..dc2daec 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c @@ -165,28 +165,30 @@ static void virtscsi_complete_cmd(struct virtio_scsi *vscsi, void *buf) sc->scsi_done(sc); } -static void virtscsi_vq_done(struct virtio_scsi *vscsi, struct virtqueue *vq, +static void virtscsi_vq_done(struct virtio_scsi *vscsi, +struct virtio_scsi_vq *virtscsi_vq, void (*fn)(struct virtio_scsi *vscsi, void *buf)) { void *buf; unsigned int len; + unsigned long flags; + struct virtqueue *vq = virtscsi_vq->vq; + spin_lock_irqsave(_vq->vq_lock, flags); do { virtqueue_disable_cb(vq); while ((buf = virtqueue_get_buf(vq, )) != NULL) fn(vscsi, buf); } while (!virtqueue_enable_cb(vq)); + spin_unlock_irqrestore(_vq->vq_lock, flags); } static void virtscsi_req_done(struct virtqueue *vq) { struct Scsi_Host *sh = virtio_scsi_host(vq->vdev); struct virtio_scsi *vscsi = shost_priv(sh); - unsigned long flags; - spin_lock_irqsave(>req_vq.vq_lock, flags); - virtscsi_vq_done(vscsi, vq, virtscsi_complete_cmd); - spin_unlock_irqrestore(>req_vq.vq_lock, flags); + virtscsi_vq_done(vscsi, >req_vq, virtscsi_complete_cmd); }; static void virtscsi_complete_free(struct virtio_scsi *vscsi, void *buf) @@ -203,11 +205,8 @@ static void virtscsi_ctrl_done(struct virtqueue *vq) { struct Scsi_Host *sh = virtio_scsi_host(vq->vdev); struct virtio_scsi *vscsi = shost_priv(sh); - unsigned long flags; - spin_lock_irqsave(>ctrl_vq.vq_lock, flags); - virtscsi_vq_done(vscsi, vq, virtscsi_complete_free); - spin_unlock_irqrestore(>ctrl_vq.vq_lock, flags); + virtscsi_vq_done(vscsi, >ctrl_vq, virtscsi_complete_free); }; static int virtscsi_kick_event(struct virtio_scsi *vscsi, @@ -342,11 +341,8 @@ static void virtscsi_event_done(struct virtqueue *vq) { struct Scsi_Host *sh = virtio_scsi_host(vq->vdev); struct virtio_scsi *vscsi = shost_priv(sh); - unsigned long flags; - spin_lock_irqsave(>event_vq.vq_lock, flags); - virtscsi_vq_done(vscsi, vq, virtscsi_complete_event); - spin_unlock_irqrestore(>event_vq.vq_lock, flags); + virtscsi_vq_done(vscsi, >event_vq, virtscsi_complete_event); }; /** -- 1.8.2.rc2 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH V7 0/5] virtio-scsi multiqueue
This series implements virtio-scsi queue steering, which gives performance improvements of up to 50% (measured both with QEMU and tcm_vhost backends). This version rebased on Rusty's virtio ring rework patches, which has already gone into virtio-next today. We hope this can go into virtio-next together with the virtio ring rework pathes. V7: respin to fix the patch apply error V6: rework "redo allocation of target data" (James) fix comments (Venkatesh) rebase to virtio-next V5: improving the grammar of 1/5 (Paolo) move the dropping of sg_elems to 'virtio-scsi: use virtqueue_add_sgs for command buffers'. (Asias) V4: rebase on virtio ring rework patches (rusty's pending-rebases branch) V3 and be found http://marc.info/?l=linux-virtualization=136067440717154=2 It would probably be easier to get it in via Rusty's tree because of the prerequisites. James, can we get your Acked-by? Paolo Bonzini (3): virtio-scsi: pass struct virtio_scsi to virtqueue completion function virtio-scsi: push vq lock/unlock into virtscsi_vq_done virtio-scsi: introduce multiqueue support Wanlong Gao (2): virtio-scsi: redo allocation of target data virtio-scsi: reset virtqueue affinity when doing cpu hotplug drivers/scsi/virtio_scsi.c | 387 - 1 file changed, 309 insertions(+), 78 deletions(-) -- 1.8.2.rc2 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH V7 0/5] virtio-scsi multiqueue
This series implements virtio-scsi queue steering, which gives performance improvements of up to 50% (measured both with QEMU and tcm_vhost backends). This version rebased on Rusty's virtio ring rework patches, which has already gone into virtio-next today. We hope this can go into virtio-next together with the virtio ring rework pathes. V7: respin to fix the patch apply error V6: rework redo allocation of target data (James) fix comments (Venkatesh) rebase to virtio-next V5: improving the grammar of 1/5 (Paolo) move the dropping of sg_elems to 'virtio-scsi: use virtqueue_add_sgs for command buffers'. (Asias) V4: rebase on virtio ring rework patches (rusty's pending-rebases branch) V3 and be found http://marc.info/?l=linux-virtualizationm=136067440717154w=2 It would probably be easier to get it in via Rusty's tree because of the prerequisites. James, can we get your Acked-by? Paolo Bonzini (3): virtio-scsi: pass struct virtio_scsi to virtqueue completion function virtio-scsi: push vq lock/unlock into virtscsi_vq_done virtio-scsi: introduce multiqueue support Wanlong Gao (2): virtio-scsi: redo allocation of target data virtio-scsi: reset virtqueue affinity when doing cpu hotplug drivers/scsi/virtio_scsi.c | 387 - 1 file changed, 309 insertions(+), 78 deletions(-) -- 1.8.2.rc2 -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH V7 3/5] virtio-scsi: push vq lock/unlock into virtscsi_vq_done
From: Paolo Bonzini pbonz...@redhat.com Avoid duplicated code in all of the callers. Cc: linux-s...@vger.kernel.org Signed-off-by: Paolo Bonzini pbonz...@redhat.com Signed-off-by: Wanlong Gao gaowanl...@cn.fujitsu.com Reviewed-by: Asias He as...@redhat.com --- drivers/scsi/virtio_scsi.c | 22 +- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index c23560c..dc2daec 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c @@ -165,28 +165,30 @@ static void virtscsi_complete_cmd(struct virtio_scsi *vscsi, void *buf) sc-scsi_done(sc); } -static void virtscsi_vq_done(struct virtio_scsi *vscsi, struct virtqueue *vq, +static void virtscsi_vq_done(struct virtio_scsi *vscsi, +struct virtio_scsi_vq *virtscsi_vq, void (*fn)(struct virtio_scsi *vscsi, void *buf)) { void *buf; unsigned int len; + unsigned long flags; + struct virtqueue *vq = virtscsi_vq-vq; + spin_lock_irqsave(virtscsi_vq-vq_lock, flags); do { virtqueue_disable_cb(vq); while ((buf = virtqueue_get_buf(vq, len)) != NULL) fn(vscsi, buf); } while (!virtqueue_enable_cb(vq)); + spin_unlock_irqrestore(virtscsi_vq-vq_lock, flags); } static void virtscsi_req_done(struct virtqueue *vq) { struct Scsi_Host *sh = virtio_scsi_host(vq-vdev); struct virtio_scsi *vscsi = shost_priv(sh); - unsigned long flags; - spin_lock_irqsave(vscsi-req_vq.vq_lock, flags); - virtscsi_vq_done(vscsi, vq, virtscsi_complete_cmd); - spin_unlock_irqrestore(vscsi-req_vq.vq_lock, flags); + virtscsi_vq_done(vscsi, vscsi-req_vq, virtscsi_complete_cmd); }; static void virtscsi_complete_free(struct virtio_scsi *vscsi, void *buf) @@ -203,11 +205,8 @@ static void virtscsi_ctrl_done(struct virtqueue *vq) { struct Scsi_Host *sh = virtio_scsi_host(vq-vdev); struct virtio_scsi *vscsi = shost_priv(sh); - unsigned long flags; - spin_lock_irqsave(vscsi-ctrl_vq.vq_lock, flags); - virtscsi_vq_done(vscsi, vq, virtscsi_complete_free); - spin_unlock_irqrestore(vscsi-ctrl_vq.vq_lock, flags); + virtscsi_vq_done(vscsi, vscsi-ctrl_vq, virtscsi_complete_free); }; static int virtscsi_kick_event(struct virtio_scsi *vscsi, @@ -342,11 +341,8 @@ static void virtscsi_event_done(struct virtqueue *vq) { struct Scsi_Host *sh = virtio_scsi_host(vq-vdev); struct virtio_scsi *vscsi = shost_priv(sh); - unsigned long flags; - spin_lock_irqsave(vscsi-event_vq.vq_lock, flags); - virtscsi_vq_done(vscsi, vq, virtscsi_complete_event); - spin_unlock_irqrestore(vscsi-event_vq.vq_lock, flags); + virtscsi_vq_done(vscsi, vscsi-event_vq, virtscsi_complete_event); }; /** -- 1.8.2.rc2 -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH V7 2/5] virtio-scsi: pass struct virtio_scsi to virtqueue completion function
From: Paolo Bonzini pbonz...@redhat.com This will be needed soon in order to retrieve the per-target struct. Cc: linux-s...@vger.kernel.org Signed-off-by: Paolo Bonzini pbonz...@redhat.com Signed-off-by: Wanlong Gao gaowanl...@cn.fujitsu.com Reviewed-by: Asias He as...@redhat.com --- drivers/scsi/virtio_scsi.c | 17 + 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index ffa03e8..c23560c 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c @@ -104,7 +104,7 @@ static void virtscsi_compute_resid(struct scsi_cmnd *sc, u32 resid) * * Called with vq_lock held. */ -static void virtscsi_complete_cmd(void *buf) +static void virtscsi_complete_cmd(struct virtio_scsi *vscsi, void *buf) { struct virtio_scsi_cmd *cmd = buf; struct scsi_cmnd *sc = cmd-sc; @@ -165,7 +165,8 @@ static void virtscsi_complete_cmd(void *buf) sc-scsi_done(sc); } -static void virtscsi_vq_done(struct virtqueue *vq, void (*fn)(void *buf)) +static void virtscsi_vq_done(struct virtio_scsi *vscsi, struct virtqueue *vq, +void (*fn)(struct virtio_scsi *vscsi, void *buf)) { void *buf; unsigned int len; @@ -173,7 +174,7 @@ static void virtscsi_vq_done(struct virtqueue *vq, void (*fn)(void *buf)) do { virtqueue_disable_cb(vq); while ((buf = virtqueue_get_buf(vq, len)) != NULL) - fn(buf); + fn(vscsi, buf); } while (!virtqueue_enable_cb(vq)); } @@ -184,11 +185,11 @@ static void virtscsi_req_done(struct virtqueue *vq) unsigned long flags; spin_lock_irqsave(vscsi-req_vq.vq_lock, flags); - virtscsi_vq_done(vq, virtscsi_complete_cmd); + virtscsi_vq_done(vscsi, vq, virtscsi_complete_cmd); spin_unlock_irqrestore(vscsi-req_vq.vq_lock, flags); }; -static void virtscsi_complete_free(void *buf) +static void virtscsi_complete_free(struct virtio_scsi *vscsi, void *buf) { struct virtio_scsi_cmd *cmd = buf; @@ -205,7 +206,7 @@ static void virtscsi_ctrl_done(struct virtqueue *vq) unsigned long flags; spin_lock_irqsave(vscsi-ctrl_vq.vq_lock, flags); - virtscsi_vq_done(vq, virtscsi_complete_free); + virtscsi_vq_done(vscsi, vq, virtscsi_complete_free); spin_unlock_irqrestore(vscsi-ctrl_vq.vq_lock, flags); }; @@ -329,7 +330,7 @@ static void virtscsi_handle_event(struct work_struct *work) virtscsi_kick_event(vscsi, event_node); } -static void virtscsi_complete_event(void *buf) +static void virtscsi_complete_event(struct virtio_scsi *vscsi, void *buf) { struct virtio_scsi_event_node *event_node = buf; @@ -344,7 +345,7 @@ static void virtscsi_event_done(struct virtqueue *vq) unsigned long flags; spin_lock_irqsave(vscsi-event_vq.vq_lock, flags); - virtscsi_vq_done(vq, virtscsi_complete_event); + virtscsi_vq_done(vscsi, vq, virtscsi_complete_event); spin_unlock_irqrestore(vscsi-event_vq.vq_lock, flags); }; -- 1.8.2.rc2 -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH V7 4/5] virtio-scsi: introduce multiqueue support
From: Paolo Bonzini pbonz...@redhat.com This patch adds queue steering to virtio-scsi. When a target is sent multiple requests, we always drive them to the same queue so that FIFO processing order is kept. However, if a target was idle, we can choose a queue arbitrarily. In this case the queue is chosen according to the current VCPU, so the driver expects the number of request queues to be equal to the number of VCPUs. This makes it easy and fast to select the queue, and also lets the driver optimize the IRQ affinity for the virtqueues (each virtqueue's affinity is set to the CPU that owns the queue). The speedup comes from improving cache locality and giving CPU affinity to the virtqueues, which is why this scheme was selected. Assuming that the thread that is sending requests to the device is I/O-bound, it is likely to be sleeping at the time the ISR is executed, and thus executing the ISR on the same processor that sent the requests is cheap. However, the kernel will not execute the ISR on the best processor unless you explicitly set the affinity. This is because in practice you will have many such I/O-bound processes and thus many otherwise idle processors. Then the kernel will execute the ISR on a random processor, rather than the one that is sending requests to the device. The alternative to per-CPU virtqueues is per-target virtqueues. To achieve the same locality, we could dynamically choose the virtqueue's affinity based on the CPU of the last task that sent a request. This is less appealing because we do not set the affinity directly---we only provide a hint to the irqbalanced running in userspace. Dynamically changing the affinity only works if the userspace applies the hint fast enough. Cc: linux-s...@vger.kernel.org Signed-off-by: Paolo Bonzini pbonz...@redhat.com Signed-off-by: Wanlong Gao gaowanl...@cn.fujitsu.com Reviewed-by: Asias He as...@redhat.com Tested-by: Venkatesh Srinivas venkate...@google.com --- drivers/scsi/virtio_scsi.c | 282 - 1 file changed, 254 insertions(+), 28 deletions(-) diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index dc2daec..8dcdef0 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c @@ -22,12 +22,14 @@ #include linux/virtio_ids.h #include linux/virtio_config.h #include linux/virtio_scsi.h +#include linux/cpu.h #include scsi/scsi_host.h #include scsi/scsi_device.h #include scsi/scsi_cmnd.h #define VIRTIO_SCSI_MEMPOOL_SZ 64 #define VIRTIO_SCSI_EVENT_LEN 8 +#define VIRTIO_SCSI_VQ_BASE 2 /* Command queue element */ struct virtio_scsi_cmd { @@ -59,22 +61,58 @@ struct virtio_scsi_vq { struct virtqueue *vq; }; -/* Per-target queue state */ +/* + * Per-target queue state. + * + * This struct holds the data needed by the queue steering policy. When a + * target is sent multiple requests, we need to drive them to the same queue so + * that FIFO processing order is kept. However, if a target was idle, we can + * choose a queue arbitrarily. In this case the queue is chosen according to + * the current VCPU, so the driver expects the number of request queues to be + * equal to the number of VCPUs. This makes it easy and fast to select the + * queue, and also lets the driver optimize the IRQ affinity for the virtqueues + * (each virtqueue's affinity is set to the CPU that owns the queue). + * + * An interesting effect of this policy is that only writes to req_vq need to + * take the tgt_lock. Read can be done outside the lock because: + * + * - writes of req_vq only occur when atomic_inc_return(tgt-reqs) returns 1. + * In that case, no other CPU is reading req_vq: even if they were in + * virtscsi_queuecommand_multi, they would be spinning on tgt_lock. + * + * - reads of req_vq only occur when the target is not idle (reqs != 0). + * A CPU that enters virtscsi_queuecommand_multi will not modify req_vq. + * + * Similarly, decrements of reqs are never concurrent with writes of req_vq. + * Thus they can happen outside the tgt_lock, provided of course we make reqs + * an atomic_t. + */ struct virtio_scsi_target_state { - /* Never held at the same time as vq_lock. */ + /* This spinlock never held at the same time as vq_lock. */ spinlock_t tgt_lock; + + /* Count of outstanding requests. */ + atomic_t reqs; + + /* Currently active virtqueue for requests sent to this target. */ + struct virtio_scsi_vq *req_vq; }; /* Driver instance state */ struct virtio_scsi { struct virtio_device *vdev; - struct virtio_scsi_vq ctrl_vq; - struct virtio_scsi_vq event_vq; - struct virtio_scsi_vq req_vq; - /* Get some buffers ready for event vq */ struct virtio_scsi_event_node event_list[VIRTIO_SCSI_EVENT_LEN]; + + u32 num_queues; + + /* If the affinity hint is set for virtqueues */ + bool affinity_hint_set; + + struct virtio_scsi_vq ctrl_vq
[PATCH V7 1/5] virtio-scsi: redo allocation of target data
virtio_scsi_target_state is now empty. We will find new uses for it in the next few patches, so this patch does not drop it completely. And as James suggested, we use entries target_alloc and target_destroy in the host template to allocate and destroy the virtio_scsi_target_state of each target, attach this struct to scsi_target-hostdata. Now we can get at it from the sdev with scsi_target(sdev)-hostdata. No messing around with fixed size arrays and bulk memory allocation and no need to pass in the maximum target size as a parameter because everything should now happen dynamically. Cc: James Bottomley james.bottom...@hansenpartnership.com Cc: linux-s...@vger.kernel.org Signed-off-by: Paolo Bonzini pbonz...@redhat.com Signed-off-by: Wanlong Gao gaowanl...@cn.fujitsu.com Reviewed-by: Asias He as...@redhat.com --- drivers/scsi/virtio_scsi.c | 71 -- 1 file changed, 25 insertions(+), 46 deletions(-) diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index b53ba9e..ffa03e8 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c @@ -75,8 +75,6 @@ struct virtio_scsi { /* Get some buffers ready for event vq */ struct virtio_scsi_event_node event_list[VIRTIO_SCSI_EVENT_LEN]; - - struct virtio_scsi_target_state *tgt[]; }; static struct kmem_cache *virtscsi_cmd_cache; @@ -530,6 +528,25 @@ static int virtscsi_abort(struct scsi_cmnd *sc) return virtscsi_tmf(vscsi, cmd); } +static int virtscsi_target_alloc(struct scsi_target *starget) +{ + struct virtio_scsi_target_state *tgt = + kmalloc(sizeof(*tgt), GFP_KERNEL); + if (!tgt) + return -ENOMEM; + + spin_lock_init(tgt-tgt_lock); + + starget-hostdata = tgt; + return 0; +} + +static void virtscsi_target_destroy(struct scsi_target *starget) +{ + struct virtio_scsi_target_state *tgt = starget-hostdata; + kfree(tgt); +} + static struct scsi_host_template virtscsi_host_template = { .module = THIS_MODULE, .name = Virtio SCSI HBA, @@ -542,6 +559,8 @@ static struct scsi_host_template virtscsi_host_template = { .can_queue = 1024, .dma_boundary = UINT_MAX, .use_clustering = ENABLE_CLUSTERING, + .target_alloc = virtscsi_target_alloc, + .target_destroy = virtscsi_target_destroy, }; #define virtscsi_config_get(vdev, fld) \ @@ -568,20 +587,6 @@ static void virtscsi_init_vq(struct virtio_scsi_vq *virtscsi_vq, virtscsi_vq-vq = vq; } -static struct virtio_scsi_target_state *virtscsi_alloc_tgt( - struct virtio_device *vdev) -{ - struct virtio_scsi_target_state *tgt; - gfp_t gfp_mask = GFP_KERNEL; - - tgt = kmalloc(sizeof(*tgt), gfp_mask); - if (!tgt) - return NULL; - - spin_lock_init(tgt-tgt_lock); - return tgt; -} - static void virtscsi_scan(struct virtio_device *vdev) { struct Scsi_Host *shost = (struct Scsi_Host *)vdev-priv; @@ -591,28 +596,17 @@ static void virtscsi_scan(struct virtio_device *vdev) static void virtscsi_remove_vqs(struct virtio_device *vdev) { - struct Scsi_Host *sh = virtio_scsi_host(vdev); - struct virtio_scsi *vscsi = shost_priv(sh); - u32 i, num_targets; - /* Stop all the virtqueues. */ vdev-config-reset(vdev); - num_targets = sh-max_id; - for (i = 0; i num_targets; i++) { - kfree(vscsi-tgt[i]); - vscsi-tgt[i] = NULL; - } - vdev-config-del_vqs(vdev); } static int virtscsi_init(struct virtio_device *vdev, -struct virtio_scsi *vscsi, int num_targets) +struct virtio_scsi *vscsi) { int err; struct virtqueue *vqs[3]; - u32 i; vq_callback_t *callbacks[] = { virtscsi_ctrl_done, @@ -640,18 +634,6 @@ static int virtscsi_init(struct virtio_device *vdev, if (virtio_has_feature(vdev, VIRTIO_SCSI_F_HOTPLUG)) virtscsi_kick_event_all(vscsi); - for (i = 0; i num_targets; i++) { - vscsi-tgt[i] = virtscsi_alloc_tgt(vdev); - if (!vscsi-tgt[i]) { - err = -ENOMEM; - goto out; - } - } - err = 0; - -out: - if (err) - virtscsi_remove_vqs(vdev); return err; } @@ -663,12 +645,9 @@ static int virtscsi_probe(struct virtio_device *vdev) u32 sg_elems, num_targets; u32 cmd_per_lun; - /* Allocate memory and link the structs together. */ num_targets = virtscsi_config_get(vdev, max_target) + 1; - shost = scsi_host_alloc(virtscsi_host_template, - sizeof(*vscsi) - + num_targets * sizeof(struct virtio_scsi_target_state)); + shost = scsi_host_alloc(virtscsi_host_template, sizeof(*vscsi)); if (!shost) return -ENOMEM
Re: [PATCH V6 5/5] virtio-scsi: reset virtqueue affinity when doing cpu hotplug
On 03/23/2013 02:36 PM, Paolo Bonzini wrote: Il 20/03/2013 08:56, Wanlong Gao ha scritto: This one does not apply on top of virtio-next + patch 1-4 in this series. I'm very sorry. This fault is because I modified the 4/5 from /* if the affinity hint is set for virtqueues */ to /* If the affinity hint is set for virtqueues */ by hand. You can also change If to if in 5/5 to apply this patch without 3-way merge. Can you respin? So we can ask James for his Acked-by and get this merged in the next window. Sure. Thanks, Wanlong Gao Paolo -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH V7 5/5] virtio-scsi: reset virtqueue affinity when doing cpu hotplug
Add hot cpu notifier to reset the request virtqueue affinity when doing cpu hotplug. Cc: linux-s...@vger.kernel.org Signed-off-by: Paolo Bonzini pbonz...@redhat.com Signed-off-by: Wanlong Gao gaowanl...@cn.fujitsu.com Reviewed-by: Asias He as...@redhat.com --- drivers/scsi/virtio_scsi.c | 29 + 1 file changed, 29 insertions(+) diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index 8dcdef0..2168258 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c @@ -110,6 +110,9 @@ struct virtio_scsi { /* If the affinity hint is set for virtqueues */ bool affinity_hint_set; + /* CPU hotplug notifier */ + struct notifier_block nb; + struct virtio_scsi_vq ctrl_vq; struct virtio_scsi_vq event_vq; struct virtio_scsi_vq req_vqs[]; @@ -762,6 +765,23 @@ static void virtscsi_set_affinity(struct virtio_scsi *vscsi, bool affinity) put_online_cpus(); } +static int virtscsi_cpu_callback(struct notifier_block *nfb, +unsigned long action, void *hcpu) +{ + struct virtio_scsi *vscsi = container_of(nfb, struct virtio_scsi, nb); + switch(action) { + case CPU_ONLINE: + case CPU_ONLINE_FROZEN: + case CPU_DEAD: + case CPU_DEAD_FROZEN: + __virtscsi_set_affinity(vscsi, true); + break; + default: + break; + } + return NOTIFY_OK; +} + static void virtscsi_init_vq(struct virtio_scsi_vq *virtscsi_vq, struct virtqueue *vq) { @@ -884,6 +904,13 @@ static int virtscsi_probe(struct virtio_device *vdev) if (err) goto virtscsi_init_failed; + vscsi-nb.notifier_call = virtscsi_cpu_callback; + err = register_hotcpu_notifier(vscsi-nb); + if (err) { + pr_err(registering cpu notifier failed\n); + goto scsi_add_host_failed; + } + cmd_per_lun = virtscsi_config_get(vdev, cmd_per_lun) ?: 1; shost-cmd_per_lun = min_t(u32, cmd_per_lun, shost-can_queue); shost-max_sectors = virtscsi_config_get(vdev, max_sectors) ?: 0x; @@ -921,6 +948,8 @@ static void virtscsi_remove(struct virtio_device *vdev) scsi_remove_host(shost); + unregister_hotcpu_notifier(vscsi-nb); + virtscsi_remove_vqs(vdev); scsi_host_put(shost); } -- 1.8.2.rc2 -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH V6 5/5] virtio-scsi: reset virtqueue affinity when doing cpu hotplug
On 03/20/2013 03:24 PM, Asias He wrote: > On Wed, Mar 20, 2013 at 03:01:24PM +0800, Wanlong Gao wrote: >> Add hot cpu notifier to reset the request virtqueue affinity >> when doing cpu hotplug. >> >> Cc: linux-s...@vger.kernel.org >> Signed-off-by: Paolo Bonzini >> Signed-off-by: Wanlong Gao >> Reviewed-by: Asias He >> --- >> drivers/scsi/virtio_scsi.c | 29 + >> 1 file changed, 29 insertions(+) >> >> diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c >> index 13d7672..0ad9017 100644 >> --- a/drivers/scsi/virtio_scsi.c >> +++ b/drivers/scsi/virtio_scsi.c >> @@ -110,6 +110,9 @@ struct virtio_scsi { >> /* if the affinity hint is set for virtqueues */ >> bool affinity_hint_set; >> >> +/* CPU hotplug notifier */ >> +struct notifier_block nb; >> + >> struct virtio_scsi_vq ctrl_vq; >> struct virtio_scsi_vq event_vq; >> struct virtio_scsi_vq req_vqs[]; >> @@ -762,6 +765,23 @@ static void virtscsi_set_affinity(struct virtio_scsi >> *vscsi, bool affinity) >> put_online_cpus(); >> } >> >> +static int virtscsi_cpu_callback(struct notifier_block *nfb, >> + unsigned long action, void *hcpu) >> +{ >> +struct virtio_scsi *vscsi = container_of(nfb, struct virtio_scsi, nb); >> +switch(action) { >> +case CPU_ONLINE: >> +case CPU_ONLINE_FROZEN: >> +case CPU_DEAD: >> +case CPU_DEAD_FROZEN: >> +__virtscsi_set_affinity(vscsi, true); >> +break; >> +default: >> +break; >> +} >> +return NOTIFY_OK; >> +} >> + >> static void virtscsi_init_vq(struct virtio_scsi_vq *virtscsi_vq, >> struct virtqueue *vq) >> { >> @@ -884,6 +904,13 @@ static int virtscsi_probe(struct virtio_device *vdev) >> if (err) >> goto virtscsi_init_failed; >> >> +vscsi->nb.notifier_call = _cpu_callback; >> +err = register_hotcpu_notifier(>nb); >> +if (err) { >> +pr_err("registering cpu notifier failed\n"); >> +goto scsi_add_host_failed; >> +} >> + >> cmd_per_lun = virtscsi_config_get(vdev, cmd_per_lun) ?: 1; >> shost->cmd_per_lun = min_t(u32, cmd_per_lun, shost->can_queue); >> shost->max_sectors = virtscsi_config_get(vdev, max_sectors) ?: 0x; >> @@ -921,6 +948,8 @@ static void virtscsi_remove(struct virtio_device *vdev) >> >> scsi_remove_host(shost); >> >> +unregister_hotcpu_notifier(>nb); >> + >> virtscsi_remove_vqs(vdev); >> scsi_host_put(shost); >> } >> -- >> 1.8.2.rc2 >> > > This one does not apply on top of virtio-next + patch 1-4 in this series. I'm very sorry. This fault is because I modified the 4/5 from "/* if the affinity hint is set for virtqueues */" to "/* If the affinity hint is set for virtqueues */" by hand. You can also change "If" to "if" in 5/5 to apply this patch without 3-way merge. Thanks, Wanlong Gao > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH V6 5/5] virtio-scsi: reset virtqueue affinity when doing cpu hotplug
On 03/20/2013 03:24 PM, Asias He wrote: > On Wed, Mar 20, 2013 at 03:01:24PM +0800, Wanlong Gao wrote: >> Add hot cpu notifier to reset the request virtqueue affinity >> when doing cpu hotplug. >> >> Cc: linux-s...@vger.kernel.org >> Signed-off-by: Paolo Bonzini >> Signed-off-by: Wanlong Gao >> Reviewed-by: Asias He >> --- >> drivers/scsi/virtio_scsi.c | 29 + >> 1 file changed, 29 insertions(+) >> >> diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c >> index 13d7672..0ad9017 100644 >> --- a/drivers/scsi/virtio_scsi.c >> +++ b/drivers/scsi/virtio_scsi.c >> @@ -110,6 +110,9 @@ struct virtio_scsi { >> /* if the affinity hint is set for virtqueues */ >> bool affinity_hint_set; >> >> +/* CPU hotplug notifier */ >> +struct notifier_block nb; >> + >> struct virtio_scsi_vq ctrl_vq; >> struct virtio_scsi_vq event_vq; >> struct virtio_scsi_vq req_vqs[]; >> @@ -762,6 +765,23 @@ static void virtscsi_set_affinity(struct virtio_scsi >> *vscsi, bool affinity) >> put_online_cpus(); >> } >> >> +static int virtscsi_cpu_callback(struct notifier_block *nfb, >> + unsigned long action, void *hcpu) >> +{ >> +struct virtio_scsi *vscsi = container_of(nfb, struct virtio_scsi, nb); >> +switch(action) { >> +case CPU_ONLINE: >> +case CPU_ONLINE_FROZEN: >> +case CPU_DEAD: >> +case CPU_DEAD_FROZEN: >> +__virtscsi_set_affinity(vscsi, true); >> +break; >> +default: >> +break; >> +} >> +return NOTIFY_OK; >> +} >> + >> static void virtscsi_init_vq(struct virtio_scsi_vq *virtscsi_vq, >> struct virtqueue *vq) >> { >> @@ -884,6 +904,13 @@ static int virtscsi_probe(struct virtio_device *vdev) >> if (err) >> goto virtscsi_init_failed; >> >> +vscsi->nb.notifier_call = _cpu_callback; >> +err = register_hotcpu_notifier(>nb); >> +if (err) { >> +pr_err("registering cpu notifier failed\n"); >> +goto scsi_add_host_failed; >> +} >> + >> cmd_per_lun = virtscsi_config_get(vdev, cmd_per_lun) ?: 1; >> shost->cmd_per_lun = min_t(u32, cmd_per_lun, shost->can_queue); >> shost->max_sectors = virtscsi_config_get(vdev, max_sectors) ?: 0x; >> @@ -921,6 +948,8 @@ static void virtscsi_remove(struct virtio_device *vdev) >> >> scsi_remove_host(shost); >> >> +unregister_hotcpu_notifier(>nb); >> + >> virtscsi_remove_vqs(vdev); >> scsi_host_put(shost); >> } >> -- >> 1.8.2.rc2 >> > > This one does not apply on top of virtio-next + patch 1-4 in this series. Oops, it's my fault. There may be something wrong here. But you can use "3-way merge" to apply them, I'll take care next time. Thanks, Wanlong Gao > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH V5 4/5] virtio-scsi: introduce multiqueue support
On 03/20/2013 09:46 AM, Venkatesh Srinivas wrote: > This looks pretty good! > > I rather like the (lack of) locking in I/O completion (around the req > count vs. target/queue binding). It is unfortunate that you need to hold the > per-target lock in virtscsi_pick_vq() though; have any idea > how much that lock hurts? Paolo? > > Just two minor comments: > > (in struct virtio_scsi_target_data): > + /* This spinlock never help at the same time as vq_lock. */ > held? > > (in struct virtio_scsi): > + /* Does the affinity hint is set for virtqueues? */ > Could you rephrase that, please? Thank you, fixed in V6, please review. > > Tested on qemu and w/ Google Compute Engine's virtio-scsi device. Cool. > > Reviewed-and-tested-by: Venkatesh Srinivas Do you mind review and test the V6? Thank you. Regards, Wanlong Gao > > Thanks, > -- vs; > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH V6 2/5] virtio-scsi: pass struct virtio_scsi to virtqueue completion function
From: Paolo Bonzini This will be needed soon in order to retrieve the per-target struct. Cc: linux-s...@vger.kernel.org Signed-off-by: Paolo Bonzini Signed-off-by: Wanlong Gao Reviewed-by: Asias He --- drivers/scsi/virtio_scsi.c | 17 + 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index ffa03e8..c23560c 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c @@ -104,7 +104,7 @@ static void virtscsi_compute_resid(struct scsi_cmnd *sc, u32 resid) * * Called with vq_lock held. */ -static void virtscsi_complete_cmd(void *buf) +static void virtscsi_complete_cmd(struct virtio_scsi *vscsi, void *buf) { struct virtio_scsi_cmd *cmd = buf; struct scsi_cmnd *sc = cmd->sc; @@ -165,7 +165,8 @@ static void virtscsi_complete_cmd(void *buf) sc->scsi_done(sc); } -static void virtscsi_vq_done(struct virtqueue *vq, void (*fn)(void *buf)) +static void virtscsi_vq_done(struct virtio_scsi *vscsi, struct virtqueue *vq, +void (*fn)(struct virtio_scsi *vscsi, void *buf)) { void *buf; unsigned int len; @@ -173,7 +174,7 @@ static void virtscsi_vq_done(struct virtqueue *vq, void (*fn)(void *buf)) do { virtqueue_disable_cb(vq); while ((buf = virtqueue_get_buf(vq, )) != NULL) - fn(buf); + fn(vscsi, buf); } while (!virtqueue_enable_cb(vq)); } @@ -184,11 +185,11 @@ static void virtscsi_req_done(struct virtqueue *vq) unsigned long flags; spin_lock_irqsave(>req_vq.vq_lock, flags); - virtscsi_vq_done(vq, virtscsi_complete_cmd); + virtscsi_vq_done(vscsi, vq, virtscsi_complete_cmd); spin_unlock_irqrestore(>req_vq.vq_lock, flags); }; -static void virtscsi_complete_free(void *buf) +static void virtscsi_complete_free(struct virtio_scsi *vscsi, void *buf) { struct virtio_scsi_cmd *cmd = buf; @@ -205,7 +206,7 @@ static void virtscsi_ctrl_done(struct virtqueue *vq) unsigned long flags; spin_lock_irqsave(>ctrl_vq.vq_lock, flags); - virtscsi_vq_done(vq, virtscsi_complete_free); + virtscsi_vq_done(vscsi, vq, virtscsi_complete_free); spin_unlock_irqrestore(>ctrl_vq.vq_lock, flags); }; @@ -329,7 +330,7 @@ static void virtscsi_handle_event(struct work_struct *work) virtscsi_kick_event(vscsi, event_node); } -static void virtscsi_complete_event(void *buf) +static void virtscsi_complete_event(struct virtio_scsi *vscsi, void *buf) { struct virtio_scsi_event_node *event_node = buf; @@ -344,7 +345,7 @@ static void virtscsi_event_done(struct virtqueue *vq) unsigned long flags; spin_lock_irqsave(>event_vq.vq_lock, flags); - virtscsi_vq_done(vq, virtscsi_complete_event); + virtscsi_vq_done(vscsi, vq, virtscsi_complete_event); spin_unlock_irqrestore(>event_vq.vq_lock, flags); }; -- 1.8.2.rc2 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH V6 4/5] virtio-scsi: introduce multiqueue support
From: Paolo Bonzini This patch adds queue steering to virtio-scsi. When a target is sent multiple requests, we always drive them to the same queue so that FIFO processing order is kept. However, if a target was idle, we can choose a queue arbitrarily. In this case the queue is chosen according to the current VCPU, so the driver expects the number of request queues to be equal to the number of VCPUs. This makes it easy and fast to select the queue, and also lets the driver optimize the IRQ affinity for the virtqueues (each virtqueue's affinity is set to the CPU that "owns" the queue). The speedup comes from improving cache locality and giving CPU affinity to the virtqueues, which is why this scheme was selected. Assuming that the thread that is sending requests to the device is I/O-bound, it is likely to be sleeping at the time the ISR is executed, and thus executing the ISR on the same processor that sent the requests is cheap. However, the kernel will not execute the ISR on the "best" processor unless you explicitly set the affinity. This is because in practice you will have many such I/O-bound processes and thus many otherwise idle processors. Then the kernel will execute the ISR on a random processor, rather than the one that is sending requests to the device. The alternative to per-CPU virtqueues is per-target virtqueues. To achieve the same locality, we could dynamically choose the virtqueue's affinity based on the CPU of the last task that sent a request. This is less appealing because we do not set the affinity directly---we only provide a hint to the irqbalanced running in userspace. Dynamically changing the affinity only works if the userspace applies the hint fast enough. Cc: linux-s...@vger.kernel.org Signed-off-by: Paolo Bonzini Signed-off-by: Wanlong Gao Reviewed-by: Asias He --- drivers/scsi/virtio_scsi.c | 282 - 1 file changed, 254 insertions(+), 28 deletions(-) diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index dc2daec..13d7672 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c @@ -22,12 +22,14 @@ #include #include #include +#include #include #include #include #define VIRTIO_SCSI_MEMPOOL_SZ 64 #define VIRTIO_SCSI_EVENT_LEN 8 +#define VIRTIO_SCSI_VQ_BASE 2 /* Command queue element */ struct virtio_scsi_cmd { @@ -59,22 +61,58 @@ struct virtio_scsi_vq { struct virtqueue *vq; }; -/* Per-target queue state */ +/* + * Per-target queue state. + * + * This struct holds the data needed by the queue steering policy. When a + * target is sent multiple requests, we need to drive them to the same queue so + * that FIFO processing order is kept. However, if a target was idle, we can + * choose a queue arbitrarily. In this case the queue is chosen according to + * the current VCPU, so the driver expects the number of request queues to be + * equal to the number of VCPUs. This makes it easy and fast to select the + * queue, and also lets the driver optimize the IRQ affinity for the virtqueues + * (each virtqueue's affinity is set to the CPU that "owns" the queue). + * + * An interesting effect of this policy is that only writes to req_vq need to + * take the tgt_lock. Read can be done outside the lock because: + * + * - writes of req_vq only occur when atomic_inc_return(>reqs) returns 1. + * In that case, no other CPU is reading req_vq: even if they were in + * virtscsi_queuecommand_multi, they would be spinning on tgt_lock. + * + * - reads of req_vq only occur when the target is not idle (reqs != 0). + * A CPU that enters virtscsi_queuecommand_multi will not modify req_vq. + * + * Similarly, decrements of reqs are never concurrent with writes of req_vq. + * Thus they can happen outside the tgt_lock, provided of course we make reqs + * an atomic_t. + */ struct virtio_scsi_target_state { - /* Never held at the same time as vq_lock. */ + /* This spinlock never held at the same time as vq_lock. */ spinlock_t tgt_lock; + + /* Count of outstanding requests. */ + atomic_t reqs; + + /* Currently active virtqueue for requests sent to this target. */ + struct virtio_scsi_vq *req_vq; }; /* Driver instance state */ struct virtio_scsi { struct virtio_device *vdev; - struct virtio_scsi_vq ctrl_vq; - struct virtio_scsi_vq event_vq; - struct virtio_scsi_vq req_vq; - /* Get some buffers ready for event vq */ struct virtio_scsi_event_node event_list[VIRTIO_SCSI_EVENT_LEN]; + + u32 num_queues; + + /* If the affinity hint is set for virtqueues */ + bool affinity_hint_set; + + struct virtio_scsi_vq ctrl_vq; + struct virtio_scsi_vq event_vq; + struct virtio_scsi_vq req_vqs[]; }; static struct kmem_cache *virtscsi_cmd_cache; @@ -109,6 +147,8 @@ static void virtscsi_complete_cmd(struct virtio_scsi
[PATCH V6 5/5] virtio-scsi: reset virtqueue affinity when doing cpu hotplug
Add hot cpu notifier to reset the request virtqueue affinity when doing cpu hotplug. Cc: linux-s...@vger.kernel.org Signed-off-by: Paolo Bonzini Signed-off-by: Wanlong Gao Reviewed-by: Asias He --- drivers/scsi/virtio_scsi.c | 29 + 1 file changed, 29 insertions(+) diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index 13d7672..0ad9017 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c @@ -110,6 +110,9 @@ struct virtio_scsi { /* if the affinity hint is set for virtqueues */ bool affinity_hint_set; + /* CPU hotplug notifier */ + struct notifier_block nb; + struct virtio_scsi_vq ctrl_vq; struct virtio_scsi_vq event_vq; struct virtio_scsi_vq req_vqs[]; @@ -762,6 +765,23 @@ static void virtscsi_set_affinity(struct virtio_scsi *vscsi, bool affinity) put_online_cpus(); } +static int virtscsi_cpu_callback(struct notifier_block *nfb, +unsigned long action, void *hcpu) +{ + struct virtio_scsi *vscsi = container_of(nfb, struct virtio_scsi, nb); + switch(action) { + case CPU_ONLINE: + case CPU_ONLINE_FROZEN: + case CPU_DEAD: + case CPU_DEAD_FROZEN: + __virtscsi_set_affinity(vscsi, true); + break; + default: + break; + } + return NOTIFY_OK; +} + static void virtscsi_init_vq(struct virtio_scsi_vq *virtscsi_vq, struct virtqueue *vq) { @@ -884,6 +904,13 @@ static int virtscsi_probe(struct virtio_device *vdev) if (err) goto virtscsi_init_failed; + vscsi->nb.notifier_call = _cpu_callback; + err = register_hotcpu_notifier(>nb); + if (err) { + pr_err("registering cpu notifier failed\n"); + goto scsi_add_host_failed; + } + cmd_per_lun = virtscsi_config_get(vdev, cmd_per_lun) ?: 1; shost->cmd_per_lun = min_t(u32, cmd_per_lun, shost->can_queue); shost->max_sectors = virtscsi_config_get(vdev, max_sectors) ?: 0x; @@ -921,6 +948,8 @@ static void virtscsi_remove(struct virtio_device *vdev) scsi_remove_host(shost); + unregister_hotcpu_notifier(>nb); + virtscsi_remove_vqs(vdev); scsi_host_put(shost); } -- 1.8.2.rc2 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH V6 3/5] virtio-scsi: push vq lock/unlock into virtscsi_vq_done
From: Paolo Bonzini Avoid duplicated code in all of the callers. Cc: linux-s...@vger.kernel.org Signed-off-by: Paolo Bonzini Signed-off-by: Wanlong Gao Reviewed-by: Asias He --- drivers/scsi/virtio_scsi.c | 22 +- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index c23560c..dc2daec 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c @@ -165,28 +165,30 @@ static void virtscsi_complete_cmd(struct virtio_scsi *vscsi, void *buf) sc->scsi_done(sc); } -static void virtscsi_vq_done(struct virtio_scsi *vscsi, struct virtqueue *vq, +static void virtscsi_vq_done(struct virtio_scsi *vscsi, +struct virtio_scsi_vq *virtscsi_vq, void (*fn)(struct virtio_scsi *vscsi, void *buf)) { void *buf; unsigned int len; + unsigned long flags; + struct virtqueue *vq = virtscsi_vq->vq; + spin_lock_irqsave(_vq->vq_lock, flags); do { virtqueue_disable_cb(vq); while ((buf = virtqueue_get_buf(vq, )) != NULL) fn(vscsi, buf); } while (!virtqueue_enable_cb(vq)); + spin_unlock_irqrestore(_vq->vq_lock, flags); } static void virtscsi_req_done(struct virtqueue *vq) { struct Scsi_Host *sh = virtio_scsi_host(vq->vdev); struct virtio_scsi *vscsi = shost_priv(sh); - unsigned long flags; - spin_lock_irqsave(>req_vq.vq_lock, flags); - virtscsi_vq_done(vscsi, vq, virtscsi_complete_cmd); - spin_unlock_irqrestore(>req_vq.vq_lock, flags); + virtscsi_vq_done(vscsi, >req_vq, virtscsi_complete_cmd); }; static void virtscsi_complete_free(struct virtio_scsi *vscsi, void *buf) @@ -203,11 +205,8 @@ static void virtscsi_ctrl_done(struct virtqueue *vq) { struct Scsi_Host *sh = virtio_scsi_host(vq->vdev); struct virtio_scsi *vscsi = shost_priv(sh); - unsigned long flags; - spin_lock_irqsave(>ctrl_vq.vq_lock, flags); - virtscsi_vq_done(vscsi, vq, virtscsi_complete_free); - spin_unlock_irqrestore(>ctrl_vq.vq_lock, flags); + virtscsi_vq_done(vscsi, >ctrl_vq, virtscsi_complete_free); }; static int virtscsi_kick_event(struct virtio_scsi *vscsi, @@ -342,11 +341,8 @@ static void virtscsi_event_done(struct virtqueue *vq) { struct Scsi_Host *sh = virtio_scsi_host(vq->vdev); struct virtio_scsi *vscsi = shost_priv(sh); - unsigned long flags; - spin_lock_irqsave(>event_vq.vq_lock, flags); - virtscsi_vq_done(vscsi, vq, virtscsi_complete_event); - spin_unlock_irqrestore(>event_vq.vq_lock, flags); + virtscsi_vq_done(vscsi, >event_vq, virtscsi_complete_event); }; /** -- 1.8.2.rc2 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH V6 1/5] virtio-scsi: redo allocation of target data
virtio_scsi_target_state is now empty. We will find new uses for it in the next few patches, so this patch does not drop it completely. And as James suggested, we use entries target_alloc and target_destroy in the host template to allocate and destroy the virtio_scsi_target_state of each target, attach this struct to scsi_target->hostdata. Now we can get at it from the sdev with scsi_target(sdev)->hostdata. No messing around with fixed size arrays and bulk memory allocation and no need to pass in the maximum target size as a parameter because everything should now happen dynamically. Cc: James Bottomley Cc: linux-s...@vger.kernel.org Signed-off-by: Paolo Bonzini Signed-off-by: Wanlong Gao --- drivers/scsi/virtio_scsi.c | 71 -- 1 file changed, 25 insertions(+), 46 deletions(-) diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index b53ba9e..ffa03e8 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c @@ -75,8 +75,6 @@ struct virtio_scsi { /* Get some buffers ready for event vq */ struct virtio_scsi_event_node event_list[VIRTIO_SCSI_EVENT_LEN]; - - struct virtio_scsi_target_state *tgt[]; }; static struct kmem_cache *virtscsi_cmd_cache; @@ -530,6 +528,25 @@ static int virtscsi_abort(struct scsi_cmnd *sc) return virtscsi_tmf(vscsi, cmd); } +static int virtscsi_target_alloc(struct scsi_target *starget) +{ + struct virtio_scsi_target_state *tgt = + kmalloc(sizeof(*tgt), GFP_KERNEL); + if (!tgt) + return -ENOMEM; + + spin_lock_init(>tgt_lock); + + starget->hostdata = tgt; + return 0; +} + +static void virtscsi_target_destroy(struct scsi_target *starget) +{ + struct virtio_scsi_target_state *tgt = starget->hostdata; + kfree(tgt); +} + static struct scsi_host_template virtscsi_host_template = { .module = THIS_MODULE, .name = "Virtio SCSI HBA", @@ -542,6 +559,8 @@ static struct scsi_host_template virtscsi_host_template = { .can_queue = 1024, .dma_boundary = UINT_MAX, .use_clustering = ENABLE_CLUSTERING, + .target_alloc = virtscsi_target_alloc, + .target_destroy = virtscsi_target_destroy, }; #define virtscsi_config_get(vdev, fld) \ @@ -568,20 +587,6 @@ static void virtscsi_init_vq(struct virtio_scsi_vq *virtscsi_vq, virtscsi_vq->vq = vq; } -static struct virtio_scsi_target_state *virtscsi_alloc_tgt( - struct virtio_device *vdev) -{ - struct virtio_scsi_target_state *tgt; - gfp_t gfp_mask = GFP_KERNEL; - - tgt = kmalloc(sizeof(*tgt), gfp_mask); - if (!tgt) - return NULL; - - spin_lock_init(>tgt_lock); - return tgt; -} - static void virtscsi_scan(struct virtio_device *vdev) { struct Scsi_Host *shost = (struct Scsi_Host *)vdev->priv; @@ -591,28 +596,17 @@ static void virtscsi_scan(struct virtio_device *vdev) static void virtscsi_remove_vqs(struct virtio_device *vdev) { - struct Scsi_Host *sh = virtio_scsi_host(vdev); - struct virtio_scsi *vscsi = shost_priv(sh); - u32 i, num_targets; - /* Stop all the virtqueues. */ vdev->config->reset(vdev); - num_targets = sh->max_id; - for (i = 0; i < num_targets; i++) { - kfree(vscsi->tgt[i]); - vscsi->tgt[i] = NULL; - } - vdev->config->del_vqs(vdev); } static int virtscsi_init(struct virtio_device *vdev, -struct virtio_scsi *vscsi, int num_targets) +struct virtio_scsi *vscsi) { int err; struct virtqueue *vqs[3]; - u32 i; vq_callback_t *callbacks[] = { virtscsi_ctrl_done, @@ -640,18 +634,6 @@ static int virtscsi_init(struct virtio_device *vdev, if (virtio_has_feature(vdev, VIRTIO_SCSI_F_HOTPLUG)) virtscsi_kick_event_all(vscsi); - for (i = 0; i < num_targets; i++) { - vscsi->tgt[i] = virtscsi_alloc_tgt(vdev); - if (!vscsi->tgt[i]) { - err = -ENOMEM; - goto out; - } - } - err = 0; - -out: - if (err) - virtscsi_remove_vqs(vdev); return err; } @@ -663,12 +645,9 @@ static int virtscsi_probe(struct virtio_device *vdev) u32 sg_elems, num_targets; u32 cmd_per_lun; - /* Allocate memory and link the structs together. */ num_targets = virtscsi_config_get(vdev, max_target) + 1; - shost = scsi_host_alloc(_host_template, - sizeof(*vscsi) - + num_targets * sizeof(struct virtio_scsi_target_state)); + shost = scsi_host_alloc(_host_template, sizeof(*vscsi)); if (!shost) return -ENOMEM; @@ -678,7 +657,7 @@ static int virtscsi_probe(struct
[PATCH V6 0/5] virtio-scsi multiqueue
This series implements virtio-scsi queue steering, which gives performance improvements of up to 50% (measured both with QEMU and tcm_vhost backends). This version rebased on Rusty's virtio ring rework patches, which has already gone into virtio-next today. We hope this can go into virtio-next together with the virtio ring rework pathes. V6: rework "redo allocation of target data" (James) fix comments (Venkatesh) rebase to virtio-next V5: improving the grammar of 1/5 (Paolo) move the dropping of sg_elems to 'virtio-scsi: use virtqueue_add_sgs for command buffers'. (Asias) V4: rebase on virtio ring rework patches (rusty's pending-rebases branch) V3 and be found http://marc.info/?l=linux-virtualization=136067440717154=2 It would probably be easier to get it in via Rusty's tree because of the prerequisites. James, can we get your Acked-by? Paolo Bonzini (3): virtio-scsi: pass struct virtio_scsi to virtqueue completion function virtio-scsi: push vq lock/unlock into virtscsi_vq_done virtio-scsi: introduce multiqueue support Wanlong Gao (2): virtio-scsi: redo allocation of target data virtio-scsi: reset virtqueue affinity when doing cpu hotplug drivers/scsi/virtio_scsi.c | 387 - 1 file changed, 309 insertions(+), 78 deletions(-) -- 1.8.2.rc2 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH V6 0/5] virtio-scsi multiqueue
This series implements virtio-scsi queue steering, which gives performance improvements of up to 50% (measured both with QEMU and tcm_vhost backends). This version rebased on Rusty's virtio ring rework patches, which has already gone into virtio-next today. We hope this can go into virtio-next together with the virtio ring rework pathes. V6: rework redo allocation of target data (James) fix comments (Venkatesh) rebase to virtio-next V5: improving the grammar of 1/5 (Paolo) move the dropping of sg_elems to 'virtio-scsi: use virtqueue_add_sgs for command buffers'. (Asias) V4: rebase on virtio ring rework patches (rusty's pending-rebases branch) V3 and be found http://marc.info/?l=linux-virtualizationm=136067440717154w=2 It would probably be easier to get it in via Rusty's tree because of the prerequisites. James, can we get your Acked-by? Paolo Bonzini (3): virtio-scsi: pass struct virtio_scsi to virtqueue completion function virtio-scsi: push vq lock/unlock into virtscsi_vq_done virtio-scsi: introduce multiqueue support Wanlong Gao (2): virtio-scsi: redo allocation of target data virtio-scsi: reset virtqueue affinity when doing cpu hotplug drivers/scsi/virtio_scsi.c | 387 - 1 file changed, 309 insertions(+), 78 deletions(-) -- 1.8.2.rc2 -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH V6 1/5] virtio-scsi: redo allocation of target data
virtio_scsi_target_state is now empty. We will find new uses for it in the next few patches, so this patch does not drop it completely. And as James suggested, we use entries target_alloc and target_destroy in the host template to allocate and destroy the virtio_scsi_target_state of each target, attach this struct to scsi_target-hostdata. Now we can get at it from the sdev with scsi_target(sdev)-hostdata. No messing around with fixed size arrays and bulk memory allocation and no need to pass in the maximum target size as a parameter because everything should now happen dynamically. Cc: James Bottomley james.bottom...@hansenpartnership.com Cc: linux-s...@vger.kernel.org Signed-off-by: Paolo Bonzini pbonz...@redhat.com Signed-off-by: Wanlong Gao gaowanl...@cn.fujitsu.com --- drivers/scsi/virtio_scsi.c | 71 -- 1 file changed, 25 insertions(+), 46 deletions(-) diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index b53ba9e..ffa03e8 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c @@ -75,8 +75,6 @@ struct virtio_scsi { /* Get some buffers ready for event vq */ struct virtio_scsi_event_node event_list[VIRTIO_SCSI_EVENT_LEN]; - - struct virtio_scsi_target_state *tgt[]; }; static struct kmem_cache *virtscsi_cmd_cache; @@ -530,6 +528,25 @@ static int virtscsi_abort(struct scsi_cmnd *sc) return virtscsi_tmf(vscsi, cmd); } +static int virtscsi_target_alloc(struct scsi_target *starget) +{ + struct virtio_scsi_target_state *tgt = + kmalloc(sizeof(*tgt), GFP_KERNEL); + if (!tgt) + return -ENOMEM; + + spin_lock_init(tgt-tgt_lock); + + starget-hostdata = tgt; + return 0; +} + +static void virtscsi_target_destroy(struct scsi_target *starget) +{ + struct virtio_scsi_target_state *tgt = starget-hostdata; + kfree(tgt); +} + static struct scsi_host_template virtscsi_host_template = { .module = THIS_MODULE, .name = Virtio SCSI HBA, @@ -542,6 +559,8 @@ static struct scsi_host_template virtscsi_host_template = { .can_queue = 1024, .dma_boundary = UINT_MAX, .use_clustering = ENABLE_CLUSTERING, + .target_alloc = virtscsi_target_alloc, + .target_destroy = virtscsi_target_destroy, }; #define virtscsi_config_get(vdev, fld) \ @@ -568,20 +587,6 @@ static void virtscsi_init_vq(struct virtio_scsi_vq *virtscsi_vq, virtscsi_vq-vq = vq; } -static struct virtio_scsi_target_state *virtscsi_alloc_tgt( - struct virtio_device *vdev) -{ - struct virtio_scsi_target_state *tgt; - gfp_t gfp_mask = GFP_KERNEL; - - tgt = kmalloc(sizeof(*tgt), gfp_mask); - if (!tgt) - return NULL; - - spin_lock_init(tgt-tgt_lock); - return tgt; -} - static void virtscsi_scan(struct virtio_device *vdev) { struct Scsi_Host *shost = (struct Scsi_Host *)vdev-priv; @@ -591,28 +596,17 @@ static void virtscsi_scan(struct virtio_device *vdev) static void virtscsi_remove_vqs(struct virtio_device *vdev) { - struct Scsi_Host *sh = virtio_scsi_host(vdev); - struct virtio_scsi *vscsi = shost_priv(sh); - u32 i, num_targets; - /* Stop all the virtqueues. */ vdev-config-reset(vdev); - num_targets = sh-max_id; - for (i = 0; i num_targets; i++) { - kfree(vscsi-tgt[i]); - vscsi-tgt[i] = NULL; - } - vdev-config-del_vqs(vdev); } static int virtscsi_init(struct virtio_device *vdev, -struct virtio_scsi *vscsi, int num_targets) +struct virtio_scsi *vscsi) { int err; struct virtqueue *vqs[3]; - u32 i; vq_callback_t *callbacks[] = { virtscsi_ctrl_done, @@ -640,18 +634,6 @@ static int virtscsi_init(struct virtio_device *vdev, if (virtio_has_feature(vdev, VIRTIO_SCSI_F_HOTPLUG)) virtscsi_kick_event_all(vscsi); - for (i = 0; i num_targets; i++) { - vscsi-tgt[i] = virtscsi_alloc_tgt(vdev); - if (!vscsi-tgt[i]) { - err = -ENOMEM; - goto out; - } - } - err = 0; - -out: - if (err) - virtscsi_remove_vqs(vdev); return err; } @@ -663,12 +645,9 @@ static int virtscsi_probe(struct virtio_device *vdev) u32 sg_elems, num_targets; u32 cmd_per_lun; - /* Allocate memory and link the structs together. */ num_targets = virtscsi_config_get(vdev, max_target) + 1; - shost = scsi_host_alloc(virtscsi_host_template, - sizeof(*vscsi) - + num_targets * sizeof(struct virtio_scsi_target_state)); + shost = scsi_host_alloc(virtscsi_host_template, sizeof(*vscsi)); if (!shost) return -ENOMEM; @@ -678,7 +657,7 @@ static int
[PATCH V6 3/5] virtio-scsi: push vq lock/unlock into virtscsi_vq_done
From: Paolo Bonzini pbonz...@redhat.com Avoid duplicated code in all of the callers. Cc: linux-s...@vger.kernel.org Signed-off-by: Paolo Bonzini pbonz...@redhat.com Signed-off-by: Wanlong Gao gaowanl...@cn.fujitsu.com Reviewed-by: Asias He as...@redhat.com --- drivers/scsi/virtio_scsi.c | 22 +- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index c23560c..dc2daec 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c @@ -165,28 +165,30 @@ static void virtscsi_complete_cmd(struct virtio_scsi *vscsi, void *buf) sc-scsi_done(sc); } -static void virtscsi_vq_done(struct virtio_scsi *vscsi, struct virtqueue *vq, +static void virtscsi_vq_done(struct virtio_scsi *vscsi, +struct virtio_scsi_vq *virtscsi_vq, void (*fn)(struct virtio_scsi *vscsi, void *buf)) { void *buf; unsigned int len; + unsigned long flags; + struct virtqueue *vq = virtscsi_vq-vq; + spin_lock_irqsave(virtscsi_vq-vq_lock, flags); do { virtqueue_disable_cb(vq); while ((buf = virtqueue_get_buf(vq, len)) != NULL) fn(vscsi, buf); } while (!virtqueue_enable_cb(vq)); + spin_unlock_irqrestore(virtscsi_vq-vq_lock, flags); } static void virtscsi_req_done(struct virtqueue *vq) { struct Scsi_Host *sh = virtio_scsi_host(vq-vdev); struct virtio_scsi *vscsi = shost_priv(sh); - unsigned long flags; - spin_lock_irqsave(vscsi-req_vq.vq_lock, flags); - virtscsi_vq_done(vscsi, vq, virtscsi_complete_cmd); - spin_unlock_irqrestore(vscsi-req_vq.vq_lock, flags); + virtscsi_vq_done(vscsi, vscsi-req_vq, virtscsi_complete_cmd); }; static void virtscsi_complete_free(struct virtio_scsi *vscsi, void *buf) @@ -203,11 +205,8 @@ static void virtscsi_ctrl_done(struct virtqueue *vq) { struct Scsi_Host *sh = virtio_scsi_host(vq-vdev); struct virtio_scsi *vscsi = shost_priv(sh); - unsigned long flags; - spin_lock_irqsave(vscsi-ctrl_vq.vq_lock, flags); - virtscsi_vq_done(vscsi, vq, virtscsi_complete_free); - spin_unlock_irqrestore(vscsi-ctrl_vq.vq_lock, flags); + virtscsi_vq_done(vscsi, vscsi-ctrl_vq, virtscsi_complete_free); }; static int virtscsi_kick_event(struct virtio_scsi *vscsi, @@ -342,11 +341,8 @@ static void virtscsi_event_done(struct virtqueue *vq) { struct Scsi_Host *sh = virtio_scsi_host(vq-vdev); struct virtio_scsi *vscsi = shost_priv(sh); - unsigned long flags; - spin_lock_irqsave(vscsi-event_vq.vq_lock, flags); - virtscsi_vq_done(vscsi, vq, virtscsi_complete_event); - spin_unlock_irqrestore(vscsi-event_vq.vq_lock, flags); + virtscsi_vq_done(vscsi, vscsi-event_vq, virtscsi_complete_event); }; /** -- 1.8.2.rc2 -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH V6 5/5] virtio-scsi: reset virtqueue affinity when doing cpu hotplug
Add hot cpu notifier to reset the request virtqueue affinity when doing cpu hotplug. Cc: linux-s...@vger.kernel.org Signed-off-by: Paolo Bonzini pbonz...@redhat.com Signed-off-by: Wanlong Gao gaowanl...@cn.fujitsu.com Reviewed-by: Asias He as...@redhat.com --- drivers/scsi/virtio_scsi.c | 29 + 1 file changed, 29 insertions(+) diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index 13d7672..0ad9017 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c @@ -110,6 +110,9 @@ struct virtio_scsi { /* if the affinity hint is set for virtqueues */ bool affinity_hint_set; + /* CPU hotplug notifier */ + struct notifier_block nb; + struct virtio_scsi_vq ctrl_vq; struct virtio_scsi_vq event_vq; struct virtio_scsi_vq req_vqs[]; @@ -762,6 +765,23 @@ static void virtscsi_set_affinity(struct virtio_scsi *vscsi, bool affinity) put_online_cpus(); } +static int virtscsi_cpu_callback(struct notifier_block *nfb, +unsigned long action, void *hcpu) +{ + struct virtio_scsi *vscsi = container_of(nfb, struct virtio_scsi, nb); + switch(action) { + case CPU_ONLINE: + case CPU_ONLINE_FROZEN: + case CPU_DEAD: + case CPU_DEAD_FROZEN: + __virtscsi_set_affinity(vscsi, true); + break; + default: + break; + } + return NOTIFY_OK; +} + static void virtscsi_init_vq(struct virtio_scsi_vq *virtscsi_vq, struct virtqueue *vq) { @@ -884,6 +904,13 @@ static int virtscsi_probe(struct virtio_device *vdev) if (err) goto virtscsi_init_failed; + vscsi-nb.notifier_call = virtscsi_cpu_callback; + err = register_hotcpu_notifier(vscsi-nb); + if (err) { + pr_err(registering cpu notifier failed\n); + goto scsi_add_host_failed; + } + cmd_per_lun = virtscsi_config_get(vdev, cmd_per_lun) ?: 1; shost-cmd_per_lun = min_t(u32, cmd_per_lun, shost-can_queue); shost-max_sectors = virtscsi_config_get(vdev, max_sectors) ?: 0x; @@ -921,6 +948,8 @@ static void virtscsi_remove(struct virtio_device *vdev) scsi_remove_host(shost); + unregister_hotcpu_notifier(vscsi-nb); + virtscsi_remove_vqs(vdev); scsi_host_put(shost); } -- 1.8.2.rc2 -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH V6 4/5] virtio-scsi: introduce multiqueue support
From: Paolo Bonzini pbonz...@redhat.com This patch adds queue steering to virtio-scsi. When a target is sent multiple requests, we always drive them to the same queue so that FIFO processing order is kept. However, if a target was idle, we can choose a queue arbitrarily. In this case the queue is chosen according to the current VCPU, so the driver expects the number of request queues to be equal to the number of VCPUs. This makes it easy and fast to select the queue, and also lets the driver optimize the IRQ affinity for the virtqueues (each virtqueue's affinity is set to the CPU that owns the queue). The speedup comes from improving cache locality and giving CPU affinity to the virtqueues, which is why this scheme was selected. Assuming that the thread that is sending requests to the device is I/O-bound, it is likely to be sleeping at the time the ISR is executed, and thus executing the ISR on the same processor that sent the requests is cheap. However, the kernel will not execute the ISR on the best processor unless you explicitly set the affinity. This is because in practice you will have many such I/O-bound processes and thus many otherwise idle processors. Then the kernel will execute the ISR on a random processor, rather than the one that is sending requests to the device. The alternative to per-CPU virtqueues is per-target virtqueues. To achieve the same locality, we could dynamically choose the virtqueue's affinity based on the CPU of the last task that sent a request. This is less appealing because we do not set the affinity directly---we only provide a hint to the irqbalanced running in userspace. Dynamically changing the affinity only works if the userspace applies the hint fast enough. Cc: linux-s...@vger.kernel.org Signed-off-by: Paolo Bonzini pbonz...@redhat.com Signed-off-by: Wanlong Gao gaowanl...@cn.fujitsu.com Reviewed-by: Asias He as...@redhat.com --- drivers/scsi/virtio_scsi.c | 282 - 1 file changed, 254 insertions(+), 28 deletions(-) diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index dc2daec..13d7672 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c @@ -22,12 +22,14 @@ #include linux/virtio_ids.h #include linux/virtio_config.h #include linux/virtio_scsi.h +#include linux/cpu.h #include scsi/scsi_host.h #include scsi/scsi_device.h #include scsi/scsi_cmnd.h #define VIRTIO_SCSI_MEMPOOL_SZ 64 #define VIRTIO_SCSI_EVENT_LEN 8 +#define VIRTIO_SCSI_VQ_BASE 2 /* Command queue element */ struct virtio_scsi_cmd { @@ -59,22 +61,58 @@ struct virtio_scsi_vq { struct virtqueue *vq; }; -/* Per-target queue state */ +/* + * Per-target queue state. + * + * This struct holds the data needed by the queue steering policy. When a + * target is sent multiple requests, we need to drive them to the same queue so + * that FIFO processing order is kept. However, if a target was idle, we can + * choose a queue arbitrarily. In this case the queue is chosen according to + * the current VCPU, so the driver expects the number of request queues to be + * equal to the number of VCPUs. This makes it easy and fast to select the + * queue, and also lets the driver optimize the IRQ affinity for the virtqueues + * (each virtqueue's affinity is set to the CPU that owns the queue). + * + * An interesting effect of this policy is that only writes to req_vq need to + * take the tgt_lock. Read can be done outside the lock because: + * + * - writes of req_vq only occur when atomic_inc_return(tgt-reqs) returns 1. + * In that case, no other CPU is reading req_vq: even if they were in + * virtscsi_queuecommand_multi, they would be spinning on tgt_lock. + * + * - reads of req_vq only occur when the target is not idle (reqs != 0). + * A CPU that enters virtscsi_queuecommand_multi will not modify req_vq. + * + * Similarly, decrements of reqs are never concurrent with writes of req_vq. + * Thus they can happen outside the tgt_lock, provided of course we make reqs + * an atomic_t. + */ struct virtio_scsi_target_state { - /* Never held at the same time as vq_lock. */ + /* This spinlock never held at the same time as vq_lock. */ spinlock_t tgt_lock; + + /* Count of outstanding requests. */ + atomic_t reqs; + + /* Currently active virtqueue for requests sent to this target. */ + struct virtio_scsi_vq *req_vq; }; /* Driver instance state */ struct virtio_scsi { struct virtio_device *vdev; - struct virtio_scsi_vq ctrl_vq; - struct virtio_scsi_vq event_vq; - struct virtio_scsi_vq req_vq; - /* Get some buffers ready for event vq */ struct virtio_scsi_event_node event_list[VIRTIO_SCSI_EVENT_LEN]; + + u32 num_queues; + + /* If the affinity hint is set for virtqueues */ + bool affinity_hint_set; + + struct virtio_scsi_vq ctrl_vq; + struct virtio_scsi_vq event_vq; + struct
[PATCH V6 2/5] virtio-scsi: pass struct virtio_scsi to virtqueue completion function
From: Paolo Bonzini pbonz...@redhat.com This will be needed soon in order to retrieve the per-target struct. Cc: linux-s...@vger.kernel.org Signed-off-by: Paolo Bonzini pbonz...@redhat.com Signed-off-by: Wanlong Gao gaowanl...@cn.fujitsu.com Reviewed-by: Asias He as...@redhat.com --- drivers/scsi/virtio_scsi.c | 17 + 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index ffa03e8..c23560c 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c @@ -104,7 +104,7 @@ static void virtscsi_compute_resid(struct scsi_cmnd *sc, u32 resid) * * Called with vq_lock held. */ -static void virtscsi_complete_cmd(void *buf) +static void virtscsi_complete_cmd(struct virtio_scsi *vscsi, void *buf) { struct virtio_scsi_cmd *cmd = buf; struct scsi_cmnd *sc = cmd-sc; @@ -165,7 +165,8 @@ static void virtscsi_complete_cmd(void *buf) sc-scsi_done(sc); } -static void virtscsi_vq_done(struct virtqueue *vq, void (*fn)(void *buf)) +static void virtscsi_vq_done(struct virtio_scsi *vscsi, struct virtqueue *vq, +void (*fn)(struct virtio_scsi *vscsi, void *buf)) { void *buf; unsigned int len; @@ -173,7 +174,7 @@ static void virtscsi_vq_done(struct virtqueue *vq, void (*fn)(void *buf)) do { virtqueue_disable_cb(vq); while ((buf = virtqueue_get_buf(vq, len)) != NULL) - fn(buf); + fn(vscsi, buf); } while (!virtqueue_enable_cb(vq)); } @@ -184,11 +185,11 @@ static void virtscsi_req_done(struct virtqueue *vq) unsigned long flags; spin_lock_irqsave(vscsi-req_vq.vq_lock, flags); - virtscsi_vq_done(vq, virtscsi_complete_cmd); + virtscsi_vq_done(vscsi, vq, virtscsi_complete_cmd); spin_unlock_irqrestore(vscsi-req_vq.vq_lock, flags); }; -static void virtscsi_complete_free(void *buf) +static void virtscsi_complete_free(struct virtio_scsi *vscsi, void *buf) { struct virtio_scsi_cmd *cmd = buf; @@ -205,7 +206,7 @@ static void virtscsi_ctrl_done(struct virtqueue *vq) unsigned long flags; spin_lock_irqsave(vscsi-ctrl_vq.vq_lock, flags); - virtscsi_vq_done(vq, virtscsi_complete_free); + virtscsi_vq_done(vscsi, vq, virtscsi_complete_free); spin_unlock_irqrestore(vscsi-ctrl_vq.vq_lock, flags); }; @@ -329,7 +330,7 @@ static void virtscsi_handle_event(struct work_struct *work) virtscsi_kick_event(vscsi, event_node); } -static void virtscsi_complete_event(void *buf) +static void virtscsi_complete_event(struct virtio_scsi *vscsi, void *buf) { struct virtio_scsi_event_node *event_node = buf; @@ -344,7 +345,7 @@ static void virtscsi_event_done(struct virtqueue *vq) unsigned long flags; spin_lock_irqsave(vscsi-event_vq.vq_lock, flags); - virtscsi_vq_done(vq, virtscsi_complete_event); + virtscsi_vq_done(vscsi, vq, virtscsi_complete_event); spin_unlock_irqrestore(vscsi-event_vq.vq_lock, flags); }; -- 1.8.2.rc2 -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH V5 4/5] virtio-scsi: introduce multiqueue support
On 03/20/2013 09:46 AM, Venkatesh Srinivas wrote: This looks pretty good! I rather like the (lack of) locking in I/O completion (around the req count vs. target/queue binding). It is unfortunate that you need to hold the per-target lock in virtscsi_pick_vq() though; have any idea how much that lock hurts? Paolo? Just two minor comments: (in struct virtio_scsi_target_data): + /* This spinlock never help at the same time as vq_lock. */ held? (in struct virtio_scsi): + /* Does the affinity hint is set for virtqueues? */ Could you rephrase that, please? Thank you, fixed in V6, please review. Tested on qemu and w/ Google Compute Engine's virtio-scsi device. Cool. Reviewed-and-tested-by: Venkatesh Srinivas venkate...@google.com Do you mind review and test the V6? Thank you. Regards, Wanlong Gao Thanks, -- vs; -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH V6 5/5] virtio-scsi: reset virtqueue affinity when doing cpu hotplug
On 03/20/2013 03:24 PM, Asias He wrote: On Wed, Mar 20, 2013 at 03:01:24PM +0800, Wanlong Gao wrote: Add hot cpu notifier to reset the request virtqueue affinity when doing cpu hotplug. Cc: linux-s...@vger.kernel.org Signed-off-by: Paolo Bonzini pbonz...@redhat.com Signed-off-by: Wanlong Gao gaowanl...@cn.fujitsu.com Reviewed-by: Asias He as...@redhat.com --- drivers/scsi/virtio_scsi.c | 29 + 1 file changed, 29 insertions(+) diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index 13d7672..0ad9017 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c @@ -110,6 +110,9 @@ struct virtio_scsi { /* if the affinity hint is set for virtqueues */ bool affinity_hint_set; +/* CPU hotplug notifier */ +struct notifier_block nb; + struct virtio_scsi_vq ctrl_vq; struct virtio_scsi_vq event_vq; struct virtio_scsi_vq req_vqs[]; @@ -762,6 +765,23 @@ static void virtscsi_set_affinity(struct virtio_scsi *vscsi, bool affinity) put_online_cpus(); } +static int virtscsi_cpu_callback(struct notifier_block *nfb, + unsigned long action, void *hcpu) +{ +struct virtio_scsi *vscsi = container_of(nfb, struct virtio_scsi, nb); +switch(action) { +case CPU_ONLINE: +case CPU_ONLINE_FROZEN: +case CPU_DEAD: +case CPU_DEAD_FROZEN: +__virtscsi_set_affinity(vscsi, true); +break; +default: +break; +} +return NOTIFY_OK; +} + static void virtscsi_init_vq(struct virtio_scsi_vq *virtscsi_vq, struct virtqueue *vq) { @@ -884,6 +904,13 @@ static int virtscsi_probe(struct virtio_device *vdev) if (err) goto virtscsi_init_failed; +vscsi-nb.notifier_call = virtscsi_cpu_callback; +err = register_hotcpu_notifier(vscsi-nb); +if (err) { +pr_err(registering cpu notifier failed\n); +goto scsi_add_host_failed; +} + cmd_per_lun = virtscsi_config_get(vdev, cmd_per_lun) ?: 1; shost-cmd_per_lun = min_t(u32, cmd_per_lun, shost-can_queue); shost-max_sectors = virtscsi_config_get(vdev, max_sectors) ?: 0x; @@ -921,6 +948,8 @@ static void virtscsi_remove(struct virtio_device *vdev) scsi_remove_host(shost); +unregister_hotcpu_notifier(vscsi-nb); + virtscsi_remove_vqs(vdev); scsi_host_put(shost); } -- 1.8.2.rc2 This one does not apply on top of virtio-next + patch 1-4 in this series. Oops, it's my fault. There may be something wrong here. But you can use 3-way merge to apply them, I'll take care next time. Thanks, Wanlong Gao -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH V6 5/5] virtio-scsi: reset virtqueue affinity when doing cpu hotplug
On 03/20/2013 03:24 PM, Asias He wrote: On Wed, Mar 20, 2013 at 03:01:24PM +0800, Wanlong Gao wrote: Add hot cpu notifier to reset the request virtqueue affinity when doing cpu hotplug. Cc: linux-s...@vger.kernel.org Signed-off-by: Paolo Bonzini pbonz...@redhat.com Signed-off-by: Wanlong Gao gaowanl...@cn.fujitsu.com Reviewed-by: Asias He as...@redhat.com --- drivers/scsi/virtio_scsi.c | 29 + 1 file changed, 29 insertions(+) diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index 13d7672..0ad9017 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c @@ -110,6 +110,9 @@ struct virtio_scsi { /* if the affinity hint is set for virtqueues */ bool affinity_hint_set; +/* CPU hotplug notifier */ +struct notifier_block nb; + struct virtio_scsi_vq ctrl_vq; struct virtio_scsi_vq event_vq; struct virtio_scsi_vq req_vqs[]; @@ -762,6 +765,23 @@ static void virtscsi_set_affinity(struct virtio_scsi *vscsi, bool affinity) put_online_cpus(); } +static int virtscsi_cpu_callback(struct notifier_block *nfb, + unsigned long action, void *hcpu) +{ +struct virtio_scsi *vscsi = container_of(nfb, struct virtio_scsi, nb); +switch(action) { +case CPU_ONLINE: +case CPU_ONLINE_FROZEN: +case CPU_DEAD: +case CPU_DEAD_FROZEN: +__virtscsi_set_affinity(vscsi, true); +break; +default: +break; +} +return NOTIFY_OK; +} + static void virtscsi_init_vq(struct virtio_scsi_vq *virtscsi_vq, struct virtqueue *vq) { @@ -884,6 +904,13 @@ static int virtscsi_probe(struct virtio_device *vdev) if (err) goto virtscsi_init_failed; +vscsi-nb.notifier_call = virtscsi_cpu_callback; +err = register_hotcpu_notifier(vscsi-nb); +if (err) { +pr_err(registering cpu notifier failed\n); +goto scsi_add_host_failed; +} + cmd_per_lun = virtscsi_config_get(vdev, cmd_per_lun) ?: 1; shost-cmd_per_lun = min_t(u32, cmd_per_lun, shost-can_queue); shost-max_sectors = virtscsi_config_get(vdev, max_sectors) ?: 0x; @@ -921,6 +948,8 @@ static void virtscsi_remove(struct virtio_device *vdev) scsi_remove_host(shost); +unregister_hotcpu_notifier(vscsi-nb); + virtscsi_remove_vqs(vdev); scsi_host_put(shost); } -- 1.8.2.rc2 This one does not apply on top of virtio-next + patch 1-4 in this series. I'm very sorry. This fault is because I modified the 4/5 from /* if the affinity hint is set for virtqueues */ to /* If the affinity hint is set for virtqueues */ by hand. You can also change If to if in 5/5 to apply this patch without 3-way merge. Thanks, Wanlong Gao -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH V5 0/5] virtio-scsi multiqueue
This series implements virtio-scsi queue steering, which gives performance improvements of up to 50% (measured both with QEMU and tcm_vhost backends). This version rebased on Rusty's virtio ring rework patches. We hope this can go into virtio-next together with the virtio ring rework pathes. V5: improving the grammar of 1/5 (Paolo) move the dropping of sg_elems to 'virtio-scsi: use virtqueue_add_sgs for command buffers'. (Asias) V4: rebase on virtio ring rework patches (rusty's pending-rebases branch) V3 and be found http://marc.info/?l=linux-virtualization=136067440717154=2 It would probably be easier to get it in via Rusty's tree because of the prerequisites. James, can we get your Acked-by? Paolo Bonzini (4): virtio-scsi: redo allocation of target data virtio-scsi: pass struct virtio_scsi to virtqueue completion function virtio-scsi: push vq lock/unlock into virtscsi_vq_done virtio-scsi: introduce multiqueue support Wanlong Gao (1): virtio-scsi: reset virtqueue affinity when doing cpu hotplug drivers/scsi/virtio_scsi.c | 359 - 1 file changed, 290 insertions(+), 69 deletions(-) -- 1.8.2.rc2 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH V5 2/5] virtio-scsi: pass struct virtio_scsi to virtqueue completion function
From: Paolo Bonzini This will be needed soon in order to retrieve the per-target struct. Cc: linux-s...@vger.kernel.org Signed-off-by: Paolo Bonzini Signed-off-by: Wanlong Gao Reviewed-by: Asias He --- drivers/scsi/virtio_scsi.c | 17 + 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index 3256c51..bcab9d7 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c @@ -106,7 +106,7 @@ static void virtscsi_compute_resid(struct scsi_cmnd *sc, u32 resid) * * Called with vq_lock held. */ -static void virtscsi_complete_cmd(void *buf) +static void virtscsi_complete_cmd(struct virtio_scsi *vscsi, void *buf) { struct virtio_scsi_cmd *cmd = buf; struct scsi_cmnd *sc = cmd->sc; @@ -167,7 +167,8 @@ static void virtscsi_complete_cmd(void *buf) sc->scsi_done(sc); } -static void virtscsi_vq_done(struct virtqueue *vq, void (*fn)(void *buf)) +static void virtscsi_vq_done(struct virtio_scsi *vscsi, struct virtqueue *vq, +void (*fn)(struct virtio_scsi *vscsi, void *buf)) { void *buf; unsigned int len; @@ -175,7 +176,7 @@ static void virtscsi_vq_done(struct virtqueue *vq, void (*fn)(void *buf)) do { virtqueue_disable_cb(vq); while ((buf = virtqueue_get_buf(vq, )) != NULL) - fn(buf); + fn(vscsi, buf); } while (!virtqueue_enable_cb(vq)); } @@ -186,11 +187,11 @@ static void virtscsi_req_done(struct virtqueue *vq) unsigned long flags; spin_lock_irqsave(>req_vq.vq_lock, flags); - virtscsi_vq_done(vq, virtscsi_complete_cmd); + virtscsi_vq_done(vscsi, vq, virtscsi_complete_cmd); spin_unlock_irqrestore(>req_vq.vq_lock, flags); }; -static void virtscsi_complete_free(void *buf) +static void virtscsi_complete_free(struct virtio_scsi *vscsi, void *buf) { struct virtio_scsi_cmd *cmd = buf; @@ -207,7 +208,7 @@ static void virtscsi_ctrl_done(struct virtqueue *vq) unsigned long flags; spin_lock_irqsave(>ctrl_vq.vq_lock, flags); - virtscsi_vq_done(vq, virtscsi_complete_free); + virtscsi_vq_done(vscsi, vq, virtscsi_complete_free); spin_unlock_irqrestore(>ctrl_vq.vq_lock, flags); }; @@ -331,7 +332,7 @@ static void virtscsi_handle_event(struct work_struct *work) virtscsi_kick_event(vscsi, event_node); } -static void virtscsi_complete_event(void *buf) +static void virtscsi_complete_event(struct virtio_scsi *vscsi, void *buf) { struct virtio_scsi_event_node *event_node = buf; @@ -346,7 +347,7 @@ static void virtscsi_event_done(struct virtqueue *vq) unsigned long flags; spin_lock_irqsave(>event_vq.vq_lock, flags); - virtscsi_vq_done(vq, virtscsi_complete_event); + virtscsi_vq_done(vscsi, vq, virtscsi_complete_event); spin_unlock_irqrestore(>event_vq.vq_lock, flags); }; -- 1.8.2.rc2 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH V5 5/5] virtio-scsi: reset virtqueue affinity when doing cpu hotplug
Add hot cpu notifier to reset the request virtqueue affinity when doing cpu hotplug. Cc: linux-s...@vger.kernel.org Signed-off-by: Paolo Bonzini Signed-off-by: Wanlong Gao Reviewed-by: Asias He --- drivers/scsi/virtio_scsi.c | 29 + 1 file changed, 29 insertions(+) diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index 1d95295..83023f5 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c @@ -112,6 +112,9 @@ struct virtio_scsi { /* Does the affinity hint is set for virtqueues? */ bool affinity_hint_set; + /* CPU hotplug notifier */ + struct notifier_block nb; + struct virtio_scsi_vq ctrl_vq; struct virtio_scsi_vq event_vq; struct virtio_scsi_vq req_vqs[]; @@ -736,6 +739,23 @@ static void virtscsi_set_affinity(struct virtio_scsi *vscsi, bool affinity) put_online_cpus(); } +static int virtscsi_cpu_callback(struct notifier_block *nfb, +unsigned long action, void *hcpu) +{ + struct virtio_scsi *vscsi = container_of(nfb, struct virtio_scsi, nb); + switch(action) { + case CPU_ONLINE: + case CPU_ONLINE_FROZEN: + case CPU_DEAD: + case CPU_DEAD_FROZEN: + __virtscsi_set_affinity(vscsi, true); + break; + default: + break; + } + return NOTIFY_OK; +} + static void virtscsi_init_vq(struct virtio_scsi_vq *virtscsi_vq, struct virtqueue *vq) { @@ -874,6 +894,13 @@ static int virtscsi_probe(struct virtio_device *vdev) if (err) goto virtscsi_init_failed; + vscsi->nb.notifier_call = _cpu_callback; + err = register_hotcpu_notifier(>nb); + if (err) { + pr_err("registering cpu notifier failed\n"); + goto scsi_add_host_failed; + } + cmd_per_lun = virtscsi_config_get(vdev, cmd_per_lun) ?: 1; shost->cmd_per_lun = min_t(u32, cmd_per_lun, shost->can_queue); shost->max_sectors = virtscsi_config_get(vdev, max_sectors) ?: 0x; @@ -911,6 +938,8 @@ static void virtscsi_remove(struct virtio_device *vdev) scsi_remove_host(shost); + unregister_hotcpu_notifier(>nb); + virtscsi_remove_vqs(vdev); scsi_host_put(shost); } -- 1.8.2.rc2 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH V5 3/5] virtio-scsi: push vq lock/unlock into virtscsi_vq_done
From: Paolo Bonzini Avoid duplicated code in all of the callers. Cc: linux-s...@vger.kernel.org Signed-off-by: Paolo Bonzini Signed-off-by: Wanlong Gao Reviewed-by: Asias He --- drivers/scsi/virtio_scsi.c | 22 +- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index bcab9d7..94a64ad 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c @@ -167,28 +167,30 @@ static void virtscsi_complete_cmd(struct virtio_scsi *vscsi, void *buf) sc->scsi_done(sc); } -static void virtscsi_vq_done(struct virtio_scsi *vscsi, struct virtqueue *vq, +static void virtscsi_vq_done(struct virtio_scsi *vscsi, +struct virtio_scsi_vq *virtscsi_vq, void (*fn)(struct virtio_scsi *vscsi, void *buf)) { void *buf; unsigned int len; + unsigned long flags; + struct virtqueue *vq = virtscsi_vq->vq; + spin_lock_irqsave(_vq->vq_lock, flags); do { virtqueue_disable_cb(vq); while ((buf = virtqueue_get_buf(vq, )) != NULL) fn(vscsi, buf); } while (!virtqueue_enable_cb(vq)); + spin_unlock_irqrestore(_vq->vq_lock, flags); } static void virtscsi_req_done(struct virtqueue *vq) { struct Scsi_Host *sh = virtio_scsi_host(vq->vdev); struct virtio_scsi *vscsi = shost_priv(sh); - unsigned long flags; - spin_lock_irqsave(>req_vq.vq_lock, flags); - virtscsi_vq_done(vscsi, vq, virtscsi_complete_cmd); - spin_unlock_irqrestore(>req_vq.vq_lock, flags); + virtscsi_vq_done(vscsi, >req_vq, virtscsi_complete_cmd); }; static void virtscsi_complete_free(struct virtio_scsi *vscsi, void *buf) @@ -205,11 +207,8 @@ static void virtscsi_ctrl_done(struct virtqueue *vq) { struct Scsi_Host *sh = virtio_scsi_host(vq->vdev); struct virtio_scsi *vscsi = shost_priv(sh); - unsigned long flags; - spin_lock_irqsave(>ctrl_vq.vq_lock, flags); - virtscsi_vq_done(vscsi, vq, virtscsi_complete_free); - spin_unlock_irqrestore(>ctrl_vq.vq_lock, flags); + virtscsi_vq_done(vscsi, >ctrl_vq, virtscsi_complete_free); }; static int virtscsi_kick_event(struct virtio_scsi *vscsi, @@ -344,11 +343,8 @@ static void virtscsi_event_done(struct virtqueue *vq) { struct Scsi_Host *sh = virtio_scsi_host(vq->vdev); struct virtio_scsi *vscsi = shost_priv(sh); - unsigned long flags; - spin_lock_irqsave(>event_vq.vq_lock, flags); - virtscsi_vq_done(vscsi, vq, virtscsi_complete_event); - spin_unlock_irqrestore(>event_vq.vq_lock, flags); + virtscsi_vq_done(vscsi, >event_vq, virtscsi_complete_event); }; /** -- 1.8.2.rc2 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH V5 1/5] virtio-scsi: redo allocation of target data
From: Paolo Bonzini virtio_scsi_target_state is now empty. We will find new uses for it in the next few patches, so this patch does not drop it completely. However, having dropped the sglist flexible array member, we can turn the tgt array-of-pointers into a simple array. This simplifies the allocation. Even simpler would be to place the virtio_scsi_target_state structs in a flexible array member at the end of struct virtio_scsi. But we do not do that, because we will place the virtqueues there in the next patches. Cc: linux-s...@vger.kernel.org Signed-off-by: Paolo Bonzini Signed-off-by: Wanlong Gao --- drivers/scsi/virtio_scsi.c | 40 +++- 1 file changed, 11 insertions(+), 29 deletions(-) diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index b53ba9e..3256c51 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c @@ -76,7 +76,7 @@ struct virtio_scsi { /* Get some buffers ready for event vq */ struct virtio_scsi_event_node event_list[VIRTIO_SCSI_EVENT_LEN]; - struct virtio_scsi_target_state *tgt[]; + struct virtio_scsi_target_state *tgt; }; static struct kmem_cache *virtscsi_cmd_cache; @@ -568,18 +568,9 @@ static void virtscsi_init_vq(struct virtio_scsi_vq *virtscsi_vq, virtscsi_vq->vq = vq; } -static struct virtio_scsi_target_state *virtscsi_alloc_tgt( - struct virtio_device *vdev) +static void virtscsi_init_tgt(struct virtio_scsi_target_state *tgt) { - struct virtio_scsi_target_state *tgt; - gfp_t gfp_mask = GFP_KERNEL; - - tgt = kmalloc(sizeof(*tgt), gfp_mask); - if (!tgt) - return NULL; - spin_lock_init(>tgt_lock); - return tgt; } static void virtscsi_scan(struct virtio_device *vdev) @@ -593,17 +584,10 @@ static void virtscsi_remove_vqs(struct virtio_device *vdev) { struct Scsi_Host *sh = virtio_scsi_host(vdev); struct virtio_scsi *vscsi = shost_priv(sh); - u32 i, num_targets; /* Stop all the virtqueues. */ vdev->config->reset(vdev); - - num_targets = sh->max_id; - for (i = 0; i < num_targets; i++) { - kfree(vscsi->tgt[i]); - vscsi->tgt[i] = NULL; - } - + kfree(vscsi->tgt); vdev->config->del_vqs(vdev); } @@ -640,13 +624,14 @@ static int virtscsi_init(struct virtio_device *vdev, if (virtio_has_feature(vdev, VIRTIO_SCSI_F_HOTPLUG)) virtscsi_kick_event_all(vscsi); - for (i = 0; i < num_targets; i++) { - vscsi->tgt[i] = virtscsi_alloc_tgt(vdev); - if (!vscsi->tgt[i]) { - err = -ENOMEM; - goto out; - } + vscsi->tgt = kmalloc(num_targets * sizeof(vscsi->tgt[0]), GFP_KERNEL); + if (!vscsi->tgt) { + err = -ENOMEM; + goto out; } + for (i = 0; i < num_targets; i++) + virtscsi_init_tgt(>tgt[i]); + err = 0; out: @@ -665,10 +650,7 @@ static int virtscsi_probe(struct virtio_device *vdev) /* Allocate memory and link the structs together. */ num_targets = virtscsi_config_get(vdev, max_target) + 1; - shost = scsi_host_alloc(_host_template, - sizeof(*vscsi) - + num_targets * sizeof(struct virtio_scsi_target_state)); - + shost = scsi_host_alloc(_host_template, sizeof(*vscsi)); if (!shost) return -ENOMEM; -- 1.8.2.rc2 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH V5 4/5] virtio-scsi: introduce multiqueue support
From: Paolo Bonzini This patch adds queue steering to virtio-scsi. When a target is sent multiple requests, we always drive them to the same queue so that FIFO processing order is kept. However, if a target was idle, we can choose a queue arbitrarily. In this case the queue is chosen according to the current VCPU, so the driver expects the number of request queues to be equal to the number of VCPUs. This makes it easy and fast to select the queue, and also lets the driver optimize the IRQ affinity for the virtqueues (each virtqueue's affinity is set to the CPU that "owns" the queue). The speedup comes from improving cache locality and giving CPU affinity to the virtqueues, which is why this scheme was selected. Assuming that the thread that is sending requests to the device is I/O-bound, it is likely to be sleeping at the time the ISR is executed, and thus executing the ISR on the same processor that sent the requests is cheap. However, the kernel will not execute the ISR on the "best" processor unless you explicitly set the affinity. This is because in practice you will have many such I/O-bound processes and thus many otherwise idle processors. Then the kernel will execute the ISR on a random processor, rather than the one that is sending requests to the device. The alternative to per-CPU virtqueues is per-target virtqueues. To achieve the same locality, we could dynamically choose the virtqueue's affinity based on the CPU of the last task that sent a request. This is less appealing because we do not set the affinity directly---we only provide a hint to the irqbalanced running in userspace. Dynamically changing the affinity only works if the userspace applies the hint fast enough. Cc: linux-s...@vger.kernel.org Signed-off-by: Paolo Bonzini Signed-off-by: Wanlong Gao Reviewed-by: Asias He --- drivers/scsi/virtio_scsi.c | 269 - 1 file changed, 241 insertions(+), 28 deletions(-) diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index 94a64ad..1d95295 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c @@ -22,12 +22,14 @@ #include #include #include +#include #include #include #include #define VIRTIO_SCSI_MEMPOOL_SZ 64 #define VIRTIO_SCSI_EVENT_LEN 8 +#define VIRTIO_SCSI_VQ_BASE 2 /* Command queue element */ struct virtio_scsi_cmd { @@ -59,24 +61,60 @@ struct virtio_scsi_vq { struct virtqueue *vq; }; -/* Per-target queue state */ +/* + * Per-target queue state. + * + * This struct holds the data needed by the queue steering policy. When a + * target is sent multiple requests, we need to drive them to the same queue so + * that FIFO processing order is kept. However, if a target was idle, we can + * choose a queue arbitrarily. In this case the queue is chosen according to + * the current VCPU, so the driver expects the number of request queues to be + * equal to the number of VCPUs. This makes it easy and fast to select the + * queue, and also lets the driver optimize the IRQ affinity for the virtqueues + * (each virtqueue's affinity is set to the CPU that "owns" the queue). + * + * An interesting effect of this policy is that only writes to req_vq need to + * take the tgt_lock. Read can be done outside the lock because: + * + * - writes of req_vq only occur when atomic_inc_return(>reqs) returns 1. + * In that case, no other CPU is reading req_vq: even if they were in + * virtscsi_queuecommand_multi, they would be spinning on tgt_lock. + * + * - reads of req_vq only occur when the target is not idle (reqs != 0). + * A CPU that enters virtscsi_queuecommand_multi will not modify req_vq. + * + * Similarly, decrements of reqs are never concurrent with writes of req_vq. + * Thus they can happen outside the tgt_lock, provided of course we make reqs + * an atomic_t. + */ struct virtio_scsi_target_state { - /* Never held at the same time as vq_lock. */ + /* This spinlock never help at the same time as vq_lock. */ spinlock_t tgt_lock; + + /* Count of outstanding requests. */ + atomic_t reqs; + + /* Currently active virtqueue for requests sent to this target. */ + struct virtio_scsi_vq *req_vq; }; /* Driver instance state */ struct virtio_scsi { struct virtio_device *vdev; - struct virtio_scsi_vq ctrl_vq; - struct virtio_scsi_vq event_vq; - struct virtio_scsi_vq req_vq; - /* Get some buffers ready for event vq */ struct virtio_scsi_event_node event_list[VIRTIO_SCSI_EVENT_LEN]; struct virtio_scsi_target_state *tgt; + + u32 num_queues; + + /* Does the affinity hint is set for virtqueues? */ + bool affinity_hint_set; + + struct virtio_scsi_vq ctrl_vq; + struct virtio_scsi_vq event_vq; + struct virtio_scsi_vq req_vqs[]; }; static struct kmem_cache *virtscsi_cmd_cache; @@ -111,6 +149,7 @@ static
[PATCH V5 1/5] virtio-scsi: redo allocation of target data
From: Paolo Bonzini pbonz...@redhat.com virtio_scsi_target_state is now empty. We will find new uses for it in the next few patches, so this patch does not drop it completely. However, having dropped the sglist flexible array member, we can turn the tgt array-of-pointers into a simple array. This simplifies the allocation. Even simpler would be to place the virtio_scsi_target_state structs in a flexible array member at the end of struct virtio_scsi. But we do not do that, because we will place the virtqueues there in the next patches. Cc: linux-s...@vger.kernel.org Signed-off-by: Paolo Bonzini pbonz...@redhat.com Signed-off-by: Wanlong Gao gaowanl...@cn.fujitsu.com --- drivers/scsi/virtio_scsi.c | 40 +++- 1 file changed, 11 insertions(+), 29 deletions(-) diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index b53ba9e..3256c51 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c @@ -76,7 +76,7 @@ struct virtio_scsi { /* Get some buffers ready for event vq */ struct virtio_scsi_event_node event_list[VIRTIO_SCSI_EVENT_LEN]; - struct virtio_scsi_target_state *tgt[]; + struct virtio_scsi_target_state *tgt; }; static struct kmem_cache *virtscsi_cmd_cache; @@ -568,18 +568,9 @@ static void virtscsi_init_vq(struct virtio_scsi_vq *virtscsi_vq, virtscsi_vq-vq = vq; } -static struct virtio_scsi_target_state *virtscsi_alloc_tgt( - struct virtio_device *vdev) +static void virtscsi_init_tgt(struct virtio_scsi_target_state *tgt) { - struct virtio_scsi_target_state *tgt; - gfp_t gfp_mask = GFP_KERNEL; - - tgt = kmalloc(sizeof(*tgt), gfp_mask); - if (!tgt) - return NULL; - spin_lock_init(tgt-tgt_lock); - return tgt; } static void virtscsi_scan(struct virtio_device *vdev) @@ -593,17 +584,10 @@ static void virtscsi_remove_vqs(struct virtio_device *vdev) { struct Scsi_Host *sh = virtio_scsi_host(vdev); struct virtio_scsi *vscsi = shost_priv(sh); - u32 i, num_targets; /* Stop all the virtqueues. */ vdev-config-reset(vdev); - - num_targets = sh-max_id; - for (i = 0; i num_targets; i++) { - kfree(vscsi-tgt[i]); - vscsi-tgt[i] = NULL; - } - + kfree(vscsi-tgt); vdev-config-del_vqs(vdev); } @@ -640,13 +624,14 @@ static int virtscsi_init(struct virtio_device *vdev, if (virtio_has_feature(vdev, VIRTIO_SCSI_F_HOTPLUG)) virtscsi_kick_event_all(vscsi); - for (i = 0; i num_targets; i++) { - vscsi-tgt[i] = virtscsi_alloc_tgt(vdev); - if (!vscsi-tgt[i]) { - err = -ENOMEM; - goto out; - } + vscsi-tgt = kmalloc(num_targets * sizeof(vscsi-tgt[0]), GFP_KERNEL); + if (!vscsi-tgt) { + err = -ENOMEM; + goto out; } + for (i = 0; i num_targets; i++) + virtscsi_init_tgt(vscsi-tgt[i]); + err = 0; out: @@ -665,10 +650,7 @@ static int virtscsi_probe(struct virtio_device *vdev) /* Allocate memory and link the structs together. */ num_targets = virtscsi_config_get(vdev, max_target) + 1; - shost = scsi_host_alloc(virtscsi_host_template, - sizeof(*vscsi) - + num_targets * sizeof(struct virtio_scsi_target_state)); - + shost = scsi_host_alloc(virtscsi_host_template, sizeof(*vscsi)); if (!shost) return -ENOMEM; -- 1.8.2.rc2 -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH V5 4/5] virtio-scsi: introduce multiqueue support
From: Paolo Bonzini pbonz...@redhat.com This patch adds queue steering to virtio-scsi. When a target is sent multiple requests, we always drive them to the same queue so that FIFO processing order is kept. However, if a target was idle, we can choose a queue arbitrarily. In this case the queue is chosen according to the current VCPU, so the driver expects the number of request queues to be equal to the number of VCPUs. This makes it easy and fast to select the queue, and also lets the driver optimize the IRQ affinity for the virtqueues (each virtqueue's affinity is set to the CPU that owns the queue). The speedup comes from improving cache locality and giving CPU affinity to the virtqueues, which is why this scheme was selected. Assuming that the thread that is sending requests to the device is I/O-bound, it is likely to be sleeping at the time the ISR is executed, and thus executing the ISR on the same processor that sent the requests is cheap. However, the kernel will not execute the ISR on the best processor unless you explicitly set the affinity. This is because in practice you will have many such I/O-bound processes and thus many otherwise idle processors. Then the kernel will execute the ISR on a random processor, rather than the one that is sending requests to the device. The alternative to per-CPU virtqueues is per-target virtqueues. To achieve the same locality, we could dynamically choose the virtqueue's affinity based on the CPU of the last task that sent a request. This is less appealing because we do not set the affinity directly---we only provide a hint to the irqbalanced running in userspace. Dynamically changing the affinity only works if the userspace applies the hint fast enough. Cc: linux-s...@vger.kernel.org Signed-off-by: Paolo Bonzini pbonz...@redhat.com Signed-off-by: Wanlong Gao gaowanl...@cn.fujitsu.com Reviewed-by: Asias He as...@redhat.com --- drivers/scsi/virtio_scsi.c | 269 - 1 file changed, 241 insertions(+), 28 deletions(-) diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index 94a64ad..1d95295 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c @@ -22,12 +22,14 @@ #include linux/virtio_ids.h #include linux/virtio_config.h #include linux/virtio_scsi.h +#include linux/cpu.h #include scsi/scsi_host.h #include scsi/scsi_device.h #include scsi/scsi_cmnd.h #define VIRTIO_SCSI_MEMPOOL_SZ 64 #define VIRTIO_SCSI_EVENT_LEN 8 +#define VIRTIO_SCSI_VQ_BASE 2 /* Command queue element */ struct virtio_scsi_cmd { @@ -59,24 +61,60 @@ struct virtio_scsi_vq { struct virtqueue *vq; }; -/* Per-target queue state */ +/* + * Per-target queue state. + * + * This struct holds the data needed by the queue steering policy. When a + * target is sent multiple requests, we need to drive them to the same queue so + * that FIFO processing order is kept. However, if a target was idle, we can + * choose a queue arbitrarily. In this case the queue is chosen according to + * the current VCPU, so the driver expects the number of request queues to be + * equal to the number of VCPUs. This makes it easy and fast to select the + * queue, and also lets the driver optimize the IRQ affinity for the virtqueues + * (each virtqueue's affinity is set to the CPU that owns the queue). + * + * An interesting effect of this policy is that only writes to req_vq need to + * take the tgt_lock. Read can be done outside the lock because: + * + * - writes of req_vq only occur when atomic_inc_return(tgt-reqs) returns 1. + * In that case, no other CPU is reading req_vq: even if they were in + * virtscsi_queuecommand_multi, they would be spinning on tgt_lock. + * + * - reads of req_vq only occur when the target is not idle (reqs != 0). + * A CPU that enters virtscsi_queuecommand_multi will not modify req_vq. + * + * Similarly, decrements of reqs are never concurrent with writes of req_vq. + * Thus they can happen outside the tgt_lock, provided of course we make reqs + * an atomic_t. + */ struct virtio_scsi_target_state { - /* Never held at the same time as vq_lock. */ + /* This spinlock never help at the same time as vq_lock. */ spinlock_t tgt_lock; + + /* Count of outstanding requests. */ + atomic_t reqs; + + /* Currently active virtqueue for requests sent to this target. */ + struct virtio_scsi_vq *req_vq; }; /* Driver instance state */ struct virtio_scsi { struct virtio_device *vdev; - struct virtio_scsi_vq ctrl_vq; - struct virtio_scsi_vq event_vq; - struct virtio_scsi_vq req_vq; - /* Get some buffers ready for event vq */ struct virtio_scsi_event_node event_list[VIRTIO_SCSI_EVENT_LEN]; struct virtio_scsi_target_state *tgt; + + u32 num_queues; + + /* Does the affinity hint is set for virtqueues? */ + bool affinity_hint_set; + + struct virtio_scsi_vq ctrl_vq
[PATCH V5 3/5] virtio-scsi: push vq lock/unlock into virtscsi_vq_done
From: Paolo Bonzini pbonz...@redhat.com Avoid duplicated code in all of the callers. Cc: linux-s...@vger.kernel.org Signed-off-by: Paolo Bonzini pbonz...@redhat.com Signed-off-by: Wanlong Gao gaowanl...@cn.fujitsu.com Reviewed-by: Asias He as...@redhat.com --- drivers/scsi/virtio_scsi.c | 22 +- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index bcab9d7..94a64ad 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c @@ -167,28 +167,30 @@ static void virtscsi_complete_cmd(struct virtio_scsi *vscsi, void *buf) sc-scsi_done(sc); } -static void virtscsi_vq_done(struct virtio_scsi *vscsi, struct virtqueue *vq, +static void virtscsi_vq_done(struct virtio_scsi *vscsi, +struct virtio_scsi_vq *virtscsi_vq, void (*fn)(struct virtio_scsi *vscsi, void *buf)) { void *buf; unsigned int len; + unsigned long flags; + struct virtqueue *vq = virtscsi_vq-vq; + spin_lock_irqsave(virtscsi_vq-vq_lock, flags); do { virtqueue_disable_cb(vq); while ((buf = virtqueue_get_buf(vq, len)) != NULL) fn(vscsi, buf); } while (!virtqueue_enable_cb(vq)); + spin_unlock_irqrestore(virtscsi_vq-vq_lock, flags); } static void virtscsi_req_done(struct virtqueue *vq) { struct Scsi_Host *sh = virtio_scsi_host(vq-vdev); struct virtio_scsi *vscsi = shost_priv(sh); - unsigned long flags; - spin_lock_irqsave(vscsi-req_vq.vq_lock, flags); - virtscsi_vq_done(vscsi, vq, virtscsi_complete_cmd); - spin_unlock_irqrestore(vscsi-req_vq.vq_lock, flags); + virtscsi_vq_done(vscsi, vscsi-req_vq, virtscsi_complete_cmd); }; static void virtscsi_complete_free(struct virtio_scsi *vscsi, void *buf) @@ -205,11 +207,8 @@ static void virtscsi_ctrl_done(struct virtqueue *vq) { struct Scsi_Host *sh = virtio_scsi_host(vq-vdev); struct virtio_scsi *vscsi = shost_priv(sh); - unsigned long flags; - spin_lock_irqsave(vscsi-ctrl_vq.vq_lock, flags); - virtscsi_vq_done(vscsi, vq, virtscsi_complete_free); - spin_unlock_irqrestore(vscsi-ctrl_vq.vq_lock, flags); + virtscsi_vq_done(vscsi, vscsi-ctrl_vq, virtscsi_complete_free); }; static int virtscsi_kick_event(struct virtio_scsi *vscsi, @@ -344,11 +343,8 @@ static void virtscsi_event_done(struct virtqueue *vq) { struct Scsi_Host *sh = virtio_scsi_host(vq-vdev); struct virtio_scsi *vscsi = shost_priv(sh); - unsigned long flags; - spin_lock_irqsave(vscsi-event_vq.vq_lock, flags); - virtscsi_vq_done(vscsi, vq, virtscsi_complete_event); - spin_unlock_irqrestore(vscsi-event_vq.vq_lock, flags); + virtscsi_vq_done(vscsi, vscsi-event_vq, virtscsi_complete_event); }; /** -- 1.8.2.rc2 -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH V5 5/5] virtio-scsi: reset virtqueue affinity when doing cpu hotplug
Add hot cpu notifier to reset the request virtqueue affinity when doing cpu hotplug. Cc: linux-s...@vger.kernel.org Signed-off-by: Paolo Bonzini pbonz...@redhat.com Signed-off-by: Wanlong Gao gaowanl...@cn.fujitsu.com Reviewed-by: Asias He as...@redhat.com --- drivers/scsi/virtio_scsi.c | 29 + 1 file changed, 29 insertions(+) diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index 1d95295..83023f5 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c @@ -112,6 +112,9 @@ struct virtio_scsi { /* Does the affinity hint is set for virtqueues? */ bool affinity_hint_set; + /* CPU hotplug notifier */ + struct notifier_block nb; + struct virtio_scsi_vq ctrl_vq; struct virtio_scsi_vq event_vq; struct virtio_scsi_vq req_vqs[]; @@ -736,6 +739,23 @@ static void virtscsi_set_affinity(struct virtio_scsi *vscsi, bool affinity) put_online_cpus(); } +static int virtscsi_cpu_callback(struct notifier_block *nfb, +unsigned long action, void *hcpu) +{ + struct virtio_scsi *vscsi = container_of(nfb, struct virtio_scsi, nb); + switch(action) { + case CPU_ONLINE: + case CPU_ONLINE_FROZEN: + case CPU_DEAD: + case CPU_DEAD_FROZEN: + __virtscsi_set_affinity(vscsi, true); + break; + default: + break; + } + return NOTIFY_OK; +} + static void virtscsi_init_vq(struct virtio_scsi_vq *virtscsi_vq, struct virtqueue *vq) { @@ -874,6 +894,13 @@ static int virtscsi_probe(struct virtio_device *vdev) if (err) goto virtscsi_init_failed; + vscsi-nb.notifier_call = virtscsi_cpu_callback; + err = register_hotcpu_notifier(vscsi-nb); + if (err) { + pr_err(registering cpu notifier failed\n); + goto scsi_add_host_failed; + } + cmd_per_lun = virtscsi_config_get(vdev, cmd_per_lun) ?: 1; shost-cmd_per_lun = min_t(u32, cmd_per_lun, shost-can_queue); shost-max_sectors = virtscsi_config_get(vdev, max_sectors) ?: 0x; @@ -911,6 +938,8 @@ static void virtscsi_remove(struct virtio_device *vdev) scsi_remove_host(shost); + unregister_hotcpu_notifier(vscsi-nb); + virtscsi_remove_vqs(vdev); scsi_host_put(shost); } -- 1.8.2.rc2 -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH V5 2/5] virtio-scsi: pass struct virtio_scsi to virtqueue completion function
From: Paolo Bonzini pbonz...@redhat.com This will be needed soon in order to retrieve the per-target struct. Cc: linux-s...@vger.kernel.org Signed-off-by: Paolo Bonzini pbonz...@redhat.com Signed-off-by: Wanlong Gao gaowanl...@cn.fujitsu.com Reviewed-by: Asias He as...@redhat.com --- drivers/scsi/virtio_scsi.c | 17 + 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index 3256c51..bcab9d7 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c @@ -106,7 +106,7 @@ static void virtscsi_compute_resid(struct scsi_cmnd *sc, u32 resid) * * Called with vq_lock held. */ -static void virtscsi_complete_cmd(void *buf) +static void virtscsi_complete_cmd(struct virtio_scsi *vscsi, void *buf) { struct virtio_scsi_cmd *cmd = buf; struct scsi_cmnd *sc = cmd-sc; @@ -167,7 +167,8 @@ static void virtscsi_complete_cmd(void *buf) sc-scsi_done(sc); } -static void virtscsi_vq_done(struct virtqueue *vq, void (*fn)(void *buf)) +static void virtscsi_vq_done(struct virtio_scsi *vscsi, struct virtqueue *vq, +void (*fn)(struct virtio_scsi *vscsi, void *buf)) { void *buf; unsigned int len; @@ -175,7 +176,7 @@ static void virtscsi_vq_done(struct virtqueue *vq, void (*fn)(void *buf)) do { virtqueue_disable_cb(vq); while ((buf = virtqueue_get_buf(vq, len)) != NULL) - fn(buf); + fn(vscsi, buf); } while (!virtqueue_enable_cb(vq)); } @@ -186,11 +187,11 @@ static void virtscsi_req_done(struct virtqueue *vq) unsigned long flags; spin_lock_irqsave(vscsi-req_vq.vq_lock, flags); - virtscsi_vq_done(vq, virtscsi_complete_cmd); + virtscsi_vq_done(vscsi, vq, virtscsi_complete_cmd); spin_unlock_irqrestore(vscsi-req_vq.vq_lock, flags); }; -static void virtscsi_complete_free(void *buf) +static void virtscsi_complete_free(struct virtio_scsi *vscsi, void *buf) { struct virtio_scsi_cmd *cmd = buf; @@ -207,7 +208,7 @@ static void virtscsi_ctrl_done(struct virtqueue *vq) unsigned long flags; spin_lock_irqsave(vscsi-ctrl_vq.vq_lock, flags); - virtscsi_vq_done(vq, virtscsi_complete_free); + virtscsi_vq_done(vscsi, vq, virtscsi_complete_free); spin_unlock_irqrestore(vscsi-ctrl_vq.vq_lock, flags); }; @@ -331,7 +332,7 @@ static void virtscsi_handle_event(struct work_struct *work) virtscsi_kick_event(vscsi, event_node); } -static void virtscsi_complete_event(void *buf) +static void virtscsi_complete_event(struct virtio_scsi *vscsi, void *buf) { struct virtio_scsi_event_node *event_node = buf; @@ -346,7 +347,7 @@ static void virtscsi_event_done(struct virtqueue *vq) unsigned long flags; spin_lock_irqsave(vscsi-event_vq.vq_lock, flags); - virtscsi_vq_done(vq, virtscsi_complete_event); + virtscsi_vq_done(vscsi, vq, virtscsi_complete_event); spin_unlock_irqrestore(vscsi-event_vq.vq_lock, flags); }; -- 1.8.2.rc2 -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH V5 0/5] virtio-scsi multiqueue
This series implements virtio-scsi queue steering, which gives performance improvements of up to 50% (measured both with QEMU and tcm_vhost backends). This version rebased on Rusty's virtio ring rework patches. We hope this can go into virtio-next together with the virtio ring rework pathes. V5: improving the grammar of 1/5 (Paolo) move the dropping of sg_elems to 'virtio-scsi: use virtqueue_add_sgs for command buffers'. (Asias) V4: rebase on virtio ring rework patches (rusty's pending-rebases branch) V3 and be found http://marc.info/?l=linux-virtualizationm=136067440717154w=2 It would probably be easier to get it in via Rusty's tree because of the prerequisites. James, can we get your Acked-by? Paolo Bonzini (4): virtio-scsi: redo allocation of target data virtio-scsi: pass struct virtio_scsi to virtqueue completion function virtio-scsi: push vq lock/unlock into virtscsi_vq_done virtio-scsi: introduce multiqueue support Wanlong Gao (1): virtio-scsi: reset virtqueue affinity when doing cpu hotplug drivers/scsi/virtio_scsi.c | 359 - 1 file changed, 290 insertions(+), 69 deletions(-) -- 1.8.2.rc2 -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH V4 0/5] virtio-scsi multiqueue
This series implements virtio-scsi queue steering, which gives performance improvements of up to 50% (measured both with QEMU and tcm_vhost backends). This version rebased on Rusty's virtio ring rework patches. We hope this can go into virtio-next together with the virtio ring rework pathes. V4: rebase on virtio ring rework patches (rusty's pending-rebases branch) V3 and be found http://marc.info/?l=linux-virtualization=136067440717154=2 It would probably be easier to get it in via Rusty's tree because of the prerequisites. James, can we get your Acked-by? Paolo Bonzini (4): virtio-scsi: redo allocation of target data virtio-scsi: pass struct virtio_scsi to virtqueue completion function virtio-scsi: push vq lock/unlock into virtscsi_vq_done virtio-scsi: introduce multiqueue support Wanlong Gao (1): virtio-scsi: reset virtqueue affinity when doing cpu hotplug drivers/scsi/virtio_scsi.c | 364 - 1 file changed, 291 insertions(+), 73 deletions(-) -- 1.8.2.rc2 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH V4 2/5] virtio-scsi: pass struct virtio_scsi to virtqueue completion function
From: Paolo Bonzini This will be needed soon in order to retrieve the per-target struct. Cc: linux-s...@vger.kernel.org Signed-off-by: Paolo Bonzini Signed-off-by: Wanlong Gao --- drivers/scsi/virtio_scsi.c | 17 + 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index 75ccde1..fdb0132 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c @@ -106,7 +106,7 @@ static void virtscsi_compute_resid(struct scsi_cmnd *sc, u32 resid) * * Called with vq_lock held. */ -static void virtscsi_complete_cmd(void *buf) +static void virtscsi_complete_cmd(struct virtio_scsi *vscsi, void *buf) { struct virtio_scsi_cmd *cmd = buf; struct scsi_cmnd *sc = cmd->sc; @@ -167,7 +167,8 @@ static void virtscsi_complete_cmd(void *buf) sc->scsi_done(sc); } -static void virtscsi_vq_done(struct virtqueue *vq, void (*fn)(void *buf)) +static void virtscsi_vq_done(struct virtio_scsi *vscsi, struct virtqueue *vq, +void (*fn)(struct virtio_scsi *vscsi, void *buf)) { void *buf; unsigned int len; @@ -175,7 +176,7 @@ static void virtscsi_vq_done(struct virtqueue *vq, void (*fn)(void *buf)) do { virtqueue_disable_cb(vq); while ((buf = virtqueue_get_buf(vq, )) != NULL) - fn(buf); + fn(vscsi, buf); } while (!virtqueue_enable_cb(vq)); } @@ -186,11 +187,11 @@ static void virtscsi_req_done(struct virtqueue *vq) unsigned long flags; spin_lock_irqsave(>req_vq.vq_lock, flags); - virtscsi_vq_done(vq, virtscsi_complete_cmd); + virtscsi_vq_done(vscsi, vq, virtscsi_complete_cmd); spin_unlock_irqrestore(>req_vq.vq_lock, flags); }; -static void virtscsi_complete_free(void *buf) +static void virtscsi_complete_free(struct virtio_scsi *vscsi, void *buf) { struct virtio_scsi_cmd *cmd = buf; @@ -207,7 +208,7 @@ static void virtscsi_ctrl_done(struct virtqueue *vq) unsigned long flags; spin_lock_irqsave(>ctrl_vq.vq_lock, flags); - virtscsi_vq_done(vq, virtscsi_complete_free); + virtscsi_vq_done(vscsi, vq, virtscsi_complete_free); spin_unlock_irqrestore(>ctrl_vq.vq_lock, flags); }; @@ -331,7 +332,7 @@ static void virtscsi_handle_event(struct work_struct *work) virtscsi_kick_event(vscsi, event_node); } -static void virtscsi_complete_event(void *buf) +static void virtscsi_complete_event(struct virtio_scsi *vscsi, void *buf) { struct virtio_scsi_event_node *event_node = buf; @@ -346,7 +347,7 @@ static void virtscsi_event_done(struct virtqueue *vq) unsigned long flags; spin_lock_irqsave(>event_vq.vq_lock, flags); - virtscsi_vq_done(vq, virtscsi_complete_event); + virtscsi_vq_done(vscsi, vq, virtscsi_complete_event); spin_unlock_irqrestore(>event_vq.vq_lock, flags); }; -- 1.8.2.rc2 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/