[PATCH] pci: Limit VPD length for megaraid_sas adapter
Resending again. Sorry if it is duplicate. My email client seems to have some issues. Reading or Writing of PCI VPD data causes system panic. We saw this problem by running "lspci -vvv" in the beginning. However this can be easily reproduced by running cat /sys/bus/devices/XX../vpd VPD length has been set as 32768 by default. Accessing vpd will trigger read/write of 32k. This causes problem as we could read data beyond the VPD end tag. Behaviour is un- predictable when this happens. I see some other adapter doing similar quirks(commit id bffadffd43d438c3143b8d172a463de89345b836) I see there is an attempt to fix this right way. https://patchwork.ozlabs.org/patch/534843/ or https://lkml.org/lkml/2015/10/23/97 Tried to fix it this way, but problem is I dont see the proper start/end TAGs(at least for this adapter) at all. The data is mostly junk or zeros. This patch fixes the issue by setting the vpd length to 0. Signed-off-by: Babu Moger <babu.mo...@oracle.com> --- drivers/pci/quirks.c| 49 +++ include/linux/pci_ids.h | 12 +++ 2 files changed, 61 insertions(+), 0 deletions(-) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index b03373f..c32cd07 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -2123,6 +2123,55 @@ static void quirk_via_cx700_pci_parking_caching(struct pci_dev *dev) DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, 0x324e, quirk_via_cx700_pci_parking_caching); /* + * A read/write to sysfs entry ('/sys/bus/pci/devices//vpd') + * will dump 32k of data. The default length is set as 32768. + * Reading a full 32k will cause an access beyond the VPD end tag. + * The system behaviour at that point is mostly unpredictable. + * Also I dont believe vendors have implemented this VPD headers properly. + * Atleast I dont see it in following megaraid sas controller. + * That is why adding the quirk here. + */ +static void quirk_megaraid_sas_limit_vpd(struct pci_dev *dev) +{ + if (dev->vpd) + dev->vpd->len = 0; +} + +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, + PCI_DEVICE_ID_LSI_SAS1078R, + quirk_megaraid_sas_limit_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, + PCI_DEVICE_ID_LSI_SAS1078DE, + quirk_megaraid_sas_limit_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, + PCI_DEVICE_ID_LSI_VERDE_ZCR, + quirk_megaraid_sas_limit_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, + PCI_DEVICE_ID_LSI_SAS1078GEN2, + quirk_megaraid_sas_limit_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, + PCI_DEVICE_ID_LSI_SAS0079GEN2, + quirk_megaraid_sas_limit_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, + PCI_DEVICE_ID_LSI_SAS0073SKINNY, + quirk_megaraid_sas_limit_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, + PCI_DEVICE_ID_LSI_SAS0071SKINNY, + quirk_megaraid_sas_limit_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, + PCI_DEVICE_ID_LSI_FUSION, + quirk_megaraid_sas_limit_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, + PCI_DEVICE_ID_LSI_PLASMA, + quirk_megaraid_sas_limit_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, + PCI_DEVICE_ID_LSI_INVADER, + quirk_megaraid_sas_limit_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, + PCI_DEVICE_ID_LSI_FURY, + quirk_megaraid_sas_limit_vpd); + +/* * For Broadcom 5706, 5708, 5709 rev. A nics, any read beyond the * VPD end tag will hang the device. This problem was initially * observed when a vpd entry was created in sysfs diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index d9ba49c..20c5103 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -213,6 +213,18 @@ #define PCI_DEVICE_ID_LSI_SAS1068E 0x0058 #define PCI_DEVICE_ID_LSI_SAS1078 0x0060 +#define PCI_DEVICE_ID_LSI_SAS1078R 0x0060 +#define PCI_DEVICE_ID_LSI_SAS1078DE 0x007C +#define PCI_DEVICE_ID_LSI_VERDE_ZCR 0x0413 +#define PCI_DEVICE_ID_LSI_SAS1078GEN2 0x0078 +#define PCI_DEVICE_ID_LSI_SAS0079GEN2 0x0079 +#define PCI_DEVICE_ID_LSI_SAS0073SKINNY 0x0073 +#define PCI_DEVICE_ID_LSI_SAS0071SKINNY 0x0071 +#define PCI_DEVICE_ID_LSI_FUSION0x005b +#define PCI_DEVICE_ID_LSI_PLASMA0x002f +#define PCI_DEVICE_ID_LSI_INVADER 0x005d +#define PCI_DEVICE_ID_LSI_FURY 0x005f + #define PCI_VENDOR_ID_ATI 0x1002 /* Mach64 */ #define PCI_DEVICE_ID_ATI_688000x4158 -- 1.7.1 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kern
[PATCH v2] pci: Limit VPD length for megaraid_sas adapter
Changes since v1 -> v2 Removed the changes in pci_id.h. Kept all the vendor ids in quirks.c Reading or Writing of PCI VPD data causes system panic. We saw this problem by running "lspci -vvv" in the beginning. However this can be easily reproduced by running cat /sys/bus/devices/XX../vpd VPD length has been set as 32768 by default. Accessing vpd will trigger read/write of 32k. This causes problem as we could read data beyond the VPD end tag. Behaviour is un- predictable when this happens. I see some other adapter doing similar quirks(commit id bffadffd43d438c3143b8d172a463de89345b836) I see there is an attempt to fix this right way. https://patchwork.ozlabs.org/patch/534843/ or https://lkml.org/lkml/2015/10/23/97 Tried to fix it this way, but problem is I dont see the proper start/end TAGs(at least for this adapter) at all. The data is mostly junk or zeros. This patch fixes the issue by setting the vpd length to 0. Signed-off-by: Babu Moger <babu.mo...@oracle.com> --- drivers/pci/quirks.c | 38 ++ 1 files changed, 38 insertions(+), 0 deletions(-) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index b03373f..f739e47 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -2123,6 +2123,44 @@ static void quirk_via_cx700_pci_parking_caching(struct pci_dev *dev) DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, 0x324e, quirk_via_cx700_pci_parking_caching); /* + * A read/write to sysfs entry ('/sys/bus/pci/devices//vpd') + * will dump 32k of data. The default length is set as 32768. + * Reading a full 32k will cause an access beyond the VPD end tag. + * The system behaviour at that point is mostly unpredictable. + * Also I dont believe vendors have implemented this VPD headers properly. + * Atleast I dont see it in following megaraid sas controller. + * That is why adding the quirk here. + */ +static void quirk_megaraid_sas_limit_vpd(struct pci_dev *dev) +{ + if (dev->vpd) + dev->vpd->len = 0; +} + +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0060, + quirk_megaraid_sas_limit_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x007c, + quirk_megaraid_sas_limit_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0413, + quirk_megaraid_sas_limit_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0078, + quirk_megaraid_sas_limit_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0079, + quirk_megaraid_sas_limit_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0073, + quirk_megaraid_sas_limit_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0071, + quirk_megaraid_sas_limit_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x005b, + quirk_megaraid_sas_limit_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x002f, + quirk_megaraid_sas_limit_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x005d, + quirk_megaraid_sas_limit_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x005f, + quirk_megaraid_sas_limit_vpd); + +/* * For Broadcom 5706, 5708, 5709 rev. A nics, any read beyond the * VPD end tag will hang the device. This problem was initially * observed when a vpd entry was created in sysfs -- 1.7.1 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH v2] pci: Limit VPD length for megaraid_sas adapter
On 11/11/2015 1:30 PM, Myron Stowe wrote: > On Wed, Nov 11, 2015 at 8:54 AM, Babu Moger <babu.mo...@oracle.com> wrote: >> Changes since v1 -> v2 >> Removed the changes in pci_id.h. Kept all the vendor >> ids in quirks.c >> >> Reading or Writing of PCI VPD data causes system panic. >> We saw this problem by running "lspci -vvv" in the beginning. >> However this can be easily reproduced by running >> cat /sys/bus/devices/XX../vpd >> >> VPD length has been set as 32768 by default. Accessing vpd >> will trigger read/write of 32k. This causes problem as we >> could read data beyond the VPD end tag. Behaviour is un- >> predictable when this happens. I see some other adapter doing >> similar quirks(commit id bffadffd43d438c3143b8d172a463de89345b836) >> >> I see there is an attempt to fix this right way. >> https://patchwork.ozlabs.org/patch/534843/ or >> https://lkml.org/lkml/2015/10/23/97 >> >> Tried to fix it this way, but problem is I dont see the proper >> start/end TAGs(at least for this adapter) at all. The data is >> mostly junk or zeros. This patch fixes the issue by setting the >> vpd length to 0. >> >> Signed-off-by: Babu Moger <babu.mo...@oracle.com> >> --- >> drivers/pci/quirks.c | 38 ++ >> 1 files changed, 38 insertions(+), 0 deletions(-) >> >> diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c >> index b03373f..f739e47 100644 >> --- a/drivers/pci/quirks.c >> +++ b/drivers/pci/quirks.c >> @@ -2123,6 +2123,44 @@ static void >> quirk_via_cx700_pci_parking_caching(struct pci_dev *dev) >> DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, 0x324e, >> quirk_via_cx700_pci_parking_caching); >> >> /* >> + * A read/write to sysfs entry ('/sys/bus/pci/devices//vpd') >> + * will dump 32k of data. The default length is set as 32768. >> + * Reading a full 32k will cause an access beyond the VPD end tag. >> + * The system behaviour at that point is mostly unpredictable. >> + * Also I dont believe vendors have implemented this VPD headers properly. >> + * Atleast I dont see it in following megaraid sas controller. >> + * That is why adding the quirk here. >> + */ >> +static void quirk_megaraid_sas_limit_vpd(struct pci_dev *dev) >> +{ >> + if (dev->vpd) >> + dev->vpd->len = 0; >> +} >> + >> +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0060, >> + quirk_megaraid_sas_limit_vpd); >> +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x007c, >> + quirk_megaraid_sas_limit_vpd); >> +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0413, >> + quirk_megaraid_sas_limit_vpd); >> +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0078, >> + quirk_megaraid_sas_limit_vpd); >> +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0079, >> + quirk_megaraid_sas_limit_vpd); >> +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0073, >> + quirk_megaraid_sas_limit_vpd); >> +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0071, >> + quirk_megaraid_sas_limit_vpd); >> +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x005b, >> + quirk_megaraid_sas_limit_vpd); >> +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x002f, >> + quirk_megaraid_sas_limit_vpd); >> +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x005d, >> + quirk_megaraid_sas_limit_vpd); >> +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x005f, >> + quirk_megaraid_sas_limit_vpd); >> + >> +/* >> * For Broadcom 5706, 5708, 5709 rev. A nics, any read beyond the >> * VPD end tag will hang the device. This problem was initially >> * observed when a vpd entry was created in sysfs >> -- >> 1.7.1 >> > > Just to confirm, I've encountered similar results on a MegaRAID SAS 2208 - Myron, Thanks for confirmation. With most of the devices behaving this way, I feel the default length is set too high. Anyway that is Bjorn's call. For this adapter, I think we should set the length to 0. > > $ lspci -vvv -s 02:00.0 > 02:00.0 RAID bus controller: LSI Logic / Symbios Logic MegaRAID SAS 2208 > [Thunderbolt] (rev 05) > Capabilities: [d0] Vital Product Data > Unknown small resource type 00, will not decode more. > > $ cat /sys/devices/pci:00/:00:02.2/:02:00.0/vpd | > od -A x -t x1z -v > 00 00 00 00 00 00 00 00 00 00 00 00 00 00 0
[PATCH v4] pci: Limit VPD length for megaraid_sas adapter
Reading or Writing of PCI VPD data causes system panic. We saw this problem by running "lspci -vvv" in the beginning. However this can be easily reproduced by running cat /sys/bus/devices/XX../vpd VPD length has been set as 32768 by default. Accessing vpd will trigger read/write of 32k. This causes problem as we could read data beyond the VPD end tag. Behaviour is un- predictable when this happens. I see some other adapter doing similar quirks(commit bffadffd43d4 ("PCI: fix VPD limit quirk for Broadcom 5708S")) I see there is an attempt to fix this right way. https://patchwork.ozlabs.org/patch/534843/ or https://lkml.org/lkml/2015/10/23/97 Tried to fix it this way, but problem is I dont see the proper start/end TAGs(at least for this adapter) at all. The data is mostly junk or zeros. This patch fixes the issue by setting the vpd length to 0x80. Signed-off-by: Babu Moger <babu.mo...@oracle.com> Reviewed-by: Khalid Aziz <khalid.a...@oracle.com> Tested-by: Dmitry Klochkov <dmitry.kloch...@oracle.com> Orabug: 22104511 Changes since v3 -> v4 We found some options of the lspci does not work very well if it cannot find the valid vpd tag(Example command "lspci -s 10:00.0 -vv"). It displays the error message and exits right away. Setting the length back to 0 fixes the problem. Changes since v2 -> v3 Changed the vpd length from 0 to 0x80 which leaves the option open for someone to read first few bytes. Changes since v1 -> v2 Removed the changes in pci_id.h. Kept all the vendor ids in quirks.c --- drivers/pci/quirks.c | 38 ++ 1 files changed, 38 insertions(+), 0 deletions(-) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index b03373f..f739e47 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -2123,6 +2123,44 @@ static void quirk_via_cx700_pci_parking_caching(struct pci_dev *dev) DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, 0x324e, quirk_via_cx700_pci_parking_caching); /* + * A read/write to sysfs entry ('/sys/bus/pci/devices//vpd') + * will dump 32k of data. The default length is set as 32768. + * Reading a full 32k will cause an access beyond the VPD end tag. + * The system behaviour at that point is mostly unpredictable. + * Also I dont believe vendors have implemented this VPD headers properly. + * Atleast I dont see it in following megaraid sas controller. + * That is why adding the quirk here. + */ +static void quirk_megaraid_sas_limit_vpd(struct pci_dev *dev) +{ + if (dev->vpd) + dev->vpd->len = 0; +} + +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0060, + quirk_megaraid_sas_limit_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x007c, + quirk_megaraid_sas_limit_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0413, + quirk_megaraid_sas_limit_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0078, + quirk_megaraid_sas_limit_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0079, + quirk_megaraid_sas_limit_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0073, + quirk_megaraid_sas_limit_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0071, + quirk_megaraid_sas_limit_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x005b, + quirk_megaraid_sas_limit_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x002f, + quirk_megaraid_sas_limit_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x005d, + quirk_megaraid_sas_limit_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x005f, + quirk_megaraid_sas_limit_vpd); + +/* * For Broadcom 5706, 5708, 5709 rev. A nics, any read beyond the * VPD end tag will hang the device. This problem was initially * observed when a vpd entry was created in sysfs -- 1.7.1 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH v4] pci: Limit VPD length for megaraid_sas adapter
Hi Bjorn, My old logs were lost. So, I had to recreate the issue again. So it took sometime. On 12/7/2015 11:29 AM, Bjorn Helgaas wrote: > Hi Babu, > > On Thu, Dec 03, 2015 at 12:25:19PM -0800, Babu Moger wrote: >> Reading or Writing of PCI VPD data causes system panic. >> We saw this problem by running "lspci -vvv" in the beginning. >> However this can be easily reproduced by running >> cat /sys/bus/devices/XX../vpd > > What sort of panic is this? Actual panic stack showed total different area. It looked like this. TSTATE: 80e01601 TPC: 007945c8 TNPC: 007945cc Y: Not tainted TPC: <ehci_irq+0x94/0x388> g0: 4000 g1: 084001604020 g2: 084001604024 g3: 0acb g4: 800fe42d0340 g5: 8000291ce000 g6: 800fe42f4000 g7: 03114000 o0: 800fe085d99c o1: 800fe42f4008 o2: 4000 o3: 0001 o4: o5: 0012 sp: 80002047b2b1 ret_pc: 00794540 RPC: <ehci_irq+0xc/0x388> l0: 800fe085d980 l1: c001 l2: 000b l3: 008e7058 l4: 00bd19a8 l5: 00bd6a88 l6: l7: i0: 800fe085d800 i1: 0016 i2: 800c20c007c3 i3: f0265f78 i4: feff4748 i5: feff2ff8 i6: 80002047b3d1 i7: 0077adf0 I7: <usb_hcd_irq+0x38/0xa0> Call Trace: [0077adf0] usb_hcd_irq+0x38/0xa0 [004d122c] handle_irq_event_percpu+0x8c/0x204 [004d13d8] handle_irq_event+0x34/0x60 [004d3998] handle_fasteoi_irq+0xdc/0x164 [004d1178] generic_handle_irq+0x24/0x38 [008dce68] handler_irq+0xb8/0xec [004208b4] tl0_irq5+0x14/0x20 [0042cfac] cpu_idle+0x9c/0x18c [008d2ad0] after_lock_tlb+0x1b4/0x1cc [] (null) While analyzing it from kdump, I saw it stuck in here below. PID: 5274 TASK: 800fe1198680 CPU: 0 COMMAND: "cat" #0 [800fe25f6f81] switch_to_pc at 8d725c #1 [800fe25f70e1] pci_user_read_config_word at 6c4698 #2 [800fe25f71a1] pci_vpd_pci22_wait at 6c4710 #3 [800fe25f7261] pci_vpd_pci22_read at 6c4994 #4 [800fe25f7321] pci_read_vpd at 6c3e90 #5 [800fe25f73d1] read_vpd_attr at 6ccc78 #6 [800fe25f7481] read at 5be478 #7 [800fe25f7531] vfs_read at 54fdb0 #8 [800fe25f75e1] sys_read at 54ff10 #9 [800fe25f76a1] linux_sparc_syscall at 4060f4 TSTATE=0x8082000223 TT=0x16d TPC=0xfc0100295e28 TNPC=0xfc0100295e2c r0=0x r1=0x0003 r2=0x0020aec0 r3=0x0020aec4 r4=0x0b00 r5=0x033f r6=0x0001 r7=0xfc0106f0 r24=0x0003 r25=0x0020e000 r26=0x8000 r27=0x r28=0x r29=0x r30=0x07feffb468d1 r31=0x00105d94 > > This seems like a defect in the megaraid hardware or firmware. If the > VPD ROM contains junk, there's no hope that software can read the data > and figure out how much is safe to read. Yes this looks like problem with megaraid hardware. Other day, Myron stowe(myron.st...@gmail.com) reported similar problem with his setup. $ lspci -vvv -s 02:00.0 02:00.0 RAID bus controller: LSI Logic / Symbios Logic MegaRAID SAS 2208 [Thunderbolt] (rev 05) Capabilities: [d0] Vital Product Data Unknown small resource type 00, will not decode more. $ cat /sys/devices/pci:00/:00:02.2/:02:00.0/vpd | od -A x -t x1z -v 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 >< * 007ff0 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 >< 008000 > > I assume VPD is useful for somebody, and I hate to silently disable > the whole thing. We might want to at least log a note about what > we're doing. Sure. Let me know what you think. > > Bjorn > >> VPD length has been set as 32768 by default. Accessing vpd >> will trigger read/write of 32k. This causes problem as we >> could read data beyond the VPD end tag. Behaviour is un- >> predictable when this happens. I see some other adapter doing >> similar quirks(commit bffadffd43d4 ("PCI: fix VPD limit quirk >> for Broadcom 5708S")) >> >> I see there is an attempt to fix this right way. >> https://patchwork.ozlabs.org/patch/534843/ or >> https://lkml.org/lkml/2015/10/23/97 >> >> Tried to fix it this way, but problem is I dont see the proper >> start/end TAGs(at least for this adapter) at all. The data is >> mostly junk or zeros. This patch fixes the issue by setting the >> vpd length to 0x80. >> >> Signed-off-by: Babu Moger <babu.mo...@oracle.com> >> Reviewed-by: Khalid Aziz <khalid.a...@ora
[PATCH] drivers/usb: Skip auto handoff for TI and RENESAS usb controllers
I have never seen auto handoff working on TI and RENESAS cards. Eventually, we force handoff. This code forces the handoff unconditionally. It saves 5 seconds boot time for each card. Signed-off-by: Babu Moger <babu.mo...@oracle.com> --- drivers/usb/host/pci-quirks.c |7 +++ 1 files changed, 7 insertions(+), 0 deletions(-) diff --git a/drivers/usb/host/pci-quirks.c b/drivers/usb/host/pci-quirks.c index f940056..b7ee895 100644 --- a/drivers/usb/host/pci-quirks.c +++ b/drivers/usb/host/pci-quirks.c @@ -1003,6 +1003,13 @@ static void quirk_usb_handoff_xhci(struct pci_dev *pdev) ext_cap_offset = xhci_find_next_cap_offset(base, ext_cap_offset); } while (1); + /* Auto handoff never worked for these devices. Force it and continue */ + if (pdev->vendor == PCI_VENDOR_ID_TI || + pdev->vendor == PCI_VENDOR_ID_RENESAS) { + val = (val | XHCI_HC_OS_OWNED) & ~XHCI_HC_BIOS_OWNED; + writel(val, base + ext_cap_offset); + } + /* If the BIOS owns the HC, signal that the OS wants it, and wait */ if (val & XHCI_HC_BIOS_OWNED) { writel(val | XHCI_HC_OS_OWNED, base + ext_cap_offset); -- 1.7.1 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH v4] pci: Limit VPD length for megaraid_sas adapter
Hi Bjorn, Checking again. How about adding some messages in the logs to let user know that vpd has been disabled on this device in case if there is an attempt to access vpd. What do you think?. Thanks Babu On 12/7/2015 5:07 PM, Babu Moger wrote: > Hi Bjorn, > My old logs were lost. So, I had to recreate the issue again. So it took > sometime. > > On 12/7/2015 11:29 AM, Bjorn Helgaas wrote: >> Hi Babu, >> >> On Thu, Dec 03, 2015 at 12:25:19PM -0800, Babu Moger wrote: >>> Reading or Writing of PCI VPD data causes system panic. >>> We saw this problem by running "lspci -vvv" in the beginning. >>> However this can be easily reproduced by running >>> cat /sys/bus/devices/XX../vpd >> >> What sort of panic is this? > > Actual panic stack showed total different area. It looked like this. > > TSTATE: 80e01601 TPC: 007945c8 TNPC: 007945cc Y: > Not tainted > TPC: <ehci_irq+0x94/0x388> > g0: 4000 g1: 084001604020 g2: 084001604024 g3: > 0acb > g4: 800fe42d0340 g5: 8000291ce000 g6: 800fe42f4000 g7: > 03114000 > o0: 800fe085d99c o1: 800fe42f4008 o2: 4000 o3: > 0001 > o4: o5: 0012 sp: 80002047b2b1 ret_pc: > 00794540 > RPC: <ehci_irq+0xc/0x388> > l0: 800fe085d980 l1: c001 l2: 000b l3: > 008e7058 > l4: 00bd19a8 l5: 00bd6a88 l6: l7: > > i0: 800fe085d800 i1: 0016 i2: 800c20c007c3 i3: > f0265f78 > i4: feff4748 i5: feff2ff8 i6: 80002047b3d1 i7: > 0077adf0 > I7: <usb_hcd_irq+0x38/0xa0> > Call Trace: > [0077adf0] usb_hcd_irq+0x38/0xa0 > [004d122c] handle_irq_event_percpu+0x8c/0x204 > [004d13d8] handle_irq_event+0x34/0x60 > [004d3998] handle_fasteoi_irq+0xdc/0x164 > [004d1178] generic_handle_irq+0x24/0x38 > [008dce68] handler_irq+0xb8/0xec > [004208b4] tl0_irq5+0x14/0x20 > [0042cfac] cpu_idle+0x9c/0x18c > [008d2ad0] after_lock_tlb+0x1b4/0x1cc > [] (null) > > > While analyzing it from kdump, I saw it stuck in here below. > > PID: 5274 TASK: 800fe1198680 CPU: 0 COMMAND: "cat" > #0 [800fe25f6f81] switch_to_pc at 8d725c > #1 [800fe25f70e1] pci_user_read_config_word at 6c4698 > #2 [800fe25f71a1] pci_vpd_pci22_wait at 6c4710 > #3 [800fe25f7261] pci_vpd_pci22_read at 6c4994 > #4 [800fe25f7321] pci_read_vpd at 6c3e90 > #5 [800fe25f73d1] read_vpd_attr at 6ccc78 > #6 [800fe25f7481] read at 5be478 > #7 [800fe25f7531] vfs_read at 54fdb0 > #8 [800fe25f75e1] sys_read at 54ff10 > #9 [800fe25f76a1] linux_sparc_syscall at 4060f4 > TSTATE=0x8082000223 TT=0x16d TPC=0xfc0100295e28 TNPC=0xfc0100295e2c > r0=0x r1=0x0003 r2=0x0020aec0 > r3=0x0020aec4 r4=0x0b00 r5=0x033f > r6=0x0001 r7=0xfc0106f0 r24=0x0003 > r25=0x0020e000 r26=0x8000 r27=0x > r28=0x r29=0x r30=0x07feffb468d1 > r31=0x00105d94 > > > >> >> This seems like a defect in the megaraid hardware or firmware. If the >> VPD ROM contains junk, there's no hope that software can read the data >> and figure out how much is safe to read. > > Yes this looks like problem with megaraid hardware. > > Other day, Myron stowe(myron.st...@gmail.com) reported similar problem with > his setup. > > $ lspci -vvv -s 02:00.0 > 02:00.0 RAID bus controller: LSI Logic / Symbios Logic MegaRAID SAS 2208 > [Thunderbolt] (rev 05) > Capabilities: [d0] Vital Product Data > Unknown small resource type 00, will not decode more. > > $ cat /sys/devices/pci:00/:00:02.2/:02:00.0/vpd | > od -A x -t x1z -v > 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 >< > * > 007ff0 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 >< > 008000 > > >> >> I assume VPD is useful for somebody, and I hate to silently disable >> the whole thing. We might want to at least log a note about what >> we're doing. > > Sure. Let me know what you think. > > >> >> Bjorn >> >>> VPD length has been set as 32768 by default. Accessing vpd >>> will trigger read/write of 32k. This causes problem as we >>> c
[PATCH v3] pci: Limit VPD length for megaraid_sas adapter
Reading or Writing of PCI VPD data causes system panic. We saw this problem by running "lspci -vvv" in the beginning. However this can be easily reproduced by running cat /sys/bus/devices/XX../vpd VPD length has been set as 32768 by default. Accessing vpd will trigger read/write of 32k. This causes problem as we could read data beyond the VPD end tag. Behaviour is un- predictable when this happens. I see some other adapter doing similar quirks(commit bffadffd43d4 ("PCI: fix VPD limit quirk for Broadcom 5708S")) I see there is an attempt to fix this right way. https://patchwork.ozlabs.org/patch/534843/ or https://lkml.org/lkml/2015/10/23/97 Tried to fix it this way, but problem is I dont see the proper start/end TAGs(at least for this adapter) at all. The data is mostly junk or zeros. This patch fixes the issue by setting the vpd length to 0x80. Signed-off-by: Babu Moger <babu.mo...@oracle.com> Reviewed-by: Khalid Aziz <khalid.a...@oracle.com> Changes since v2 -> v3 Changed the vpd length from 0 to 0x80 which leaves the option open for someone to read first few bytes. Changes since v1 -> v2 Removed the changes in pci_id.h. Kept all the vendor ids in quirks.c --- drivers/pci/quirks.c | 38 ++ 1 files changed, 38 insertions(+), 0 deletions(-) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index b03373f..b8774e2 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -2123,6 +2123,44 @@ static void quirk_via_cx700_pci_parking_caching(struct pci_dev *dev) DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, 0x324e, quirk_via_cx700_pci_parking_caching); /* + * A read/write to sysfs entry ('/sys/bus/pci/devices//vpd') + * will dump 32k of data. The default length is set as 32768. + * Reading a full 32k will cause an access beyond the VPD end tag. + * The system behaviour at that point is mostly unpredictable. + * Also I dont believe vendors have implemented this VPD headers properly. + * Atleast I dont see it in following megaraid sas controller. + * That is why adding the quirk here. + */ +static void quirk_megaraid_sas_limit_vpd(struct pci_dev *dev) +{ + if (dev->vpd) + dev->vpd->len = 0x80; +} + +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0060, + quirk_megaraid_sas_limit_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x007c, + quirk_megaraid_sas_limit_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0413, + quirk_megaraid_sas_limit_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0078, + quirk_megaraid_sas_limit_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0079, + quirk_megaraid_sas_limit_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0073, + quirk_megaraid_sas_limit_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0071, + quirk_megaraid_sas_limit_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x005b, + quirk_megaraid_sas_limit_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x002f, + quirk_megaraid_sas_limit_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x005d, + quirk_megaraid_sas_limit_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x005f, + quirk_megaraid_sas_limit_vpd); + +/* * For Broadcom 5706, 5708, 5709 rev. A nics, any read beyond the * VPD end tag will hang the device. This problem was initially * observed when a vpd entry was created in sysfs -- 1.7.1 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH] ixgbe: Fix minor typo while freeing irq
The array subscript increments after the execution of the statement. So there is no issue here. However it helps to read the code better. Signed-off-by: Babu Moger <babu.mo...@oracle.com> --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c |2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 569cb07..6f4fe66 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -3016,7 +3016,7 @@ static void ixgbe_free_irq(struct ixgbe_adapter *adapter) free_irq(entry->vector, q_vector); } - free_irq(adapter->msix_entries[vector++].vector, adapter); + free_irq(adapter->msix_entries[vector].vector, adapter); } /** -- 1.7.1
Re: [PATCHv2 4/4] pci: Blacklist vpd access for buggy devices
On 2/9/2016 3:07 PM, Bjorn Helgaas wrote: > There seem to be several revs of this patch, and it's hard for me to > keep track of what's current. If you want to update any patch in the > series, please repost the entire series with a new version number. Here is the latest of patch 4/4. https://patchwork.kernel.org/patch/8084221/ I will wait for Hannes's response before re-posting it. Hannes, If you want me to re-post all the series let me know. > > On Wed, Jan 13, 2016 at 12:25:35PM +0100, Hannes Reinecke wrote: >> From: Babu Moger <babu.mo...@oracle.com> >> >> Reading or Writing of PCI VPD data causes system panic. >> We saw this problem by running "lspci -vvv" in the beginning. >> However this can be easily reproduced by running >> cat /sys/bus/devices/XX../vpd >> >> As even a simple read on any VPD data triggers a system >> lockup on certain cards this patch implements a PCI quirk >> to disabling VPD acces altogether by setting the vpd length > > s/acces/access/ > s/vpd/VPD/ > >> to '0'. >> >> Signed-off-by: Babu Moger <babu.mo...@oracle.com> >> Signed-off-by: Hannes Reinecke <h...@suse.de> >> --- >> drivers/pci/access.c | 5 - >> drivers/pci/quirks.c | 41 + >> 2 files changed, 45 insertions(+), 1 deletion(-) >> >> diff --git a/drivers/pci/access.c b/drivers/pci/access.c >> index 914e023..82f41a8 100644 >> --- a/drivers/pci/access.c >> +++ b/drivers/pci/access.c >> @@ -396,7 +396,7 @@ static ssize_t pci_vpd_pci22_read(struct pci_dev *dev, >> loff_t pos, size_t count, >> if (pos < 0) >> return -EINVAL; >> >> -if (!vpd->valid) { >> +if (!vpd->valid && vpd->base.len > 0) { >> vpd->valid = true; >> vpd->base.len = pci_vpd_pci22_size(dev); >> } >> @@ -459,6 +459,9 @@ static ssize_t pci_vpd_pci22_write(struct pci_dev *dev, >> loff_t pos, size_t count >> loff_t end = pos + count; >> int ret = 0; >> >> +if (vpd->base.len == 0) >> +return -EIO; >> + >> if (!vpd->valid) >> return -EAGAIN; >> >> diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c >> index 7e32730..af0f8a1 100644 >> --- a/drivers/pci/quirks.c >> +++ b/drivers/pci/quirks.c >> @@ -2123,6 +2123,47 @@ static void >> quirk_via_cx700_pci_parking_caching(struct pci_dev *dev) >> DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, 0x324e, >> quirk_via_cx700_pci_parking_caching); >> >> /* >> + * A read/write to sysfs entry ('/sys/bus/pci/devices//vpd') >> + * will dump 32k of data. The default length is set as 32768. >> + * Reading a full 32k will cause an access beyond the VPD end tag. >> + * The system behaviour at that point is mostly unpredictable. >> + * Apparently, some vendors have not implemented this VPD headers properly. >> + * Adding a generic function disable vpd data for these buggy adapters >> + * Add the DECLARE_PCI_FIXUP_FINAL line below with the specific with >> + * vendor and device of interest to use this quirk. >> + */ >> +static void quirk_blacklist_vpd(struct pci_dev *dev) >> +{ >> +if (dev->vpd) { >> +dev->vpd->len = 0; >> +dev_warn(>dev, "PCI vpd access has been disabled due to >> firmware bug\n"); > > "PCI" is superfluous and "VPD" should be capitalized. > >> +} >> +} >> + >> +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0060, >> +quirk_blacklist_vpd); >> +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x007c, >> +quirk_blacklist_vpd); >> +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0413, >> +quirk_blacklist_vpd); >> +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0078, >> +quirk_blacklist_vpd); >> +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0079, >> +quirk_blacklist_vpd); >> +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0073, >> +quirk_blacklist_vpd); >> +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0071, >> +quirk_blacklist_vpd); >> +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x005b, >> +quirk_blacklist_vpd); >> +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x002f, >> +quirk_blacklist_vpd); >> +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x005d, >> +quirk_blacklist_vpd); >> +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x005f, >> +quirk_blacklist_vpd); >> + >> +/* >> * For Broadcom 5706, 5708, 5709 rev. A nics, any read beyond the >> * VPD end tag will hang the device. This problem was initially >> * observed when a vpd entry was created in sysfs >> -- >> 1.8.5.6 >> >> -- >> To unsubscribe from this list: send the line "unsubscribe linux-pci" in >> the body of a message to majord...@vger.kernel.org >> More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH RFC] pci: Blacklist vpd access for buggy devices
On 1/21/2016 9:47 AM, jordan_hargr...@dell.com wrote: >> From: Babu Moger [babu.mo...@oracle.com] >> Sent: Tuesday, January 19, 2016 2:39 PM >> To: Hargrave, Jordan; bhelg...@google.com >> Cc: linux-...@vger.kernel.org; linux-kernel@vger.kernel.org; >> alexander.du...@gmail.com; h...@suse.de; mkube...@suse.com; >> shane.seym...@hpe.com; myron.st...@gmail.com >> Subject: Re: [PATCH RFC] pci: Blacklist vpd access for buggy devices >> >> Hi Jordan, >> >> On 1/19/2016 9:22 AM, jordan_hargr...@dell.com wrote: >>> From: Babu Moger [babu.mo...@oracle.com] >>> Sent: Monday, January 11, 2016 4:49 PM >>> To: bhelg...@google.com >>> Cc: linux-...@vger.kernel.org; linux-kernel@vger.kernel.org; >>> alexander.du...@gmail.com; h...@suse.de; mkube...@suse.com; >>> shane.seym...@hpe.com; myron.st...@gmail.com; >>> venkatkumar.duvv...@avago.com; Hargrave, Jordan >>> Subject: Re: [PATCH RFC] pci: Blacklist vpd access for buggy devices >>> >>> Sorry. Missed Jordan. >>> >>> On 1/11/2016 3:13 PM, Babu Moger wrote: >>>> Reading or Writing of PCI VPD data causes system panic. >>>> We saw this problem by running "lspci -vvv" in the beginning. >>>> However this can be easily reproduced by running >>>> cat /sys/bus/devices/XX../vpd >>>> >>>> VPD length has been set as 32768 by default. Accessing vpd >>>> will trigger read/write of 32k. This causes problem as we >>>> could read data beyond the VPD end tag. Behaviour is un- >>>> predictable when this happens. I see some other adapter doing >>>> similar quirks(commit bffadffd43d4 ("PCI: fix VPD limit quirk >>>> for Broadcom 5708S")) >>>> >>>> I see there is an attempt to fix this right way. >>>> https://patchwork.ozlabs.org/patch/534843/ or >>>> https://lkml.org/lkml/2015/10/23/97 >>>> >>>> Tried to fix it this way, but problem is I dont see the proper >>>> start/end TAGs(at least for this adapter) at all. The data is >>>> mostly junk or zeros. This patch fixes the issue by setting the >>>> vpd length to 0x80. >>>> >>>> Also look at the threds >>>> >>>> https://lkml.org/lkml/2015/11/10/557 >>>> https://lkml.org/lkml/2015/12/29/315 >>>> >>>> Signed-off-by: Babu Moger <babu.mo...@oracle.com> >>>> --- >>>> >>>> NOTE: >>>> Jordan, Are you sure all the devices in PCI_VENDOR_ID_ATHEROS and >>>> PCI_VENDOR_ID_ATTANSIC have this problem. You have used PCI_ANY_ID. >>>> I felt it is too broad. Can you please check. >>>> >>> >>> I don't actually have that hardware, it was a bugfix for biosdevname for >>> RedHat. We were getting >>> 'BUG: soft lockup - CPU#0 stuck for 23s!' when attempting to read the vpd >>> area. >>> >>> Certainly 0x1969:0x1026 experienced this. >> >> Ok. Thanks. I will update the patch 4/4. >> > > Thanks! I also found 1969:2062. Maybe best to just block everything in > drivers/net/ethernet/atheros/ Ok. I will update the patch.. > > atl1c: > static const struct pci_device_id atl1c_pci_tbl[] = { > {PCI_DEVICE(PCI_VENDOR_ID_ATTANSIC, PCI_DEVICE_ID_ATTANSIC_L1C)}, > {PCI_DEVICE(PCI_VENDOR_ID_ATTANSIC, PCI_DEVICE_ID_ATTANSIC_L2C)}, > {PCI_DEVICE(PCI_VENDOR_ID_ATTANSIC, PCI_DEVICE_ID_ATHEROS_L2C_B)}, > {PCI_DEVICE(PCI_VENDOR_ID_ATTANSIC, PCI_DEVICE_ID_ATHEROS_L2C_B2)}, > {PCI_DEVICE(PCI_VENDOR_ID_ATTANSIC, PCI_DEVICE_ID_ATHEROS_L1D)}, > {PCI_DEVICE(PCI_VENDOR_ID_ATTANSIC, PCI_DEVICE_ID_ATHEROS_L1D_2_0)}, > /* required last entry */ > { 0 } > }; > > atl1e > static const struct pci_device_id atl1e_pci_tbl[] = { > {PCI_DEVICE(PCI_VENDOR_ID_ATTANSIC, PCI_DEVICE_ID_ATTANSIC_L1E)}, > {PCI_DEVICE(PCI_VENDOR_ID_ATTANSIC, 0x1066)}, > /* required last entry */ > { 0 } > }; > >>> >>> 09:00.0 Ethernet controller: Atheros Communications AR8121/AR8113/AR8114 >>> Gigabit or Fast Ethernet (rev b0) >>> Subsystem: Atheros Communications AR8121/AR8113/AR8114 Gigabit or >>> Fast Ethernet >>> Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- >>> Stepping- SERR- FastB2B- DisINTx+ >>> Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- >>> SERR- >> Latency: 0, Cache Line
[PATCH v4 4/4] pci: Blacklist vpd access for buggy devices
Reading or Writing of PCI VPD data causes system panic. We saw this problem by running "lspci -vvv" in the beginning. However this can be easily reproduced by running cat /sys/bus/devices/XX../vpd As even a simple read on any VPD data triggers a system lockup on certain cards this patch implements a PCI quirk to disabling VPD acces altogether by setting the vpd length to '0'. Added all the PCI_VENDOR_ID_ATTANSIC varients. Signed-off-by: Babu Moger <babu.mo...@oracle.com> Signed-off-by: Hannes Reinecke <h...@suse.de> Signed-off-by: Jordan Hargrave <jordan_hargr...@dell.com> --- drivers/pci/access.c |5 - drivers/pci/quirks.c | 43 +++ 2 files changed, 47 insertions(+), 1 deletions(-) diff --git a/drivers/pci/access.c b/drivers/pci/access.c index 914e023..82f41a8 100644 --- a/drivers/pci/access.c +++ b/drivers/pci/access.c @@ -396,7 +396,7 @@ static ssize_t pci_vpd_pci22_read(struct pci_dev *dev, loff_t pos, size_t count, if (pos < 0) return -EINVAL; - if (!vpd->valid) { + if (!vpd->valid && vpd->base.len > 0) { vpd->valid = true; vpd->base.len = pci_vpd_pci22_size(dev); } @@ -459,6 +459,9 @@ static ssize_t pci_vpd_pci22_write(struct pci_dev *dev, loff_t pos, size_t count loff_t end = pos + count; int ret = 0; + if (vpd->base.len == 0) + return -EIO; + if (!vpd->valid) return -EAGAIN; diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index b03373f..f0007e9 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -2123,6 +2123,49 @@ static void quirk_via_cx700_pci_parking_caching(struct pci_dev *dev) DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, 0x324e, quirk_via_cx700_pci_parking_caching); /* + * A read/write to sysfs entry ('/sys/bus/pci/devices//vpd') + * will dump 32k of data. The default length is set as 32768. + * Reading a full 32k will cause an access beyond the VPD end tag. + * The system behaviour at that point is mostly unpredictable. + * Apparently, some vendors have not implemented this VPD headers properly. + * Adding a generic function disable vpd data for these buggy adapters + * Add the DECLARE_PCI_FIXUP_FINAL line below with the specific with + * vendor and device of interest to use this quirk. + */ +static void quirk_blacklist_vpd(struct pci_dev *dev) +{ + if (dev->vpd) { + dev->vpd->len = 0; + dev_warn(>dev, "PCI vpd access has been disabled due to firmware bug\n"); + } +} + +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0060, + quirk_blacklist_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x007c, + quirk_blacklist_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0413, + quirk_blacklist_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0078, + quirk_blacklist_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0079, + quirk_blacklist_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0073, + quirk_blacklist_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0071, + quirk_blacklist_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x005b, + quirk_blacklist_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x002f, + quirk_blacklist_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x005d, + quirk_blacklist_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x005f, + quirk_blacklist_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATTANSIC, PCI_ANY_ID, + quirk_blacklist_vpd); + +/* * For Broadcom 5706, 5708, 5709 rev. A nics, any read beyond the * VPD end tag will hang the device. This problem was initially * observed when a vpd entry was created in sysfs -- 1.7.1
[PATCH RFC] PCI: Fix for panic while enabling SR-IOV
We noticed this panic while enabling SR-IOV in sparc. === mlx4_core: Mellanox ConnectX core driver v2.2-1 (Jan 1 2015) mlx4_core: Initializing 0007:01:00.0 mlx4_core 0007:01:00.0: Enabling SR-IOV with 5 VFs mlx4_core: Initializing 0007:01:00.1 Unable to handle kernel NULL pointer dereference insmod(10010): Oops [#1] CPU: 391 PID: 10010 Comm: insmod Not tainted 4.1.12-32.el6uek.kdump2.sparc64 #1 TPC: <dma_supported+0x20/0x80> I7: <__mlx4_init_one+0x324/0x500 [mlx4_core]> Call Trace: [104c5ea4] __mlx4_init_one+0x324/0x500 [mlx4_core] [104c613c] mlx4_init_one+0xbc/0x120 [mlx4_core] [00725f14] local_pci_probe+0x34/0xa0 [00726028] pci_call_probe+0xa8/0xe0 [00726310] pci_device_probe+0x50/0x80 [0079f700] really_probe+0x140/0x420 [0079fa24] driver_probe_device+0x44/0xa0 [0079fb5c] __device_attach+0x3c/0x60 [0079d85c] bus_for_each_drv+0x5c/0xa0 [0079f588] device_attach+0x88/0xc0 [0071acd0] pci_bus_add_device+0x30/0x80 [00736090] virtfn_add.clone.1+0x210/0x360 [007364a4] sriov_enable+0x2c4/0x520 [0073672c] pci_enable_sriov+0x2c/0x40 [104c2d58] mlx4_enable_sriov+0xf8/0x180 [mlx4_core] [104c49ac] mlx4_load_one+0x42c/0xd40 [mlx4_core] Disabling lock debugging due to kernel taint Caller[104c5ea4]: __mlx4_init_one+0x324/0x500 [mlx4_core] Caller[104c613c]: mlx4_init_one+0xbc/0x120 [mlx4_core] Caller[00725f14]: local_pci_probe+0x34/0xa0 Caller[00726028]: pci_call_probe+0xa8/0xe0 Caller[00726310]: pci_device_probe+0x50/0x80 Caller[0079f700]: really_probe+0x140/0x420 Caller[0079fa24]: driver_probe_device+0x44/0xa0 Caller[0079fb5c]: __device_attach+0x3c/0x60 Caller[0079d85c]: bus_for_each_drv+0x5c/0xa0 Caller[0079f588]: device_attach+0x88/0xc0 Caller[0071acd0]: pci_bus_add_device+0x30/0x80 Caller[00736090]: virtfn_add.clone.1+0x210/0x360 Caller[007364a4]: sriov_enable+0x2c4/0x520 Caller[0073672c]: pci_enable_sriov+0x2c/0x40 Caller[104c2d58]: mlx4_enable_sriov+0xf8/0x180 [mlx4_core] Caller[104c49ac]: mlx4_load_one+0x42c/0xd40 [mlx4_core] Caller[104c5f90]: __mlx4_init_one+0x410/0x500 [mlx4_core] Caller[104c613c]: mlx4_init_one+0xbc/0x120 [mlx4_core] Caller[00725f14]: local_pci_probe+0x34/0xa0 Caller[00726028]: pci_call_probe+0xa8/0xe0 Caller[00726310]: pci_device_probe+0x50/0x80 Caller[0079f700]: really_probe+0x140/0x420 Caller[0079fa24]: driver_probe_device+0x44/0xa0 Caller[0079fb08]: __driver_attach+0x88/0xa0 Caller[0079d90c]: bus_for_each_dev+0x6c/0xa0 Caller[0079f29c]: driver_attach+0x1c/0x40 Caller[0079e35c]: bus_add_driver+0x17c/0x220 Caller[007a02d4]: driver_register+0x74/0x120 Caller[007263fc]: __pci_register_driver+0x3c/0x60 Caller[104f62bc]: mlx4_init+0x60/0xcc [mlx4_core] Kernel panic - not syncing: Fatal exception Press Stop-A (L1-A) to return to the boot prom ---[ end Kernel panic - not syncing: Fatal exception === Details: Here is the call sequence virtfn_add->__mlx4_init_one->dma_set_mask->dma_supported The panic happened at line 760(file arch/sparc/kernel/iommu.c) 758 int dma_supported(struct device *dev, u64 device_mask) 759 { 760 struct iommu *iommu = dev->archdata.iommu; 761 u64 dma_addr_mask = iommu->dma_addr_mask; 762 763 if (device_mask >= (1UL << 32UL)) 764 return 0; 765 766 if ((device_mask & dma_addr_mask) == dma_addr_mask) 767 return 1; 768 769 #ifdef CONFIG_PCI 770 if (dev_is_pci(dev)) 771 return pci64_dma_supported(to_pci_dev(dev), device_mask); 772 #endif 773 774 return 0; 775 } 776 EXPORT_SYMBOL(dma_supported); Same panic happened with Intel ixgbe driver also. When VF device is added, driver probe function makes set of calls to initialize the pci device. Because the VF device is added different way than the normal PF device(which happens via of_create_pci_dev), some of the arch specific initialization does not happen for VF device. That causes panic when archdata is accessed. To fix this I have introduced weak function pci_init_archdata in SR-IOV code. Sparc will copy the archdata from PF to VF. Also verified the fix. Please review. Let me know if there is a better way to fix this. Signed-off-by: Babu Moger <babu.mo...@oracle.com> Signed-off-by: Sowmini Varadhan <sowmini.varad...@oracle.com> --- arch/sparc/kernel/pci.c |7 +++ drivers/pci/iov.c |5 + include/linux/pci.h |1 + 3 files changed, 13 insertions(+), 0 deletions(-) diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c index badf095..0fc774a 100644 --- a/a
Re: [PATCH v2] drivers/usb: Skip auto handoff for TI and RENESAS usb controllers
Its been a while since I submit this patch. Ping 1. On 1/8/2016 9:39 AM, Babu Moger wrote: > I have never seen auto handoff working on TI and RENESAS cards. > Eventually, we force handoff. This code forces the handoff > unconditionally. It saves 5 seconds boot time for each card. > > Signed-off-by: Babu Moger <babu.mo...@oracle.com> > --- > v2: > Made changes per comments from Greg KH. > Extra space removal in assignment > Added both vendor and device id checks. > > drivers/usb/host/pci-quirks.c |8 > 1 files changed, 8 insertions(+), 0 deletions(-) > > diff --git a/drivers/usb/host/pci-quirks.c b/drivers/usb/host/pci-quirks.c > index f940056..0915f44 100644 > --- a/drivers/usb/host/pci-quirks.c > +++ b/drivers/usb/host/pci-quirks.c > @@ -1003,6 +1003,14 @@ static void quirk_usb_handoff_xhci(struct pci_dev > *pdev) > ext_cap_offset = xhci_find_next_cap_offset(base, > ext_cap_offset); > } while (1); > > + /* Auto handoff never worked for these devices. Force it and continue */ > + if ((pdev->vendor == PCI_VENDOR_ID_TI && pdev->device == 0x8241) || > + (pdev->vendor == PCI_VENDOR_ID_RENESAS > + && pdev->device == 0x0014)) { > + val = (val | XHCI_HC_OS_OWNED) & ~XHCI_HC_BIOS_OWNED; > + writel(val, base + ext_cap_offset); > + } > + > /* If the BIOS owns the HC, signal that the OS wants it, and wait */ > if (val & XHCI_HC_BIOS_OWNED) { > writel(val | XHCI_HC_OS_OWNED, base + ext_cap_offset); >
[PATCH v2] sparc/PCI: Fix for panic while enabling SR-IOV
We noticed this panic while enabling SR-IOV in sparc. mlx4_core: Mellanox ConnectX core driver v2.2-1 (Jan 1 2015) mlx4_core: Initializing 0007:01:00.0 mlx4_core 0007:01:00.0: Enabling SR-IOV with 5 VFs mlx4_core: Initializing 0007:01:00.1 Unable to handle kernel NULL pointer dereference insmod(10010): Oops [#1] CPU: 391 PID: 10010 Comm: insmod Not tainted 4.1.12-32.el6uek.kdump2.sparc64 #1 TPC: <dma_supported+0x20/0x80> I7: <__mlx4_init_one+0x324/0x500 [mlx4_core]> Call Trace: [104c5ea4] __mlx4_init_one+0x324/0x500 [mlx4_core] [104c613c] mlx4_init_one+0xbc/0x120 [mlx4_core] [00725f14] local_pci_probe+0x34/0xa0 [00726028] pci_call_probe+0xa8/0xe0 [00726310] pci_device_probe+0x50/0x80 [0079f700] really_probe+0x140/0x420 [0079fa24] driver_probe_device+0x44/0xa0 [0079fb5c] __device_attach+0x3c/0x60 [0079d85c] bus_for_each_drv+0x5c/0xa0 [0079f588] device_attach+0x88/0xc0 [0071acd0] pci_bus_add_device+0x30/0x80 [00736090] virtfn_add.clone.1+0x210/0x360 [007364a4] sriov_enable+0x2c4/0x520 [0073672c] pci_enable_sriov+0x2c/0x40 [104c2d58] mlx4_enable_sriov+0xf8/0x180 [mlx4_core] [104c49ac] mlx4_load_one+0x42c/0xd40 [mlx4_core] Disabling lock debugging due to kernel taint Caller[104c5ea4]: __mlx4_init_one+0x324/0x500 [mlx4_core] Caller[104c613c]: mlx4_init_one+0xbc/0x120 [mlx4_core] Caller[00725f14]: local_pci_probe+0x34/0xa0 Caller[00726028]: pci_call_probe+0xa8/0xe0 Caller[00726310]: pci_device_probe+0x50/0x80 Caller[0079f700]: really_probe+0x140/0x420 Caller[0079fa24]: driver_probe_device+0x44/0xa0 Caller[0079fb5c]: __device_attach+0x3c/0x60 Caller[0079d85c]: bus_for_each_drv+0x5c/0xa0 Caller[0079f588]: device_attach+0x88/0xc0 Caller[0071acd0]: pci_bus_add_device+0x30/0x80 Caller[00736090]: virtfn_add.clone.1+0x210/0x360 Caller[007364a4]: sriov_enable+0x2c4/0x520 Caller[0073672c]: pci_enable_sriov+0x2c/0x40 Caller[104c2d58]: mlx4_enable_sriov+0xf8/0x180 [mlx4_core] Caller[104c49ac]: mlx4_load_one+0x42c/0xd40 [mlx4_core] Caller[104c5f90]: __mlx4_init_one+0x410/0x500 [mlx4_core] Caller[104c613c]: mlx4_init_one+0xbc/0x120 [mlx4_core] Caller[00725f14]: local_pci_probe+0x34/0xa0 Caller[00726028]: pci_call_probe+0xa8/0xe0 Caller[00726310]: pci_device_probe+0x50/0x80 Caller[0079f700]: really_probe+0x140/0x420 Caller[0079fa24]: driver_probe_device+0x44/0xa0 Caller[0079fb08]: __driver_attach+0x88/0xa0 Caller[0079d90c]: bus_for_each_dev+0x6c/0xa0 Caller[0079f29c]: driver_attach+0x1c/0x40 Caller[0079e35c]: bus_add_driver+0x17c/0x220 Caller[007a02d4]: driver_register+0x74/0x120 Caller[007263fc]: __pci_register_driver+0x3c/0x60 Caller[104f62bc]: mlx4_init+0x60/0xcc [mlx4_core] Kernel panic - not syncing: Fatal exception Press Stop-A (L1-A) to return to the boot prom ---[ end Kernel panic - not syncing: Fatal exception Details: Here is the call sequence virtfn_add->__mlx4_init_one->dma_set_mask->dma_supported The panic happened at line 760(file arch/sparc/kernel/iommu.c) 758 int dma_supported(struct device *dev, u64 device_mask) 759 { 760 struct iommu *iommu = dev->archdata.iommu; 761 u64 dma_addr_mask = iommu->dma_addr_mask; 762 763 if (device_mask >= (1UL << 32UL)) 764 return 0; 765 766 if ((device_mask & dma_addr_mask) == dma_addr_mask) 767 return 1; 768 769 #ifdef CONFIG_PCI 770 if (dev_is_pci(dev)) 771 return pci64_dma_supported(to_pci_dev(dev), device_mask); 772 #endif 773 774 return 0; 775 } 776 EXPORT_SYMBOL(dma_supported); Same panic happened with Intel ixgbe driver also. SR-IOV code looks for arch specific data while enabling VFs. When VF device is added, driver probe function makes set of calls to initialize the pci device. Because the VF device is added different way than the normal PF device(which happens via of_create_pci_dev for sparc), some of the arch specific initialization does not happen for VF device. That causes panic when archdata is accessed. To fix this, I have used already defined weak function pcibios_setup_device to copy archdata from PF to VF. Also verified the fix. Signed-off-by: Babu Moger <babu.mo...@oracle.com> Signed-off-by: Sowmini Varadhan <sowmini.varad...@oracle.com> Reviewed-by: Ethan Zhao <ethan.z...@oracle.com> --- v2: Removed RFC. Made changes per comments from Ethan Zhao. Now the changes are only in Sparc specific code. Removed the changes from driver/pci. Implemented already defined weak function pcibios_add_device in arch/sparc/kernel/pci.c to initialize sriov archdata. arch/sparc/kernel/pci.c | 15 +++ 1 f
[PATCH v3] sparc/PCI: Fix for panic while enabling SR-IOV
We noticed this panic while enabling SR-IOV in sparc. mlx4_core: Mellanox ConnectX core driver v2.2-1 (Jan 1 2015) mlx4_core: Initializing 0007:01:00.0 mlx4_core 0007:01:00.0: Enabling SR-IOV with 5 VFs mlx4_core: Initializing 0007:01:00.1 Unable to handle kernel NULL pointer dereference insmod(10010): Oops [#1] CPU: 391 PID: 10010 Comm: insmod Not tainted 4.1.12-32.el6uek.kdump2.sparc64 #1 TPC: <dma_supported+0x20/0x80> I7: <__mlx4_init_one+0x324/0x500 [mlx4_core]> Call Trace: [104c5ea4] __mlx4_init_one+0x324/0x500 [mlx4_core] [104c613c] mlx4_init_one+0xbc/0x120 [mlx4_core] [00725f14] local_pci_probe+0x34/0xa0 [00726028] pci_call_probe+0xa8/0xe0 [00726310] pci_device_probe+0x50/0x80 [0079f700] really_probe+0x140/0x420 [0079fa24] driver_probe_device+0x44/0xa0 [0079fb5c] __device_attach+0x3c/0x60 [0079d85c] bus_for_each_drv+0x5c/0xa0 [0079f588] device_attach+0x88/0xc0 [0071acd0] pci_bus_add_device+0x30/0x80 [00736090] virtfn_add.clone.1+0x210/0x360 [007364a4] sriov_enable+0x2c4/0x520 [0073672c] pci_enable_sriov+0x2c/0x40 [104c2d58] mlx4_enable_sriov+0xf8/0x180 [mlx4_core] [104c49ac] mlx4_load_one+0x42c/0xd40 [mlx4_core] Disabling lock debugging due to kernel taint Caller[104c5ea4]: __mlx4_init_one+0x324/0x500 [mlx4_core] Caller[104c613c]: mlx4_init_one+0xbc/0x120 [mlx4_core] Caller[00725f14]: local_pci_probe+0x34/0xa0 Caller[00726028]: pci_call_probe+0xa8/0xe0 Caller[00726310]: pci_device_probe+0x50/0x80 Caller[0079f700]: really_probe+0x140/0x420 Caller[0079fa24]: driver_probe_device+0x44/0xa0 Caller[0079fb5c]: __device_attach+0x3c/0x60 Caller[0079d85c]: bus_for_each_drv+0x5c/0xa0 Caller[0079f588]: device_attach+0x88/0xc0 Caller[0071acd0]: pci_bus_add_device+0x30/0x80 Caller[00736090]: virtfn_add.clone.1+0x210/0x360 Caller[007364a4]: sriov_enable+0x2c4/0x520 Caller[0073672c]: pci_enable_sriov+0x2c/0x40 Caller[104c2d58]: mlx4_enable_sriov+0xf8/0x180 [mlx4_core] Caller[104c49ac]: mlx4_load_one+0x42c/0xd40 [mlx4_core] Caller[104c5f90]: __mlx4_init_one+0x410/0x500 [mlx4_core] Caller[104c613c]: mlx4_init_one+0xbc/0x120 [mlx4_core] Caller[00725f14]: local_pci_probe+0x34/0xa0 Caller[00726028]: pci_call_probe+0xa8/0xe0 Caller[00726310]: pci_device_probe+0x50/0x80 Caller[0079f700]: really_probe+0x140/0x420 Caller[0079fa24]: driver_probe_device+0x44/0xa0 Caller[0079fb08]: __driver_attach+0x88/0xa0 Caller[0079d90c]: bus_for_each_dev+0x6c/0xa0 Caller[0079f29c]: driver_attach+0x1c/0x40 Caller[0079e35c]: bus_add_driver+0x17c/0x220 Caller[007a02d4]: driver_register+0x74/0x120 Caller[007263fc]: __pci_register_driver+0x3c/0x60 Caller[104f62bc]: mlx4_init+0x60/0xcc [mlx4_core] Kernel panic - not syncing: Fatal exception Press Stop-A (L1-A) to return to the boot prom ---[ end Kernel panic - not syncing: Fatal exception Details: Here is the call sequence virtfn_add->__mlx4_init_one->dma_set_mask->dma_supported The panic happened at line 760(file arch/sparc/kernel/iommu.c) 758 int dma_supported(struct device *dev, u64 device_mask) 759 { 760 struct iommu *iommu = dev->archdata.iommu; 761 u64 dma_addr_mask = iommu->dma_addr_mask; 762 763 if (device_mask >= (1UL << 32UL)) 764 return 0; 765 766 if ((device_mask & dma_addr_mask) == dma_addr_mask) 767 return 1; 768 769 #ifdef CONFIG_PCI 770 if (dev_is_pci(dev)) 771 return pci64_dma_supported(to_pci_dev(dev), device_mask); 772 #endif 773 774 return 0; 775 } 776 EXPORT_SYMBOL(dma_supported); Same panic happened with Intel ixgbe driver also. SR-IOV code looks for arch specific data while enabling VFs. When VF device is added, driver probe function makes set of calls to initialize the pci device. Because the VF device is added different way than the normal PF device(which happens via of_create_pci_dev for sparc), some of the arch specific initialization does not happen for VF device. That causes panic when archdata is accessed. To fix this, I have used already defined weak function pcibios_setup_device to copy archdata from PF to VF. Also verified the fix. Signed-off-by: Babu Moger <babu.mo...@oracle.com> Signed-off-by: Sowmini Varadhan <sowmini.varad...@oracle.com> Reviewed-by: Ethan Zhao <ethan.z...@oracle.com> --- v2: Removed RFC. Made changes per comments from Ethan Zhao. Now the changes are only in Sparc specific code. Removed the changes from driver/pci. Implemented already defined weak function pcibios_add_device in arch/sparc/kernel/pci.c to initialize sriov archdata. v3: Fixed the compile error repor
[PATCH v4] sparc/PCI: Fix for panic while enabling SR-IOV
We noticed this panic while enabling SR-IOV in sparc. mlx4_core: Mellanox ConnectX core driver v2.2-1 (Jan 1 2015) mlx4_core: Initializing 0007:01:00.0 mlx4_core 0007:01:00.0: Enabling SR-IOV with 5 VFs mlx4_core: Initializing 0007:01:00.1 Unable to handle kernel NULL pointer dereference insmod(10010): Oops [#1] CPU: 391 PID: 10010 Comm: insmod Not tainted 4.1.12-32.el6uek.kdump2.sparc64 #1 TPC: <dma_supported+0x20/0x80> I7: <__mlx4_init_one+0x324/0x500 [mlx4_core]> Call Trace: [104c5ea4] __mlx4_init_one+0x324/0x500 [mlx4_core] [104c613c] mlx4_init_one+0xbc/0x120 [mlx4_core] [00725f14] local_pci_probe+0x34/0xa0 [00726028] pci_call_probe+0xa8/0xe0 [00726310] pci_device_probe+0x50/0x80 [0079f700] really_probe+0x140/0x420 [0079fa24] driver_probe_device+0x44/0xa0 [0079fb5c] __device_attach+0x3c/0x60 [0079d85c] bus_for_each_drv+0x5c/0xa0 [0079f588] device_attach+0x88/0xc0 [0071acd0] pci_bus_add_device+0x30/0x80 [00736090] virtfn_add.clone.1+0x210/0x360 [007364a4] sriov_enable+0x2c4/0x520 [0073672c] pci_enable_sriov+0x2c/0x40 [104c2d58] mlx4_enable_sriov+0xf8/0x180 [mlx4_core] [104c49ac] mlx4_load_one+0x42c/0xd40 [mlx4_core] Disabling lock debugging due to kernel taint Caller[104c5ea4]: __mlx4_init_one+0x324/0x500 [mlx4_core] Caller[104c613c]: mlx4_init_one+0xbc/0x120 [mlx4_core] Caller[00725f14]: local_pci_probe+0x34/0xa0 Caller[00726028]: pci_call_probe+0xa8/0xe0 Caller[00726310]: pci_device_probe+0x50/0x80 Caller[0079f700]: really_probe+0x140/0x420 Caller[0079fa24]: driver_probe_device+0x44/0xa0 Caller[0079fb5c]: __device_attach+0x3c/0x60 Caller[0079d85c]: bus_for_each_drv+0x5c/0xa0 Caller[0079f588]: device_attach+0x88/0xc0 Caller[0071acd0]: pci_bus_add_device+0x30/0x80 Caller[00736090]: virtfn_add.clone.1+0x210/0x360 Caller[007364a4]: sriov_enable+0x2c4/0x520 Caller[0073672c]: pci_enable_sriov+0x2c/0x40 Caller[104c2d58]: mlx4_enable_sriov+0xf8/0x180 [mlx4_core] Caller[104c49ac]: mlx4_load_one+0x42c/0xd40 [mlx4_core] Caller[104c5f90]: __mlx4_init_one+0x410/0x500 [mlx4_core] Caller[104c613c]: mlx4_init_one+0xbc/0x120 [mlx4_core] Caller[00725f14]: local_pci_probe+0x34/0xa0 Caller[00726028]: pci_call_probe+0xa8/0xe0 Caller[00726310]: pci_device_probe+0x50/0x80 Caller[0079f700]: really_probe+0x140/0x420 Caller[0079fa24]: driver_probe_device+0x44/0xa0 Caller[0079fb08]: __driver_attach+0x88/0xa0 Caller[0079d90c]: bus_for_each_dev+0x6c/0xa0 Caller[0079f29c]: driver_attach+0x1c/0x40 Caller[0079e35c]: bus_add_driver+0x17c/0x220 Caller[007a02d4]: driver_register+0x74/0x120 Caller[007263fc]: __pci_register_driver+0x3c/0x60 Caller[104f62bc]: mlx4_init+0x60/0xcc [mlx4_core] Kernel panic - not syncing: Fatal exception Press Stop-A (L1-A) to return to the boot prom ---[ end Kernel panic - not syncing: Fatal exception Details: Here is the call sequence virtfn_add->__mlx4_init_one->dma_set_mask->dma_supported The panic happened at line 760(file arch/sparc/kernel/iommu.c) 758 int dma_supported(struct device *dev, u64 device_mask) 759 { 760 struct iommu *iommu = dev->archdata.iommu; 761 u64 dma_addr_mask = iommu->dma_addr_mask; 762 763 if (device_mask >= (1UL << 32UL)) 764 return 0; 765 766 if ((device_mask & dma_addr_mask) == dma_addr_mask) 767 return 1; 768 769 #ifdef CONFIG_PCI 770 if (dev_is_pci(dev)) 771 return pci64_dma_supported(to_pci_dev(dev), device_mask); 772 #endif 773 774 return 0; 775 } 776 EXPORT_SYMBOL(dma_supported); Same panic happened with Intel ixgbe driver also. SR-IOV code looks for arch specific data while enabling VFs. When VF device is added, driver probe function makes set of calls to initialize the pci device. Because the VF device is added different way than the normal PF device(which happens via of_create_pci_dev for sparc), some of the arch specific initialization does not happen for VF device. That causes panic when archdata is accessed. To fix this, I have used already defined weak function pcibios_setup_device to copy archdata from PF to VF. Also verified the fix. Signed-off-by: Babu Moger <babu.mo...@oracle.com> Signed-off-by: Sowmini Varadhan <sowmini.varad...@oracle.com> Reviewed-by: Ethan Zhao <ethan.z...@oracle.com> --- v2: Removed RFC. Made changes per comments from Ethan Zhao. Now the changes are only in Sparc specific code. Removed the changes from driver/pci. Implemented already defined weak function pcibios_add_device in arch/sparc/kernel/pci.c to initialize sriov archdata. v3: Fixed the compile error reported in kbuild t
Re: [PATCH v4] sparc/PCI: Fix for panic while enabling SR-IOV
Hi David, On 3/29/2016 7:57 PM, David Miller wrote: > From: Babu Moger <babu.mo...@oracle.com> > Date: Thu, 24 Mar 2016 13:02:22 -0700 > >> We noticed this panic while enabling SR-IOV in sparc. > ... >> SR-IOV code looks for arch specific data while enabling >> VFs. When VF device is added, driver probe function makes set >> of calls to initialize the pci device. Because the VF device is >> added different way than the normal PF device(which happens via >> of_create_pci_dev for sparc), some of the arch specific initialization >> does not happen for VF device. That causes panic when archdata is >> accessed. >> >> To fix this, I have used already defined weak function >> pcibios_setup_device to copy archdata from PF to VF. >> Also verified the fix. >> >> Signed-off-by: Babu Moger <babu.mo...@oracle.com> >> Signed-off-by: Sowmini Varadhan <sowmini.varad...@oracle.com> >> Reviewed-by: Ethan Zhao <ethan.z...@oracle.com> > > Looks good, applied and queued up for -stable, thanks. Thanks. > > Just a note, I am assuming that the VFs are not instantiated in the > device tree. Because when you just memcpy the arch data over from the > PF, one thing we end up doing is using the device node of the PF. No. VFs are not instantiated in device tree(/proc/device-tree) > > I slightly cringed at the memcpy, because at least one of these > pointers are to objects which are reference counted, the OF device. > > Generally speaking we don't really support hot-plug for OF probed > devices, but if we did all of the device tree pointers have to be > refcounted properly. > > So in the long term that whole sequence where we go: > > struct dev_archdata *sd; > ... > sd = >dev.archdata; > sd->iommu = pbm->iommu; > sd->stc = >stc; > sd->host_controller = pbm; > sd->op = op = of_find_device_by_node(node); > sd->numa_node = pbm->numa_node; > > should be encapsulated into a helper function, and both > of_create_pci_dev() and this new pcibios_setup_device() can > invoke it. > Yes. Agree. We need to refactor the whole of_create_pci_dev path to support hot-plug for the long term. I will start looking at it. For now we should be fine with the current patch. thanks
[PATCH] ixgbevf: Fix relaxed order settings in VF driver
Current code writes the tx/rx relaxed order without reading it first. This can lead to unintended consequences as we are forcibly writing other bits. We noticed this problem while testing VF driver on sparc. Relaxed order settings for rx queue were all messed up which was causing performance drop with VF interface. Fixed it by reading the registers first and setting the specific bit of interest. With this change we are able to match the bandwidth equivalent to PF interface. Signed-off-by: Babu Moger <babu.mo...@oracle.com> --- drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c |9 +++-- 1 files changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 0ea14c0..51abff1 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -1545,6 +1545,7 @@ static inline void ixgbevf_irq_enable(struct ixgbevf_adapter *adapter) static void ixgbevf_configure_tx_ring(struct ixgbevf_adapter *adapter, struct ixgbevf_ring *ring) { + u32 regval; struct ixgbe_hw *hw = >hw; u64 tdba = ring->dma; int wait_loop = 10; @@ -1565,8 +1566,10 @@ static void ixgbevf_configure_tx_ring(struct ixgbevf_adapter *adapter, IXGBE_WRITE_REG(hw, IXGBE_VFTDWBAL(reg_idx), 0); /* enable relaxed ordering */ + regval = IXGBE_READ_REG(hw, IXGBE_VFDCA_TXCTRL(reg_idx)); + IXGBE_WRITE_REG(hw, IXGBE_VFDCA_TXCTRL(reg_idx), - (IXGBE_DCA_TXCTRL_DESC_RRO_EN | + (regval | IXGBE_DCA_TXCTRL_DESC_RRO_EN | IXGBE_DCA_TXCTRL_DATA_RRO_EN)); /* reset head and tail pointers */ @@ -1734,6 +1737,7 @@ static void ixgbevf_setup_vfmrqc(struct ixgbevf_adapter *adapter) static void ixgbevf_configure_rx_ring(struct ixgbevf_adapter *adapter, struct ixgbevf_ring *ring) { + u32 regval; struct ixgbe_hw *hw = >hw; u64 rdba = ring->dma; u32 rxdctl; @@ -1749,8 +1753,9 @@ static void ixgbevf_configure_rx_ring(struct ixgbevf_adapter *adapter, ring->count * sizeof(union ixgbe_adv_rx_desc)); /* enable relaxed ordering */ + regval = IXGBE_READ_REG(hw, IXGBE_VFDCA_RXCTRL(reg_idx)); IXGBE_WRITE_REG(hw, IXGBE_VFDCA_RXCTRL(reg_idx), - IXGBE_DCA_RXCTRL_DESC_RRO_EN); + regval | IXGBE_DCA_RXCTRL_DESC_RRO_EN); /* reset head and tail pointers */ IXGBE_WRITE_REG(hw, IXGBE_VFRDH(reg_idx), 0); -- 1.7.1
Re: [PATCH] ixgbevf: Fix relaxed order settings in VF driver
Hi Alex, On 4/21/2016 2:22 PM, Alexander Duyck wrote: > On Thu, Apr 21, 2016 at 11:13 AM, Alexander Duyck > <alexander.du...@gmail.com> wrote: >> On Thu, Apr 21, 2016 at 10:21 AM, Babu Moger <babu.mo...@oracle.com> wrote: >>> Current code writes the tx/rx relaxed order without reading it first. >>> This can lead to unintended consequences as we are forcibly writing >>> other bits. >> >> The consequences were very much intended as there are situations where >> enabling relaxed ordering can lead to data corruption. >> >>> We noticed this problem while testing VF driver on sparc. Relaxed >>> order settings for rx queue were all messed up which was causing >>> performance drop with VF interface. >> >> What additional relaxed ordering bits are you enabling on Sparc? I'm >> assuming it is just the Rx data write back but I want to verify. >> >>> Fixed it by reading the registers first and setting the specific >>> bit of interest. With this change we are able to match the bandwidth >>> equivalent to PF interface. >>> >>> Signed-off-by: Babu Moger <babu.mo...@oracle.com> >> >> Fixed is a relative term here since you are only chasing performance >> from what I can tell. We need to make certain that this doesn't break >> the driver on any other architectures by leading to things like data >> corruption. >> >> - Alex > > It occurs to me that what might be easier is instead of altering the > configuration on all architectures you could instead wrap the write so > that on SPARC you include the extra bits you need and on all other > architectures you leave the write as-is similar to how the code in the > ixgbe_start_hw_gen2 only clears the bits if CONFIG_SPARC is not > defined. Here are the default values that I see when testing on Sparc. Default tx value 0x2a00 All below 3 set #define IXGBE_DCA_TXCTRL_DESC_RRO_EN (1 << 9) /* Tx rd Desc Relax Order */ #define IXGBE_DCA_TXCTRL_DESC_WRO_EN (1 << 11) /* Tx Desc writeback RO bit */ #define IXGBE_DCA_TXCTRL_DATA_RRO_EN (1 << 13) /* Tx rd data Relax Order */ I am not too worried about tx values. I can keep it as it is. It did not seem to cause any problems right now. Default rx value 0xb200 All below 3 set plus one more #define IXGBE_DCA_RXCTRL_DESC_RRO_EN (1 << 9) /* DCA Rx rd Desc Relax Order */ #define IXGBE_DCA_RXCTRL_DATA_WRO_EN (1 << 13) /* Rx wr data Relax Order */ #define IXGBE_DCA_RXCTRL_HEAD_WRO_EN (1 << 15) /* Rx wr header RO */ Is there a reason to disable IXGBE_DCA_RXCTRL_DATA_WRO_EN and IXGBE_DCA_RXCTRL_HEAD_WRO_EN for RX? I would think CONFIG_SPARC should be our last option. What do you think? > > - Alex >
[PATCH v2] ixgbevf: Change the relaxed order settings in VF driver for sparc
We noticed performance issues with VF interface on sparc compared to PF. Setting the RX to IXGBE_DCA_RXCTRL_DATA_WRO_EN brings it on far with PF. Also this matches to the default sparc setting in PF driver. Signed-off-by: Babu Moger <babu.mo...@oracle.com> Acked-by: Sowmini Varadhan <sowmini.varad...@oracle.com> --- v2: Alexander had concerns about this negativily affecting other architectures. Added CONFIG_SPARC check so this should not affect other architectures. drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c |6 ++ 1 files changed, 6 insertions(+), 0 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 0ea14c0..3596e0b 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -1748,9 +1748,15 @@ static void ixgbevf_configure_rx_ring(struct ixgbevf_adapter *adapter, IXGBE_WRITE_REG(hw, IXGBE_VFRDLEN(reg_idx), ring->count * sizeof(union ixgbe_adv_rx_desc)); +#ifndef CONFIG_SPARC /* enable relaxed ordering */ IXGBE_WRITE_REG(hw, IXGBE_VFDCA_RXCTRL(reg_idx), IXGBE_DCA_RXCTRL_DESC_RRO_EN); +#else + IXGBE_WRITE_REG(hw, IXGBE_VFDCA_RXCTRL(reg_idx), + IXGBE_DCA_RXCTRL_DESC_RRO_EN | + IXGBE_DCA_RXCTRL_DATA_WRO_EN); +#endif /* reset head and tail pointers */ IXGBE_WRITE_REG(hw, IXGBE_VFRDH(reg_idx), 0); -- 1.7.1
Filesystem slow write performance
xa0 Aug 3 12:18:55 build-t7 kernel: [005586a8] filemap_fdatawait_range+0xc8/0x140 Aug 3 12:18:55 build-t7 kernel: [005587fc] filemap_write_and_wait_range+0x3c/0x80 Aug 3 12:18:55 build-t7 kernel: [100e4258] ext3_sync_file+0x58/0x2c0 [ext3] Aug 3 12:18:55 build-t7 kernel: [005f54d8] vfs_fsync_range+0x38/0xa0 Aug 3 12:18:55 build-t7 kernel: [005f555c] vfs_fsync+0x1c/0x40 Aug 3 12:18:55 build-t7 kernel: [005f55a8] do_fsync+0x28/0x60 Aug 3 12:18:55 build-t7 kernel: [005f55f0] SyS_fdatasync+0x10/0x40 I am not an expert on this area. Note that I am bit behind on the kernel version(but not a whole lot). Working on to recreate this with latest upstream kernel. Looked at the upstream patches and tried most of the upstream patches which appear to be related and nothing helped. Problem is fairly easy to reproduce. Let me know if you want me to try something. Thanks Babu Moger
Re: Filesystem slow write performance
I wasn't able to repro this with mainline. Sorry for the noise. On 8/6/2016 1:49 PM, Babu Moger wrote: Hi, Seeing some terrible write performance with ext3/4 writes. Reads are fine. I have a created loop device and mounted as ext3(tried ext4 also). Here is iostat output. await time is pretty high most of the time. Device: rrqm/s wrqm/s r/s w/s rsec/s wsec/s avgrq-sz avgqu-sz await svctm %util loop0 0.00 0.000.00 133.00 0.00 1064.00 8.00 124.14 835.61 7.52 100.00 dm-0 0.00 0.000.00 132.00 0.00 1056.00 8.00 1.007.52 7.52 99.20 Device: rrqm/s wrqm/s r/s w/s rsec/s wsec/s avgrq-sz avgqu-sz await svctm %util loop0 0.00 0.000.00 94.00 0.00 752.00 8.00 124.18 901.02 10.64 100.00 dm-0 0.00 0.000.00 92.00 0.00 736.00 8.00 1.02 11.09 10.87 100.00 Device: rrqm/s wrqm/s r/s w/s rsec/s wsec/s avgrq-sz avgqu-sz await svctm %util loop0 0.00 0.000.00 132.00 0.00 1056.00 8.00 124.56 1329.30 7.58 100.00 dm-0 0.00 0.000.00 141.00 0.00 1128.00 8.00 1.087.72 7.06 99.60 Tags output [root@build-t7 0]# cat tags nr_tags=128, reserved_tags=0, bits_per_word=5 nr_free=128, nr_reserved=0 active_queues=0 Here is the output of "echo w > /proc/sysrq-trigger" when the problem happens. Aug 3 12:18:55 build-t7 kernel: kworker/u512:0 D 009defd4 0 6 2 0x0600 Aug 3 12:18:55 build-t7 kernel: Workqueue: writeback bdi_writeback_workfn (flush-7:0) Aug 3 12:18:55 build-t7 kernel: Call Trace: Aug 3 12:18:55 build-t7 kernel: [009dc9e4] schedule+0x24/0xa0 Aug 3 12:18:55 build-t7 kernel: [009defd4] schedule_timeout+0x134/0x220 Aug 3 12:18:55 build-t7 kernel: [009dc044] io_schedule_timeout+0x84/0x100 Aug 3 12:18:55 build-t7 kernel: [006be64c] bt_get+0x10c/0x1e0 Aug 3 12:18:55 build-t7 kernel: [006be7f4] blk_mq_get_tag+0x74/0xe0 Aug 3 12:18:55 build-t7 kernel: [006ba570] __blk_mq_alloc_request+0x10/0x180 Aug 3 12:18:55 build-t7 kernel: [006bb9f4] blk_mq_map_request+0x1d4/0x260 Aug 3 12:18:55 build-t7 kernel: [006bbd40] blk_sq_make_request+0x60/0x300 Aug 3 12:18:55 build-t7 kernel: [006afa58] generic_make_request+0x78/0xe0 Aug 3 12:18:55 build-t7 kernel: [006afb44] submit_bio+0x84/0x160 Aug 3 12:18:55 build-t7 kernel: [005f7cb4] _submit_bh+0x174/0x200 Aug 3 12:18:55 build-t7 kernel: [005f7d54] submit_bh+0x14/0x40 Aug 3 12:18:55 build-t7 kernel: [005fc248] __block_write_full_page.clone.0+0x2c8/0x500 Aug 3 12:18:55 build-t7 kernel: [005fc620] block_write_full_page+0xa0/0xe0 Aug 3 12:18:55 build-t7 kernel: [100e7d94] ext3_writeback_writepage+0x134/0x200 [ext3] Aug 3 12:18:55 build-t7 kernel: [00562798] __writepage+0x18/0x60 Aug 3 12:18:55 build-t7 kernel: loop0 D 009deff4 0 15632 2 0x01000400 Aug 3 12:18:55 build-t7 kernel: Call Trace: Aug 3 12:18:55 build-t7 kernel: [009dc9e4] schedule+0x24/0xa0 Aug 3 12:18:55 build-t7 kernel: [009deff4] schedule_timeout+0x154/0x220 Aug 3 12:18:55 build-t7 kernel: [009dc044] io_schedule_timeout+0x84/0x100 Aug 3 12:18:55 build-t7 kernel: [009dcdbc] bit_wait_io+0x3c/0x80 Aug 3 12:18:55 build-t7 kernel: [009dd1c4] __wait_on_bit+0x84/0x100 Aug 3 12:18:55 build-t7 kernel: [0055719c] wait_on_page_bit+0x7c/0xa0 Aug 3 12:18:55 build-t7 kernel: [005586a8] filemap_fdatawait_range+0xc8/0x140 Aug 3 12:18:55 build-t7 kernel: [005587fc] filemap_write_and_wait_range+0x3c/0x80 Aug 3 12:18:55 build-t7 kernel: [00558a58] __generic_file_write_iter+0xb8/0x140 Aug 3 12:18:55 build-t7 kernel: [00558bac] generic_file_write_iter+0xcc/0x1e0 Aug 3 12:18:55 build-t7 kernel: [007ca000] lo_rw_aio+0x180/0x240 Aug 3 12:18:55 build-t7 kernel: [007ca260] do_req_filebacked+0x1a0/0x1c0 Aug 3 12:18:55 build-t7 kernel: [007ca2b4] loop_queue_work+0x34/0x80 Aug 3 12:18:55 build-t7 kernel: [00491944] kthread_worker_fn+0x44/0x180 Aug 3 12:18:55 build-t7 kernel: [00491c4c] kthread+0xac/0xe0 Aug 3 12:18:55 build-t7 kernel: [00406184] ret_from_fork+0x1c/0x2c Aug 3 12:18:55 build-t7 kernel: livecd-creator D 009deff4 0 15627 2676 0x308000103000400 Aug 3 12:18:55 build-t7 kernel: Call Trace: Aug 3 12:18:55 build-t7 kernel: [009dc9e4] schedule+0x24/0xa0 Aug 3 12:18:55 build-t7 kernel: [009deff4] schedule_timeout+0x154/0x220 Aug 3 12:18:55 build-t7 kernel: [009dc044] io_schedule_timeout+0x84/0x100 Aug 3 12:18:55 build-t7 kernel: [009dcdbc] bit_wait_io+0x3c/0x80 Aug 3 12:18:55 build-t7 kernel: [000
Re: [PATCH v3 2/2] lockdep: Limit static allocations if PROVE_LOCKING_SMALL is defined
CCing Dave. Dave, Please ack it if it looks good. Thanks. On 9/27/2016 2:33 PM, Babu Moger wrote: Reduce the size of data structure for lockdep entries by half if PROVE_LOCKING_SMALL if defined. This is used only for sparc. Signed-off-by: Babu Moger <babu.mo...@oracle.com> --- kernel/locking/lockdep_internals.h | 20 +--- 1 files changed, 17 insertions(+), 3 deletions(-) diff --git a/kernel/locking/lockdep_internals.h b/kernel/locking/lockdep_internals.h index 51c4b24..c2b8849 100644 --- a/kernel/locking/lockdep_internals.h +++ b/kernel/locking/lockdep_internals.h @@ -46,6 +46,14 @@ enum { (LOCKF_USED_IN_HARDIRQ_READ | LOCKF_USED_IN_SOFTIRQ_READ) /* + * CONFIG_PROVE_LOCKING_SMALL is defined for sparc. Sparc requires .text, + * .data and .bss to fit in required 32MB limit for the kernel. With + * PROVE_LOCKING we could go over this limit and cause system boot-up problems. + * So, reduce the static allocations for lockdeps related structures so that + * everything fits in current required size limit. + */ +#ifdef CONFIG_PROVE_LOCKING_SMALL +/* * MAX_LOCKDEP_ENTRIES is the maximum number of lock dependencies * we track. * @@ -54,18 +62,24 @@ enum { * table (if it's not there yet), and we check it for lock order * conflicts and deadlocks. */ +#define MAX_LOCKDEP_ENTRIES16384UL +#define MAX_LOCKDEP_CHAINS_BITS15 +#define MAX_STACK_TRACE_ENTRIES262144UL +#else #define MAX_LOCKDEP_ENTRIES 32768UL #define MAX_LOCKDEP_CHAINS_BITS 16 -#define MAX_LOCKDEP_CHAINS (1UL << MAX_LOCKDEP_CHAINS_BITS) - -#define MAX_LOCKDEP_CHAIN_HLOCKS (MAX_LOCKDEP_CHAINS*5) /* * Stack-trace: tightly packed array of stack backtrace * addresses. Protected by the hash_lock. */ #define MAX_STACK_TRACE_ENTRIES 524288UL +#endif + +#define MAX_LOCKDEP_CHAINS (1UL << MAX_LOCKDEP_CHAINS_BITS) + +#define MAX_LOCKDEP_CHAIN_HLOCKS (MAX_LOCKDEP_CHAINS*5) extern struct list_head all_lock_classes; extern struct lock_chain lock_chains[];
Re: [PATCH v3 1/2] config: Adding the new config parameter CONFIG_PROVE_LOCKING_SMALL for sparc
On 9/28/2016 3:39 AM, Peter Zijlstra wrote: On Tue, Sep 27, 2016 at 12:33:27PM -0700, Babu Moger wrote: This new config parameter limits the space used for "Lock debugging: prove locking correctness" by about 4MB. The current sparc systems have the limitation of 32MB size for kernel size including .text, .data and .bss sections. With PROVE_LOCKING feature, the kernel size could grow beyond this limit and causing system boot-up issues. With this option, kernel limits the size of the entries of lock_chains, stack_trace etc., so that kernel fits in required size limit. This is not visible to user and only used for sparc. Signed-off-by: Babu Moger <babu.mo...@oracle.com> You forgot to Cc Dave, and since you're touching sparc I need an Ack from him before I can queue this. Dave, Can you please take a look at the patch. Please ack it if it looks good. Thanks Dave? --- arch/sparc/Kconfig |1 + lib/Kconfig.debug |3 +++ 2 files changed, 4 insertions(+), 0 deletions(-) diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 59b0960..8da321c 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -44,6 +44,7 @@ config SPARC select ARCH_HAS_SG_CHAIN select CPU_NO_EFFICIENT_FFS select HAVE_ARCH_HARDENED_USERCOPY + select PROVE_LOCKING_SMALL if PROVE_LOCKING config SPARC32 def_bool !64BIT diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index cab7405..597e589 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1084,6 +1084,9 @@ config PROVE_LOCKING For more details, see Documentation/locking/lockdep-design.txt. +config PROVE_LOCKING_SMALL + bool + config LOCKDEP bool depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT -- 1.7.1
Re: [PATCH v3 1/2] config: Adding the new config parameter CONFIG_PROVE_LOCKING_SMALL for sparc
Dave, Gentle reminder to review this patch. Thanks On 9/30/2016 12:19 AM, David Miller wrote: From: Babu Moger <babu.mo...@oracle.com> Date: Thu, 29 Sep 2016 08:53:24 -0500 On 9/28/2016 3:39 AM, Peter Zijlstra wrote: On Tue, Sep 27, 2016 at 12:33:27PM -0700, Babu Moger wrote: This new config parameter limits the space used for "Lock debugging: prove locking correctness" by about 4MB. The current sparc systems have the limitation of 32MB size for kernel size including .text, .data and .bss sections. With PROVE_LOCKING feature, the kernel size could grow beyond this limit and causing system boot-up issues. With this option, kernel limits the size of the entries of lock_chains, stack_trace etc., so that kernel fits in required size limit. This is not visible to user and only used for sparc. Signed-off-by: Babu Moger <babu.mo...@oracle.com> You forgot to Cc Dave, and since you're touching sparc I need an Ack from him before I can queue this. Dave, Can you please take a look at the patch. Please ack it if it looks good. I am travelling and will look at it when I get a chance.
Re: [PATCH 1/2] watchdog: Introduce update_arch_nmi_watchdog
On 10/6/2016 11:34 PM, Sam Ravnborg wrote: On Thu, Oct 06, 2016 at 03:16:42PM -0700, Babu Moger wrote: Currently we do not have a way to enable/disable arch specific watchdog handlers if it was implemented by any of the architectures. This patch introduces new function update_arch_nmi_watchdog which can be used to enable/disable architecture specific NMI watchdog handlers. Also exposes watchdog_enabled variable outside so that arch specific nmi watchdogs can use it to implement enalbe/disable behavour. Signed-off-by: Babu Moger <babu.mo...@oracle.com> --- include/linux/nmi.h |1 + kernel/watchdog.c | 16 +--- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 4630eea..01b4830 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -66,6 +66,7 @@ static inline bool trigger_allbutself_cpu_backtrace(void) #ifdef CONFIG_LOCKUP_DETECTOR u64 hw_nmi_get_sample_period(int watchdog_thresh); +extern unsigned long watchdog_enabled; The extern is within an #ifdef, but the definition later is valid alway. So extern definition should be outside the #ifdef to match the actual implementation. Ok. Sure. To manipulate / read watchdog_enabled two constants are used: NMI_WATCHDOG_ENABLED, SOFT_WATCHDOG_ENABLED Sure. I will bring these definitions to nmi.h from watchdog.c They should be visible too, so uses do not fall into the trap and uses constants (like in patch 2). Will re-post v2 version with these changes. Thanks for the comments. Sam
[PATCH 0/2] Introduce update_arch_nmi_watchdog for arch specific handlers
During our testing we noticed that nmi watchdogs in sparc could not be disabled or enabled dynamically using sysctl/proc interface. Sparc uses its own arch specific nmi watchdogs. There is a sysctl and proc interface(proc/sys/kernel/nmi_watchdog) to enable/disable nmi watchdogs. However, that is not working for sparc. There is no interface to feed this parameter to arch specific nmi watchdogs. These patches extend the same sysctl/proc interface to enable or disable these arch specific nmi watchdogs dynamically. Introduced new function update_arch_nmi_watchdog which can be implemented in arch specific handlers. If you think there is a better way to do this. Please advice. Tested on sparc. Compile tested on x86. Babu Moger (2): watchdog: Introduce update_arch_nmi_watchdog sparc: Implement update_arch_nmi_watchdog arch/sparc/kernel/nmi.c | 26 ++ include/linux/nmi.h |1 + kernel/watchdog.c | 16 +--- 3 files changed, 40 insertions(+), 3 deletions(-)
[PATCH 1/2] watchdog: Introduce update_arch_nmi_watchdog
Currently we do not have a way to enable/disable arch specific watchdog handlers if it was implemented by any of the architectures. This patch introduces new function update_arch_nmi_watchdog which can be used to enable/disable architecture specific NMI watchdog handlers. Also exposes watchdog_enabled variable outside so that arch specific nmi watchdogs can use it to implement enalbe/disable behavour. Signed-off-by: Babu Moger <babu.mo...@oracle.com> --- include/linux/nmi.h |1 + kernel/watchdog.c | 16 +--- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 4630eea..01b4830 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -66,6 +66,7 @@ static inline bool trigger_allbutself_cpu_backtrace(void) #ifdef CONFIG_LOCKUP_DETECTOR u64 hw_nmi_get_sample_period(int watchdog_thresh); +extern unsigned long watchdog_enabled; extern int nmi_watchdog_enabled; extern int soft_watchdog_enabled; extern int watchdog_user_enabled; diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 9acb29f..1ac2814 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -46,16 +46,21 @@ static DEFINE_MUTEX(watchdog_proc_mutex); -#ifdef CONFIG_HARDLOCKUP_DETECTOR -static unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED|NMI_WATCHDOG_ENABLED; +#if defined(CONFIG_HARDLOCKUP_DETECTOR) || defined(CONFIG_HAVE_NMI_WATCHDOG) +unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED|NMI_WATCHDOG_ENABLED; #else -static unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED; +unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED; #endif int __read_mostly nmi_watchdog_enabled; int __read_mostly soft_watchdog_enabled; int __read_mostly watchdog_user_enabled; int __read_mostly watchdog_thresh = 10; +/* + * Implemented by arch specific handlers if it defines CONFIG_HAVE_NMI_WATCHDOG + */ +void __weak update_arch_nmi_watchdog(void) {} + #ifdef CONFIG_SMP int __read_mostly sysctl_softlockup_all_cpu_backtrace; int __read_mostly sysctl_hardlockup_all_cpu_backtrace; @@ -842,6 +847,11 @@ static int proc_watchdog_update(void) int err = 0; /* +* Enable/Disable arch specific nmi watchdogs if there is one +*/ + update_arch_nmi_watchdog(); + + /* * Watchdog threads won't be started if they are already active. * The 'watchdog_running' variable in watchdog_*_all_cpus() takes * care of this. If those threads are already active, the sample -- 1.7.1
[PATCH 2/2] sparc: Implement update_arch_nmi_watchdog
Implement function update_arch_nmi_watchdog to enable/disable nmi watchdog. Sparc uses arch specific nmi watchdog handler. Currently, we do not have a way to enable/disable nmi watchdog dynamically. With these patches we can enable or disable arch specinf nmi watchdogs using proc or sysctl interface. Example commands. To enable: echo 1 > /proc/sys/kernel/nmi_watchdog To disable: echo 0 > /proc/sys/kernel/nmi_watchdog It can also achieved using the sysctl parameter kernel.nmi_watchdog Signed-off-by: Babu Moger <babu.mo...@oracle.com> --- arch/sparc/kernel/nmi.c | 26 ++ 1 files changed, 26 insertions(+), 0 deletions(-) diff --git a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c index a9973bb..27c4e18 100644 --- a/arch/sparc/kernel/nmi.c +++ b/arch/sparc/kernel/nmi.c @@ -153,6 +153,8 @@ static void report_broken_nmi(int cpu, int *prev_nmi_count) void stop_nmi_watchdog(void *unused) { + if (!__this_cpu_read(wd_enabled)) + return; pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_disable); __this_cpu_write(wd_enabled, 0); atomic_dec(_active); @@ -207,6 +209,8 @@ error: void start_nmi_watchdog(void *unused) { + if (__this_cpu_read(wd_enabled)) + return; __this_cpu_write(wd_enabled, 1); atomic_inc(_active); @@ -270,3 +274,25 @@ static int __init setup_nmi_watchdog(char *str) return 0; } __setup("nmi_watchdog=", setup_nmi_watchdog); + +#ifdef CONFIG_LOCKUP_DETECTOR +void update_arch_nmi_watchdog(void) +{ + if (atomic_read(_active) < 0) { + printk(KERN_WARNING + "NMI watchdog cannot be enabled or disabled\n"); + return; + } + + /* +* Check for bit 0. Bit 0 is dedicated for hard lockup detector or +* arch specific nmi and bit 1 for the soft lockup detector. We +* are interested only in bit 0 here. +*/ + if (watchdog_enabled & 1) + on_each_cpu(start_nmi_watchdog, NULL, 1); + else + on_each_cpu(stop_nmi_watchdog, NULL, 1); + +} +#endif -- 1.7.1
Re: [PATCH 0/2] Ajust lockdep static allocations
On 9/23/2016 2:12 AM, Peter Zijlstra wrote: On Thu, Sep 22, 2016 at 11:43:34AM -0700, Babu Moger wrote: These patches adjust the static allocations for lockdep data structures used for debugging locking correctness. The current code reserves about 4MB extra space for these data structures. Most of the configurations do not need these many data structures. While testing, I have not seen it go beyond 20% of already reserved entries. $grep "lock-classes" /proc/lockdep_stats lock-classes: 1560 [max: 8191] Reserving even more space seems unreasonable. So, keeping the default entries small as before the Commit 1413c0389333 ("lockdep: Increase static allocations"). Added new CONFIG_PROVE_LOCKING_PLUS in case someone needs more entries to debug their large configuration. Why make this more complicated? There's absolutely no upside to this change as far as I can see. Peter, What do you mean? Revert the commit 1413c038933? Right now, I cannot boot my setup after enabling lockdep. How do you think we can handle this?
Re: [PATCH 0/2] Ajust lockdep static allocations
On 9/23/2016 9:34 AM, Peter Zijlstra wrote: On Fri, Sep 23, 2016 at 09:04:42AM -0500, Babu Moger wrote: On 9/23/2016 2:12 AM, Peter Zijlstra wrote: On Thu, Sep 22, 2016 at 11:43:34AM -0700, Babu Moger wrote: These patches adjust the static allocations for lockdep data structures used for debugging locking correctness. The current code reserves about 4MB extra space for these data structures. Most of the configurations do not need these many data structures. While testing, I have not seen it go beyond 20% of already reserved entries. $grep "lock-classes" /proc/lockdep_stats lock-classes: 1560 [max: 8191] Reserving even more space seems unreasonable. So, keeping the default entries small as before the Commit 1413c0389333 ("lockdep: Increase static allocations"). Added new CONFIG_PROVE_LOCKING_PLUS in case someone needs more entries to debug their large configuration. Why make this more complicated? There's absolutely no upside to this change as far as I can see. Peter, What do you mean? I mean I see no point to the patches you send. Revert the commit 1413c038933? Nah, why would I? Right now, I cannot boot my setup after enabling lockdep. How do you think we can handle this? Why can't you boot? You have that little memories? 4MB doesn't seem like a worthwhile amount of memory. Also, you didn't say. This seems a somewhat crucial point. Correct, We can't boot with lockdep. Sorry I did not make that clear. We have a limit on static size of the kernel. In any case, maybe invert this, add make it depend on CONFIG_BASE_SMALL, since this really only matters for really dinky systems. Sure. Will use CONFIG_BASE_SMALL and re-post the patches. Thanks
Re: [PATCH 0/2] Ajust lockdep static allocations
On 9/23/2016 10:04 AM, Peter Zijlstra wrote: On Fri, Sep 23, 2016 at 09:50:52AM -0500, Babu Moger wrote: Why can't you boot? You have that little memories? 4MB doesn't seem like a worthwhile amount of memory. Also, you didn't say. This seems a somewhat crucial point. Correct, We can't boot with lockdep. Sorry I did not make that clear. We have a limit on static size of the kernel. This stuff should be in .bss not .data. It should not affect the static size at all. Or am I misunderstanding things? Here it is. $ ./scripts/bloat-o-meter vmlinux.lockdep.small vmlinux.lockdep.big add/remove: 0/0 grow/shrink: 5/0 up/down: 4653056/0 (4653056) function old new delta stack_trace 2097152 4194304 +2097152 lock_chains 1048576 2097152 +1048576 list_entries 1048576 2097152 +1048576 chain_hlocks 327680 655360 +327680 chainhash_table 131072 262144 +131072 Total: Before=21046200, After=25699256, chg 22.00%
Re: [PATCH 0/2] Ajust lockdep static allocations
On 9/23/2016 10:40 AM, Peter Zijlstra wrote: On Fri, Sep 23, 2016 at 10:15:46AM -0500, Babu Moger wrote: Correct, We can't boot with lockdep. Sorry I did not make that clear. We have a limit on static size of the kernel. This stuff should be in .bss not .data. It should not affect the static size at all. Or am I misunderstanding things? Here it is. $ ./scripts/bloat-o-meter vmlinux.lockdep.small vmlinux.lockdep.big What does bloat-o-meter have to do with things? The static image size is not dependent on .bss, right? Peter, We checked again. Yes, It goes in .bss section. But in sparc we have to fit .text, .data, .bss in 7 permanent TLBs(that is totally 28MB). It was fine so far. But the commit 1413c0389333 ("lockdep: Increase static allocations") added extra 4MB which makes it go beyond 28MB. That is causing system boot up problems in sparc. Yes. We know it. This is a limitation. Changing this limit in our hardware is a much bigger change which we cannot address right away. So, we are trying to come up with a solution which can work for all. I will re-post the patches with CONFIG_BASE_SMALL option if there is no objections. CCing David Miller and Rob Gardner. They might be able to explain more if you have any more questions. Here is the discussion thread if you guys want to look at history. https://www.mail-archive.com/linux-kernel@vger.kernel.org/msg1237642.html
Re: [PATCH 0/2] Ajust lockdep static allocations
On 9/23/2016 3:17 PM, Peter Zijlstra wrote: On Fri, Sep 23, 2016 at 02:57:39PM -0500, Babu Moger wrote: We checked again. Yes, It goes in .bss section. But in sparc we have to fit .text, .data, .bss in 7 permanent TLBs(that is totally 28MB). It was fine so far. But the commit 1413c0389333 ("lockdep: Increase static allocations") added extra 4MB which makes it go beyond 28MB. That is causing system boot up problems in sparc. *sigh*, why didn't you start with that :/ Yes. We know it. This is a limitation. Changing this limit in our hardware is a much bigger change which we cannot address right away. So, we are trying to come up with a solution which can work for all. I will re-post the patches with CONFIG_BASE_SMALL option if there is no objections. OK, so double check BASE_SMALL doesn't imply other things you cannot live with, Sparc64 isn't a dinky system. If BASE_SMALL works for you then good, otherwise do a PROVE_LOCKING_SMALL symbol that is not user selectable and have SPARC select that. Use the invisible Help for that symbol to explain all this again. Thanks. Will work on it. CCing David Miller and Rob Gardner. They might be able to explain more if you have any more questions. Nah, I think I remember enough of how the Sparc MMU works to see reason.
[PATCH 0/2] Ajust lockdep static allocations
These patches adjust the static allocations for lockdep data structures used for debugging locking correctness. The current code reserves about 4MB extra space for these data structures. Most of the configurations do not need these many data structures. While testing, I have not seen it go beyond 20% of already reserved entries. $grep "lock-classes" /proc/lockdep_stats lock-classes: 1560 [max: 8191] Reserving even more space seems unreasonable. So, keeping the default entries small as before the Commit 1413c0389333 ("lockdep: Increase static allocations"). Added new CONFIG_PROVE_LOCKING_PLUS in case someone needs more entries to debug their large configuration. Patch 1 : Adjusts the sizes based on the new config parameter patch 2 : Adds new config parameter Babu Moger (2): lockdep: Keep the default static allocations small config: Add new CONFIG_PROVE_LOCKING_PLUS kernel/locking/lockdep_internals.h | 14 +++--- lib/Kconfig.debug | 10 ++ 2 files changed, 21 insertions(+), 3 deletions(-)
[PATCH 1/2] lockdep: Keep the default static allocations small
The Commit 1413c0389333 ("lockdep: Increase static allocations") doubled the static allocation for lockdep. The size is unusually high and not required for majority of the configurations. This could cause problems to some environments with limited memory configurations. We are already seeing issues on our sparc configuration where kernel fails to boot when lockdep feature is enabled. This patch keeps the default size to same as before Commit 1413c0389333 ("lockdep: Increase static allocations"). Adding the new config parameter CONFIG_PROVE_LOCKING_PLUS in case someone needs to enable more static space for lockdep entries, lock chains and stack traces to debug large configurations. Signed-off-by: Babu Moger <babu.mo...@oracle.com> --- kernel/locking/lockdep_internals.h | 14 +++--- 1 files changed, 11 insertions(+), 3 deletions(-) diff --git a/kernel/locking/lockdep_internals.h b/kernel/locking/lockdep_internals.h index 51c4b24..47336a6 100644 --- a/kernel/locking/lockdep_internals.h +++ b/kernel/locking/lockdep_internals.h @@ -54,18 +54,26 @@ enum { * table (if it's not there yet), and we check it for lock order * conflicts and deadlocks. */ +#ifdef CONFIG_PROVE_LOCKING_PLUS #define MAX_LOCKDEP_ENTRIES32768UL #define MAX_LOCKDEP_CHAINS_BITS16 -#define MAX_LOCKDEP_CHAINS (1UL << MAX_LOCKDEP_CHAINS_BITS) - -#define MAX_LOCKDEP_CHAIN_HLOCKS (MAX_LOCKDEP_CHAINS*5) /* * Stack-trace: tightly packed array of stack backtrace * addresses. Protected by the hash_lock. */ #define MAX_STACK_TRACE_ENTRIES524288UL +#else +#define MAX_LOCKDEP_ENTRIES16384UL +#define MAX_LOCKDEP_CHAINS_BITS15 +#define MAX_STACK_TRACE_ENTRIES262144UL +#endif + +#define MAX_LOCKDEP_CHAINS (1UL << MAX_LOCKDEP_CHAINS_BITS) + +#define MAX_LOCKDEP_CHAIN_HLOCKS (MAX_LOCKDEP_CHAINS*5) + extern struct list_head all_lock_classes; extern struct lock_chain lock_chains[]; -- 1.7.1
[PATCH 2/2] config: Add new CONFIG_PROVE_LOCKING_PLUS
Adding the new config parameter CONFIG_PROVE_LOCKING_PLUS in case someone needs to enable more static space for lockdep entries, lock chains and stack traces to debug large configurations. The default size is kept small to cover majority of the configs. Signed-off-by: Babu Moger <babu.mo...@oracle.com> --- lib/Kconfig.debug | 10 ++ 1 files changed, 10 insertions(+), 0 deletions(-) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index b9cfdbf..d5d995e 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1070,6 +1070,16 @@ config PROVE_LOCKING For more details, see Documentation/locking/lockdep-design.txt. +config PROVE_LOCKING_PLUS + bool "Reserve extra space for prove locking correctness" + depends on PROVE_LOCKING + default n + help +This feature reserves more space for lockdep entries, lock chains +and stack traces to debug large configurations. This could add +about additional 4MB static memory to kernel size. This is not +suitable for embedded or other limited memory configurations. + config LOCKDEP bool depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT -- 1.7.1
Re: [PATCH v2 1/2] config: Add new CONFIG_PROVE_LOCKING_SMALL
On 9/27/2016 10:43 AM, Sam Ravnborg wrote: On Tue, Sep 27, 2016 at 09:51:40AM -0500, Babu Moger wrote: On 9/27/2016 6:40 AM, Peter Zijlstra wrote: On Tue, Sep 27, 2016 at 06:46:25AM +0200, Sam Ravnborg wrote: Since this is only relevant for sparc, and for sparc this is "select"ed, then there is limited/no gain having this as a visible menu config option. How about adding just a simple non-visible config symbol: config PROVE_LOCKING_SMALL bool The nice help text can be added to the H file, and the select can be move to the sparc/Kconfig file where it really belongs. Yes, this should not be user selectable. I don't mind the help being here though. How about this? Moved everything to arch/sparc/Kconfig.debug. It may be not useful to have help in config file as it is not visible. Lets have some explanation in .h file. I will send v3 version if you all agree. = diff --git a/arch/sparc/Kconfig.debug b/arch/sparc/Kconfig.debug index 6db35fb..67e58a1 100644 --- a/arch/sparc/Kconfig.debug +++ b/arch/sparc/Kconfig.debug @@ -21,4 +21,9 @@ config FRAME_POINTER depends on MCOUNT default y +config PROVE_LOCKING_SMALL + bool + depends on PROVE_LOCKING && SPARC + default y + endmenu The idea is to have the SPAC specific stuff in arch/sparc/Kconfig, and not scattered in Kconfig files all over the tree. Therefore drop the "depends". In sparc/Kconfig you then just do: config SPARC select PROVE_LOCKING_SMALL if PROVE_LOCKING The if part is likely not needed as PROVE_LOCKING_SMALL will be ignored unless PROVE_LOCKING is enabled. Sure. thanks. Here it is below. I will re-post v3. diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index cde1a62..353731f 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -43,6 +43,7 @@ config SPARC select ODD_RT_SIGACTION select OLD_SIGSUSPEND select ARCH_HAS_SG_CHAIN + select PROVE_LOCKING_SMALL if PROVE_LOCKING config SPARC32 def_bool !64BIT diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index ba2b0c8..3ba1665 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1008,6 +1008,9 @@ config PROVE_LOCKING For more details, see Documentation/locking/lockdep-design.txt. +config PROVE_LOCKING_SMALL + bool + config LOCKDEP bool depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT Sam
Re: [PATCH v2 1/2] config: Add new CONFIG_PROVE_LOCKING_SMALL
On 9/27/2016 6:40 AM, Peter Zijlstra wrote: On Tue, Sep 27, 2016 at 06:46:25AM +0200, Sam Ravnborg wrote: Since this is only relevant for sparc, and for sparc this is "select"ed, then there is limited/no gain having this as a visible menu config option. How about adding just a simple non-visible config symbol: config PROVE_LOCKING_SMALL bool The nice help text can be added to the H file, and the select can be move to the sparc/Kconfig file where it really belongs. Yes, this should not be user selectable. I don't mind the help being here though. How about this? Moved everything to arch/sparc/Kconfig.debug. It may be not useful to have help in config file as it is not visible. Lets have some explanation in .h file. I will send v3 version if you all agree. = diff --git a/arch/sparc/Kconfig.debug b/arch/sparc/Kconfig.debug index 6db35fb..67e58a1 100644 --- a/arch/sparc/Kconfig.debug +++ b/arch/sparc/Kconfig.debug @@ -21,4 +21,9 @@ config FRAME_POINTER depends on MCOUNT default y +config PROVE_LOCKING_SMALL + bool + depends on PROVE_LOCKING && SPARC + default y + endmenu
[PATCH v3 0/2] Ajust lockdep static allocations for sparc
These patches limit the static allocations for lockdep data structures used for debugging locking correctness. For sparc, all the kernel's code, data, and bss, must have locked translations in the TLB so that we don't get TLB misses on kernel code and data. Current sparc chips have 8 TLB entries available that may be locked down, and with a 4mb page size, this gives a maximum of 32MB. With PROVE_LOCKING we could go over this limit and cause system boot-up problems. These patches limit the static allocations so that everything fits in current required size limit. patch 1 : Adds new config parameter CONFIG_PROVE_LOCKING_SMALL Patch 2 : Adjusts the sizes based on the new config parameter v2-> v3: Some more comments from Sam Ravnborg and Peter Zijlstra. Defined PROVE_LOCKING_SMALL as invisible and moved the selection to arch/sparc/Kconfig. v1-> v2: As suggested by Peter Zijlstra, keeping the default as is. Introduced new config variable CONFIG_PROVE_LOCKING_SMALL to handle sparc specific case. v0: Initial revision. Babu Moger (2): config: Adding the new config parameter CONFIG_PROVE_LOCKING_SMALL for sparc lockdep: Limit static allocations if PROVE_LOCKING_SMALL is defined arch/sparc/Kconfig |1 + kernel/locking/lockdep_internals.h | 20 +--- lib/Kconfig.debug |3 +++ 3 files changed, 21 insertions(+), 3 deletions(-)
[PATCH v3 1/2] config: Adding the new config parameter CONFIG_PROVE_LOCKING_SMALL for sparc
This new config parameter limits the space used for "Lock debugging: prove locking correctness" by about 4MB. The current sparc systems have the limitation of 32MB size for kernel size including .text, .data and .bss sections. With PROVE_LOCKING feature, the kernel size could grow beyond this limit and causing system boot-up issues. With this option, kernel limits the size of the entries of lock_chains, stack_trace etc., so that kernel fits in required size limit. This is not visible to user and only used for sparc. Signed-off-by: Babu Moger <babu.mo...@oracle.com> --- arch/sparc/Kconfig |1 + lib/Kconfig.debug |3 +++ 2 files changed, 4 insertions(+), 0 deletions(-) diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 59b0960..8da321c 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -44,6 +44,7 @@ config SPARC select ARCH_HAS_SG_CHAIN select CPU_NO_EFFICIENT_FFS select HAVE_ARCH_HARDENED_USERCOPY + select PROVE_LOCKING_SMALL if PROVE_LOCKING config SPARC32 def_bool !64BIT diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index cab7405..597e589 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1084,6 +1084,9 @@ config PROVE_LOCKING For more details, see Documentation/locking/lockdep-design.txt. +config PROVE_LOCKING_SMALL + bool + config LOCKDEP bool depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT -- 1.7.1
[PATCH v3 2/2] lockdep: Limit static allocations if PROVE_LOCKING_SMALL is defined
Reduce the size of data structure for lockdep entries by half if PROVE_LOCKING_SMALL if defined. This is used only for sparc. Signed-off-by: Babu Moger <babu.mo...@oracle.com> --- kernel/locking/lockdep_internals.h | 20 +--- 1 files changed, 17 insertions(+), 3 deletions(-) diff --git a/kernel/locking/lockdep_internals.h b/kernel/locking/lockdep_internals.h index 51c4b24..c2b8849 100644 --- a/kernel/locking/lockdep_internals.h +++ b/kernel/locking/lockdep_internals.h @@ -46,6 +46,14 @@ enum { (LOCKF_USED_IN_HARDIRQ_READ | LOCKF_USED_IN_SOFTIRQ_READ) /* + * CONFIG_PROVE_LOCKING_SMALL is defined for sparc. Sparc requires .text, + * .data and .bss to fit in required 32MB limit for the kernel. With + * PROVE_LOCKING we could go over this limit and cause system boot-up problems. + * So, reduce the static allocations for lockdeps related structures so that + * everything fits in current required size limit. + */ +#ifdef CONFIG_PROVE_LOCKING_SMALL +/* * MAX_LOCKDEP_ENTRIES is the maximum number of lock dependencies * we track. * @@ -54,18 +62,24 @@ enum { * table (if it's not there yet), and we check it for lock order * conflicts and deadlocks. */ +#define MAX_LOCKDEP_ENTRIES16384UL +#define MAX_LOCKDEP_CHAINS_BITS15 +#define MAX_STACK_TRACE_ENTRIES262144UL +#else #define MAX_LOCKDEP_ENTRIES32768UL #define MAX_LOCKDEP_CHAINS_BITS16 -#define MAX_LOCKDEP_CHAINS (1UL << MAX_LOCKDEP_CHAINS_BITS) - -#define MAX_LOCKDEP_CHAIN_HLOCKS (MAX_LOCKDEP_CHAINS*5) /* * Stack-trace: tightly packed array of stack backtrace * addresses. Protected by the hash_lock. */ #define MAX_STACK_TRACE_ENTRIES524288UL +#endif + +#define MAX_LOCKDEP_CHAINS (1UL << MAX_LOCKDEP_CHAINS_BITS) + +#define MAX_LOCKDEP_CHAIN_HLOCKS (MAX_LOCKDEP_CHAINS*5) extern struct list_head all_lock_classes; extern struct lock_chain lock_chains[]; -- 1.7.1
[PATCH v2 2/2] lockdep: Keep the static allocations small for PROVE_LOCKING_SMALL
Reduce the size of data structure for lockdep entries half if PROVE_LOCKING_SMALL if defined. This is used for sparc. This config variable is disabled by default. Signed-off-by: Babu Moger <babu.mo...@oracle.com> --- kernel/locking/lockdep_internals.h | 13 ++--- 1 files changed, 10 insertions(+), 3 deletions(-) diff --git a/kernel/locking/lockdep_internals.h b/kernel/locking/lockdep_internals.h index 51c4b24..7d364a6 100644 --- a/kernel/locking/lockdep_internals.h +++ b/kernel/locking/lockdep_internals.h @@ -54,18 +54,25 @@ enum { * table (if it's not there yet), and we check it for lock order * conflicts and deadlocks. */ +#ifdef CONFIG_PROVE_LOCKING_SMALL +#define MAX_LOCKDEP_ENTRIES16384UL +#define MAX_LOCKDEP_CHAINS_BITS15 +#define MAX_STACK_TRACE_ENTRIES262144UL +#else #define MAX_LOCKDEP_ENTRIES32768UL #define MAX_LOCKDEP_CHAINS_BITS16 -#define MAX_LOCKDEP_CHAINS (1UL << MAX_LOCKDEP_CHAINS_BITS) - -#define MAX_LOCKDEP_CHAIN_HLOCKS (MAX_LOCKDEP_CHAINS*5) /* * Stack-trace: tightly packed array of stack backtrace * addresses. Protected by the hash_lock. */ #define MAX_STACK_TRACE_ENTRIES524288UL +#endif + +#define MAX_LOCKDEP_CHAINS (1UL << MAX_LOCKDEP_CHAINS_BITS) + +#define MAX_LOCKDEP_CHAIN_HLOCKS (MAX_LOCKDEP_CHAINS*5) extern struct list_head all_lock_classes; extern struct lock_chain lock_chains[]; -- 1.7.1
[PATCH v2 0/2] Ajust lockdep static allocations for sparc
These patches limit the static allocations for lockdep data structures used for debugging locking correctness. This is required for sparc as it requires .text, .data and .bss to fit in required 32MB limit for the kernel. Right now, with PROVE_LOCKING enabled we could go over this limit and cause system boot-up problems. These patches limit the static allocations so that everything fits in current required space limit. This is only visible for sparc. patch 1 : Adds new config parameter CONFIG_PROVE_LOCKING_SMALL Patch 2 : Adjusts the sizes based on the new config parameter v1-> v2: As suggested by Peter Zijlstra, keeping the default as is. Introduced new config variable CONFIG_PROVE_LOCKING_SMALL to handle sparc specific case. v0: Initial revision. Babu Moger (2): config: Add new CONFIG_PROVE_LOCKING_SMALL lockdep: Keep the static allocations small for PROVE_LOCKING_SMALL kernel/locking/lockdep_internals.h | 13 ++--- lib/Kconfig.debug | 17 + 2 files changed, 27 insertions(+), 3 deletions(-)
[PATCH v2 1/2] config: Add new CONFIG_PROVE_LOCKING_SMALL
Adding the new config parameter CONFIG_PROVE_LOCKING_SMALL for sparc. This feature limits the space used for "Lock debugging: prove locking correctness" by about 4MB. The current sparc systms have the limitation of 32MB size for kernel size including .text, .data and .bss sections. With PROVE_LOCKING feature, the kernel size could grow beyond this limit and causing system bootup issues. With this option, kernel limits the size of the entries of lock_chains, stack_trace etc. so that kernel fits in required size limit. This is only visible for sparc. Signed-off-by: Babu Moger <babu.mo...@oracle.com> --- lib/Kconfig.debug | 17 + 1 files changed, 17 insertions(+), 0 deletions(-) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index b9cfdbf..c79de25 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1035,6 +1035,7 @@ config PROVE_LOCKING select DEBUG_MUTEXES select DEBUG_LOCK_ALLOC select TRACE_IRQFLAGS + select PROVE_LOCKING_SMALL if SPARC default n help This feature enables the kernel to prove that all locking @@ -1070,6 +1071,22 @@ config PROVE_LOCKING For more details, see Documentation/locking/lockdep-design.txt. +config PROVE_LOCKING_SMALL + bool "Limit the space for prove locking correctness" + depends on PROVE_LOCKING && SPARC + help +This feature limits the space used for "Lock debugging: prove +locking correctness" by about 4MB. In sparc system, all the +kernel's code, data, and bss, must have locked translations in +the TLB so that it does not hit TLB misses. The current sparc +chips have 8 TLB entries available that may be locked down, and +with a 4mb page size, this gives a maximum of 32mb of memory for +the kernel size. With PROVE_LOCKING feature, the kernel size could +grow beyond this limit and causing system bootup issues. With +this option, kernel limits the size of the entries of lock_chains, +stack_trace etc. to debug PROVE_LOCKING so that kernel size fits +in 32MB. This is only visible for SPARC. + config LOCKDEP bool depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT -- 1.7.1
[RFC PATCH 2/4] watchdog: Move shared definitions to nmi.h
Move shared macros and definitions to nmi.h so that watchdog.c, watchdog_hld.c or any other architecture specific handler can use those definitions. Signed-off-by: Babu Moger <babu.mo...@oracle.com> --- include/linux/nmi.h | 19 +++ kernel/watchdog.c | 25 - 2 files changed, 23 insertions(+), 21 deletions(-) diff --git a/include/linux/nmi.h b/include/linux/nmi.h index a78c35c..0ea0a38 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -7,6 +7,23 @@ #include #include +/* + * The run state of the lockup detectors is controlled by the content of the + * 'watchdog_enabled' variable. Each lockup detector has its dedicated bit - + * bit 0 for the hard lockup detector and bit 1 for the soft lockup detector. + * + * 'watchdog_user_enabled', 'nmi_watchdog_enabled' and 'soft_watchdog_enabled' + * are variables that are only used as an 'interface' between the parameters + * in /proc/sys/kernel and the internal state bits in 'watchdog_enabled'. The + * 'watchdog_thresh' variable is handled differently because its value is not + * boolean, and the lockup detectors are 'suspended' while 'watchdog_thresh' + * is equal zero. + */ +#define NMI_WATCHDOG_ENABLED_BIT 0 +#define SOFT_WATCHDOG_ENABLED_BIT 1 +#define NMI_WATCHDOG_ENABLED (1 << NMI_WATCHDOG_ENABLED_BIT) +#define SOFT_WATCHDOG_ENABLED (1 << SOFT_WATCHDOG_ENABLED_BIT) + /** * touch_nmi_watchdog - restart NMI watchdog timeout. * @@ -91,6 +108,8 @@ static inline bool trigger_single_cpu_backtrace(int cpu) extern int soft_watchdog_enabled; extern int watchdog_user_enabled; extern int watchdog_thresh; +extern unsigned long watchdog_enabled; +extern DEFINE_PER_CPU(unsigned long, hrtimer_interrupts); extern unsigned long *watchdog_cpumask_bits; extern int sysctl_softlockup_all_cpu_backtrace; extern int sysctl_hardlockup_all_cpu_backtrace; diff --git a/kernel/watchdog.c b/kernel/watchdog.c index a88e179..4ea7752 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -26,29 +26,12 @@ #include #include -/* - * The run state of the lockup detectors is controlled by the content of the - * 'watchdog_enabled' variable. Each lockup detector has its dedicated bit - - * bit 0 for the hard lockup detector and bit 1 for the soft lockup detector. - * - * 'watchdog_user_enabled', 'nmi_watchdog_enabled' and 'soft_watchdog_enabled' - * are variables that are only used as an 'interface' between the parameters - * in /proc/sys/kernel and the internal state bits in 'watchdog_enabled'. The - * 'watchdog_thresh' variable is handled differently because its value is not - * boolean, and the lockup detectors are 'suspended' while 'watchdog_thresh' - * is equal zero. - */ -#define NMI_WATCHDOG_ENABLED_BIT 0 -#define SOFT_WATCHDOG_ENABLED_BIT 1 -#define NMI_WATCHDOG_ENABLED (1 << NMI_WATCHDOG_ENABLED_BIT) -#define SOFT_WATCHDOG_ENABLED (1 << SOFT_WATCHDOG_ENABLED_BIT) - static DEFINE_MUTEX(watchdog_proc_mutex); -#ifdef CONFIG_HARDLOCKUP_DETECTOR -static unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED|NMI_WATCHDOG_ENABLED; +#if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR) +unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED|NMI_WATCHDOG_ENABLED; #else -static unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED; +unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED; #endif int __read_mostly nmi_watchdog_enabled; int __read_mostly soft_watchdog_enabled; @@ -96,7 +79,7 @@ static DEFINE_PER_CPU(struct hrtimer, watchdog_hrtimer); static DEFINE_PER_CPU(bool, softlockup_touch_sync); static DEFINE_PER_CPU(bool, soft_watchdog_warn); -static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts); +DEFINE_PER_CPU(unsigned long, hrtimer_interrupts); static DEFINE_PER_CPU(unsigned long, soft_lockup_hrtimer_cnt); static DEFINE_PER_CPU(struct task_struct *, softlockup_task_ptr_saved); static unsigned long soft_lockup_nmi_warn; -- 1.7.1
[RFC PATCH 1/4] watchdog: Remove hardlockup handler references
Separate hardlockup code from watchdog.c. It is mostly straight forward. Remove everything inside CONFIG_HARDLOCKUP_DETECTORS. This code will go to file watchdog_hld.c. We also define weak handlers watchdog_nmi_enable and watchdog_nmi_disable. Signed-off-by: Babu Moger <babu.mo...@oracle.com> --- kernel/watchdog.c | 251 ++--- 1 files changed, 7 insertions(+), 244 deletions(-) diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 9acb29f..a88e179 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -24,7 +24,6 @@ #include #include -#include #include /* @@ -100,50 +99,8 @@ static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts); static DEFINE_PER_CPU(unsigned long, soft_lockup_hrtimer_cnt); static DEFINE_PER_CPU(struct task_struct *, softlockup_task_ptr_saved); -#ifdef CONFIG_HARDLOCKUP_DETECTOR -static DEFINE_PER_CPU(bool, hard_watchdog_warn); -static DEFINE_PER_CPU(bool, watchdog_nmi_touch); -static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved); -static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); -#endif static unsigned long soft_lockup_nmi_warn; -/* boot commands */ -/* - * Should we panic when a soft-lockup or hard-lockup occurs: - */ -#ifdef CONFIG_HARDLOCKUP_DETECTOR -unsigned int __read_mostly hardlockup_panic = - CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE; -static unsigned long hardlockup_allcpu_dumped; -/* - * We may not want to enable hard lockup detection by default in all cases, - * for example when running the kernel as a guest on a hypervisor. In these - * cases this function can be called to disable hard lockup detection. This - * function should only be executed once by the boot processor before the - * kernel command line parameters are parsed, because otherwise it is not - * possible to override this in hardlockup_panic_setup(). - */ -void hardlockup_detector_disable(void) -{ - watchdog_enabled &= ~NMI_WATCHDOG_ENABLED; -} - -static int __init hardlockup_panic_setup(char *str) -{ - if (!strncmp(str, "panic", 5)) - hardlockup_panic = 1; - else if (!strncmp(str, "nopanic", 7)) - hardlockup_panic = 0; - else if (!strncmp(str, "0", 1)) - watchdog_enabled &= ~NMI_WATCHDOG_ENABLED; - else if (!strncmp(str, "1", 1)) - watchdog_enabled |= NMI_WATCHDOG_ENABLED; - return 1; -} -__setup("nmi_watchdog=", hardlockup_panic_setup); -#endif - unsigned int __read_mostly softlockup_panic = CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE; @@ -264,43 +221,12 @@ void touch_all_softlockup_watchdogs(void) wq_watchdog_touch(-1); } -#ifdef CONFIG_HARDLOCKUP_DETECTOR -void touch_nmi_watchdog(void) -{ - /* -* Using __raw here because some code paths have -* preemption enabled. If preemption is enabled -* then interrupts should be enabled too, in which -* case we shouldn't have to worry about the watchdog -* going off. -*/ - raw_cpu_write(watchdog_nmi_touch, true); - touch_softlockup_watchdog(); -} -EXPORT_SYMBOL(touch_nmi_watchdog); - -#endif - void touch_softlockup_watchdog_sync(void) { __this_cpu_write(softlockup_touch_sync, true); __this_cpu_write(watchdog_touch_ts, 0); } -#ifdef CONFIG_HARDLOCKUP_DETECTOR -/* watchdog detector functions */ -static bool is_hardlockup(void) -{ - unsigned long hrint = __this_cpu_read(hrtimer_interrupts); - - if (__this_cpu_read(hrtimer_interrupts_saved) == hrint) - return true; - - __this_cpu_write(hrtimer_interrupts_saved, hrint); - return false; -} -#endif - static int is_softlockup(unsigned long touch_ts) { unsigned long now = get_timestamp(); @@ -313,78 +239,18 @@ static int is_softlockup(unsigned long touch_ts) return 0; } -#ifdef CONFIG_HARDLOCKUP_DETECTOR - -static struct perf_event_attr wd_hw_attr = { - .type = PERF_TYPE_HARDWARE, - .config = PERF_COUNT_HW_CPU_CYCLES, - .size = sizeof(struct perf_event_attr), - .pinned = 1, - .disabled = 1, -}; - -/* Callback function for perf event subsystem */ -static void watchdog_overflow_callback(struct perf_event *event, -struct perf_sample_data *data, -struct pt_regs *regs) -{ - /* Ensure the watchdog never gets throttled */ - event->hw.interrupts = 0; - - if (__this_cpu_read(watchdog_nmi_touch) == true) { - __this_cpu_write(watchdog_nmi_touch, false); - return; - } - - /* check for a hardlockup -* This is done by making sure our timer interrupt -* is incrementing. The timer interrupt should have -* fired multiple times before we overflow'd. If it hasn't -* then this is a goo
[RFC PATCH 3/4] watchdog: Move hardlockup detector to separate file
Move hardlockup detector code to watchdog_hld.c. Also update the makefile accordigly. Signed-off-by: Babu Moger <babu.mo...@oracle.com> --- kernel/Makefile |1 + kernel/watchdog_hld.c | 238 + 2 files changed, 239 insertions(+), 0 deletions(-) create mode 100644 kernel/watchdog_hld.c diff --git a/kernel/Makefile b/kernel/Makefile index eb26e12..314e7d6 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -84,6 +84,7 @@ obj-$(CONFIG_KPROBES) += kprobes.o obj-$(CONFIG_KGDB) += debug/ obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o obj-$(CONFIG_LOCKUP_DETECTOR) += watchdog.o +obj-$(CONFIG_HARDLOCKUP_DETECTOR) += watchdog_hld.o obj-$(CONFIG_SECCOMP) += seccomp.o obj-$(CONFIG_RELAY) += relay.o obj-$(CONFIG_SYSCTL) += utsname_sysctl.o diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c new file mode 100644 index 000..cd690fb --- /dev/null +++ b/kernel/watchdog_hld.c @@ -0,0 +1,238 @@ +/* + * Detect hard and soft lockups on a system + * + * started by Don Zickus, Copyright (C) 2010 Red Hat, Inc. + * + * Note: Most of this code is borrowed heavily from the original softlockup + * detector, so thanks to Ingo for the initial implementation. + * Some chunks also taken from the old x86-specific nmi watchdog code, thanks + * to those contributors as well. + */ + +#include +#include +#include +#include + +static DEFINE_PER_CPU(bool, hard_watchdog_warn); +static DEFINE_PER_CPU(bool, watchdog_nmi_touch); +static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved); +static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); + +/* boot commands */ +/* + * Should we panic when a soft-lockup or hard-lockup occurs: + */ +unsigned int __read_mostly hardlockup_panic = + CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE; +static unsigned long hardlockup_allcpu_dumped; +/* + * We may not want to enable hard lockup detection by default in all cases, + * for example when running the kernel as a guest on a hypervisor. In these + * cases this function can be called to disable hard lockup detection. This + * function should only be executed once by the boot processor before the + * kernel command line parameters are parsed, because otherwise it is not + * possible to override this in hardlockup_panic_setup(). + */ +void hardlockup_detector_disable(void) +{ + watchdog_enabled &= ~NMI_WATCHDOG_ENABLED; +} + +static int __init hardlockup_panic_setup(char *str) +{ + if (!strncmp(str, "panic", 5)) + hardlockup_panic = 1; + else if (!strncmp(str, "nopanic", 7)) + hardlockup_panic = 0; + else if (!strncmp(str, "0", 1)) + watchdog_enabled &= ~NMI_WATCHDOG_ENABLED; + else if (!strncmp(str, "1", 1)) + watchdog_enabled |= NMI_WATCHDOG_ENABLED; + return 1; +} +__setup("nmi_watchdog=", hardlockup_panic_setup); + +void touch_nmi_watchdog(void) +{ + /* +* Using __raw here because some code paths have +* preemption enabled. If preemption is enabled +* then interrupts should be enabled too, in which +* case we shouldn't have to worry about the watchdog +* going off. +*/ + raw_cpu_write(watchdog_nmi_touch, true); + touch_softlockup_watchdog(); +} +EXPORT_SYMBOL(touch_nmi_watchdog); + +/* watchdog detector functions */ +static bool is_hardlockup(void) +{ + unsigned long hrint = __this_cpu_read(hrtimer_interrupts); + + if (__this_cpu_read(hrtimer_interrupts_saved) == hrint) + return true; + + __this_cpu_write(hrtimer_interrupts_saved, hrint); + return false; +} + +static struct perf_event_attr wd_hw_attr = { + .type = PERF_TYPE_HARDWARE, + .config = PERF_COUNT_HW_CPU_CYCLES, + .size = sizeof(struct perf_event_attr), + .pinned = 1, + .disabled = 1, +}; + +/* Callback function for perf event subsystem */ +static void watchdog_overflow_callback(struct perf_event *event, +struct perf_sample_data *data, +struct pt_regs *regs) +{ + /* Ensure the watchdog never gets throttled */ + event->hw.interrupts = 0; + + if (__this_cpu_read(watchdog_nmi_touch) == true) { + __this_cpu_write(watchdog_nmi_touch, false); + return; + } + + /* check for a hardlockup +* This is done by making sure our timer interrupt +* is incrementing. The timer interrupt should have +* fired multiple times before we overflow'd. If it hasn't +* then this is a good indication the cpu is stuck +*/ + if (is_hardlockup()) { + int this_cpu = smp_processor_id(); + struct pt_regs *regs = get_irq_regs(); + + /* only print hardlockups once */ + if (__this_cpu_read(hard_watchdog_warn)
[RFC PATCH 4/4] sparc: Implement watchdog_nmi_enable and watchdog_nmi_disable
Implement functions watchdog_nmi_enable and watchdog_nmi_disable to enable/disable nmi watchdog. Sparc uses arch specific nmi watchdog handler. Currently, we do not have a way to enable/disable nmi watchdog dynamically. With these patches we can enable or disable arch specific nmi watchdogs using proc or sysctl interface. Example commands. To enable: echo 1 > /proc/sys/kernel/nmi_watchdog To disable: echo 0 > /proc/sys/kernel/nmi_watchdog It can also achieved using the sysctl parameter kernel.nmi_watchdog Signed-off-by: Babu Moger <babu.mo...@oracle.com> --- arch/sparc/kernel/nmi.c | 44 +++- 1 files changed, 43 insertions(+), 1 deletions(-) diff --git a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c index a9973bb..95e73c6 100644 --- a/arch/sparc/kernel/nmi.c +++ b/arch/sparc/kernel/nmi.c @@ -42,7 +42,7 @@ */ atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */ EXPORT_SYMBOL(nmi_active); - +static int nmi_init_done; static unsigned int nmi_hz = HZ; static DEFINE_PER_CPU(short, wd_enabled); static int endflag __initdata; @@ -153,6 +153,8 @@ static void report_broken_nmi(int cpu, int *prev_nmi_count) void stop_nmi_watchdog(void *unused) { + if (!__this_cpu_read(wd_enabled)) + return; pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_disable); __this_cpu_write(wd_enabled, 0); atomic_dec(_active); @@ -207,6 +209,9 @@ static int __init check_nmi_watchdog(void) void start_nmi_watchdog(void *unused) { + if (__this_cpu_read(wd_enabled)) + return; + __this_cpu_write(wd_enabled, 1); atomic_inc(_active); @@ -259,6 +264,8 @@ int __init nmi_init(void) } } + nmi_init_done = 1; + return err; } @@ -270,3 +277,38 @@ static int __init setup_nmi_watchdog(char *str) return 0; } __setup("nmi_watchdog=", setup_nmi_watchdog); + +/* + * sparc specific NMI watchdog enable function. + * Enables watchdog if it is not enabled already. + */ +int watchdog_nmi_enable(unsigned int cpu) +{ + if (atomic_read(_active) == -1) { + pr_warn("NMI watchdog cannot be enabled or disabled\n"); + return -1; + } + + /* +* watchdog thread could start even before nmi_init is called. +* Just Return in that case. Let nmi_init finish the init +* process first. +*/ + if (!nmi_init_done) + return 0; + + smp_call_function_single(cpu, start_nmi_watchdog, NULL, 1); + + return 0; +} +/* + * sparc specific NMI watchdog disable function. + * Disables watchdog if it is not disabled already. + */ +void watchdog_nmi_disable(unsigned int cpu) +{ + if (atomic_read(_active) == -1) + pr_warn_once("NMI watchdog cannot be enabled or disabled\n"); + else + smp_call_function_single(cpu, stop_nmi_watchdog, NULL, 1); +} -- 1.7.1
[RFC PATCH 0/4] Clean up watchdog handlers
This is an attempt to cleanup watchdog handlers. Right now, kernel/watchdog.c implements both softlockup and hardlockup detectors. Softlockup code is generic. Hardlockup code is arch specific. Some architectures don't use hardlockup detectors. They use their own watchdog detectors. To make both these combination work, we have numerous #ifdefs in kernel/watchdog.c. We are trying here to make these handlers independent of each other. Also provide an interface for architectures to implement their own handlers. watchdog_nmi_enable and watchdog_nmi_disable will be defined as weak such that architectures can override its definitions. Thanks to Don Zickus for his suggestions. Here is the previous discussion http://www.spinics.net/lists/sparclinux/msg16441.html Babu Moger (4): watchdog: Remove hardlockup handler references watchdog: Move shared definitions to nmi.h watchdog: Move hardlockup detector in separate file sparc: Implement watchdog_nmi_enable and watchdog_nmi_disable arch/sparc/kernel/nmi.c | 44 - include/linux/nmi.h | 19 kernel/Makefile |1 + kernel/watchdog.c | 276 ++- kernel/watchdog_hld.c | 238 5 files changed, 312 insertions(+), 266 deletions(-) create mode 100644 kernel/watchdog_hld.c
Re: [PATCH v2 1/2] watchdog: Introduce arch_watchdog_nmi_enable and arch_watchdog_nmi_disable
Don, On 10/17/2016 12:31 PM, Don Zickus wrote: On Thu, Oct 13, 2016 at 01:38:01PM -0700, Babu Moger wrote: Currently we do not have a way to enable/disable arch specific watchdog handlers if it was implemented by any of the architectures. This patch introduces new functions arch_watchdog_nmi_enable and arch_watchdog_nmi_disable which can be used to enable/disable architecture specific NMI watchdog handlers. These functions are defined as weak as architectures can override their definitions to enable/disable nmi watchdog behaviour. Hi Babu, This patch tested fine on my x86 box and I am ok with the changes. I do have one small cosmetic request below for a failure path. Other than that I will give my ack. Yes. I am testing these changes. If everything goes as expected, I will post v3 version tomorrow. Thanks Babu Cheers, Don Signed-off-by: Babu Moger <babu.mo...@oracle.com> --- kernel/watchdog.c | 65 +++- 1 files changed, 44 insertions(+), 21 deletions(-) diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 9acb29f..d1e84e6 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -46,7 +46,7 @@ static DEFINE_MUTEX(watchdog_proc_mutex); -#ifdef CONFIG_HARDLOCKUP_DETECTOR +#if defined(CONFIG_HARDLOCKUP_DETECTOR) || defined(CONFIG_HAVE_NMI_WATCHDOG) static unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED|NMI_WATCHDOG_ENABLED; #else static unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED; @@ -585,15 +585,11 @@ static void watchdog(unsigned int cpu) */ static unsigned long cpu0_err; -static int watchdog_nmi_enable(unsigned int cpu) +static int arch_watchdog_nmi_enable(unsigned int cpu) { struct perf_event_attr *wd_attr; struct perf_event *event = per_cpu(watchdog_ev, cpu); - /* nothing to do if the hard lockup detector is disabled */ - if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED)) - goto out; - /* is it already setup and enabled? */ if (event && event->state > PERF_EVENT_STATE_OFF) goto out; @@ -619,18 +615,6 @@ static int watchdog_nmi_enable(unsigned int cpu) goto out_save; } - /* -* Disable the hard lockup detector if _any_ CPU fails to set up -* set up the hardware perf event. The watchdog() function checks -* the NMI_WATCHDOG_ENABLED bit periodically. -* -* The barriers are for syncing up watchdog_enabled across all the -* cpus, as clear_bit() does not use barriers. -*/ - smp_mb__before_atomic(); - clear_bit(NMI_WATCHDOG_ENABLED_BIT, _enabled); - smp_mb__after_atomic(); - /* skip displaying the same error again */ if (cpu > 0 && (PTR_ERR(event) == cpu0_err)) return PTR_ERR(event); In the arch_watchdog_nmi_enable code is a pr_info on failure pr_info("Shutting down hard lockup detector on all cpus\n"); that should be moved to below.. @@ -658,7 +642,7 @@ out: return 0; } -static void watchdog_nmi_disable(unsigned int cpu) +static void arch_watchdog_nmi_disable(unsigned int cpu) { struct perf_event *event = per_cpu(watchdog_ev, cpu); @@ -676,8 +660,13 @@ static void watchdog_nmi_disable(unsigned int cpu) } #else -static int watchdog_nmi_enable(unsigned int cpu) { return 0; } -static void watchdog_nmi_disable(unsigned int cpu) { return; } +/* + * These two functions are mostly architecture specific + * defining them as weak here. + */ +int __weak arch_watchdog_nmi_enable(unsigned int cpu) { return 0; } +void __weak arch_watchdog_nmi_disable(unsigned int cpu) { return; } + #endif /* CONFIG_HARDLOCKUP_DETECTOR */ static struct smp_hotplug_thread watchdog_threads = { @@ -781,6 +770,40 @@ void lockup_detector_resume(void) put_online_cpus(); } +void watchdog_nmi_disable(unsigned int cpu) +{ + arch_watchdog_nmi_disable(cpu); +} + +int watchdog_nmi_enable(unsigned int cpu) +{ + int err; + + /* nothing to do if the hard lockup detector is disabled */ + if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED)) + return 0; + + err = arch_watchdog_nmi_enable(cpu); + + if (err) { + /* +* Disable the hard lockup detector if _any_ CPU fails to set up +* set up the hardware perf event. The watchdog() function checks +* the NMI_WATCHDOG_ENABLED bit periodically. +* +* The barriers are for syncing up watchdog_enabled across all the +* cpus, as clear_bit() does not use barriers. +*/ + smp_mb__before_atomic(); + clear_bit(NMI_WATCHDOG_ENABLED_BIT, _enabled); + smp_mb__after_atomic(); moved to here: pr_info("Shutting down hard lockup det
[PATCH v3 2/2] sparc: Implement arch_watchdog_nmi_enable and arch_watchdog_nmi_disable
Implement functions arch_watchdog_nmi_enable and arch_watchdog_nmi_disable to enable/disable nmi watchdog. Sparc uses arch specific nmi watchdog handler. Currently, we do not have a way to enable/disable nmi watchdog dynamically. With these patches we can enable or disable arch specific nmi watchdogs using proc or sysctl interface. Example commands. To enable: echo 1 > /proc/sys/kernel/nmi_watchdog To disable: echo 0 > /proc/sys/kernel/nmi_watchdog It can also achieved using the sysctl parameter kernel.nmi_watchdog Signed-off-by: Babu Moger <babu.mo...@oracle.com> --- arch/sparc/kernel/nmi.c | 44 +++- 1 files changed, 43 insertions(+), 1 deletions(-) diff --git a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c index a9973bb..b55d518 100644 --- a/arch/sparc/kernel/nmi.c +++ b/arch/sparc/kernel/nmi.c @@ -42,7 +42,7 @@ static int panic_on_timeout; */ atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */ EXPORT_SYMBOL(nmi_active); - +static int nmi_init_done; static unsigned int nmi_hz = HZ; static DEFINE_PER_CPU(short, wd_enabled); static int endflag __initdata; @@ -153,6 +153,8 @@ static void report_broken_nmi(int cpu, int *prev_nmi_count) void stop_nmi_watchdog(void *unused) { + if (!__this_cpu_read(wd_enabled)) + return; pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_disable); __this_cpu_write(wd_enabled, 0); atomic_dec(_active); @@ -207,6 +209,9 @@ error: void start_nmi_watchdog(void *unused) { + if (__this_cpu_read(wd_enabled)) + return; + __this_cpu_write(wd_enabled, 1); atomic_inc(_active); @@ -259,6 +264,8 @@ int __init nmi_init(void) } } + nmi_init_done = 1; + return err; } @@ -270,3 +277,38 @@ static int __init setup_nmi_watchdog(char *str) return 0; } __setup("nmi_watchdog=", setup_nmi_watchdog); + +/* + * sparc specific NMI watchdog enable function. + * Enables watchdog if it is not enabled already. + */ +int arch_watchdog_nmi_enable(unsigned int cpu) +{ + if (atomic_read(_active) == -1) { + pr_warn("NMI watchdog cannot be enabled or disabled\n"); + return -1; + } + + /* +* watchdog thread could start even before nmi_init is called. +* Just Return in that case. Let nmi_init finish the init +* process first. +*/ + if (!nmi_init_done) + return 0; + + smp_call_function_single(cpu, start_nmi_watchdog, NULL, 1); + + return 0; +} +/* + * sparc specific NMI watchdog disable function. + * Disables watchdog if it is not disabled already. + */ +void arch_watchdog_nmi_disable(unsigned int cpu) +{ + if (atomic_read(_active) == -1) + pr_warn_once("NMI watchdog cannot be enabled or disabled\n"); + else + smp_call_function_single(cpu, stop_nmi_watchdog, NULL, 1); +} -- 1.7.1
[PATCH v3 0/2] Introduce arch specific nmi enable, disable handlers
During our testing we noticed that nmi watchdogs in sparc could not be disabled or enabled dynamically using sysctl/proc interface. Sparc uses its own arch specific nmi watchdogs. There is a sysctl and proc interface(proc/sys/kernel/nmi_watchdog) to enable/disable nmi watchdogs. However, that is not working for sparc. There is no interface to feed this parameter to arch specific nmi watchdogs. These patches extend the same sysctl/proc interface to enable or disable these arch specific nmi watchdogs dynamically. Introduced new functions arch_watchdog_nmi_enable and arch_watchdog_nmi_disable which can be implemented in arch specific handlers. If you think there is a better way to do this. Please advice. Tested on sparc. Compile tested on x86. v3: Made one more change per Don Zickus comments. Moved failure path messages to into generic code inside watchdog_nmi_enable. Also added matching prints in sparc to warn about the failure. v2: a)Sam Ravnborg's comments about making the definitions visible. With the new approach we dont need those definitions((NMI_WATCHDOG_ENABLED, SOFT_WATCHDOG_ENABLED etc..) outside watchdog.c. So no action. b) Made changes per Don Zickus comments. Don, I could not use your patches as is. Reason is sparc does not define CONFIG_HARDLOCKUP_DETECTOR. So, defining default __weak function did not work for me. However, I have used your idea to define __weak functions arch_watchdog_nmi_enable and arch_watchdog_nmi_disable when CONFIG_HARDLOCKUP_DETECTOR is not defined. I feel this should have very less impact on the races you are concerned about. Please take a look. Feel free to suggest. Patch2 changes: I had to introduce new variable nmi_init_done to synchronize watchdog thread and kernel init thread. v1: Initial version. Discussion thread here http://www.mail-archive.com/linux-kernel@vger.kernel.org/msg1245427.html Babu Moger (2): watchdog: Introduce arch_watchdog_nmi_enable and arch_watchdog_nmi_disable sparc: Implement arch_watchdog_nmi_enable and arch_watchdog_nmi_disable arch/sparc/kernel/nmi.c | 44 +- kernel/watchdog.c | 69 +++--- 2 files changed, 89 insertions(+), 24 deletions(-)
[PATCH v3 1/2] watchdog: Introduce arch_watchdog_nmi_enable and arch_watchdog_nmi_disable
Currently we do not have a way to enable/disable arch specific watchdog handlers if it was implemented by any of the architectures. This patch introduces new functions arch_watchdog_nmi_enable and arch_watchdog_nmi_disable which can be used to enable/disable architecture specific NMI watchdog handlers. These functions are defined as weak as architectures can override their definitions to enable/disable nmi watchdog behaviour. Signed-off-by: Babu Moger <babu.mo...@oracle.com> --- kernel/watchdog.c | 69 +++- 1 files changed, 46 insertions(+), 23 deletions(-) diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 9acb29f..2d0765b 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -46,7 +46,7 @@ static DEFINE_MUTEX(watchdog_proc_mutex); -#ifdef CONFIG_HARDLOCKUP_DETECTOR +#if defined(CONFIG_HARDLOCKUP_DETECTOR) || defined(CONFIG_HAVE_NMI_WATCHDOG) static unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED|NMI_WATCHDOG_ENABLED; #else static unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED; @@ -585,15 +585,11 @@ static void watchdog(unsigned int cpu) */ static unsigned long cpu0_err; -static int watchdog_nmi_enable(unsigned int cpu) +static int arch_watchdog_nmi_enable(unsigned int cpu) { struct perf_event_attr *wd_attr; struct perf_event *event = per_cpu(watchdog_ev, cpu); - /* nothing to do if the hard lockup detector is disabled */ - if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED)) - goto out; - /* is it already setup and enabled? */ if (event && event->state > PERF_EVENT_STATE_OFF) goto out; @@ -619,18 +615,6 @@ static int watchdog_nmi_enable(unsigned int cpu) goto out_save; } - /* -* Disable the hard lockup detector if _any_ CPU fails to set up -* set up the hardware perf event. The watchdog() function checks -* the NMI_WATCHDOG_ENABLED bit periodically. -* -* The barriers are for syncing up watchdog_enabled across all the -* cpus, as clear_bit() does not use barriers. -*/ - smp_mb__before_atomic(); - clear_bit(NMI_WATCHDOG_ENABLED_BIT, _enabled); - smp_mb__after_atomic(); - /* skip displaying the same error again */ if (cpu > 0 && (PTR_ERR(event) == cpu0_err)) return PTR_ERR(event); @@ -645,8 +629,6 @@ static int watchdog_nmi_enable(unsigned int cpu) pr_err("disabled (cpu%i): unable to create perf event: %ld\n", cpu, PTR_ERR(event)); - pr_info("Shutting down hard lockup detector on all cpus\n"); - return PTR_ERR(event); /* success path */ @@ -658,7 +640,7 @@ out: return 0; } -static void watchdog_nmi_disable(unsigned int cpu) +static void arch_watchdog_nmi_disable(unsigned int cpu) { struct perf_event *event = per_cpu(watchdog_ev, cpu); @@ -676,8 +658,13 @@ static void watchdog_nmi_disable(unsigned int cpu) } #else -static int watchdog_nmi_enable(unsigned int cpu) { return 0; } -static void watchdog_nmi_disable(unsigned int cpu) { return; } +/* + * These two functions are mostly architecture specific + * defining them as weak here. + */ +int __weak arch_watchdog_nmi_enable(unsigned int cpu) { return 0; } +void __weak arch_watchdog_nmi_disable(unsigned int cpu) { return; } + #endif /* CONFIG_HARDLOCKUP_DETECTOR */ static struct smp_hotplug_thread watchdog_threads = { @@ -781,6 +768,42 @@ void lockup_detector_resume(void) put_online_cpus(); } +void watchdog_nmi_disable(unsigned int cpu) +{ + arch_watchdog_nmi_disable(cpu); +} + +int watchdog_nmi_enable(unsigned int cpu) +{ + int err; + + /* nothing to do if the hard lockup detector is disabled */ + if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED)) + return 0; + + err = arch_watchdog_nmi_enable(cpu); + + if (err) { + /* +* Disable the hard lockup detector if _any_ CPU fails to set up +* set up the hardware perf event. The watchdog() function checks +* the NMI_WATCHDOG_ENABLED bit periodically. +* +* The barriers are for syncing up watchdog_enabled across all the +* cpus, as clear_bit() does not use barriers. +*/ + smp_mb__before_atomic(); + clear_bit(NMI_WATCHDOG_ENABLED_BIT, _enabled); + smp_mb__after_atomic(); + + pr_info("Shutting down hard lockup detector on all cpus\n"); + + return err; + } + + return 0; +} + static int update_watchdog_all_cpus(void) { int ret; -- 1.7.1
Re: [PATCH v2 1/2] watchdog: Introduce arch_watchdog_nmi_enable and arch_watchdog_nmi_disable
On 10/24/2016 10:19 AM, Don Zickus wrote: On Fri, Oct 21, 2016 at 04:50:21PM -0500, Babu Moger wrote: Don, On 10/21/2016 2:19 PM, Andrew Morton wrote: On Fri, 21 Oct 2016 11:11:14 -0400 Don Zickus <dzic...@redhat.com> wrote: On Thu, Oct 20, 2016 at 08:25:27PM -0700, Andrew Morton wrote: On Thu, 20 Oct 2016 12:14:14 -0400 Don Zickus <dzic...@redhat.com> wrote: -static int watchdog_nmi_enable(unsigned int cpu) { return 0; } -static void watchdog_nmi_disable(unsigned int cpu) { return; } +/* + * These two functions are mostly architecture specific + * defining them as weak here. + */ +int __weak arch_watchdog_nmi_enable(unsigned int cpu) { return 0; } +void __weak arch_watchdog_nmi_disable(unsigned int cpu) { return; } + #endif /* CONFIG_HARDLOCKUP_DETECTOR */ This is a strange way of using __weak. Take a look at (one of many examples) kernel/module.c:module_alloc(). We simply provide a default implementation and some other compilation unit can override (actually replace) that at link time. No strange ifdeffing needed. Yeah, this is mostly because of how we enable the hardlockup detector. Some arches use the perf hw and enable CONFIG_HARDLOCKUP_DETECTOR. Other arches just use their own variant of nmi and set CONFIG_HAVE_NMI_WATCHDOG and the rest of the arches do not use this. So the thought was if CONFIG_HARDLOCKUP_DETECTOR use that implementation, everyone else use the __weak version. Then the arches like sparc can override the weak version with their own nmi enablement. I don't know how to represent those 3 states correctly and the above is what we end up with. Is there a suitable site where we could capture these considerations in a code comment? Hi Andrew, I am not sure I understand your question. When you say 'site', are you referring to the kernel/watchdog.c file? Yes, somewhere in there I guess. The problem with this sort of thing is that the implementation is splattered over multiple places in one file or in several files so there's no clear place to document what's happening. But I think this situation *should* be documented somewhere. Or maybe that just isn't worthwhile - feel free to disagree! The other approach that might help de-clutter this file, is to pull out the HARDLOCKUP_DETECTOR changes (as they are arch specific) and move it to say kernel/watchdog_hw_ld.c. Then all the nmi hooks in kernel/watchdog.c can be __weak and overridden by the kernel_watchdog_hw_ld.c file or the sparc files. This would leave kernel/watchdog.c with just a framework and the arch-agnostic softlockup detector. Probably easier to read and digest. Don, Yes. I am fine with your idea. Let me know if you need any help here. If you want I can start working this cleanup myself. I might take sometime as I need to spend sometime understanding the whole watchdog stuff first. If you have already started working on this then I will let you continue. Hi Babu, Feel free to start looking at it. I am trying to wrap up a couple of things here and will only be able to little poke at it the next couple of days. But for the most part you might be able to rip out anything with CONFIG_HARDLOCKUP_DETECTOR and put it into another file. Then just clean up the pieces. Don. Sure. I have started on this. Will send RFC version sometime this week. Cheers, Don Well, it depends how the code ends up looking. It's best to separate functional changes from cleanups. Generally I think it's best to do "cleanup comes first", because it's then simpler to revert the functional change if it has problems. Plus people are more *interested* in the functional change so it's best to have that at top-of-tree.
Re: [PATCH v2 RESEND] drivers/usb: Skip auto handoff for TI and RENESAS usb controllers
On 10/25/2016 1:51 AM, Mathias Nyman wrote: On 24.10.2016 17:52, Babu Moger wrote: On 10/24/2016 5:54 AM, Yoshihiro Shimoda wrote: Hi, From: Mathias Nyman Sent: Monday, October 24, 2016 6:58 PM On 22.10.2016 01:25, Babu Moger wrote: Never seen XHCI auto handoff working on TI and RENESAS cards. Eventually, we force handoff. This code forces the handoff unconditionally. It saves 5 seconds boot time for each card. Signed-off-by: Babu Moger <babu.mo...@oracle.com> Do the Renesas and TI controllers still advertise the extended capability for the handoff? (XHCI_EXT_CAPS_LEGACY) I don't see this capability. Here is lspci output. It's not a PCI capability, it's a xhci Extended Capability. If the capability is supported, and handoff fail, then quirk_usb_handoff_xhci() will print "xHCI BIOS handoff failed (BIOS bug ?)" Yes. I see these messages. After this patch I don't see these messages. Further on in the same function we wait for the "controller not ready" bit in the status register to clear. If that times it prints out: "xHCI HW not ready after 5 sec (HC bug?) status" No. I didn't see this messages. Do you see any of these two messages in the log? -Mathias
[PATCH v2 RESEND] drivers/usb: Skip auto handoff for TI and RENESAS usb controllers
Never seen XHCI auto handoff working on TI and RENESAS cards. Eventually, we force handoff. This code forces the handoff unconditionally. It saves 5 seconds boot time for each card. Signed-off-by: Babu Moger <babu.mo...@oracle.com> --- v2: Made changes per comments from Greg KH. Extra space removal in assignment Added both vendor and device id checks. Resending the patch. Original discussion here. https://marc.info/?t=14522116207=1=4 drivers/usb/host/pci-quirks.c |8 1 files changed, 8 insertions(+), 0 deletions(-) diff --git a/drivers/usb/host/pci-quirks.c b/drivers/usb/host/pci-quirks.c index 35af362..31c9502 100644 --- a/drivers/usb/host/pci-quirks.c +++ b/drivers/usb/host/pci-quirks.c @@ -996,6 +996,14 @@ static void quirk_usb_handoff_xhci(struct pci_dev *pdev) } val = readl(base + ext_cap_offset); + /* Auto handoff never worked for these devices. Force it and continue */ + if ((pdev->vendor == PCI_VENDOR_ID_TI && pdev->device == 0x8241) || + (pdev->vendor == PCI_VENDOR_ID_RENESAS +&& pdev->device == 0x0014)) { + val = (val | XHCI_HC_OS_OWNED) & ~XHCI_HC_BIOS_OWNED; + writel(val, base + ext_cap_offset); + } + /* If the BIOS owns the HC, signal that the OS wants it, and wait */ if (val & XHCI_HC_BIOS_OWNED) { writel(val | XHCI_HC_OS_OWNED, base + ext_cap_offset); -- 1.7.1
Re: [PATCH v2 1/2] watchdog: Introduce arch_watchdog_nmi_enable and arch_watchdog_nmi_disable
Don, On 10/21/2016 2:19 PM, Andrew Morton wrote: On Fri, 21 Oct 2016 11:11:14 -0400 Don Zickuswrote: On Thu, Oct 20, 2016 at 08:25:27PM -0700, Andrew Morton wrote: On Thu, 20 Oct 2016 12:14:14 -0400 Don Zickus wrote: -static int watchdog_nmi_enable(unsigned int cpu) { return 0; } -static void watchdog_nmi_disable(unsigned int cpu) { return; } +/* + * These two functions are mostly architecture specific + * defining them as weak here. + */ +int __weak arch_watchdog_nmi_enable(unsigned int cpu) { return 0; } +void __weak arch_watchdog_nmi_disable(unsigned int cpu) { return; } + #endif /* CONFIG_HARDLOCKUP_DETECTOR */ This is a strange way of using __weak. Take a look at (one of many examples) kernel/module.c:module_alloc(). We simply provide a default implementation and some other compilation unit can override (actually replace) that at link time. No strange ifdeffing needed. Yeah, this is mostly because of how we enable the hardlockup detector. Some arches use the perf hw and enable CONFIG_HARDLOCKUP_DETECTOR. Other arches just use their own variant of nmi and set CONFIG_HAVE_NMI_WATCHDOG and the rest of the arches do not use this. So the thought was if CONFIG_HARDLOCKUP_DETECTOR use that implementation, everyone else use the __weak version. Then the arches like sparc can override the weak version with their own nmi enablement. I don't know how to represent those 3 states correctly and the above is what we end up with. Is there a suitable site where we could capture these considerations in a code comment? Hi Andrew, I am not sure I understand your question. When you say 'site', are you referring to the kernel/watchdog.c file? Yes, somewhere in there I guess. The problem with this sort of thing is that the implementation is splattered over multiple places in one file or in several files so there's no clear place to document what's happening. But I think this situation *should* be documented somewhere. Or maybe that just isn't worthwhile - feel free to disagree! The other approach that might help de-clutter this file, is to pull out the HARDLOCKUP_DETECTOR changes (as they are arch specific) and move it to say kernel/watchdog_hw_ld.c. Then all the nmi hooks in kernel/watchdog.c can be __weak and overridden by the kernel_watchdog_hw_ld.c file or the sparc files. This would leave kernel/watchdog.c with just a framework and the arch-agnostic softlockup detector. Probably easier to read and digest. Don, Yes. I am fine with your idea. Let me know if you need any help here. If you want I can start working this cleanup myself. I might take sometime as I need to spend sometime understanding the whole watchdog stuff first. If you have already started working on this then I will let you continue. Well, it depends how the code ends up looking. It's best to separate functional changes from cleanups. Generally I think it's best to do "cleanup comes first", because it's then simpler to revert the functional change if it has problems. Plus people are more *interested* in the functional change so it's best to have that at top-of-tree.
Re: [PATCH v2 RESEND] drivers/usb: Skip auto handoff for TI and RENESAS usb controllers
On 10/24/2016 5:54 AM, Yoshihiro Shimoda wrote: Hi, From: Mathias Nyman Sent: Monday, October 24, 2016 6:58 PM On 22.10.2016 01:25, Babu Moger wrote: Never seen XHCI auto handoff working on TI and RENESAS cards. Eventually, we force handoff. This code forces the handoff unconditionally. It saves 5 seconds boot time for each card. Signed-off-by: Babu Moger <babu.mo...@oracle.com> Do the Renesas and TI controllers still advertise the extended capability for the handoff? (XHCI_EXT_CAPS_LEGACY) I don't see this capability. Here is lspci output. # lspci -s 0009:01:00.0 -vvv 0009:01:00.0 USB controller: Texas Instruments TUSB73x0 SuperSpeed USB 3.0 xHCI Host Controller (rev 02) (prog-if 30 [XHCI]) Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR- FastB2B- DisINTx+ Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- SERR- Latency: 0, Cache Line Size: 64 bytes Interrupt: pin A routed to IRQ 000e Region 0: Memory at 1 (64-bit, non-prefetchable) [size=64K] Region 2: Memory at 10001 (64-bit, non-prefetchable) [size=8K] Region 4: [virtual] Memory at fffdfdc0 (32-bit, non-prefetchable) Region 5: [virtual] Memory at fffdfdc0 (32-bit, non-prefetchable) [virtual] Expansion ROM at fffdfdc0 [disabled] Capabilities: [40] Power Management version 3 Flags: PMEClk- DSI- D1+ D2+ AuxCurrent=0mA PME(D0+,D1+,D2+,D3hot+,D3cold-) Status: D0 NoSoftRst+ PME-Enable- DSel=0 DScale=0 PME- Capabilities: [48] MSI: Enable- Count=1/8 Maskable- 64bit+ Address: Data: Capabilities: [70] Express (v2) Endpoint, MSI 00 DevCap: MaxPayload 1024 bytes, PhantFunc 0, Latency L0s unlimited, L1 unlimited ExtTag- AttnBtn- AttnInd- PwrInd- RBE+ FLReset- DevCtl: Report errors: Correctable- Non-Fatal- Fatal- Unsupported- RlxdOrd+ ExtTag- PhantFunc- AuxPwr- NoSnoop- MaxPayload 256 bytes, MaxReadReq 512 bytes DevSta: CorrErr+ UncorrErr- FatalErr- UnsuppReq+ AuxPwr- TransPend- LnkCap: Port #0, Speed 5GT/s, Width x1, ASPM L0s L1, Latency L0 <2us, L1 <64us ClockPM+ Surprise- LLActRep- BwNot- LnkCtl: ASPM Disabled; RCB 64 bytes Disabled- Retrain- CommClk- ExtSynch- ClockPM- AutWidDis- BWInt- AutBWInt- LnkSta: Speed 5GT/s, Width x1, TrErr- Train- SlotClk+ DLActive- BWMgmt- ABWMgmt- DevCap2: Completion Timeout: Not Supported, TimeoutDis+, LTR-, OBFF Not Supported DevCtl2: Completion Timeout: 50us to 50ms, TimeoutDis-, LTR-, OBFF Disabled LnkCtl2: Target Link Speed: 5GT/s, EnterCompliance- SpeedDis- Transmit Margin: Normal Operating Range, EnterModifiedCompliance- ComplianceSOS- Compliance De-emphasis: -6dB LnkSta2: Current De-emphasis Level: -3.5dB, EqualizationComplete-, EqualizationPhase1- EqualizationPhase2-, EqualizationPhase3-, LinkEqualizationRequest- Capabilities: [c0] MSI-X: Enable+ Count=8 Masked- Vector table: BAR=2 offset= PBA: BAR=2 offset=1000 Capabilities: [100 v2] Advanced Error Reporting UESta: DLP- SDES- TLP- FCP- CmpltTO- CmpltAbrt- UnxCmplt- RxOF- MalfTLP- ECRC- UnsupReq- ACSViol- UEMsk: DLP- SDES- TLP- FCP- CmpltTO- CmpltAbrt- UnxCmplt- RxOF- MalfTLP- ECRC- UnsupReq- ACSViol- UESvrt: DLP+ SDES+ TLP- FCP+ CmpltTO- CmpltAbrt- UnxCmplt- RxOF+ MalfTLP+ ECRC- UnsupReq- ACSViol- CESta: RxErr- BadTLP- BadDLLP- Rollover- Timeout- NonFatalErr+ CEMsk: RxErr- BadTLP- BadDLLP- Rollover- Timeout- NonFatalErr+ AERCap: First Error Pointer: 00, GenCap+ CGenEn- ChkCap+ ChkEn- Capabilities: [150 v1] Device Serial Number 08-00-28-00-00-20-00-00 Kernel driver in use: xhci_hcd Is this some known issue with these vendors controllers? Is there some documentation about this, errata or anything? Adding Yoshihiro Shimoda, he might know about the Renesas controller. Thank you for adding me on this email. However, unfortunately I don't know the detail about Renesas PCIe xHCI controllers. (I know the xHCI controller of R-Car SoCs for now.) Best regards, Yoshihiro Shimoda
Re: [RFC PATCH 0/4] Clean up watchdog handlers
On 10/31/2016 4:00 PM, Don Zickus wrote: On Wed, Oct 26, 2016 at 09:02:19AM -0700, Babu Moger wrote: This is an attempt to cleanup watchdog handlers. Right now, kernel/watchdog.c implements both softlockup and hardlockup detectors. Softlockup code is generic. Hardlockup code is arch specific. Some architectures don't use hardlockup detectors. They use their own watchdog detectors. To make both these combination work, we have numerous #ifdefs in kernel/watchdog.c. We are trying here to make these handlers independent of each other. Also provide an interface for architectures to implement their own handlers. watchdog_nmi_enable and watchdog_nmi_disable will be defined as weak such that architectures can override its definitions. Thanks to Don Zickus for his suggestions. Here is the previous discussion http://www.spinics.net/lists/sparclinux/msg16441.html Hi Babu, I finally got some cycles to poke at this today. Good work. A couple of suggestions. For bisectability, I am thinking patch2 should be first and patch1 and patch3 should be combined. Also watchdog_hld.c is going to need up top: #define pr_fmt(fmt) "NMI watchdog: " fmt otherwise the error messages miss the header. Though I don't think watchdog.c and watchdog_hld.c should have the same header. A good solution isn't coming to me right now. I will try to run some tests on this tomorrow. Don, Thanks for the feedback. Let me know if you into issues with your tests. I will start working on the review comments. Cheers, Don Babu Moger (4): watchdog: Remove hardlockup handler references watchdog: Move shared definitions to nmi.h watchdog: Move hardlockup detector in separate file sparc: Implement watchdog_nmi_enable and watchdog_nmi_disable arch/sparc/kernel/nmi.c | 44 - include/linux/nmi.h | 19 kernel/Makefile |1 + kernel/watchdog.c | 276 ++- kernel/watchdog_hld.c | 238 5 files changed, 312 insertions(+), 266 deletions(-) create mode 100644 kernel/watchdog_hld.c
Re: [RFC PATCH 0/4] Clean up watchdog handlers
On 10/31/2016 4:00 PM, Don Zickus wrote: On Wed, Oct 26, 2016 at 09:02:19AM -0700, Babu Moger wrote: This is an attempt to cleanup watchdog handlers. Right now, kernel/watchdog.c implements both softlockup and hardlockup detectors. Softlockup code is generic. Hardlockup code is arch specific. Some architectures don't use hardlockup detectors. They use their own watchdog detectors. To make both these combination work, we have numerous #ifdefs in kernel/watchdog.c. We are trying here to make these handlers independent of each other. Also provide an interface for architectures to implement their own handlers. watchdog_nmi_enable and watchdog_nmi_disable will be defined as weak such that architectures can override its definitions. Thanks to Don Zickus for his suggestions. Here is the previous discussion http://www.spinics.net/lists/sparclinux/msg16441.html Hi Babu, I finally got some cycles to poke at this today. Good work. A couple of suggestions. For bisectability, I am thinking patch2 should be first and patch1 and patch3 should be combined. Also watchdog_hld.c is going to need up top: #define pr_fmt(fmt) "NMI watchdog: " fmt otherwise the error messages miss the header. Though I don't think watchdog.c and watchdog_hld.c should have the same header. A good solution isn't coming to me right now. I will try to run some tests on this tomorrow. Don, Thanks for the feedback. Let me know if you run into problems with your tests. I will start working on the comments. Thanks Babu Cheers, Don Babu Moger (4): watchdog: Remove hardlockup handler references watchdog: Move shared definitions to nmi.h watchdog: Move hardlockup detector in separate file sparc: Implement watchdog_nmi_enable and watchdog_nmi_disable arch/sparc/kernel/nmi.c | 44 - include/linux/nmi.h | 19 kernel/Makefile |1 + kernel/watchdog.c | 276 ++- kernel/watchdog_hld.c | 238 5 files changed, 312 insertions(+), 266 deletions(-) create mode 100644 kernel/watchdog_hld.c
Re: [RFC PATCH 0/4] Clean up watchdog handlers
On 11/1/2016 8:20 AM, Don Zickus wrote: On Mon, Oct 31, 2016 at 04:30:59PM -0500, Babu Moger wrote: On 10/31/2016 4:00 PM, Don Zickus wrote: On Wed, Oct 26, 2016 at 09:02:19AM -0700, Babu Moger wrote: This is an attempt to cleanup watchdog handlers. Right now, kernel/watchdog.c implements both softlockup and hardlockup detectors. Softlockup code is generic. Hardlockup code is arch specific. Some architectures don't use hardlockup detectors. They use their own watchdog detectors. To make both these combination work, we have numerous #ifdefs in kernel/watchdog.c. We are trying here to make these handlers independent of each other. Also provide an interface for architectures to implement their own handlers. watchdog_nmi_enable and watchdog_nmi_disable will be defined as weak such that architectures can override its definitions. Thanks to Don Zickus for his suggestions. Here is the previous discussion http://www.spinics.net/lists/sparclinux/msg16441.html Hi Babu, I finally got some cycles to poke at this today. Good work. A couple of suggestions. For bisectability, I am thinking patch2 should be first and patch1 and patch3 should be combined. Also watchdog_hld.c is going to need up top: #define pr_fmt(fmt) "NMI watchdog: " fmt otherwise the error messages miss the header. Though I don't think watchdog.c and watchdog_hld.c should have the same header. A good solution isn't coming to me right now. I will try to run some tests on this tomorrow. Don, Thanks for the feedback. Let me know if you run into problems with your tests. Hi Babu, My tests passed. I just have to tweak the expected output lines as they constantly change. :-( I am going to play with different config options to see if things break from a compile perspective. Don, Great. Thanks for the update. I had couple of compilation issues with different config options. 1. drivers/edac/edac_device.o:(.discard+0x0): multiple definition of `__pcpu_unique_hrtimer_interrupts' drivers/edac/edac_mc.o:(.discard+0x0): first defined here This was a problem with uni processor config. Thinking of moving the definition of hrtimer_interrupts and is_hardlockup into watchdog.c as softlockup code does most of the work here. 2. kernel/built-in.o: In function `watchdog_overflow_callback': >> watchdog_hld.c:(.text+0x56940): undefined reference to `sysctl_hardlockup_all_cpu_backtrace' Moved this definition to nmi.h. Will post the v2 version soon with all the comments included. Thanks Babu I will start working on the comments. Great. Cheers, Don Thanks Babu Cheers, Don Babu Moger (4): watchdog: Remove hardlockup handler references watchdog: Move shared definitions to nmi.h watchdog: Move hardlockup detector in separate file sparc: Implement watchdog_nmi_enable and watchdog_nmi_disable arch/sparc/kernel/nmi.c | 44 - include/linux/nmi.h | 19 kernel/Makefile |1 + kernel/watchdog.c | 276 ++- kernel/watchdog_hld.c | 238 5 files changed, 312 insertions(+), 266 deletions(-) create mode 100644 kernel/watchdog_hld.c
Re: [PATCH v2 0/3] Clean up watchdog handlers
On 11/4/2016 11:25 AM, Don Zickus wrote: On Tue, Nov 01, 2016 at 02:13:43PM -0700, Babu Moger wrote: This is an attempt to cleanup watchdog handlers. Right now, kernel/watchdog.c implements both softlockup and hardlockup detectors. Softlockup code is generic. Hardlockup code is arch specific. Some architectures don't use hardlockup detectors. They use their own watchdog detectors. To make both these combination work, we have numerous #ifdefs in kernel/watchdog.c. We are trying here to make these handlers independent of each other. Also provide an interface for architectures to implement their own handlers. watchdog_nmi_enable and watchdog_nmi_disable will be defined as weak such that architectures can override its definitions. Thanks to Don Zickus for his suggestions. Here are our previous discussions http://www.spinics.net/lists/sparclinux/msg16543.html http://www.spinics.net/lists/sparclinux/msg16441.html Hi Babu, Thanks for the patches. It passes my panic/reboot testing. The patches look good for now. Though this change has me thinking about other cleanup changes I can make on top of this. But I am going to hold off for now until we are sure nothing really broke. As this should be a straight forward split. The only odd thing for me is I am having trouble disabling CONFIG_HARDLOCKUP_DETECTOR. For some reason def_bool y, is forcing the option on despite my repeated attempts to disable it. I had to rename the option to do some test compiling and verify it doesn't regress when disabled. Probably my environment.. Don, You are welcome. Thanks for your feedback to resolve this. Thanks for the work Babu! Acked-by: Don Zickus <dzic...@redhat.com> v2: Addressed few comments from Don Zickus. 1. Took care of bisectability issue. Previous patch2 is patch1 now. Combined patch 1 and 3. Patch 4 is now patch 3. 2. Added pr_fmt back in watchdog_hld.c 3. Tweaked the file headers for watchdog.c and watchdog_hld.c. 4. Took care of couple of config compile issues. drivers/edac/edac_device.o:(.discard+0x0): multiple definition of `__pcpu_unique_hrtimer_interrupts' drivers/edac/edac_mc.o:(.discard+0x0): first defined here This was a problem with uni processor config. Moved the definition of hrtimer_interrupts and is_hardlockup into watchdog.c as softlockup code does most of the work here. is_hardlockup kind of generic most part. kernel/built-in.o: In function `watchdog_overflow_callback': watchdog_hld.c:(.text+0x56940): undefined reference to `sysctl_hardlockup_all_cpu_backtrace' Moved this definition to nmi.h. v1: Initial version Babu Moger (3): watchdog: Move shared definitions to nmi.h watchdog: Move hardlockup detector to separate file sparc: Implement watchdog_nmi_enable and watchdog_nmi_disable arch/sparc/kernel/nmi.c | 44 - include/linux/nmi.h | 24 kernel/Makefile |1 + kernel/watchdog.c | 270 +++ kernel/watchdog_hld.c | 227 +++ 5 files changed, 310 insertions(+), 256 deletions(-) create mode 100644 kernel/watchdog_hld.c
[PATCH v2 0/3] Clean up watchdog handlers
This is an attempt to cleanup watchdog handlers. Right now, kernel/watchdog.c implements both softlockup and hardlockup detectors. Softlockup code is generic. Hardlockup code is arch specific. Some architectures don't use hardlockup detectors. They use their own watchdog detectors. To make both these combination work, we have numerous #ifdefs in kernel/watchdog.c. We are trying here to make these handlers independent of each other. Also provide an interface for architectures to implement their own handlers. watchdog_nmi_enable and watchdog_nmi_disable will be defined as weak such that architectures can override its definitions. Thanks to Don Zickus for his suggestions. Here are our previous discussions http://www.spinics.net/lists/sparclinux/msg16543.html http://www.spinics.net/lists/sparclinux/msg16441.html v2: Addressed few comments from Don Zickus. 1. Took care of bisectability issue. Previous patch2 is patch1 now. Combined patch 1 and 3. Patch 4 is now patch 3. 2. Added pr_fmt back in watchdog_hld.c 3. Tweaked the file headers for watchdog.c and watchdog_hld.c. 4. Took care of couple of config compile issues. drivers/edac/edac_device.o:(.discard+0x0): multiple definition of `__pcpu_unique_hrtimer_interrupts' drivers/edac/edac_mc.o:(.discard+0x0): first defined here This was a problem with uni processor config. Moved the definition of hrtimer_interrupts and is_hardlockup into watchdog.c as softlockup code does most of the work here. is_hardlockup kind of generic most part. kernel/built-in.o: In function `watchdog_overflow_callback': watchdog_hld.c:(.text+0x56940): undefined reference to `sysctl_hardlockup_all_cpu_backtrace' Moved this definition to nmi.h. v1: Initial version Babu Moger (3): watchdog: Move shared definitions to nmi.h watchdog: Move hardlockup detector to separate file sparc: Implement watchdog_nmi_enable and watchdog_nmi_disable arch/sparc/kernel/nmi.c | 44 - include/linux/nmi.h | 24 kernel/Makefile |1 + kernel/watchdog.c | 270 +++ kernel/watchdog_hld.c | 227 +++ 5 files changed, 310 insertions(+), 256 deletions(-) create mode 100644 kernel/watchdog_hld.c
[PATCH v2 1/3] watchdog: Move shared definitions to nmi.h
Move shared macros and definitions to nmi.h so that watchdog.c, new file watchdog_hld.c or any other architecture specific handler can use those definitions. Signed-off-by: Babu Moger <babu.mo...@oracle.com> --- include/linux/nmi.h | 24 kernel/watchdog.c | 28 2 files changed, 28 insertions(+), 24 deletions(-) diff --git a/include/linux/nmi.h b/include/linux/nmi.h index a78c35c..aacca82 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -7,6 +7,23 @@ #include #include +/* + * The run state of the lockup detectors is controlled by the content of the + * 'watchdog_enabled' variable. Each lockup detector has its dedicated bit - + * bit 0 for the hard lockup detector and bit 1 for the soft lockup detector. + * + * 'watchdog_user_enabled', 'nmi_watchdog_enabled' and 'soft_watchdog_enabled' + * are variables that are only used as an 'interface' between the parameters + * in /proc/sys/kernel and the internal state bits in 'watchdog_enabled'. The + * 'watchdog_thresh' variable is handled differently because its value is not + * boolean, and the lockup detectors are 'suspended' while 'watchdog_thresh' + * is equal zero. + */ +#define NMI_WATCHDOG_ENABLED_BIT 0 +#define SOFT_WATCHDOG_ENABLED_BIT 1 +#define NMI_WATCHDOG_ENABLED (1 << NMI_WATCHDOG_ENABLED_BIT) +#define SOFT_WATCHDOG_ENABLED (1 << SOFT_WATCHDOG_ENABLED_BIT) + /** * touch_nmi_watchdog - restart NMI watchdog timeout. * @@ -91,9 +108,16 @@ static inline bool trigger_single_cpu_backtrace(int cpu) extern int soft_watchdog_enabled; extern int watchdog_user_enabled; extern int watchdog_thresh; +extern unsigned long watchdog_enabled; extern unsigned long *watchdog_cpumask_bits; +#ifdef CONFIG_SMP extern int sysctl_softlockup_all_cpu_backtrace; extern int sysctl_hardlockup_all_cpu_backtrace; +#else +#define sysctl_softlockup_all_cpu_backtrace 0 +#define sysctl_hardlockup_all_cpu_backtrace 0 +#endif +extern bool is_hardlockup(void); struct ctl_table; extern int proc_watchdog(struct ctl_table *, int , void __user *, size_t *, loff_t *); diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 9acb29f..0424301 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -27,29 +27,12 @@ #include #include -/* - * The run state of the lockup detectors is controlled by the content of the - * 'watchdog_enabled' variable. Each lockup detector has its dedicated bit - - * bit 0 for the hard lockup detector and bit 1 for the soft lockup detector. - * - * 'watchdog_user_enabled', 'nmi_watchdog_enabled' and 'soft_watchdog_enabled' - * are variables that are only used as an 'interface' between the parameters - * in /proc/sys/kernel and the internal state bits in 'watchdog_enabled'. The - * 'watchdog_thresh' variable is handled differently because its value is not - * boolean, and the lockup detectors are 'suspended' while 'watchdog_thresh' - * is equal zero. - */ -#define NMI_WATCHDOG_ENABLED_BIT 0 -#define SOFT_WATCHDOG_ENABLED_BIT 1 -#define NMI_WATCHDOG_ENABLED (1 << NMI_WATCHDOG_ENABLED_BIT) -#define SOFT_WATCHDOG_ENABLED (1 << SOFT_WATCHDOG_ENABLED_BIT) - static DEFINE_MUTEX(watchdog_proc_mutex); -#ifdef CONFIG_HARDLOCKUP_DETECTOR -static unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED|NMI_WATCHDOG_ENABLED; +#if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR) +unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED|NMI_WATCHDOG_ENABLED; #else -static unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED; +unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED; #endif int __read_mostly nmi_watchdog_enabled; int __read_mostly soft_watchdog_enabled; @@ -59,9 +42,6 @@ #ifdef CONFIG_SMP int __read_mostly sysctl_softlockup_all_cpu_backtrace; int __read_mostly sysctl_hardlockup_all_cpu_backtrace; -#else -#define sysctl_softlockup_all_cpu_backtrace 0 -#define sysctl_hardlockup_all_cpu_backtrace 0 #endif static struct cpumask watchdog_cpumask __read_mostly; unsigned long *watchdog_cpumask_bits = cpumask_bits(_cpumask); @@ -289,7 +269,7 @@ void touch_softlockup_watchdog_sync(void) #ifdef CONFIG_HARDLOCKUP_DETECTOR /* watchdog detector functions */ -static bool is_hardlockup(void) +bool is_hardlockup(void) { unsigned long hrint = __this_cpu_read(hrtimer_interrupts); -- 1.7.1
[PATCH v2 3/3] sparc: Implement watchdog_nmi_enable and watchdog_nmi_disable
Implement functions watchdog_nmi_enable and watchdog_nmi_disable to enable/disable nmi watchdog. Sparc uses arch specific nmi watchdog handler. Currently, we do not have a way to enable/disable nmi watchdog dynamically. With these patches we can enable or disable arch specific nmi watchdogs using proc or sysctl interface. Example commands. To enable: echo 1 > /proc/sys/kernel/nmi_watchdog To disable: echo 0 > /proc/sys/kernel/nmi_watchdog It can also achieved using the sysctl parameter kernel.nmi_watchdog Signed-off-by: Babu Moger <babu.mo...@oracle.com> --- arch/sparc/kernel/nmi.c | 44 +++- 1 files changed, 43 insertions(+), 1 deletions(-) diff --git a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c index a9973bb..95e73c6 100644 --- a/arch/sparc/kernel/nmi.c +++ b/arch/sparc/kernel/nmi.c @@ -42,7 +42,7 @@ */ atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */ EXPORT_SYMBOL(nmi_active); - +static int nmi_init_done; static unsigned int nmi_hz = HZ; static DEFINE_PER_CPU(short, wd_enabled); static int endflag __initdata; @@ -153,6 +153,8 @@ static void report_broken_nmi(int cpu, int *prev_nmi_count) void stop_nmi_watchdog(void *unused) { + if (!__this_cpu_read(wd_enabled)) + return; pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_disable); __this_cpu_write(wd_enabled, 0); atomic_dec(_active); @@ -207,6 +209,9 @@ static int __init check_nmi_watchdog(void) void start_nmi_watchdog(void *unused) { + if (__this_cpu_read(wd_enabled)) + return; + __this_cpu_write(wd_enabled, 1); atomic_inc(_active); @@ -259,6 +264,8 @@ int __init nmi_init(void) } } + nmi_init_done = 1; + return err; } @@ -270,3 +277,38 @@ static int __init setup_nmi_watchdog(char *str) return 0; } __setup("nmi_watchdog=", setup_nmi_watchdog); + +/* + * sparc specific NMI watchdog enable function. + * Enables watchdog if it is not enabled already. + */ +int watchdog_nmi_enable(unsigned int cpu) +{ + if (atomic_read(_active) == -1) { + pr_warn("NMI watchdog cannot be enabled or disabled\n"); + return -1; + } + + /* +* watchdog thread could start even before nmi_init is called. +* Just Return in that case. Let nmi_init finish the init +* process first. +*/ + if (!nmi_init_done) + return 0; + + smp_call_function_single(cpu, start_nmi_watchdog, NULL, 1); + + return 0; +} +/* + * sparc specific NMI watchdog disable function. + * Disables watchdog if it is not disabled already. + */ +void watchdog_nmi_disable(unsigned int cpu) +{ + if (atomic_read(_active) == -1) + pr_warn_once("NMI watchdog cannot be enabled or disabled\n"); + else + smp_call_function_single(cpu, stop_nmi_watchdog, NULL, 1); +} -- 1.7.1
[PATCH RESEND v3 1/2] config: Adding the new config parameter CONFIG_PROVE_LOCKING_SMALL for sparc
This new config parameter limits the space used for "Lock debugging: prove locking correctness" by about 4MB. The current sparc systems have the limitation of 32MB size for kernel size including .text, .data and .bss sections. With PROVE_LOCKING feature, the kernel size could grow beyond this limit and causing system boot-up issues. With this option, kernel limits the size of the entries of lock_chains, stack_trace etc., so that kernel fits in required size limit. This is not visible to user and only used for sparc. Signed-off-by: Babu Moger <babu.mo...@oracle.com> Acked-by: Sam Ravnborg <s...@ravnborg.org> --- arch/sparc/Kconfig |1 + lib/Kconfig.debug |3 +++ 2 files changed, 4 insertions(+), 0 deletions(-) diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index b23c76b..a85e51d 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -43,6 +43,7 @@ config SPARC select ARCH_HAS_SG_CHAIN select CPU_NO_EFFICIENT_FFS select HAVE_ARCH_HARDENED_USERCOPY + select PROVE_LOCKING_SMALL if PROVE_LOCKING config SPARC32 def_bool !64BIT diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index b01e547..a6c8db1 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1085,6 +1085,9 @@ config PROVE_LOCKING For more details, see Documentation/locking/lockdep-design.txt. +config PROVE_LOCKING_SMALL + bool + config LOCKDEP bool depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT -- 1.7.1
[PATCH RESEND v3 2/2] lockdep: Limit static allocations if PROVE_LOCKING_SMALL is defined
Reduce the size of data structure for lockdep entries by half if PROVE_LOCKING_SMALL if defined. This is used only for sparc. Signed-off-by: Babu Moger <babu.mo...@oracle.com> Acked-by: Sam Ravnborg <s...@ravnborg.org> --- kernel/locking/lockdep_internals.h | 20 +--- 1 files changed, 17 insertions(+), 3 deletions(-) diff --git a/kernel/locking/lockdep_internals.h b/kernel/locking/lockdep_internals.h index 51c4b24..c2b8849 100644 --- a/kernel/locking/lockdep_internals.h +++ b/kernel/locking/lockdep_internals.h @@ -46,6 +46,14 @@ enum { (LOCKF_USED_IN_HARDIRQ_READ | LOCKF_USED_IN_SOFTIRQ_READ) /* + * CONFIG_PROVE_LOCKING_SMALL is defined for sparc. Sparc requires .text, + * .data and .bss to fit in required 32MB limit for the kernel. With + * PROVE_LOCKING we could go over this limit and cause system boot-up problems. + * So, reduce the static allocations for lockdeps related structures so that + * everything fits in current required size limit. + */ +#ifdef CONFIG_PROVE_LOCKING_SMALL +/* * MAX_LOCKDEP_ENTRIES is the maximum number of lock dependencies * we track. * @@ -54,18 +62,24 @@ enum { * table (if it's not there yet), and we check it for lock order * conflicts and deadlocks. */ +#define MAX_LOCKDEP_ENTRIES16384UL +#define MAX_LOCKDEP_CHAINS_BITS15 +#define MAX_STACK_TRACE_ENTRIES262144UL +#else #define MAX_LOCKDEP_ENTRIES32768UL #define MAX_LOCKDEP_CHAINS_BITS16 -#define MAX_LOCKDEP_CHAINS (1UL << MAX_LOCKDEP_CHAINS_BITS) - -#define MAX_LOCKDEP_CHAIN_HLOCKS (MAX_LOCKDEP_CHAINS*5) /* * Stack-trace: tightly packed array of stack backtrace * addresses. Protected by the hash_lock. */ #define MAX_STACK_TRACE_ENTRIES524288UL +#endif + +#define MAX_LOCKDEP_CHAINS (1UL << MAX_LOCKDEP_CHAINS_BITS) + +#define MAX_LOCKDEP_CHAIN_HLOCKS (MAX_LOCKDEP_CHAINS*5) extern struct list_head all_lock_classes; extern struct lock_chain lock_chains[]; -- 1.7.1
[PATCH RESEND v3 0/2] Adjust lockdep static allocations for sparc
Looks like these patches are lost in the mix. Resending with following note. Dave, This requires your Ack as it touches sparc. Peter is waiting for your Ack to queue it. Here is our previous discussion. http://marc.info/?t=14750048631=1=2 These patches limit the static allocations for lockdep data structures used for debugging locking correctness. For sparc, all the kernel's code, data, and bss, must have locked translations in the TLB so that we don't get TLB misses on kernel code and data. Current sparc chips have 8 TLB entries available that may be locked down, and with a 4mb page size, this gives a maximum of 32MB. With PROVE_LOCKING we could go over this limit and cause system boot-up problems. These patches limit the static allocations so that everything fits in current required size limit. patch 1 : Adds new config parameter CONFIG_PROVE_LOCKING_SMALL Patch 2 : Adjusts the sizes based on the new config parameter v2-> v3: Some more comments from Sam Ravnborg and Peter Zijlstra. Defined PROVE_LOCKING_SMALL as invisible and moved the selection to arch/sparc/Kconfig. v1-> v2: As suggested by Peter Zijlstra, keeping the default as is. Introduced new config variable CONFIG_PROVE_LOCKING_SMALL to handle sparc specific case. v0: Initial revision. Babu Moger (2): config: Adding the new config parameter CONFIG_PROVE_LOCKING_SMALL for sparc lockdep: Limit static allocations if PROVE_LOCKING_SMALL is defined arch/sparc/Kconfig |1 + kernel/locking/lockdep_internals.h | 20 +--- lib/Kconfig.debug |3 +++ 3 files changed, 21 insertions(+), 3 deletions(-)
[PATCH v2 2/3] watchdog: Move hardlockup detector to separate file
Separate hardlockup code from watchdog.c and move it to watchdog_hld.c. It is mostly straight forward. Remove everything inside CONFIG_HARDLOCKUP_DETECTORS. This code will go to file watchdog_hld.c. Also update the makefile accordigly. Signed-off-by: Babu Moger <babu.mo...@oracle.com> --- kernel/Makefile |1 + kernel/watchdog.c | 242 ++-- kernel/watchdog_hld.c | 227 ++ 3 files changed, 239 insertions(+), 231 deletions(-) create mode 100644 kernel/watchdog_hld.c diff --git a/kernel/Makefile b/kernel/Makefile index eb26e12..314e7d6 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -84,6 +84,7 @@ obj-$(CONFIG_KPROBES) += kprobes.o obj-$(CONFIG_KGDB) += debug/ obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o obj-$(CONFIG_LOCKUP_DETECTOR) += watchdog.o +obj-$(CONFIG_HARDLOCKUP_DETECTOR) += watchdog_hld.o obj-$(CONFIG_SECCOMP) += seccomp.o obj-$(CONFIG_RELAY) += relay.o obj-$(CONFIG_SYSCTL) += utsname_sysctl.o diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 0424301..d4b0fa0 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -24,7 +24,6 @@ #include #include -#include #include static DEFINE_MUTEX(watchdog_proc_mutex); @@ -80,50 +79,9 @@ static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts); static DEFINE_PER_CPU(unsigned long, soft_lockup_hrtimer_cnt); static DEFINE_PER_CPU(struct task_struct *, softlockup_task_ptr_saved); -#ifdef CONFIG_HARDLOCKUP_DETECTOR -static DEFINE_PER_CPU(bool, hard_watchdog_warn); -static DEFINE_PER_CPU(bool, watchdog_nmi_touch); static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved); -static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); -#endif static unsigned long soft_lockup_nmi_warn; -/* boot commands */ -/* - * Should we panic when a soft-lockup or hard-lockup occurs: - */ -#ifdef CONFIG_HARDLOCKUP_DETECTOR -unsigned int __read_mostly hardlockup_panic = - CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE; -static unsigned long hardlockup_allcpu_dumped; -/* - * We may not want to enable hard lockup detection by default in all cases, - * for example when running the kernel as a guest on a hypervisor. In these - * cases this function can be called to disable hard lockup detection. This - * function should only be executed once by the boot processor before the - * kernel command line parameters are parsed, because otherwise it is not - * possible to override this in hardlockup_panic_setup(). - */ -void hardlockup_detector_disable(void) -{ - watchdog_enabled &= ~NMI_WATCHDOG_ENABLED; -} - -static int __init hardlockup_panic_setup(char *str) -{ - if (!strncmp(str, "panic", 5)) - hardlockup_panic = 1; - else if (!strncmp(str, "nopanic", 7)) - hardlockup_panic = 0; - else if (!strncmp(str, "0", 1)) - watchdog_enabled &= ~NMI_WATCHDOG_ENABLED; - else if (!strncmp(str, "1", 1)) - watchdog_enabled |= NMI_WATCHDOG_ENABLED; - return 1; -} -__setup("nmi_watchdog=", hardlockup_panic_setup); -#endif - unsigned int __read_mostly softlockup_panic = CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE; @@ -244,30 +202,12 @@ void touch_all_softlockup_watchdogs(void) wq_watchdog_touch(-1); } -#ifdef CONFIG_HARDLOCKUP_DETECTOR -void touch_nmi_watchdog(void) -{ - /* -* Using __raw here because some code paths have -* preemption enabled. If preemption is enabled -* then interrupts should be enabled too, in which -* case we shouldn't have to worry about the watchdog -* going off. -*/ - raw_cpu_write(watchdog_nmi_touch, true); - touch_softlockup_watchdog(); -} -EXPORT_SYMBOL(touch_nmi_watchdog); - -#endif - void touch_softlockup_watchdog_sync(void) { __this_cpu_write(softlockup_touch_sync, true); __this_cpu_write(watchdog_touch_ts, 0); } -#ifdef CONFIG_HARDLOCKUP_DETECTOR /* watchdog detector functions */ bool is_hardlockup(void) { @@ -279,7 +219,6 @@ bool is_hardlockup(void) __this_cpu_write(hrtimer_interrupts_saved, hrint); return false; } -#endif static int is_softlockup(unsigned long touch_ts) { @@ -293,78 +232,22 @@ static int is_softlockup(unsigned long touch_ts) return 0; } -#ifdef CONFIG_HARDLOCKUP_DETECTOR - -static struct perf_event_attr wd_hw_attr = { - .type = PERF_TYPE_HARDWARE, - .config = PERF_COUNT_HW_CPU_CYCLES, - .size = sizeof(struct perf_event_attr), - .pinned = 1, - .disabled = 1, -}; - -/* Callback function for perf event subsystem */ -static void watchdog_overflow_callback(struct perf_event *event, -struct perf_sample_data *data, -struct pt_regs *regs) -{ - /* Ensure the watchdog never gets throttled */
[PATCH v2 2/2] sparc: Implement arch_watchdog_nmi_enable and arch_watchdog_nmi_disable
Implement functions arch_watchdog_nmi_enable and arch_watchdog_nmi_disable to enable/disable nmi watchdog. Sparc uses arch specific nmi watchdog handler. Currently, we do not have a way to enable/disable nmi watchdog dynamically. With these patches we can enable or disable arch specific nmi watchdogs using proc or sysctl interface. Example commands. To enable: echo 1 > /proc/sys/kernel/nmi_watchdog To disable: echo 0 > /proc/sys/kernel/nmi_watchdog It can also achieved using the sysctl parameter kernel.nmi_watchdog Signed-off-by: Babu Moger <babu.mo...@oracle.com> --- arch/sparc/kernel/nmi.c | 41 - 1 files changed, 40 insertions(+), 1 deletions(-) diff --git a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c index a9973bb..d7e2c01 100644 --- a/arch/sparc/kernel/nmi.c +++ b/arch/sparc/kernel/nmi.c @@ -42,7 +42,7 @@ static int panic_on_timeout; */ atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */ EXPORT_SYMBOL(nmi_active); - +static int nmi_init_done; static unsigned int nmi_hz = HZ; static DEFINE_PER_CPU(short, wd_enabled); static int endflag __initdata; @@ -153,6 +153,8 @@ static void report_broken_nmi(int cpu, int *prev_nmi_count) void stop_nmi_watchdog(void *unused) { + if (!__this_cpu_read(wd_enabled)) + return; pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_disable); __this_cpu_write(wd_enabled, 0); atomic_dec(_active); @@ -207,6 +209,9 @@ error: void start_nmi_watchdog(void *unused) { + if (__this_cpu_read(wd_enabled)) + return; + __this_cpu_write(wd_enabled, 1); atomic_inc(_active); @@ -259,6 +264,8 @@ int __init nmi_init(void) } } + nmi_init_done = 1; + return err; } @@ -270,3 +277,35 @@ static int __init setup_nmi_watchdog(char *str) return 0; } __setup("nmi_watchdog=", setup_nmi_watchdog); + +/* + * sparc specific NMI watchdog enable function. + * Enables watchdog if it is not enabled already. + */ +int arch_watchdog_nmi_enable(unsigned int cpu) +{ + if (atomic_read(_active) == -1) { + pr_info_once("NMI watchdog cannot be enabled\n"); + return -1; + } + + /* +* watchdog thread could start even before nmi_init is called. +* Just Return in that case. Let nmi_init finish the init +* process first. +*/ + if (!nmi_init_done) + return 0; + + smp_call_function_single(cpu, start_nmi_watchdog, NULL, 1); + + return 0; +} +/* + * sparc specific NMI watchdog disable function. + * Disables watchdog if it is not disabled already. + */ +void arch_watchdog_nmi_disable(unsigned int cpu) +{ + smp_call_function_single(cpu, stop_nmi_watchdog, NULL, 1); +} -- 1.7.1
[PATCH v2 0/2] Introduce arch specific nmi enable, disable handlers
During our testing we noticed that nmi watchdogs in sparc could not be disabled or enabled dynamically using sysctl/proc interface. Sparc uses its own arch specific nmi watchdogs. There is a sysctl and proc interface(proc/sys/kernel/nmi_watchdog) to enable/disable nmi watchdogs. However, that is not working for sparc. There is no interface to feed this parameter to arch specific nmi watchdogs. These patches extend the same sysctl/proc interface to enable or disable these arch specific nmi watchdogs dynamically. Introduced new functions arch_watchdog_nmi_enable and arch_watchdog_nmi_disable which can be implemented in arch specific handlers. If you think there is a better way to do this. Please advice. Tested on sparc. Compile tested on x86. v2: a)Sam Ravnborg's comments about making the definitions visible. With the new approach we dont need those definitions((NMI_WATCHDOG_ENABLED, SOFT_WATCHDOG_ENABLED etc..) outside watchdog.c. So no action. b) Made changes per Don Zickus comments. Don, I could not use your patches as is. Reason is sparc does not define CONFIG_HARDLOCKUP_DETECTOR. So, defining default __weak function did not work for me. However, I have used your idea to define __weak functions arch_watchdog_nmi_enable and arch_watchdog_nmi_disable when CONFIG_HARDLOCKUP_DETECTOR is not defined. I feel this should have very less impact on the races you are concerned about. Please take a look. Feel free to suggest. Patch2 changes: I had to introduce new variable nmi_init_done to synchronize watchdog thread and kernel init thread. v1: Initial version. Discussion thread here http://www.mail-archive.com/linux-kernel@vger.kernel.org/msg1245427.html Babu Moger (2): watchdog: Introduce arch_watchdog_nmi_enable and arch_watchdog_nmi_disable sparc: Implement arch_watchdog_nmi_enable and arch_watchdog_nmi_disable arch/sparc/kernel/nmi.c | 41 +- kernel/watchdog.c | 65 +++--- 2 files changed, 84 insertions(+), 22 deletions(-)
Re: [PATCH 0/2] Introduce update_arch_nmi_watchdog for arch specific handlers
On 10/7/2016 10:51 AM, Don Zickus wrote: On Thu, Oct 06, 2016 at 03:16:41PM -0700, Babu Moger wrote: During our testing we noticed that nmi watchdogs in sparc could not be disabled or enabled dynamically using sysctl/proc interface. Sparc uses its own arch specific nmi watchdogs. There is a sysctl and proc interface(proc/sys/kernel/nmi_watchdog) to enable/disable nmi watchdogs. However, that is not working for sparc. There is no interface to feed this parameter to arch specific nmi watchdogs. These patches extend the same sysctl/proc interface to enable or disable these arch specific nmi watchdogs dynamically. Introduced new function update_arch_nmi_watchdog which can be implemented in arch specific handlers. If you think there is a better way to do this. Please advice. Tested on sparc. Compile tested on x86. Hi Babu, Thanks for the patch. Yeah, I don't test sparc at all (lack of hardware). Sorry about that. We did spend quite a bit of time trying to get various soft/hard lockup logic going for the /proc stuff and I am wondering if your patches are to simple and expose some of the races we tried to fix. Therefore I am wondering if we could re-use some of our logic for your case. The perf stuff is really the x86 equivalent of arch_watchdog_enable. I am wondering if we break that out as a __weak default function and then have sparc override it with its own enable/disable functions. Something along the lines below (compiled on x86 but untested)? Hi Don, Sorry for the late response. I ran into issues with the setups and new approach. I could not use your patches as is. Reason is sparc does not define CONFIG_HARDLOCKUP_DETECTOR. So, defining default __weak function did not work for me. However, I have used your idea to define __weak functions arch_watchdog_nmi_enable and arch_watchdog_nmi_disable when CONFIG_HARDLOCKUP_DETECTOR is not defined. Sending v2 version now. Please take a look. Thanks for your inputs. Cheers, Don diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 9acb29f..55cd2d3 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -585,15 +585,11 @@ static void watchdog(unsigned int cpu) */ static unsigned long cpu0_err; -static int watchdog_nmi_enable(unsigned int cpu) +int __weak arch_watchdog_nmi_enable(unsigned int cpu) { struct perf_event_attr *wd_attr; struct perf_event *event = per_cpu(watchdog_ev, cpu); - /* nothing to do if the hard lockup detector is disabled */ - if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED)) - goto out; - /* is it already setup and enabled? */ if (event && event->state > PERF_EVENT_STATE_OFF) goto out; @@ -619,18 +615,6 @@ static int watchdog_nmi_enable(unsigned int cpu) goto out_save; } - /* -* Disable the hard lockup detector if _any_ CPU fails to set up -* set up the hardware perf event. The watchdog() function checks -* the NMI_WATCHDOG_ENABLED bit periodically. -* -* The barriers are for syncing up watchdog_enabled across all the -* cpus, as clear_bit() does not use barriers. -*/ - smp_mb__before_atomic(); - clear_bit(NMI_WATCHDOG_ENABLED_BIT, _enabled); - smp_mb__after_atomic(); - /* skip displaying the same error again */ if (cpu > 0 && (PTR_ERR(event) == cpu0_err)) return PTR_ERR(event); @@ -658,7 +642,36 @@ out: return 0; } -static void watchdog_nmi_disable(unsigned int cpu) +static int watchdog_nmi_enable(unsigned int cpu) +{ + int err; + + /* nothing to do if the hard lockup detector is disabled */ + if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED)) + return 0; + + err = arch_watchdog_nmi_enable(cpu); + + if (err) { + /* +* Disable the hard lockup detector if _any_ CPU fails to set up +* set up the hardware perf event. The watchdog() function checks +* the NMI_WATCHDOG_ENABLED bit periodically. +* +* The barriers are for syncing up watchdog_enabled across all the +* cpus, as clear_bit() does not use barriers. +*/ + smp_mb__before_atomic(); + clear_bit(NMI_WATCHDOG_ENABLED_BIT, _enabled); + smp_mb__after_atomic(); + + return err; + } + + return 0; +} + +void __weak arch_watchdog_nmi_disable(unsigned int cpu) { struct perf_event *event = per_cpu(watchdog_ev, cpu); @@ -675,6 +688,11 @@ static void watchdog_nmi_disable(unsigned int cpu) } } +static void watchdog_nmi_disable(unsigned int cpu) +{ + arch_watchdog_nmi_disable(cpu); +} + #else static int watchdog_nmi_enable(unsigned int cpu) { return 0; } static void watchdog_nmi_disable(unsigned int cpu) { return; }
[PATCH v2 1/2] watchdog: Introduce arch_watchdog_nmi_enable and arch_watchdog_nmi_disable
Currently we do not have a way to enable/disable arch specific watchdog handlers if it was implemented by any of the architectures. This patch introduces new functions arch_watchdog_nmi_enable and arch_watchdog_nmi_disable which can be used to enable/disable architecture specific NMI watchdog handlers. These functions are defined as weak as architectures can override their definitions to enable/disable nmi watchdog behaviour. Signed-off-by: Babu Moger <babu.mo...@oracle.com> --- kernel/watchdog.c | 65 +++- 1 files changed, 44 insertions(+), 21 deletions(-) diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 9acb29f..d1e84e6 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -46,7 +46,7 @@ static DEFINE_MUTEX(watchdog_proc_mutex); -#ifdef CONFIG_HARDLOCKUP_DETECTOR +#if defined(CONFIG_HARDLOCKUP_DETECTOR) || defined(CONFIG_HAVE_NMI_WATCHDOG) static unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED|NMI_WATCHDOG_ENABLED; #else static unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED; @@ -585,15 +585,11 @@ static void watchdog(unsigned int cpu) */ static unsigned long cpu0_err; -static int watchdog_nmi_enable(unsigned int cpu) +static int arch_watchdog_nmi_enable(unsigned int cpu) { struct perf_event_attr *wd_attr; struct perf_event *event = per_cpu(watchdog_ev, cpu); - /* nothing to do if the hard lockup detector is disabled */ - if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED)) - goto out; - /* is it already setup and enabled? */ if (event && event->state > PERF_EVENT_STATE_OFF) goto out; @@ -619,18 +615,6 @@ static int watchdog_nmi_enable(unsigned int cpu) goto out_save; } - /* -* Disable the hard lockup detector if _any_ CPU fails to set up -* set up the hardware perf event. The watchdog() function checks -* the NMI_WATCHDOG_ENABLED bit periodically. -* -* The barriers are for syncing up watchdog_enabled across all the -* cpus, as clear_bit() does not use barriers. -*/ - smp_mb__before_atomic(); - clear_bit(NMI_WATCHDOG_ENABLED_BIT, _enabled); - smp_mb__after_atomic(); - /* skip displaying the same error again */ if (cpu > 0 && (PTR_ERR(event) == cpu0_err)) return PTR_ERR(event); @@ -658,7 +642,7 @@ out: return 0; } -static void watchdog_nmi_disable(unsigned int cpu) +static void arch_watchdog_nmi_disable(unsigned int cpu) { struct perf_event *event = per_cpu(watchdog_ev, cpu); @@ -676,8 +660,13 @@ static void watchdog_nmi_disable(unsigned int cpu) } #else -static int watchdog_nmi_enable(unsigned int cpu) { return 0; } -static void watchdog_nmi_disable(unsigned int cpu) { return; } +/* + * These two functions are mostly architecture specific + * defining them as weak here. + */ +int __weak arch_watchdog_nmi_enable(unsigned int cpu) { return 0; } +void __weak arch_watchdog_nmi_disable(unsigned int cpu) { return; } + #endif /* CONFIG_HARDLOCKUP_DETECTOR */ static struct smp_hotplug_thread watchdog_threads = { @@ -781,6 +770,40 @@ void lockup_detector_resume(void) put_online_cpus(); } +void watchdog_nmi_disable(unsigned int cpu) +{ + arch_watchdog_nmi_disable(cpu); +} + +int watchdog_nmi_enable(unsigned int cpu) +{ + int err; + + /* nothing to do if the hard lockup detector is disabled */ + if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED)) + return 0; + + err = arch_watchdog_nmi_enable(cpu); + + if (err) { + /* +* Disable the hard lockup detector if _any_ CPU fails to set up +* set up the hardware perf event. The watchdog() function checks +* the NMI_WATCHDOG_ENABLED bit periodically. +* +* The barriers are for syncing up watchdog_enabled across all the +* cpus, as clear_bit() does not use barriers. +*/ + smp_mb__before_atomic(); + clear_bit(NMI_WATCHDOG_ENABLED_BIT, _enabled); + smp_mb__after_atomic(); + + return err; + } + + return 0; +} + static int update_watchdog_all_cpus(void) { int ret; -- 1.7.1
[PATCH v2] arch/sparc: Avoid DCTI Couples
Avoid un-intended DCTI Couples. Use of DCTI couples is deprecated. Also address the "Programming Note" for optimal performance. Here is the complete text from Oracle SPARC Architecture Specs. 6.3.4.7 DCTI Couples "A delayed control transfer instruction (DCTI) in the delay slot of another DCTI is referred to as a “DCTI couple”. The use of DCTI couples is deprecated in the Oracle SPARC Architecture; no new software should place a DCTI in the delay slot of another DCTI, because on future Oracle SPARC Architecture implementations DCTI couples may execute either slowly or differently than the programmer assumes it will. SPARC V8 and SPARC V9 Compatibility Note The SPARC V8 architecture left behavior undefined for a DCTI couple. The SPARC V9 architecture defined behavior in that case, but as of UltraSPARC Architecture 2005, use of DCTI couples was deprecated. Software should not expect high performance from DCTI couples, and performance of DCTI couples should be expected to decline further in future processors. Programming Note As noted in TABLE 6-5 on page 115, an annulled branch-always (branch-always with a = 1) instruction is not architecturally a DCTI. However, since not all implementations make that distinction, for optimal performance, a DCTI should not be placed in the instruction word immediately following an annulled branch-always instruction (BA,A or BPA,A)." Signed-off-by: Babu Moger <babu.mo...@oracle.com> Reviewed-by: Rob Gardner <rob.gard...@oracle.com> --- arch/sparc/kernel/head_64.S|4 arch/sparc/kernel/misctrap.S |1 + arch/sparc/kernel/rtrap_64.S |1 + arch/sparc/kernel/spiterrs.S |1 + arch/sparc/kernel/sun4v_tlb_miss.S |1 + arch/sparc/kernel/urtt_fill.S |1 + arch/sparc/kernel/winfixup.S |2 ++ arch/sparc/lib/NG2memcpy.S |4 arch/sparc/lib/NG4memcpy.S |1 + arch/sparc/lib/NG4memset.S |1 + arch/sparc/lib/NGmemcpy.S |1 + 11 files changed, 18 insertions(+), 0 deletions(-) diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S index 6aa3da1..4410119 100644 --- a/arch/sparc/kernel/head_64.S +++ b/arch/sparc/kernel/head_64.S @@ -96,6 +96,7 @@ sparc64_boot: andn%g1, PSTATE_AM, %g1 wrpr%g1, 0x0, %pstate ba,a,pt %xcc, 1f +nop .globl prom_finddev_name, prom_chosen_path, prom_root_node .globl prom_getprop_name, prom_mmu_name, prom_peer_name @@ -613,6 +614,7 @@ niagara_tlb_fixup: nop ba,a,pt %xcc, 80f +nop niagara4_patch: callniagara4_patch_copyops nop @@ -622,6 +624,7 @@ niagara4_patch: nop ba,a,pt %xcc, 80f +nop niagara2_patch: callniagara2_patch_copyops @@ -632,6 +635,7 @@ niagara2_patch: nop ba,a,pt %xcc, 80f +nop niagara_patch: callniagara_patch_copyops diff --git a/arch/sparc/kernel/misctrap.S b/arch/sparc/kernel/misctrap.S index 34b4933..9276d2f 100644 --- a/arch/sparc/kernel/misctrap.S +++ b/arch/sparc/kernel/misctrap.S @@ -82,6 +82,7 @@ do_stdfmna: callhandle_stdfmna add%sp, PTREGS_OFF, %o0 ba,a,pt %xcc, rtrap +nop .size do_stdfmna,.-do_stdfmna .type breakpoint_trap,#function diff --git a/arch/sparc/kernel/rtrap_64.S b/arch/sparc/kernel/rtrap_64.S index 216948c..709a82e 100644 --- a/arch/sparc/kernel/rtrap_64.S +++ b/arch/sparc/kernel/rtrap_64.S @@ -237,6 +237,7 @@ rt_continue:ldx [%sp + PTREGS_OFF + PT_V9_G1], %g1 bne,pt %xcc, user_rtt_fill_32bit wrpr %g1, %cwp ba,a,pt %xcc, user_rtt_fill_64bit +nop user_rtt_fill_fixup_dax: ba,pt %xcc, user_rtt_fill_fixup_common diff --git a/arch/sparc/kernel/spiterrs.S b/arch/sparc/kernel/spiterrs.S index 4a73009..d7e5408 100644 --- a/arch/sparc/kernel/spiterrs.S +++ b/arch/sparc/kernel/spiterrs.S @@ -86,6 +86,7 @@ __spitfire_cee_trap_continue: rd %pc, %g7 ba,a,pt %xcc, 2f +nop 1: ba,pt %xcc, etrap_irq rd %pc, %g7 diff --git a/arch/sparc/kernel/sun4v_tlb_miss.S b/arch/sparc/kernel/sun4v_tlb_miss.S index 6179e19..c19f352 100644 --- a/arch/sparc/kernel/sun4v_tlb_miss.S +++ b/arch/sparc/kernel/sun4v_tlb_miss.S @@ -352,6 +352,7 @@ sun4v_mna: callsun4v_do_mna add%sp, PTREGS_OFF, %o0 ba,a,pt %xcc, rtrap +nop /* Privileged Action. */ sun4v_privact: diff --git a/arch/sparc/kernel/urtt_fill.S b/arch/sparc/kernel/urtt_fill.S index 5604a2b..364af32 100644 --- a/arch/sparc/kernel/urtt_fill.S +++ b/arch/sparc/kernel/urtt_fill.S @@ -92,6 +92,7 @@ user_rtt_fill_fixup_common: cal
[PATCH] arch/sparc: Avoid DCTI Couples
Avoid un-intended DCTI Couples. Use of DCTI couples is deprecated. Refer UltraSPARC Architecture 2005(Section 6.3.4.7 - DCTI Couples). http://www.oracle.com/technetwork/systems/opensparc/1537734 "A delayed control transfer instruction (DCTI) in the delay slot of another DCTI is referred to as a DCTI couple. The use of DCTI couples is deprecated in the Oracle SPARC Architecture; no new software should place a DCTI in the delay slot of another DCTI, because on future Oracle SPARC Architecture implementations DCTI couples may execute either slowly or differently than the programmer assumes it will." Signed-off-by: Babu Moger <babu.mo...@oracle.com> Reviewed-by: Rob Gardner <rob.gard...@oracle.com> --- arch/sparc/kernel/head_64.S|4 arch/sparc/kernel/misctrap.S |1 + arch/sparc/kernel/rtrap_64.S |1 + arch/sparc/kernel/spiterrs.S |1 + arch/sparc/kernel/sun4v_tlb_miss.S |1 + arch/sparc/kernel/urtt_fill.S |1 + arch/sparc/kernel/winfixup.S |2 ++ arch/sparc/lib/NG2memcpy.S |4 arch/sparc/lib/NG4memcpy.S |1 + arch/sparc/lib/NG4memset.S |1 + arch/sparc/lib/NGmemcpy.S |1 + 11 files changed, 18 insertions(+), 0 deletions(-) diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S index 6aa3da1..4410119 100644 --- a/arch/sparc/kernel/head_64.S +++ b/arch/sparc/kernel/head_64.S @@ -96,6 +96,7 @@ sparc64_boot: andn%g1, PSTATE_AM, %g1 wrpr%g1, 0x0, %pstate ba,a,pt %xcc, 1f +nop .globl prom_finddev_name, prom_chosen_path, prom_root_node .globl prom_getprop_name, prom_mmu_name, prom_peer_name @@ -613,6 +614,7 @@ niagara_tlb_fixup: nop ba,a,pt %xcc, 80f +nop niagara4_patch: callniagara4_patch_copyops nop @@ -622,6 +624,7 @@ niagara4_patch: nop ba,a,pt %xcc, 80f +nop niagara2_patch: callniagara2_patch_copyops @@ -632,6 +635,7 @@ niagara2_patch: nop ba,a,pt %xcc, 80f +nop niagara_patch: callniagara_patch_copyops diff --git a/arch/sparc/kernel/misctrap.S b/arch/sparc/kernel/misctrap.S index 34b4933..9276d2f 100644 --- a/arch/sparc/kernel/misctrap.S +++ b/arch/sparc/kernel/misctrap.S @@ -82,6 +82,7 @@ do_stdfmna: callhandle_stdfmna add%sp, PTREGS_OFF, %o0 ba,a,pt %xcc, rtrap +nop .size do_stdfmna,.-do_stdfmna .type breakpoint_trap,#function diff --git a/arch/sparc/kernel/rtrap_64.S b/arch/sparc/kernel/rtrap_64.S index 216948c..709a82e 100644 --- a/arch/sparc/kernel/rtrap_64.S +++ b/arch/sparc/kernel/rtrap_64.S @@ -237,6 +237,7 @@ rt_continue:ldx [%sp + PTREGS_OFF + PT_V9_G1], %g1 bne,pt %xcc, user_rtt_fill_32bit wrpr %g1, %cwp ba,a,pt %xcc, user_rtt_fill_64bit +nop user_rtt_fill_fixup_dax: ba,pt %xcc, user_rtt_fill_fixup_common diff --git a/arch/sparc/kernel/spiterrs.S b/arch/sparc/kernel/spiterrs.S index 4a73009..d7e5408 100644 --- a/arch/sparc/kernel/spiterrs.S +++ b/arch/sparc/kernel/spiterrs.S @@ -86,6 +86,7 @@ __spitfire_cee_trap_continue: rd %pc, %g7 ba,a,pt %xcc, 2f +nop 1: ba,pt %xcc, etrap_irq rd %pc, %g7 diff --git a/arch/sparc/kernel/sun4v_tlb_miss.S b/arch/sparc/kernel/sun4v_tlb_miss.S index 6179e19..c19f352 100644 --- a/arch/sparc/kernel/sun4v_tlb_miss.S +++ b/arch/sparc/kernel/sun4v_tlb_miss.S @@ -352,6 +352,7 @@ sun4v_mna: callsun4v_do_mna add%sp, PTREGS_OFF, %o0 ba,a,pt %xcc, rtrap +nop /* Privileged Action. */ sun4v_privact: diff --git a/arch/sparc/kernel/urtt_fill.S b/arch/sparc/kernel/urtt_fill.S index 5604a2b..364af32 100644 --- a/arch/sparc/kernel/urtt_fill.S +++ b/arch/sparc/kernel/urtt_fill.S @@ -92,6 +92,7 @@ user_rtt_fill_fixup_common: callsun4v_data_access_exception nop ba,a,pt %xcc, rtrap +nop 1: callspitfire_data_access_exception nop diff --git a/arch/sparc/kernel/winfixup.S b/arch/sparc/kernel/winfixup.S index 855019a..1ee173c 100644 --- a/arch/sparc/kernel/winfixup.S +++ b/arch/sparc/kernel/winfixup.S @@ -152,6 +152,8 @@ fill_fixup_dax: callsun4v_data_access_exception nop ba,a,pt %xcc, rtrap +nop 1: callspitfire_data_access_exception nop ba,a,pt %xcc, rtrap +nop diff --git a/arch/sparc/lib/NG2memcpy.S b/arch/sparc/lib/NG2memcpy.S index c629dbd..64dcd6c 100644 --- a/arch/sparc/lib/NG2memcpy.S +++ b/arch/sparc/lib/NG2memcpy.S @@ -326,11 +326,13 @@ FUNC_NAME:/*
Re: [PATCH RFC 3/4] arch/sparc: Optimized memcpy, memset, copy_to_user, copy_from_user for M7
David, Thanks for the comments. I am working on addressing your feedback. Comments inline below. On 7/29/2017 4:36 PM, David Miller wrote: From: Babu Moger <babu.mo...@oracle.com> Date: Thu, 27 Jul 2017 15:57:29 -0600 @@ -600,7 +600,7 @@ niagara_tlb_fixup: be,pt %xcc, niagara4_patch nop cmp %g1, SUN4V_CHIP_SPARC_M7 - be,pt %xcc, niagara4_patch + be,pt %xcc, sparc_m7_patch nop cmp %g1, SUN4V_CHIP_SPARC_SN be,pt %xcc, niagara4_patch This part will need to be respun now that the M8 patches are in as there will be a slight conflict in this hunk. Actually, these patches have been tested both on M7 and M8. I wanted to add M8 also. But M8 patches were not in the kernel yet. Now that these M8 patches(from Allen) are in the kernel, I can add it now. Will update it in the second version. +.register %g2,#scratch + + .section".text" + .global FUNC_NAME + .type FUNC_NAME, #function + .align 16 +FUNC_NAME: + srlx%o2, 31, %g2 + cmp %g2, 0 + tne %xcc, 5 + PREAMBLE + mov %o0, %g1! save %o0 + brz,pn %o2, .Lsmallx + + cmp%o2, 3 +ble,pn %icc, .Ltiny_cp + cmp%o2, 19 +ble,pn %icc, .Lsmall_cp + or %o0, %o1, %g2 +cmp %o2, SMALL_MAX +bl,pn %icc, .Lmedium_cp + nop What in world is going on with this indentation? I can't comprehend how, if anyone actually put their eyes on this code and the patch itself, wouldn't notice this. DO NOT mix all-spaced and TAB+space indentation. Always, consistently, use as many TABs as you can and then when needed add trailing spaces. Sure. Will address these problems. In general will address all the format issues. thanks +.Lsrc_dst_aligned_on_8: + ! check if we are copying MED_MAX or more bytes +set MED_MAX, %o3 +cmp %o2, %o3 ! limit to store buffer size + bgu,pn %ncc, .Llarge_align8_copy +nop Again, same problem here. +/* + * Handle all cases where src and dest are aligned on word + * boundaries. Use unrolled loops for better performance. + * This option wins over standard large data move when + * source and destination is in cache for.Lmedium + * to short data moves. + */ +set MED_WMAX, %o3 +cmp %o2, %o3 ! limit to store buffer size + bge,pt %ncc, .Lunalignrejoin ! otherwise rejoin main loop +nop More weird indentation. +.dbalign: +andcc %o5, 7, %o3 ! is sp1 aligned on a 8 byte bound? +bz,pt %ncc, .blkalign ! already long word aligned + sub %o3, 8, %o3 ! -(bytes till long word aligned) + +add %o2, %o3, %o2 ! update o2 with new count +! Set -(%o3) bytes till sp1 long word aligned +1: stb %o1, [%o5] ! there is at least 1 byte to set + inccc %o3 ! byte clearing loop +bl,pt %ncc, 1b +inc %o5 More weird indentation. +! Now sp1 is block aligned +.blkwr: +andn%o2, 63, %o4! calculate size of blocks in bytes +brz,pn %o1, .wrzero! special case if c == 0 + and %o2, 63, %o3! %o3 = bytes left after blk stores. + +set MIN_LOOP, %g1 +cmp %o4, %g1! check there are enough bytes to set + blu,pn %ncc, .short_set! to justify cost of membar +! must be > pre-cleared lines + nop Likewise. + +! initial cache-clearing stores +! get store pipeline moving + rd %asi, %g3 ! save %asi to be restored later +wr %g0, ASI_STBIMRU_P, %asi Likewise. +.wrzero_small: +stxa%o1, [%o5]ASI_STBI_P +subcc %o4, 64, %o4 +bgu,pt %ncc, .wrzero_small + add %o5, 64, %o5 + ba,a.bsi_done Likewise. +.asi_done: + wr %g3, 0x0, %asi ! restored saved %asi +.bsi_done: +membar #StoreStore ! required by use of Block Store Init Likewise. + .size M7memset,.-M7memset It's usually a lot better to use ENTRY() and ENDPROC() instead of expanding these kinds of directives out. Ok. Sure. Will address it. + .globl m7_patch_copyops + .type m7_patch_copyops,#function +m7_patch_copyops: ENTRY() Sure. + .size m7_patch_copyops,.-m7_patch_copyops ENDPROC() Sure + .globl m7_patch_bzero + .type m7_patch_bzero,#function +m7_patch_bzero: Likewise. Ok + .size m7_patch_bzero,.-m7_patch_bzero Likewise. Ok + .globl m7_patch_pageops + .type m7_patch_pageops,#function +m7_patch_page
Re: [PATCH v2 0/4] Update memcpy, memset etc. for M7/M8 architectures
David, Thanks for applying. On 8/10/2017 4:38 PM, David Miller wrote: From: Babu Moger <babu.mo...@oracle.com> Date: Mon, 7 Aug 2017 17:52:48 -0600 This series of patches updates the memcpy, memset, copy_to_user, copy_from_user etc for SPARC M7/M8 architecture. This doesn't build, you cannot assume the existence of "%ncc", it is a recent addition. Furthermore there is no need to ever use %ncc in v9 targetted code anyways. I'll fix that up, but this was a really disappointing build failure to hit. Thank you.. Meanwhile, two questions: 1) Is this also faster on T4 as well? If it is, we can just get rid of the T4 routines and use this on those chips as well. At the time of this work, our focus was mostly on T7 and T8. We did not test this code on T4. For T4 and other older configs we used NG4 versions. I would think it would require some changes to make it work on T4. 2) There has been a lot of discussion and consideration put into how a memcpy/memset routine might be really great for the local cpu but overall pessimize performance for other cpus either locally on the same core (contention for physical resources such as ports to the store buffer and/or L3 cache) or on other cores. Has any such study been done into these issues wrt. this new code? No, we have not done this kind of study.
Re: [GIT PULL] USB/PHY patches for 4.13-rc1
On 7/4/2017 4:09 AM, Geert Uytterhoeven wrote: Hi Greg, On Tue, Jul 4, 2017 at 10:04 AM, Greg KHwrote: On Tue, Jul 04, 2017 at 09:15:55AM +0200, Geert Uytterhoeven wrote: On Mon, Jul 3, 2017 at 4:58 PM, Greg KH wrote: USB/PHY patches for 4.13-rc1 Heikki Krogerus (3): usb: typec: Add support for UCSI interface Commit c1b0bc2dabfa884d ("usb: typec: Add support for UCSI interface"): --- /dev/null +++ b/drivers/usb/typec/ucsi/Kconfig @@ -0,0 +1,23 @@ +config TYPEC_UCSI + tristate "USB Type-C Connector System Software Interface driver" + depends on !CPU_BIG_ENDIAN To work as expected, and prevent this driver from being enabled on big endian systems, this depends on "[PATCH v3 0/3] Define CPU_BIG_ENDIAN or warn for inconsistencies". https://lkml.org/lkml/2017/6/12/1068 Is this a problem? I have no idea what happens if you enable the driver on big endian. I thought that series was slated to be merged soon, is that not going to happen? Me too. But it's not in next-20170704. Babu, what's the plan? Yes. I think these series are safe to be merged. Max, Do you have any concerns about xtensa? Thanks! Gr{oetje,eeting}s, Geert -- Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- ge...@linux-m68k.org In personal conversations with technical people, I call myself a hacker. But when I'm talking to journalists I just say "programmer" or something like that. -- Linus Torvalds
Re: [GIT PULL] USB/PHY patches for 4.13-rc1
On 7/6/2017 3:24 AM, Max Filippov wrote: Hi Babu, On Tue, Jul 4, 2017 at 10:19 AM, Babu Moger <babu.mo...@oracle.com> wrote: Max, Do you have any concerns about xtensa? no, not ATM. I still haven't got a chance to look closer at moving endianness macros back to Kconfig for xtensa. Thanks. Greg, Can you please stage this series for the next merge.
Re: [GIT PULL] USB/PHY patches for 4.13-rc1
On 7/6/2017 9:33 AM, Greg KH wrote: On Thu, Jul 06, 2017 at 09:28:06AM -0500, Babu Moger wrote: On 7/6/2017 3:24 AM, Max Filippov wrote: Hi Babu, On Tue, Jul 4, 2017 at 10:19 AM, Babu Moger <babu.mo...@oracle.com> wrote: Max, Do you have any concerns about xtensa? no, not ATM. I still haven't got a chance to look closer at moving endianness macros back to Kconfig for xtensa. Thanks. Greg, Can you please stage this series for the next merge. What series? What exactly are you referring to here? totally confused... Greg, Sorry for not making it clear. I am referring to this following series. https://patchwork.kernel.org/patch/9782851/ https://patchwork.kernel.org/patch/9782843/ https://patchwork.kernel.org/patch/9782847/ greg k-h
Re: [GIT PULL] USB/PHY patches for 4.13-rc1
On 7/6/2017 10:51 AM, Greg KH wrote: On Thu, Jul 06, 2017 at 10:28:03AM -0500, Babu Moger wrote: On 7/6/2017 9:33 AM, Greg KH wrote: On Thu, Jul 06, 2017 at 09:28:06AM -0500, Babu Moger wrote: On 7/6/2017 3:24 AM, Max Filippov wrote: Hi Babu, On Tue, Jul 4, 2017 at 10:19 AM, Babu Moger <babu.mo...@oracle.com> wrote: Max, Do you have any concerns about xtensa? no, not ATM. I still haven't got a chance to look closer at moving endianness macros back to Kconfig for xtensa. Thanks. Greg, Can you please stage this series for the next merge. What series? What exactly are you referring to here? totally confused... Greg, Sorry for not making it clear. I am referring to this following series. https://patchwork.kernel.org/patch/9782851/ https://patchwork.kernel.org/patch/9782843/ https://patchwork.kernel.org/patch/9782847/ Can you resend them please so I can take them from email? Sure. Will send it in a bit. thanks thanks, greg k-h
[PATCH v3 1/3] arch: Define CPU_BIG_ENDIAN for all fixed big endian archs
While working on enabling queued rwlock on SPARC, found this following code in include/asm-generic/qrwlock.h which uses CONFIG_CPU_BIG_ENDIAN to clear a byte. static inline u8 *__qrwlock_write_byte(struct qrwlock *lock) { return (u8 *)lock + 3 * IS_BUILTIN(CONFIG_CPU_BIG_ENDIAN); } Problem is many of the fixed big endian architectures don't define CPU_BIG_ENDIAN and clears the wrong byte. Define CPU_BIG_ENDIAN for all the fixed big endian architecture to fix it. Also found few more references of this config parameter in drivers/of/base.c drivers/of/fdt.c drivers/tty/serial/earlycon.c drivers/tty/serial/serial_core.c Be aware that this may cause regressions if someone has worked-around problems in the above code already. Remove the work-around. Here is our original discussion https://lkml.org/lkml/2017/5/24/620 Signed-off-by: Babu Moger <babu.mo...@oracle.com> Suggested-by: Arnd Bergmann <a...@arndb.de> Acked-by: Geert Uytterhoeven <ge...@linux-m68k.org> Acked-by: David S. Miller <da...@davemloft.net> Acked-by: Stafford Horne <sho...@gmail.com> --- arch/frv/Kconfig |3 +++ arch/h8300/Kconfig|3 +++ arch/m68k/Kconfig |3 +++ arch/openrisc/Kconfig |3 +++ arch/parisc/Kconfig |3 +++ arch/sparc/Kconfig|3 +++ 6 files changed, 18 insertions(+), 0 deletions(-) diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig index eefd9a4..1cce824 100644 --- a/arch/frv/Kconfig +++ b/arch/frv/Kconfig @@ -17,6 +17,9 @@ config FRV select HAVE_DEBUG_STACKOVERFLOW select ARCH_NO_COHERENT_DMA_MMAP +config CPU_BIG_ENDIAN + def_bool y + config ZONE_DMA bool default y diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig index 3ae8525..5380ac8 100644 --- a/arch/h8300/Kconfig +++ b/arch/h8300/Kconfig @@ -23,6 +23,9 @@ config H8300 select HAVE_ARCH_HASH select CPU_NO_EFFICIENT_FFS +config CPU_BIG_ENDIAN + def_bool y + config RWSEM_GENERIC_SPINLOCK def_bool y diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig index d140206..029a58b 100644 --- a/arch/m68k/Kconfig +++ b/arch/m68k/Kconfig @@ -23,6 +23,9 @@ config M68K select OLD_SIGSUSPEND3 select OLD_SIGACTION +config CPU_BIG_ENDIAN + def_bool y + config RWSEM_GENERIC_SPINLOCK bool default y diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig index 1e95920..a0f2e4a 100644 --- a/arch/openrisc/Kconfig +++ b/arch/openrisc/Kconfig @@ -29,6 +29,9 @@ config OPENRISC select CPU_NO_EFFICIENT_FFS if !OPENRISC_HAVE_INST_FF1 select NO_BOOTMEM +config CPU_BIG_ENDIAN + def_bool y + config MMU def_bool y diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 531da9e..dda1f55 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -47,6 +47,9 @@ config PARISC and later HP3000 series). The PA-RISC Linux project home page is at <http://www.parisc-linux.org/>. +config CPU_BIG_ENDIAN + def_bool y + config MMU def_bool y diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 908f019..0d9dc49 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -92,6 +92,9 @@ config ARCH_DEFCONFIG config ARCH_PROC_KCORE_TEXT def_bool y +config CPU_BIG_ENDIAN + def_bool y + config ARCH_ATU bool default y if SPARC64 -- 1.7.1
[PATCH v3 3/3] include: warn for inconsistent endian config definition
We have seen some generic code use config parameter CONFIG_CPU_BIG_ENDIAN to decide the endianness. Here are the few examples. include/asm-generic/qrwlock.h drivers/of/base.c drivers/of/fdt.c drivers/tty/serial/earlycon.c drivers/tty/serial/serial_core.c Display warning if CPU_BIG_ENDIAN is not defined on big endian architecture and also warn if it defined on little endian architectures. Here is our original discussion https://lkml.org/lkml/2017/5/24/620 Signed-off-by: Babu Moger <babu.mo...@oracle.com> Suggested-by: Arnd Bergmann <a...@arndb.de> Acked-by: Geert Uytterhoeven <ge...@linux-m68k.org> --- include/linux/byteorder/big_endian.h|4 include/linux/byteorder/little_endian.h |4 2 files changed, 8 insertions(+), 0 deletions(-) diff --git a/include/linux/byteorder/big_endian.h b/include/linux/byteorder/big_endian.h index 3920414..ffd2159 100644 --- a/include/linux/byteorder/big_endian.h +++ b/include/linux/byteorder/big_endian.h @@ -3,5 +3,9 @@ #include +#ifndef CONFIG_CPU_BIG_ENDIAN +#warning inconsistent configuration, needs CONFIG_CPU_BIG_ENDIAN +#endif + #include #endif /* _LINUX_BYTEORDER_BIG_ENDIAN_H */ diff --git a/include/linux/byteorder/little_endian.h b/include/linux/byteorder/little_endian.h index 0805737..ba910bb 100644 --- a/include/linux/byteorder/little_endian.h +++ b/include/linux/byteorder/little_endian.h @@ -3,5 +3,9 @@ #include +#ifdef CONFIG_CPU_BIG_ENDIAN +#warning inconsistent configuration, CONFIG_CPU_BIG_ENDIAN is set +#endif + #include #endif /* _LINUX_BYTEORDER_LITTLE_ENDIAN_H */ -- 1.7.1
[PATCH v3 0/3] Define CPU_BIG_ENDIAN or warn for inconsistencies
Resending the series per Greg KH's request. Found this problem while enabling queued rwlock on SPARC. The parameter CONFIG_CPU_BIG_ENDIAN is used to clear the specific byte in qrwlock structure. Without this parameter, we clear the wrong byte. Here is the code in include/asm-generic/qrwlock.h static inline u8 *__qrwlock_write_byte(struct qrwlock *lock) { return (u8 *)lock + 3 * IS_BUILTIN(CONFIG_CPU_BIG_ENDIAN); } Also found few more references of this parameter in drivers/of/base.c drivers/of/fdt.c drivers/tty/serial/earlycon.c drivers/tty/serial/serial_core.c Here is our previous discussion. https://lkml.org/lkml/2017/5/24/620 Based on the discussion, it was decided to add CONFIG_CPU_BIG_ENDIAN for all the fixed big endian architecture(frv, h8300, m68k, openrisc, parisc and sparc). And warn if there are inconsistencies in this definition. v2 -> v3: Added the choice statement for endianness selection for microblaze. Updated the Makefile for microblaze(Suggested by Arnd Bergmann) to properly compile for the correct format. Updated acks. v1 -> v2: Updated the commit messages and acks. Babu Moger (3): arch: Define CPU_BIG_ENDIAN for all fixed big endian archs arch/microblaze: Add choice for endianness and update Makefile include: warn for inconsistent endian config definition arch/frv/Kconfig|3 +++ arch/h8300/Kconfig |3 +++ arch/m68k/Kconfig |3 +++ arch/microblaze/Kconfig | 16 arch/microblaze/Makefile|2 ++ arch/openrisc/Kconfig |3 +++ arch/parisc/Kconfig |3 +++ arch/sparc/Kconfig |3 +++ include/linux/byteorder/big_endian.h|4 include/linux/byteorder/little_endian.h |4 10 files changed, 44 insertions(+), 0 deletions(-)
[PATCH v3 2/3] arch/microblaze: Add choice for endianness and update Makefile
microblaze architectures can be configured for either little or big endian formats. Add a choice option for the user to select the correct endian format(default to big endian). Also update the Makefile so toolchain can compile for the format it is configured for. Signed-off-by: Babu Moger <babu.mo...@oracle.com> Signed-off-by: Arnd Bergmann <a...@arndb.de> --- arch/microblaze/Kconfig | 16 arch/microblaze/Makefile |2 ++ 2 files changed, 18 insertions(+), 0 deletions(-) diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig index 85885a5..74aa5de 100644 --- a/arch/microblaze/Kconfig +++ b/arch/microblaze/Kconfig @@ -35,6 +35,22 @@ config MICROBLAZE select VIRT_TO_BUS select CPU_NO_EFFICIENT_FFS +# Endianness selection +choice + prompt "Endianness selection" + default CPU_BIG_ENDIAN + help + microblaze architectures can be configured for either little or + big endian formats. Be sure to select the appropriate mode. + +config CPU_BIG_ENDIAN + bool "Big endian" + +config CPU_LITTLE_ENDIAN + bool "Little endian" + +endchoice + config SWAP def_bool n diff --git a/arch/microblaze/Makefile b/arch/microblaze/Makefile index 740f2b8..1f6c486 100644 --- a/arch/microblaze/Makefile +++ b/arch/microblaze/Makefile @@ -35,6 +35,8 @@ endif CPUFLAGS-$(CONFIG_XILINX_MICROBLAZE0_USE_DIV) += -mno-xl-soft-div CPUFLAGS-$(CONFIG_XILINX_MICROBLAZE0_USE_BARREL) += -mxl-barrel-shift CPUFLAGS-$(CONFIG_XILINX_MICROBLAZE0_USE_PCMP_INSTR) += -mxl-pattern-compare +CPUFLAGS-$(CONFIG_BIG_ENDIAN) += -mbig-endian +CPUFLAGS-$(CONFIG_LITTLE_ENDIAN) += -mlittle-endian CPUFLAGS-1 += $(call cc-option,-mcpu=v$(CPU_VER)) -- 1.7.1
[PATCH v2 1/4] arch/sparc: Separate the exception handlers from NG4memcpy
Separate the exception handlers from NG4memcpy so that it can be used with new memcpy routines. Make a separate file for all these handlers. Signed-off-by: Babu Moger <babu.mo...@oracle.com> --- arch/sparc/lib/Makefile |2 + arch/sparc/lib/Memcpy_utils.S | 163 + arch/sparc/lib/NG4memcpy.S| 149 - 3 files changed, 165 insertions(+), 149 deletions(-) create mode 100644 arch/sparc/lib/Memcpy_utils.S diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile index 07c03e7..37930c0 100644 --- a/arch/sparc/lib/Makefile +++ b/arch/sparc/lib/Makefile @@ -36,6 +36,8 @@ lib-$(CONFIG_SPARC64) += NG2patch.o lib-$(CONFIG_SPARC64) += NG4memcpy.o NG4copy_from_user.o NG4copy_to_user.o lib-$(CONFIG_SPARC64) += NG4patch.o NG4copy_page.o NG4clear_page.o NG4memset.o +lib-$(CONFIG_SPARC64) += Memcpy_utils.o + lib-$(CONFIG_SPARC64) += GENmemcpy.o GENcopy_from_user.o GENcopy_to_user.o lib-$(CONFIG_SPARC64) += GENpatch.o GENpage.o GENbzero.o diff --git a/arch/sparc/lib/Memcpy_utils.S b/arch/sparc/lib/Memcpy_utils.S new file mode 100644 index 000..f7a26e0 --- /dev/null +++ b/arch/sparc/lib/Memcpy_utils.S @@ -0,0 +1,163 @@ +#ifndef __ASM_MEMCPY_UTILS +#define __ASM_MEMCPY_UTILS + +#include +#include +#include + +ENTRY(__restore_asi_fp) + VISExitHalf + retl +wr %g0, ASI_AIUS, %asi +ENDPROC(__restore_asi_fp) + +ENTRY(__restore_asi) + retl +wr %g0, ASI_AIUS, %asi +ENDPROC(__restore_asi) + +ENTRY(NG4_retl_o2) + ba,pt %xcc, __restore_asi +mov%o2, %o0 +ENDPROC(NG4_retl_o2) +ENTRY(NG4_retl_o2_plus_1) + ba,pt %xcc, __restore_asi +add%o2, 1, %o0 +ENDPROC(NG4_retl_o2_plus_1) +ENTRY(NG4_retl_o2_plus_4) + ba,pt %xcc, __restore_asi +add%o2, 4, %o0 +ENDPROC(NG4_retl_o2_plus_4) +ENTRY(NG4_retl_o2_plus_o5) + ba,pt %xcc, __restore_asi +add%o2, %o5, %o0 +ENDPROC(NG4_retl_o2_plus_o5) +ENTRY(NG4_retl_o2_plus_o5_plus_4) + add %o5, 4, %o5 + ba,pt %xcc, __restore_asi +add%o2, %o5, %o0 +ENDPROC(NG4_retl_o2_plus_o5_plus_4) +ENTRY(NG4_retl_o2_plus_o5_plus_8) + add %o5, 8, %o5 + ba,pt %xcc, __restore_asi +add%o2, %o5, %o0 +ENDPROC(NG4_retl_o2_plus_o5_plus_8) +ENTRY(NG4_retl_o2_plus_o5_plus_16) + add %o5, 16, %o5 + ba,pt %xcc, __restore_asi +add%o2, %o5, %o0 +ENDPROC(NG4_retl_o2_plus_o5_plus_16) +ENTRY(NG4_retl_o2_plus_o5_plus_24) + add %o5, 24, %o5 + ba,pt %xcc, __restore_asi +add%o2, %o5, %o0 +ENDPROC(NG4_retl_o2_plus_o5_plus_24) +ENTRY(NG4_retl_o2_plus_o5_plus_32) + add %o5, 32, %o5 + ba,pt %xcc, __restore_asi +add%o2, %o5, %o0 +ENDPROC(NG4_retl_o2_plus_o5_plus_32) +ENTRY(NG4_retl_o2_plus_g1) + ba,pt %xcc, __restore_asi +add%o2, %g1, %o0 +ENDPROC(NG4_retl_o2_plus_g1) +ENTRY(NG4_retl_o2_plus_g1_plus_1) + add %g1, 1, %g1 + ba,pt %xcc, __restore_asi +add%o2, %g1, %o0 +ENDPROC(NG4_retl_o2_plus_g1_plus_1) +ENTRY(NG4_retl_o2_plus_g1_plus_8) + add %g1, 8, %g1 + ba,pt %xcc, __restore_asi +add%o2, %g1, %o0 +ENDPROC(NG4_retl_o2_plus_g1_plus_8) +ENTRY(NG4_retl_o2_plus_o4) + ba,pt %xcc, __restore_asi +add%o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4) +ENTRY(NG4_retl_o2_plus_o4_plus_8) + add %o4, 8, %o4 + ba,pt %xcc, __restore_asi +add%o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_8) +ENTRY(NG4_retl_o2_plus_o4_plus_16) + add %o4, 16, %o4 + ba,pt %xcc, __restore_asi +add%o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_16) +ENTRY(NG4_retl_o2_plus_o4_plus_24) + add %o4, 24, %o4 + ba,pt %xcc, __restore_asi +add%o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_24) +ENTRY(NG4_retl_o2_plus_o4_plus_32) + add %o4, 32, %o4 + ba,pt %xcc, __restore_asi +add%o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_32) +ENTRY(NG4_retl_o2_plus_o4_plus_40) + add %o4, 40, %o4 + ba,pt %xcc, __restore_asi +add%o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_40) +ENTRY(NG4_retl_o2_plus_o4_plus_48) + add %o4, 48, %o4 + ba,pt %xcc, __restore_asi +add%o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_48) +ENTRY(NG4_retl_o2_plus_o4_plus_56) + add %o4, 56, %o4 + ba,pt %xcc, __restore_asi +add%o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_56) +ENTRY(NG4_retl_o2_plus_o4_plus_64) + add %o4, 64, %o4 + ba,pt %xcc, __restore_asi +add%o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_64) +ENTRY(NG4_retl_o2_plus_o4_fp) + ba,pt %xcc, __restore_asi_fp +add%o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_fp) +ENTRY(NG4_retl_o2_plus_o4_plus_8_fp) + add %o4, 8, %o4 + ba,pt
[PATCH v2 0/4] Update memcpy, memset etc. for M7/M8 architectures
This series of patches updates the memcpy, memset, copy_to_user, copy_from_user etc for SPARC M7/M8 architecture. New algorithm here takes advantage of the M7/M8 block init store ASIs, with much more optimized way to improve the performance. More detail are in code comments. Tested and compared the latency measured in ticks(NG4memcpy vs new M7memcpy). 1. Memset numbers(Aligned memset) No.of bytes NG4memsetM7memset Delta ((B-A)/A)*100 (Avg.Ticks A) (Avg.Ticks B) (latency reduction) 3 77 25 -67.53 7 43 33 -23.25 3272 68 -5.55 128 164 44 -73.17 256 335 68 -79.70 512 511 220 -56.94 1024 1552627 -59.60 2048 35151322-62.38 4096 63032472-60.78 8192 13118 4867-62.89 16384 26206 10371 -60.42 32768 52501 18569 -64.63 65536 100219 35899 -64.17 2. Memcpy numbers(Aligned memcpy) No.of bytes NG4memcpyM7memcpy Delta ((B-A)/A)*100 (Avg.Ticks A) (Avg.Ticks B) (latency reduction) 3 20 19 -5 7 29 27 -6.89 3230 28 -6.66 128 89 69 -22.47 256 142 143 0.70 512 341 283 -17.00 1024 1588655 -58.75 2048 35531357-61.80 4096 72182590-64.11 8192 13701 5231-61.82 16384 28304 10716 -62.13 32768 56516 22995 -59.31 65536 115443 50840 -55.96 3. Memset numbers(un-aligned memset) No.of bytes NG4memsetM7memset Delta ((B-A)/A)*100 (Avg.Ticks A) (Avg.Ticks B) (latency reduction) 3 40 31 -22.5 7 52 29 -44.2307692308 3289 86 -3.3707865169 128 201 74 -63.184079602 256 340 154 -54.7058823529 512 961 335 -65.1404786681 1024 1799686 -61.8677042802 2048 35751260-64.7552447552 4096 65602627-59.9542682927 8192 13161 6018-54.273991338 16384 26465 10439 -60.5554505951 32768 52119 18649 -64.2184232238 65536 101593 35724 -64.8361599717 4. Memcpy numbers(un-aligned memcpy) No.of bytes NG4memcpyM7memcpy Delta ((B-A)/A)*100 (Avg.Ticks A) (Avg.Ticks B) (latency reduction) 3 26 19 -26.9230769231 7 48 45 -6.25 3252 49 -5.7692307692 128 284 334 17.6056338028 256 430 482 12.0930232558 512 646 690 6.8111455108 1024 10511016-3.3301617507 2048 178718181.7347509793 4096 330933762.0247809006 8192 81517444-8.673782358 16384 34222 34556 0.9759803635 32768 87851 95044 8.1877269468 65536 158331 159572 0.7838010244 There is not much difference in numbers with Un-aligned copies between NG4memcpy and M7memcpy because they both mostly use the same algorithems. v2: 1. Fixed indentation issues found by David Miller 2. Used ENTRY and ENDPROC for the labels in M7patch.S as suggested by David Miller 3. Now M8 also will use M7memcpy. Also tested on M8 config. 4. These patches are created on top of below M8 patches https://patchwork.ozlabs.org/patch/792661/ https://patchwork.ozlabs.org/patch/792662/ However, I did not see these patches in sparc-next tree. It may be in queue now. It is possible these patches might cause some build problems. It will resolve once all M8 patches are in sparc-next tree. v0: Initial version Babu Moger (4): arch/sparc: Separate the exception handlers from NG4memcpy arch/sparc: Rename exception handlers arch/sparc: Optimized memcpy, memset, copy_to_user, copy_from_user for M7/M8 arch/sparc: Add accurate exception reporting
[PATCH v2 2/4] arch/sparc: Rename exception handlers
Rename exception handlers to memcpy_xxx as these are going to be used by new memcpy routines and these handlers are not exclusive to NG4memcpy anymore. Signed-off-by: Babu Moger <babu.mo...@oracle.com> --- arch/sparc/lib/Memcpy_utils.S | 120 +++--- arch/sparc/lib/NG4memcpy.S| 128 2 files changed, 124 insertions(+), 124 deletions(-) diff --git a/arch/sparc/lib/Memcpy_utils.S b/arch/sparc/lib/Memcpy_utils.S index f7a26e0..bcc5d77 100644 --- a/arch/sparc/lib/Memcpy_utils.S +++ b/arch/sparc/lib/Memcpy_utils.S @@ -16,148 +16,148 @@ ENTRY(__restore_asi) wr %g0, ASI_AIUS, %asi ENDPROC(__restore_asi) -ENTRY(NG4_retl_o2) +ENTRY(memcpy_retl_o2) ba,pt %xcc, __restore_asi mov%o2, %o0 -ENDPROC(NG4_retl_o2) -ENTRY(NG4_retl_o2_plus_1) +ENDPROC(memcpy_retl_o2) +ENTRY(memcpy_retl_o2_plus_1) ba,pt %xcc, __restore_asi add%o2, 1, %o0 -ENDPROC(NG4_retl_o2_plus_1) -ENTRY(NG4_retl_o2_plus_4) +ENDPROC(memcpy_retl_o2_plus_1) +ENTRY(memcpy_retl_o2_plus_4) ba,pt %xcc, __restore_asi add%o2, 4, %o0 -ENDPROC(NG4_retl_o2_plus_4) -ENTRY(NG4_retl_o2_plus_o5) +ENDPROC(memcpy_retl_o2_plus_4) +ENTRY(memcpy_retl_o2_plus_o5) ba,pt %xcc, __restore_asi add%o2, %o5, %o0 -ENDPROC(NG4_retl_o2_plus_o5) -ENTRY(NG4_retl_o2_plus_o5_plus_4) +ENDPROC(memcpy_retl_o2_plus_o5) +ENTRY(memcpy_retl_o2_plus_o5_plus_4) add %o5, 4, %o5 ba,pt %xcc, __restore_asi add%o2, %o5, %o0 -ENDPROC(NG4_retl_o2_plus_o5_plus_4) -ENTRY(NG4_retl_o2_plus_o5_plus_8) +ENDPROC(memcpy_retl_o2_plus_o5_plus_4) +ENTRY(memcpy_retl_o2_plus_o5_plus_8) add %o5, 8, %o5 ba,pt %xcc, __restore_asi add%o2, %o5, %o0 -ENDPROC(NG4_retl_o2_plus_o5_plus_8) -ENTRY(NG4_retl_o2_plus_o5_plus_16) +ENDPROC(memcpy_retl_o2_plus_o5_plus_8) +ENTRY(memcpy_retl_o2_plus_o5_plus_16) add %o5, 16, %o5 ba,pt %xcc, __restore_asi add%o2, %o5, %o0 -ENDPROC(NG4_retl_o2_plus_o5_plus_16) -ENTRY(NG4_retl_o2_plus_o5_plus_24) +ENDPROC(memcpy_retl_o2_plus_o5_plus_16) +ENTRY(memcpy_retl_o2_plus_o5_plus_24) add %o5, 24, %o5 ba,pt %xcc, __restore_asi add%o2, %o5, %o0 -ENDPROC(NG4_retl_o2_plus_o5_plus_24) -ENTRY(NG4_retl_o2_plus_o5_plus_32) +ENDPROC(memcpy_retl_o2_plus_o5_plus_24) +ENTRY(memcpy_retl_o2_plus_o5_plus_32) add %o5, 32, %o5 ba,pt %xcc, __restore_asi add%o2, %o5, %o0 -ENDPROC(NG4_retl_o2_plus_o5_plus_32) -ENTRY(NG4_retl_o2_plus_g1) +ENDPROC(memcpy_retl_o2_plus_o5_plus_32) +ENTRY(memcpy_retl_o2_plus_g1) ba,pt %xcc, __restore_asi add%o2, %g1, %o0 -ENDPROC(NG4_retl_o2_plus_g1) -ENTRY(NG4_retl_o2_plus_g1_plus_1) +ENDPROC(memcpy_retl_o2_plus_g1) +ENTRY(memcpy_retl_o2_plus_g1_plus_1) add %g1, 1, %g1 ba,pt %xcc, __restore_asi add%o2, %g1, %o0 -ENDPROC(NG4_retl_o2_plus_g1_plus_1) -ENTRY(NG4_retl_o2_plus_g1_plus_8) +ENDPROC(memcpy_retl_o2_plus_g1_plus_1) +ENTRY(memcpy_retl_o2_plus_g1_plus_8) add %g1, 8, %g1 ba,pt %xcc, __restore_asi add%o2, %g1, %o0 -ENDPROC(NG4_retl_o2_plus_g1_plus_8) -ENTRY(NG4_retl_o2_plus_o4) +ENDPROC(memcpy_retl_o2_plus_g1_plus_8) +ENTRY(memcpy_retl_o2_plus_o4) ba,pt %xcc, __restore_asi add%o2, %o4, %o0 -ENDPROC(NG4_retl_o2_plus_o4) -ENTRY(NG4_retl_o2_plus_o4_plus_8) +ENDPROC(memcpy_retl_o2_plus_o4) +ENTRY(memcpy_retl_o2_plus_o4_plus_8) add %o4, 8, %o4 ba,pt %xcc, __restore_asi add%o2, %o4, %o0 -ENDPROC(NG4_retl_o2_plus_o4_plus_8) -ENTRY(NG4_retl_o2_plus_o4_plus_16) +ENDPROC(memcpy_retl_o2_plus_o4_plus_8) +ENTRY(memcpy_retl_o2_plus_o4_plus_16) add %o4, 16, %o4 ba,pt %xcc, __restore_asi add%o2, %o4, %o0 -ENDPROC(NG4_retl_o2_plus_o4_plus_16) -ENTRY(NG4_retl_o2_plus_o4_plus_24) +ENDPROC(memcpy_retl_o2_plus_o4_plus_16) +ENTRY(memcpy_retl_o2_plus_o4_plus_24) add %o4, 24, %o4 ba,pt %xcc, __restore_asi add%o2, %o4, %o0 -ENDPROC(NG4_retl_o2_plus_o4_plus_24) -ENTRY(NG4_retl_o2_plus_o4_plus_32) +ENDPROC(memcpy_retl_o2_plus_o4_plus_24) +ENTRY(memcpy_retl_o2_plus_o4_plus_32) add %o4, 32, %o4 ba,pt %xcc, __restore_asi add%o2, %o4, %o0 -ENDPROC(NG4_retl_o2_plus_o4_plus_32) -ENTRY(NG4_retl_o2_plus_o4_plus_40) +ENDPROC(memcpy_retl_o2_plus_o4_plus_32) +ENTRY(memcpy_retl_o2_plus_o4_plus_40) add %o4, 40, %o4 ba,pt %xcc, __restore_asi add%o2, %o4, %o0 -ENDPROC(NG4_retl_o2_plus_o4_plus_40) -ENTRY(NG4_retl_o2_plus_o4_plus_48) +ENDPROC(memcpy_retl_o2_plus_o4_plus_40) +ENTRY(memcpy_retl_o2_plus_o4_plus_48) add %o4, 48, %o4 ba,pt %xcc, __restore_asi add%o2, %o4, %o0 -ENDPROC(NG4_retl_o2_plus_o4_plus_48) -ENTRY(NG4_retl_o2_plus_o4_plus_56) +E
[PATCH v2 3/4] arch/sparc: Optimized memcpy, memset, copy_to_user, copy_from_user for M7/M8
New algorithm that takes advantage of the M7/M8 block init store ASI, ie, overlapping pipelines and miss buffer filling. Full details in code comments. Signed-off-by: Babu Moger <babu.mo...@oracle.com> --- arch/sparc/kernel/head_64.S | 16 +- arch/sparc/lib/M7copy_from_user.S | 41 ++ arch/sparc/lib/M7copy_to_user.S | 51 ++ arch/sparc/lib/M7memcpy.S | 923 + arch/sparc/lib/M7memset.S | 352 ++ arch/sparc/lib/M7patch.S | 51 ++ arch/sparc/lib/Makefile |3 + 7 files changed, 1435 insertions(+), 2 deletions(-) create mode 100644 arch/sparc/lib/M7copy_from_user.S create mode 100644 arch/sparc/lib/M7copy_to_user.S create mode 100644 arch/sparc/lib/M7memcpy.S create mode 100644 arch/sparc/lib/M7memset.S create mode 100644 arch/sparc/lib/M7patch.S diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S index 78e0211..bf9a5ac 100644 --- a/arch/sparc/kernel/head_64.S +++ b/arch/sparc/kernel/head_64.S @@ -603,10 +603,10 @@ niagara_tlb_fixup: be,pt %xcc, niagara4_patch nop cmp %g1, SUN4V_CHIP_SPARC_M7 - be,pt %xcc, niagara4_patch + be,pt %xcc, sparc_m7_patch nop cmp %g1, SUN4V_CHIP_SPARC_M8 - be,pt %xcc, niagara4_patch + be,pt %xcc, sparc_m7_patch nop cmp %g1, SUN4V_CHIP_SPARC_SN be,pt %xcc, niagara4_patch @@ -621,6 +621,18 @@ niagara_tlb_fixup: ba,a,pt %xcc, 80f nop + +sparc_m7_patch: + callm7_patch_copyops +nop + callm7_patch_bzero +nop + callm7_patch_pageops +nop + + ba,a,pt %xcc, 80f +nop + niagara4_patch: callniagara4_patch_copyops nop diff --git a/arch/sparc/lib/M7copy_from_user.S b/arch/sparc/lib/M7copy_from_user.S new file mode 100644 index 000..d0689d7 --- /dev/null +++ b/arch/sparc/lib/M7copy_from_user.S @@ -0,0 +1,41 @@ +/* + * M7copy_from_user.S: SPARC M7 optimized copy from userspace. + * + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + */ + + +#define EX_LD(x) \ +98:x; \ + .section __ex_table,"a";\ + .align 4; \ + .word 98b, __restore_asi; \ + .text; \ + .align 4; + +#define EX_LD_FP(x)\ +98:x; \ + .section __ex_table,"a";\ + .align 4; \ + .word 98b, __restore_asi_fp;\ + .text; \ + .align 4; + + +#ifndef ASI_AIUS +#define ASI_AIUS 0x11 +#endif + +#define FUNC_NAME M7copy_from_user +#define LOAD(type,addr,dest) type##a [addr] %asi, dest +#define EX_RETVAL(x) 0 + +#ifdef __KERNEL__ +#define PREAMBLE \ + rd %asi, %g1; \ + cmp %g1, ASI_AIUS; \ + bne,pn %icc, raw_copy_in_user; \ + nop +#endif + +#include "M7memcpy.S" diff --git a/arch/sparc/lib/M7copy_to_user.S b/arch/sparc/lib/M7copy_to_user.S new file mode 100644 index 000..d3be132 --- /dev/null +++ b/arch/sparc/lib/M7copy_to_user.S @@ -0,0 +1,51 @@ +/* + * M7copy_to_user.S: SPARC M7 optimized copy to userspace. + * + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + */ + + +#define EX_ST(x) \ +98:x; \ + .section __ex_table,"a";\ + .align 4; \ + .word 98b, __restore_asi; \ + .text; \ + .align 4; + +#define EX_ST_FP(x)\ +98:x; \ + .section __ex_table,"a";\ + .align 4; \ + .word 98b, __restore_asi_fp;\ + .text; \ + .align 4; + + +#ifndef ASI_AIUS +#define ASI_AIUS 0x11 +#endif + +#ifndef ASI_BLK_INIT_QUAD_LDD_AIUS +#define ASI_BLK_INIT_QUAD_LDD_AIUS 0x23 +#endif + +#define FUNC_NAME M7copy_to_user +#define STORE(type,src,addr) type##a src, [addr] %asi +#define STORE_ASI ASI_BLK_INIT_QUAD_LDD_AIUS +#defineSTORE_MRU_ASI ASI_ST_BLKINIT_MRU_S +#define EX_RETVAL(x) 0 + +#ifdef __KERNEL__ + /* Writing to %asi is _expensive_ so we hardcode it. +* Reading %asi to check for KERNEL_DS is comparatively +* cheap. +*/ +#define PREAMBLE \ + rd %asi, %g1; \ + cmp %g1, ASI_AIUS; \ + bne,pn %icc, raw_copy_in_user; \ + nop +#endif + +#include &q
[PATCH v2 4/4] arch/sparc: Add accurate exception reporting in M7memcpy
Add accurate exception reporting in M7memcpy Signed-off-by: Babu Moger <babu.mo...@oracle.com> --- arch/sparc/lib/M7copy_from_user.S | 11 +- arch/sparc/lib/M7copy_to_user.S | 10 +- arch/sparc/lib/M7memcpy.S | 396 ++-- arch/sparc/lib/Memcpy_utils.S | 182 + 4 files changed, 390 insertions(+), 209 deletions(-) diff --git a/arch/sparc/lib/M7copy_from_user.S b/arch/sparc/lib/M7copy_from_user.S index d0689d7..66464b3 100644 --- a/arch/sparc/lib/M7copy_from_user.S +++ b/arch/sparc/lib/M7copy_from_user.S @@ -5,23 +5,22 @@ */ -#define EX_LD(x) \ +#define EX_LD(x, y)\ 98:x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __restore_asi; \ + .word 98b, y; \ .text; \ .align 4; -#define EX_LD_FP(x)\ +#define EX_LD_FP(x, y) \ 98:x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __restore_asi_fp;\ + .word 98b, y##_fp; \ .text; \ .align 4; - #ifndef ASI_AIUS #define ASI_AIUS 0x11 #endif @@ -35,7 +34,7 @@ rd %asi, %g1; \ cmp %g1, ASI_AIUS; \ bne,pn %icc, raw_copy_in_user; \ - nop +nop #endif #include "M7memcpy.S" diff --git a/arch/sparc/lib/M7copy_to_user.S b/arch/sparc/lib/M7copy_to_user.S index d3be132..a60ac46 100644 --- a/arch/sparc/lib/M7copy_to_user.S +++ b/arch/sparc/lib/M7copy_to_user.S @@ -5,19 +5,19 @@ */ -#define EX_ST(x) \ +#define EX_ST(x, y)\ 98:x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __restore_asi; \ + .word 98b, y; \ .text; \ .align 4; -#define EX_ST_FP(x)\ +#define EX_ST_FP(x, y) \ 98:x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __restore_asi_fp;\ + .word 98b, y##_fp; \ .text; \ .align 4; @@ -45,7 +45,7 @@ rd %asi, %g1; \ cmp %g1, ASI_AIUS; \ bne,pn %icc, raw_copy_in_user; \ - nop +nop #endif #include "M7memcpy.S" diff --git a/arch/sparc/lib/M7memcpy.S b/arch/sparc/lib/M7memcpy.S index 0a0421d..d016fc2 100644 --- a/arch/sparc/lib/M7memcpy.S +++ b/arch/sparc/lib/M7memcpy.S @@ -96,17 +96,17 @@ #endif #ifndef EX_LD -#define EX_LD(x) x +#define EX_LD(x,y) x #endif #ifndef EX_LD_FP -#define EX_LD_FP(x)x +#define EX_LD_FP(x,y) x #endif #ifndef EX_ST -#define EX_ST(x) x +#define EX_ST(x,y) x #endif #ifndef EX_ST_FP -#define EX_ST_FP(x)x +#define EX_ST_FP(x,y) x #endif #ifndef EX_RETVAL @@ -206,9 +206,9 @@ FUNC_NAME: sub %o1, %o0, %o1 ! %o1 gets the difference 7: ! dst aligning loop add %o1, %o0, %o4 - EX_LD(LOAD(ldub, %o4, %o4)) ! load one byte + EX_LD(LOAD(ldub, %o4, %o4), memcpy_retl_o2_plus_o5) ! load one byte subcc %o5, 1, %o5 - EX_ST(STORE(stb, %o4, %o0)) + EX_ST(STORE(stb, %o4, %o0), memcpy_retl_o2_plus_o5_plus_1) bgu,pt %ncc, 7b add%o0, 1, %o0 ! advance dst add %o1, %o0, %o1 ! restore %o1 @@ -233,64 +233,64 @@ FUNC_NAME: ble,pn %ncc, .Lmedl63 ! skip big loop if less than 64 bytes nop .Lmedl64: - EX_LD(LOAD(ldx, %o1, %o4)) ! load + EX_LD(LOAD(ldx, %o1, %o4), memcpy_retl_o2_plus_63) ! load subcc %o2, 64, %o2! decrement length count - EX_ST(STORE(stx, %o4, %o0)) ! and store - EX_LD(LOAD(ldx, %o1+8, %o3))! a block of 64 bytes - EX_ST(STORE(stx, %o3, %o0+8)) - EX_LD(LOAD(ldx, %o1+16, %o4)) - EX_ST(STORE(stx, %o4, %o0+16)) - EX_LD(LOAD(ldx, %o1+24, %o3)) - EX_ST(STORE(stx, %o3, %o0+24)) - EX_LD(LOAD(ldx, %o1+32, %o4)) ! load - EX_ST(STORE(stx, %o4, %o0+32)) ! and store - EX_LD(LOAD(ldx, %o1+40, %o3)) ! a block of 64 bytes + EX_ST(STORE(stx, %o4, %o0), memcpy_retl_o2_plus_63_64) ! and store + EX_LD(LOAD(ldx, %o1+8, %o3), memcpy_retl_o2_plus_63_56) ! a block of 64 + EX_ST(STORE(stx, %o3, %o0+8), memcpy_retl_o2_plus_63_56) + EX_LD(LOAD
[PATCH RFC 2/4] arch/sparc: Rename exception handlers
Rename exception handlers to memcpy_xxx as these are going to be used by new memcpy routines and these handlers are not exclusive to NG4memcpy anymore. Signed-off-by: Babu Moger <babu.mo...@oracle.com> --- arch/sparc/lib/Memcpy_utils.S | 120 +++--- arch/sparc/lib/NG4memcpy.S| 128 2 files changed, 124 insertions(+), 124 deletions(-) diff --git a/arch/sparc/lib/Memcpy_utils.S b/arch/sparc/lib/Memcpy_utils.S index f7a26e0..bcc5d77 100644 --- a/arch/sparc/lib/Memcpy_utils.S +++ b/arch/sparc/lib/Memcpy_utils.S @@ -16,148 +16,148 @@ ENTRY(__restore_asi) wr %g0, ASI_AIUS, %asi ENDPROC(__restore_asi) -ENTRY(NG4_retl_o2) +ENTRY(memcpy_retl_o2) ba,pt %xcc, __restore_asi mov%o2, %o0 -ENDPROC(NG4_retl_o2) -ENTRY(NG4_retl_o2_plus_1) +ENDPROC(memcpy_retl_o2) +ENTRY(memcpy_retl_o2_plus_1) ba,pt %xcc, __restore_asi add%o2, 1, %o0 -ENDPROC(NG4_retl_o2_plus_1) -ENTRY(NG4_retl_o2_plus_4) +ENDPROC(memcpy_retl_o2_plus_1) +ENTRY(memcpy_retl_o2_plus_4) ba,pt %xcc, __restore_asi add%o2, 4, %o0 -ENDPROC(NG4_retl_o2_plus_4) -ENTRY(NG4_retl_o2_plus_o5) +ENDPROC(memcpy_retl_o2_plus_4) +ENTRY(memcpy_retl_o2_plus_o5) ba,pt %xcc, __restore_asi add%o2, %o5, %o0 -ENDPROC(NG4_retl_o2_plus_o5) -ENTRY(NG4_retl_o2_plus_o5_plus_4) +ENDPROC(memcpy_retl_o2_plus_o5) +ENTRY(memcpy_retl_o2_plus_o5_plus_4) add %o5, 4, %o5 ba,pt %xcc, __restore_asi add%o2, %o5, %o0 -ENDPROC(NG4_retl_o2_plus_o5_plus_4) -ENTRY(NG4_retl_o2_plus_o5_plus_8) +ENDPROC(memcpy_retl_o2_plus_o5_plus_4) +ENTRY(memcpy_retl_o2_plus_o5_plus_8) add %o5, 8, %o5 ba,pt %xcc, __restore_asi add%o2, %o5, %o0 -ENDPROC(NG4_retl_o2_plus_o5_plus_8) -ENTRY(NG4_retl_o2_plus_o5_plus_16) +ENDPROC(memcpy_retl_o2_plus_o5_plus_8) +ENTRY(memcpy_retl_o2_plus_o5_plus_16) add %o5, 16, %o5 ba,pt %xcc, __restore_asi add%o2, %o5, %o0 -ENDPROC(NG4_retl_o2_plus_o5_plus_16) -ENTRY(NG4_retl_o2_plus_o5_plus_24) +ENDPROC(memcpy_retl_o2_plus_o5_plus_16) +ENTRY(memcpy_retl_o2_plus_o5_plus_24) add %o5, 24, %o5 ba,pt %xcc, __restore_asi add%o2, %o5, %o0 -ENDPROC(NG4_retl_o2_plus_o5_plus_24) -ENTRY(NG4_retl_o2_plus_o5_plus_32) +ENDPROC(memcpy_retl_o2_plus_o5_plus_24) +ENTRY(memcpy_retl_o2_plus_o5_plus_32) add %o5, 32, %o5 ba,pt %xcc, __restore_asi add%o2, %o5, %o0 -ENDPROC(NG4_retl_o2_plus_o5_plus_32) -ENTRY(NG4_retl_o2_plus_g1) +ENDPROC(memcpy_retl_o2_plus_o5_plus_32) +ENTRY(memcpy_retl_o2_plus_g1) ba,pt %xcc, __restore_asi add%o2, %g1, %o0 -ENDPROC(NG4_retl_o2_plus_g1) -ENTRY(NG4_retl_o2_plus_g1_plus_1) +ENDPROC(memcpy_retl_o2_plus_g1) +ENTRY(memcpy_retl_o2_plus_g1_plus_1) add %g1, 1, %g1 ba,pt %xcc, __restore_asi add%o2, %g1, %o0 -ENDPROC(NG4_retl_o2_plus_g1_plus_1) -ENTRY(NG4_retl_o2_plus_g1_plus_8) +ENDPROC(memcpy_retl_o2_plus_g1_plus_1) +ENTRY(memcpy_retl_o2_plus_g1_plus_8) add %g1, 8, %g1 ba,pt %xcc, __restore_asi add%o2, %g1, %o0 -ENDPROC(NG4_retl_o2_plus_g1_plus_8) -ENTRY(NG4_retl_o2_plus_o4) +ENDPROC(memcpy_retl_o2_plus_g1_plus_8) +ENTRY(memcpy_retl_o2_plus_o4) ba,pt %xcc, __restore_asi add%o2, %o4, %o0 -ENDPROC(NG4_retl_o2_plus_o4) -ENTRY(NG4_retl_o2_plus_o4_plus_8) +ENDPROC(memcpy_retl_o2_plus_o4) +ENTRY(memcpy_retl_o2_plus_o4_plus_8) add %o4, 8, %o4 ba,pt %xcc, __restore_asi add%o2, %o4, %o0 -ENDPROC(NG4_retl_o2_plus_o4_plus_8) -ENTRY(NG4_retl_o2_plus_o4_plus_16) +ENDPROC(memcpy_retl_o2_plus_o4_plus_8) +ENTRY(memcpy_retl_o2_plus_o4_plus_16) add %o4, 16, %o4 ba,pt %xcc, __restore_asi add%o2, %o4, %o0 -ENDPROC(NG4_retl_o2_plus_o4_plus_16) -ENTRY(NG4_retl_o2_plus_o4_plus_24) +ENDPROC(memcpy_retl_o2_plus_o4_plus_16) +ENTRY(memcpy_retl_o2_plus_o4_plus_24) add %o4, 24, %o4 ba,pt %xcc, __restore_asi add%o2, %o4, %o0 -ENDPROC(NG4_retl_o2_plus_o4_plus_24) -ENTRY(NG4_retl_o2_plus_o4_plus_32) +ENDPROC(memcpy_retl_o2_plus_o4_plus_24) +ENTRY(memcpy_retl_o2_plus_o4_plus_32) add %o4, 32, %o4 ba,pt %xcc, __restore_asi add%o2, %o4, %o0 -ENDPROC(NG4_retl_o2_plus_o4_plus_32) -ENTRY(NG4_retl_o2_plus_o4_plus_40) +ENDPROC(memcpy_retl_o2_plus_o4_plus_32) +ENTRY(memcpy_retl_o2_plus_o4_plus_40) add %o4, 40, %o4 ba,pt %xcc, __restore_asi add%o2, %o4, %o0 -ENDPROC(NG4_retl_o2_plus_o4_plus_40) -ENTRY(NG4_retl_o2_plus_o4_plus_48) +ENDPROC(memcpy_retl_o2_plus_o4_plus_40) +ENTRY(memcpy_retl_o2_plus_o4_plus_48) add %o4, 48, %o4 ba,pt %xcc, __restore_asi add%o2, %o4, %o0 -ENDPROC(NG4_retl_o2_plus_o4_plus_48) -ENTRY(NG4_retl_o2_plus_o4_plus_56) +E
[PATCH RFC 3/4] arch/sparc: Optimized memcpy, memset, copy_to_user, copy_from_user for M7
New algorithm that takes advantage of the M7 block init store ASI, ie, overlapping pipelines and miss buffer filling. Full details in code comments. Signed-off-by: Babu Moger <babu.mo...@oracle.com> --- arch/sparc/kernel/head_64.S | 12 +- arch/sparc/lib/M7copy_from_user.S | 41 ++ arch/sparc/lib/M7copy_to_user.S | 51 ++ arch/sparc/lib/M7memcpy.S | 924 + arch/sparc/lib/M7memset.S | 354 ++ arch/sparc/lib/M7patch.S | 55 +++ arch/sparc/lib/Makefile |3 + 7 files changed, 1439 insertions(+), 1 deletions(-) create mode 100644 arch/sparc/lib/M7copy_from_user.S create mode 100644 arch/sparc/lib/M7copy_to_user.S create mode 100644 arch/sparc/lib/M7memcpy.S create mode 100644 arch/sparc/lib/M7memset.S create mode 100644 arch/sparc/lib/M7patch.S diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S index 41a4073..a7de798 100644 --- a/arch/sparc/kernel/head_64.S +++ b/arch/sparc/kernel/head_64.S @@ -600,7 +600,7 @@ niagara_tlb_fixup: be,pt %xcc, niagara4_patch nop cmp %g1, SUN4V_CHIP_SPARC_M7 - be,pt %xcc, niagara4_patch + be,pt %xcc, sparc_m7_patch nop cmp %g1, SUN4V_CHIP_SPARC_SN be,pt %xcc, niagara4_patch @@ -615,6 +615,16 @@ niagara_tlb_fixup: ba,a,pt %xcc, 80f nop +sparc_m7_patch: + callm7_patch_copyops +nop + callm7_patch_bzero +nop + callm7_patch_pageops +nop + + ba,a,pt %xcc, 80f +nop niagara4_patch: callniagara4_patch_copyops nop diff --git a/arch/sparc/lib/M7copy_from_user.S b/arch/sparc/lib/M7copy_from_user.S new file mode 100644 index 000..d0689d7 --- /dev/null +++ b/arch/sparc/lib/M7copy_from_user.S @@ -0,0 +1,41 @@ +/* + * M7copy_from_user.S: SPARC M7 optimized copy from userspace. + * + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + */ + + +#define EX_LD(x) \ +98:x; \ + .section __ex_table,"a";\ + .align 4; \ + .word 98b, __restore_asi; \ + .text; \ + .align 4; + +#define EX_LD_FP(x)\ +98:x; \ + .section __ex_table,"a";\ + .align 4; \ + .word 98b, __restore_asi_fp;\ + .text; \ + .align 4; + + +#ifndef ASI_AIUS +#define ASI_AIUS 0x11 +#endif + +#define FUNC_NAME M7copy_from_user +#define LOAD(type,addr,dest) type##a [addr] %asi, dest +#define EX_RETVAL(x) 0 + +#ifdef __KERNEL__ +#define PREAMBLE \ + rd %asi, %g1; \ + cmp %g1, ASI_AIUS; \ + bne,pn %icc, raw_copy_in_user; \ + nop +#endif + +#include "M7memcpy.S" diff --git a/arch/sparc/lib/M7copy_to_user.S b/arch/sparc/lib/M7copy_to_user.S new file mode 100644 index 000..d3be132 --- /dev/null +++ b/arch/sparc/lib/M7copy_to_user.S @@ -0,0 +1,51 @@ +/* + * M7copy_to_user.S: SPARC M7 optimized copy to userspace. + * + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + */ + + +#define EX_ST(x) \ +98:x; \ + .section __ex_table,"a";\ + .align 4; \ + .word 98b, __restore_asi; \ + .text; \ + .align 4; + +#define EX_ST_FP(x)\ +98:x; \ + .section __ex_table,"a";\ + .align 4; \ + .word 98b, __restore_asi_fp;\ + .text; \ + .align 4; + + +#ifndef ASI_AIUS +#define ASI_AIUS 0x11 +#endif + +#ifndef ASI_BLK_INIT_QUAD_LDD_AIUS +#define ASI_BLK_INIT_QUAD_LDD_AIUS 0x23 +#endif + +#define FUNC_NAME M7copy_to_user +#define STORE(type,src,addr) type##a src, [addr] %asi +#define STORE_ASI ASI_BLK_INIT_QUAD_LDD_AIUS +#defineSTORE_MRU_ASI ASI_ST_BLKINIT_MRU_S +#define EX_RETVAL(x) 0 + +#ifdef __KERNEL__ + /* Writing to %asi is _expensive_ so we hardcode it. +* Reading %asi to check for KERNEL_DS is comparatively +* cheap. +*/ +#define PREAMBLE \ + rd %asi, %g1; \ + cmp %g1, ASI_AIUS; \ + bne,pn %icc, raw_copy_in_user; \ + nop +#endif + +#include "M7memcpy.S" diff --git a/arch/sparc/lib/M7memcpy.S b/arch/sparc/lib/M7memcpy.S new file mode 100644 index 000..d4
[PATCH RFC 4/4] arch/sparc: Add accurate exception reporting in M7memcpy
Add accurate exception reporting in M7memcpy Signed-off-by: Babu Moger <babu.mo...@oracle.com> --- arch/sparc/lib/M7copy_from_user.S | 11 +- arch/sparc/lib/M7copy_to_user.S | 10 +- arch/sparc/lib/M7memcpy.S | 396 ++-- arch/sparc/lib/Memcpy_utils.S | 182 + 4 files changed, 390 insertions(+), 209 deletions(-) diff --git a/arch/sparc/lib/M7copy_from_user.S b/arch/sparc/lib/M7copy_from_user.S index d0689d7..66464b3 100644 --- a/arch/sparc/lib/M7copy_from_user.S +++ b/arch/sparc/lib/M7copy_from_user.S @@ -5,23 +5,22 @@ */ -#define EX_LD(x) \ +#define EX_LD(x, y)\ 98:x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __restore_asi; \ + .word 98b, y; \ .text; \ .align 4; -#define EX_LD_FP(x)\ +#define EX_LD_FP(x, y) \ 98:x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __restore_asi_fp;\ + .word 98b, y##_fp; \ .text; \ .align 4; - #ifndef ASI_AIUS #define ASI_AIUS 0x11 #endif @@ -35,7 +34,7 @@ rd %asi, %g1; \ cmp %g1, ASI_AIUS; \ bne,pn %icc, raw_copy_in_user; \ - nop +nop #endif #include "M7memcpy.S" diff --git a/arch/sparc/lib/M7copy_to_user.S b/arch/sparc/lib/M7copy_to_user.S index d3be132..a60ac46 100644 --- a/arch/sparc/lib/M7copy_to_user.S +++ b/arch/sparc/lib/M7copy_to_user.S @@ -5,19 +5,19 @@ */ -#define EX_ST(x) \ +#define EX_ST(x, y)\ 98:x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __restore_asi; \ + .word 98b, y; \ .text; \ .align 4; -#define EX_ST_FP(x)\ +#define EX_ST_FP(x, y) \ 98:x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __restore_asi_fp;\ + .word 98b, y##_fp; \ .text; \ .align 4; @@ -45,7 +45,7 @@ rd %asi, %g1; \ cmp %g1, ASI_AIUS; \ bne,pn %icc, raw_copy_in_user; \ - nop +nop #endif #include "M7memcpy.S" diff --git a/arch/sparc/lib/M7memcpy.S b/arch/sparc/lib/M7memcpy.S index d49f702..5cb3dae 100644 --- a/arch/sparc/lib/M7memcpy.S +++ b/arch/sparc/lib/M7memcpy.S @@ -96,17 +96,17 @@ #endif #ifndef EX_LD -#define EX_LD(x) x +#define EX_LD(x,y) x #endif #ifndef EX_LD_FP -#define EX_LD_FP(x)x +#define EX_LD_FP(x,y) x #endif #ifndef EX_ST -#define EX_ST(x) x +#define EX_ST(x,y) x #endif #ifndef EX_ST_FP -#define EX_ST_FP(x)x +#define EX_ST_FP(x,y) x #endif #ifndef EX_RETVAL @@ -207,9 +207,9 @@ FUNC_NAME: sub %o1, %o0, %o1 ! %o1 gets the difference 7: ! dst aligning loop add %o1, %o0, %o4 - EX_LD(LOAD(ldub, %o4, %o4)) ! load one byte + EX_LD(LOAD(ldub, %o4, %o4), memcpy_retl_o2_plus_o5) ! load one byte subcc %o5, 1, %o5 - EX_ST(STORE(stb, %o4, %o0)) + EX_ST(STORE(stb, %o4, %o0), memcpy_retl_o2_plus_o5_plus_1) bgu,pt %ncc, 7b add%o0, 1, %o0 ! advance dst add %o1, %o0, %o1 ! restore %o1 @@ -234,64 +234,64 @@ FUNC_NAME: ble,pn %ncc, .Lmedl63 ! skip big loop if less than 64 bytes nop .Lmedl64: - EX_LD(LOAD(ldx, %o1, %o4)) ! load + EX_LD(LOAD(ldx, %o1, %o4), memcpy_retl_o2_plus_63) ! load subcc %o2, 64, %o2! decrement length count - EX_ST(STORE(stx, %o4, %o0)) ! and store - EX_LD(LOAD(ldx, %o1+8, %o3))! a block of 64 bytes - EX_ST(STORE(stx, %o3, %o0+8)) - EX_LD(LOAD(ldx, %o1+16, %o4)) - EX_ST(STORE(stx, %o4, %o0+16)) - EX_LD(LOAD(ldx, %o1+24, %o3)) - EX_ST(STORE(stx, %o3, %o0+24)) - EX_LD(LOAD(ldx, %o1+32, %o4)) ! load - EX_ST(STORE(stx, %o4, %o0+32)) ! and store - EX_LD(LOAD(ldx, %o1+40, %o3)) ! a block of 64 bytes + EX_ST(STORE(stx, %o4, %o0), memcpy_retl_o2_plus_63_64) ! and store + EX_LD(LOAD(ldx, %o1+8, %o3), memcpy_retl_o2_plus_63_56) ! a block of 64 + EX_ST(STORE(stx, %o3, %o0+8), memcpy_retl_o2_plus_63_56) + EX_LD(LOAD
[PATCH RFC 1/4] arch/sparc: Separate the exception handlers from NG4memcpy
Separate the exception handlers from NG4memcpy so that it can be used with new memcpy routines. Make a separate file for all these handlers. Signed-off-by: Babu Moger <babu.mo...@oracle.com> --- arch/sparc/lib/Makefile |2 + arch/sparc/lib/Memcpy_utils.S | 163 + arch/sparc/lib/NG4memcpy.S| 149 - 3 files changed, 165 insertions(+), 149 deletions(-) create mode 100644 arch/sparc/lib/Memcpy_utils.S diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile index 07c03e7..37930c0 100644 --- a/arch/sparc/lib/Makefile +++ b/arch/sparc/lib/Makefile @@ -36,6 +36,8 @@ lib-$(CONFIG_SPARC64) += NG2patch.o lib-$(CONFIG_SPARC64) += NG4memcpy.o NG4copy_from_user.o NG4copy_to_user.o lib-$(CONFIG_SPARC64) += NG4patch.o NG4copy_page.o NG4clear_page.o NG4memset.o +lib-$(CONFIG_SPARC64) += Memcpy_utils.o + lib-$(CONFIG_SPARC64) += GENmemcpy.o GENcopy_from_user.o GENcopy_to_user.o lib-$(CONFIG_SPARC64) += GENpatch.o GENpage.o GENbzero.o diff --git a/arch/sparc/lib/Memcpy_utils.S b/arch/sparc/lib/Memcpy_utils.S new file mode 100644 index 000..f7a26e0 --- /dev/null +++ b/arch/sparc/lib/Memcpy_utils.S @@ -0,0 +1,163 @@ +#ifndef __ASM_MEMCPY_UTILS +#define __ASM_MEMCPY_UTILS + +#include +#include +#include + +ENTRY(__restore_asi_fp) + VISExitHalf + retl +wr %g0, ASI_AIUS, %asi +ENDPROC(__restore_asi_fp) + +ENTRY(__restore_asi) + retl +wr %g0, ASI_AIUS, %asi +ENDPROC(__restore_asi) + +ENTRY(NG4_retl_o2) + ba,pt %xcc, __restore_asi +mov%o2, %o0 +ENDPROC(NG4_retl_o2) +ENTRY(NG4_retl_o2_plus_1) + ba,pt %xcc, __restore_asi +add%o2, 1, %o0 +ENDPROC(NG4_retl_o2_plus_1) +ENTRY(NG4_retl_o2_plus_4) + ba,pt %xcc, __restore_asi +add%o2, 4, %o0 +ENDPROC(NG4_retl_o2_plus_4) +ENTRY(NG4_retl_o2_plus_o5) + ba,pt %xcc, __restore_asi +add%o2, %o5, %o0 +ENDPROC(NG4_retl_o2_plus_o5) +ENTRY(NG4_retl_o2_plus_o5_plus_4) + add %o5, 4, %o5 + ba,pt %xcc, __restore_asi +add%o2, %o5, %o0 +ENDPROC(NG4_retl_o2_plus_o5_plus_4) +ENTRY(NG4_retl_o2_plus_o5_plus_8) + add %o5, 8, %o5 + ba,pt %xcc, __restore_asi +add%o2, %o5, %o0 +ENDPROC(NG4_retl_o2_plus_o5_plus_8) +ENTRY(NG4_retl_o2_plus_o5_plus_16) + add %o5, 16, %o5 + ba,pt %xcc, __restore_asi +add%o2, %o5, %o0 +ENDPROC(NG4_retl_o2_plus_o5_plus_16) +ENTRY(NG4_retl_o2_plus_o5_plus_24) + add %o5, 24, %o5 + ba,pt %xcc, __restore_asi +add%o2, %o5, %o0 +ENDPROC(NG4_retl_o2_plus_o5_plus_24) +ENTRY(NG4_retl_o2_plus_o5_plus_32) + add %o5, 32, %o5 + ba,pt %xcc, __restore_asi +add%o2, %o5, %o0 +ENDPROC(NG4_retl_o2_plus_o5_plus_32) +ENTRY(NG4_retl_o2_plus_g1) + ba,pt %xcc, __restore_asi +add%o2, %g1, %o0 +ENDPROC(NG4_retl_o2_plus_g1) +ENTRY(NG4_retl_o2_plus_g1_plus_1) + add %g1, 1, %g1 + ba,pt %xcc, __restore_asi +add%o2, %g1, %o0 +ENDPROC(NG4_retl_o2_plus_g1_plus_1) +ENTRY(NG4_retl_o2_plus_g1_plus_8) + add %g1, 8, %g1 + ba,pt %xcc, __restore_asi +add%o2, %g1, %o0 +ENDPROC(NG4_retl_o2_plus_g1_plus_8) +ENTRY(NG4_retl_o2_plus_o4) + ba,pt %xcc, __restore_asi +add%o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4) +ENTRY(NG4_retl_o2_plus_o4_plus_8) + add %o4, 8, %o4 + ba,pt %xcc, __restore_asi +add%o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_8) +ENTRY(NG4_retl_o2_plus_o4_plus_16) + add %o4, 16, %o4 + ba,pt %xcc, __restore_asi +add%o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_16) +ENTRY(NG4_retl_o2_plus_o4_plus_24) + add %o4, 24, %o4 + ba,pt %xcc, __restore_asi +add%o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_24) +ENTRY(NG4_retl_o2_plus_o4_plus_32) + add %o4, 32, %o4 + ba,pt %xcc, __restore_asi +add%o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_32) +ENTRY(NG4_retl_o2_plus_o4_plus_40) + add %o4, 40, %o4 + ba,pt %xcc, __restore_asi +add%o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_40) +ENTRY(NG4_retl_o2_plus_o4_plus_48) + add %o4, 48, %o4 + ba,pt %xcc, __restore_asi +add%o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_48) +ENTRY(NG4_retl_o2_plus_o4_plus_56) + add %o4, 56, %o4 + ba,pt %xcc, __restore_asi +add%o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_56) +ENTRY(NG4_retl_o2_plus_o4_plus_64) + add %o4, 64, %o4 + ba,pt %xcc, __restore_asi +add%o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_64) +ENTRY(NG4_retl_o2_plus_o4_fp) + ba,pt %xcc, __restore_asi_fp +add%o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_fp) +ENTRY(NG4_retl_o2_plus_o4_plus_8_fp) + add %o4, 8, %o4 + ba,pt
Re: [PATCH 2/2] include: warn for inconsistent endian config definition
On 6/12/2017 3:51 PM, Arnd Bergmann wrote: On Mon, Jun 12, 2017 at 10:30 PM, Babu Moger <babu.mo...@oracle.com> wrote: Looks like microblaze can be configured to either little or big endian formats. How about adding a choice statement to address this. Here is my proposed patch. Hi Babu, This part looks fine, but I think we also need this one: diff --git a/arch/microblaze/Makefile b/arch/microblaze/Makefile index 740f2b82a182..1f6c486826a0 100644 --- a/arch/microblaze/Makefile +++ b/arch/microblaze/Makefile @@ -35,6 +35,8 @@ endif CPUFLAGS-$(CONFIG_XILINX_MICROBLAZE0_USE_DIV) += -mno-xl-soft-div CPUFLAGS-$(CONFIG_XILINX_MICROBLAZE0_USE_BARREL) += -mxl-barrel-shift CPUFLAGS-$(CONFIG_XILINX_MICROBLAZE0_USE_PCMP_INSTR) += -mxl-pattern-compare +CPUFLAGS-$(CONFIG_BIG_ENDIAN) += -mbig-endian +CPUFLAGS-$(CONFIG_LITTLE_ENDIAN) += -mlittle-endian CPUFLAGS-1 += $(call cc-option,-mcpu=v$(CPU_VER)) That way, we don't have to guess what the toolchain does, but rather tell it to do whatever is configured, like we do for most other architectures. Ok. Thanks. Arnd. Will update and resend the series. Unfortunately we can't do the same thing on xtensa, as that no longer supports the -mbig-endian/-mbig-endian flags in any recent gcc version (a long time ago it had them, but they were removed along with many other options). Arnd
Re: [PATCH 2/2] include: warn for inconsistent endian config definition
On 6/12/2017 3:58 PM, Max Filippov wrote: On Mon, Jun 12, 2017 at 1:51 PM, Arnd Bergmannwrote: That way, we don't have to guess what the toolchain does, but rather tell it to do whatever is configured, like we do for most other architectures. Unfortunately we can't do the same thing on xtensa, as that no longer supports the -mbig-endian/-mbig-endian flags in any recent gcc version (a long time ago it had them, but they were removed along with many other options). For xtensa we probably need to generate Kconfig fragment that would go in with the variant subdirectory. That will solve this, and clean up other options that we currently have for manual selection for xtensa, but there's actually no choice, i.e. the option has to be selected correctly, there's only one correct choice and otherwise the kernel either won't build or won't work. I'll look into it. Max. Thanks. Please update us when you are done.
Re: [PATCH 2/2] include: warn for inconsistent endian config definition
Hi All, On 6/10/2017 9:06 AM, kbuild test robot wrote: Hi Babu, [auto build test WARNING on linus/master] [also build test WARNING on v4.12-rc4 next-20170609] [if your patch is applied to the wrong git tree, please drop us a note to help improve the system] url: https://github.com/0day-ci/linux/commits/Babu-Moger/Define-CPU_BIG_ENDIAN-or-warn-for-inconsistencies/20170610-200424 config: microblaze-mmu_defconfig (attached as .config) compiler: microblaze-linux-gcc (GCC) 6.2.0 reproduce: wget https://raw.githubusercontent.com/01org/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # save the attached .config to linux build tree make.cross ARCH=microblaze All warnings (new ones prefixed by >>): In file included from arch/microblaze/include/uapi/asm/byteorder.h:7:0, from include/asm-generic/bitops/le.h:5, from include/asm-generic/bitops.h:34, from arch/microblaze/include/asm/bitops.h:1, from include/linux/bitops.h:36, from include/linux/kernel.h:10, from include/asm-generic/bug.h:15, from arch/microblaze/include/asm/bug.h:1, from include/linux/bug.h:4, from include/linux/page-flags.h:9, from kernel/bounds.c:9: include/linux/byteorder/big_endian.h:7:2: warning: #warning inconsistent configuration, needs CONFIG_CPU_BIG_ENDIAN [-Wcpp] #warning inconsistent configuration, needs CONFIG_CPU_BIG_ENDIAN ^~~ -- In file included from arch/microblaze/include/uapi/asm/byteorder.h:7:0, from include/asm-generic/bitops/le.h:5, from include/asm-generic/bitops.h:34, from arch/microblaze/include/asm/bitops.h:1, from include/linux/bitops.h:36, from include/linux/kernel.h:10, from include/asm-generic/bug.h:15, from arch/microblaze/include/asm/bug.h:1, from include/linux/bug.h:4, from include/linux/page-flags.h:9, from kernel/bounds.c:9: include/linux/byteorder/big_endian.h:7:2: warning: #warning inconsistent configuration, needs CONFIG_CPU_BIG_ENDIAN [-Wcpp] #warning inconsistent configuration, needs CONFIG_CPU_BIG_ENDIAN ^~~ In file included from arch/microblaze/include/uapi/asm/byteorder.h:7:0, from include/asm-generic/bitops/le.h:5, from include/asm-generic/bitops.h:34, from arch/microblaze/include/asm/bitops.h:1, from include/linux/bitops.h:36, from include/linux/kernel.h:10, from include/linux/list.h:8, from include/linux/rculist.h:9, from include/linux/pid.h:4, from include/linux/sched.h:13, from arch/microblaze/kernel/asm-offsets.c:13: include/linux/byteorder/big_endian.h:7:2: warning: #warning inconsistent configuration, needs CONFIG_CPU_BIG_ENDIAN [-Wcpp] #warning inconsistent configuration, needs CONFIG_CPU_BIG_ENDIAN ^~~ :1326:2: warning: #warning syscall statx not implemented [-Wcpp] vim +7 include/linux/byteorder/big_endian.h 1 #ifndef _LINUX_BYTEORDER_BIG_ENDIAN_H 2 #define _LINUX_BYTEORDER_BIG_ENDIAN_H 3 4 #include 5 6 #ifndef CONFIG_CPU_BIG_ENDIAN > 7 #warning inconsistent configuration, needs CONFIG_CPU_BIG_ENDIAN 8 #endif 9 10 #include 11 #endif /* _LINUX_BYTEORDER_BIG_ENDIAN_H */ --- 0-DAY kernel test infrastructureOpen Source Technology Center https://lists.01.org/pipermail/kbuild-all Intel Corporation Looks like microblaze can be configured to either little or big endian formats. How about adding a choice statement to address this. Here is my proposed patch. === diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig index 85885a5..74aa5de 100644 --- a/arch/microblaze/Kconfig +++ b/arch/microblaze/Kconfig @@ -35,6 +35,22 @@ config MICROBLAZE select VIRT_TO_BUS select CPU_NO_EFFICIENT_FFS +# Endianness selection +choice + prompt "Endianness selection" + default CPU_BIG_ENDIAN + help + microblaze architectures can be configured for either little or + big endian formats. Be sure to select the appropriate mode. + +config CPU_BIG_ENDIAN + bool "Big endian" + +config CPU_LITTLE_ENDIAN + bool "Little endian" + +endchoice + config SWAP def_bool n
[PATCH v3 1/3] arch: Define CPU_BIG_ENDIAN for all fixed big endian archs
While working on enabling queued rwlock on SPARC, found this following code in include/asm-generic/qrwlock.h which uses CONFIG_CPU_BIG_ENDIAN to clear a byte. static inline u8 *__qrwlock_write_byte(struct qrwlock *lock) { return (u8 *)lock + 3 * IS_BUILTIN(CONFIG_CPU_BIG_ENDIAN); } Problem is many of the fixed big endian architectures don't define CPU_BIG_ENDIAN and clears the wrong byte. Define CPU_BIG_ENDIAN for all the fixed big endian architecture to fix it. Also found few more references of this config parameter in drivers/of/base.c drivers/of/fdt.c drivers/tty/serial/earlycon.c drivers/tty/serial/serial_core.c Be aware that this may cause regressions if someone has worked-around problems in the above code already. Remove the work-around. Here is our original discussion https://lkml.org/lkml/2017/5/24/620 Signed-off-by: Babu Moger <babu.mo...@oracle.com> Suggested-by: Arnd Bergmann <a...@arndb.de> Acked-by: Geert Uytterhoeven <ge...@linux-m68k.org> Acked-by: David S. Miller <da...@davemloft.net> Acked-by: Stafford Horne <sho...@gmail.com> --- arch/frv/Kconfig |3 +++ arch/h8300/Kconfig|3 +++ arch/m68k/Kconfig |3 +++ arch/openrisc/Kconfig |3 +++ arch/parisc/Kconfig |3 +++ arch/sparc/Kconfig|3 +++ 6 files changed, 18 insertions(+), 0 deletions(-) diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig index eefd9a4..1cce824 100644 --- a/arch/frv/Kconfig +++ b/arch/frv/Kconfig @@ -17,6 +17,9 @@ config FRV select HAVE_DEBUG_STACKOVERFLOW select ARCH_NO_COHERENT_DMA_MMAP +config CPU_BIG_ENDIAN + def_bool y + config ZONE_DMA bool default y diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig index 3ae8525..5380ac8 100644 --- a/arch/h8300/Kconfig +++ b/arch/h8300/Kconfig @@ -23,6 +23,9 @@ config H8300 select HAVE_ARCH_HASH select CPU_NO_EFFICIENT_FFS +config CPU_BIG_ENDIAN + def_bool y + config RWSEM_GENERIC_SPINLOCK def_bool y diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig index d140206..029a58b 100644 --- a/arch/m68k/Kconfig +++ b/arch/m68k/Kconfig @@ -23,6 +23,9 @@ config M68K select OLD_SIGSUSPEND3 select OLD_SIGACTION +config CPU_BIG_ENDIAN + def_bool y + config RWSEM_GENERIC_SPINLOCK bool default y diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig index 1e95920..a0f2e4a 100644 --- a/arch/openrisc/Kconfig +++ b/arch/openrisc/Kconfig @@ -29,6 +29,9 @@ config OPENRISC select CPU_NO_EFFICIENT_FFS if !OPENRISC_HAVE_INST_FF1 select NO_BOOTMEM +config CPU_BIG_ENDIAN + def_bool y + config MMU def_bool y diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 531da9e..dda1f55 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -47,6 +47,9 @@ config PARISC and later HP3000 series). The PA-RISC Linux project home page is at <http://www.parisc-linux.org/>. +config CPU_BIG_ENDIAN + def_bool y + config MMU def_bool y diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 908f019..0d9dc49 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -92,6 +92,9 @@ config ARCH_DEFCONFIG config ARCH_PROC_KCORE_TEXT def_bool y +config CPU_BIG_ENDIAN + def_bool y + config ARCH_ATU bool default y if SPARC64 -- 1.7.1