[PATCH 1/4] ARM: EXYNOS: remove soc_is_exynos4/5 from exynos.c
This patch adds support for checking soc compatibility based on compatibility match. It will help us in removing soc_is_exynos4 and soc_is_exynos5 function usage and definition. CC: Russell King CC: Heiko Stuebner CC: Thomas Abraham Signed-off-by: Pankaj Dubey --- arch/arm/mach-exynos/exynos.c| 30 +++--- arch/arm/plat-samsung/include/plat/cpu.h |3 --- 2 files changed, 27 insertions(+), 6 deletions(-) diff --git a/arch/arm/mach-exynos/exynos.c b/arch/arm/mach-exynos/exynos.c index 59eb1f1..93ae076 100644 --- a/arch/arm/mach-exynos/exynos.c +++ b/arch/arm/mach-exynos/exynos.c @@ -212,6 +212,30 @@ static const struct of_device_id exynos_dt_pmu_match[] = { {}, }; +static const struct of_device_id exynos5_device_ids[] = { + { .compatible = "samsung,exynos5250", }, + { .compatible = "samsung,exynos5420", }, + {}, +}; + +static const struct of_device_id exynos4_device_ids[] = { + { .compatible = "samsung,exynos4210", }, + { .compatible = "samsung,exynos4212", }, + { .compatible = "samsung,exynos4412", }, + {}, +}; + +static inline bool soc_is_compatible(const struct of_device_id *device_ids) +{ + unsigned long root = of_get_flat_dt_root(); + const struct of_device_id *matches = device_ids; + for (; matches->compatible[0]; matches++) { + if (of_flat_dt_is_compatible(root, matches->compatible)) + return true; + } + return false; +} + /* * exynos_map_io * @@ -219,10 +243,10 @@ static const struct of_device_id exynos_dt_pmu_match[] = { */ static void __init exynos_map_io(void) { - if (soc_is_exynos4()) + if (soc_is_compatible(exynos4_device_ids)) iotable_init(exynos4_iodesc, ARRAY_SIZE(exynos4_iodesc)); - if (soc_is_exynos5()) + if (soc_is_compatible(exynos5_device_ids)) iotable_init(exynos5_iodesc, ARRAY_SIZE(exynos5_iodesc)); } @@ -306,7 +330,7 @@ static void __init exynos_dt_machine_init(void) * are available then re-configure the interrupts via the * system register. */ - if (soc_is_exynos5()) { + if (soc_is_compatible(exynos5_device_ids)) { for_each_compatible_node(i2c_np, NULL, i2c_compat) { if (of_device_is_available(i2c_np)) { id = of_alias_get_id(i2c_np, "i2c"); diff --git a/arch/arm/plat-samsung/include/plat/cpu.h b/arch/arm/plat-samsung/include/plat/cpu.h index 5992b8d..18a9a00 100644 --- a/arch/arm/plat-samsung/include/plat/cpu.h +++ b/arch/arm/plat-samsung/include/plat/cpu.h @@ -166,9 +166,6 @@ IS_SAMSUNG_CPU(exynos5440, EXYNOS5440_SOC_ID, EXYNOS5_SOC_MASK) # define soc_is_exynos5440() 0 #endif -#define soc_is_exynos4() (soc_is_exynos4210() || soc_is_exynos4212() || \ - soc_is_exynos4412()) -#define soc_is_exynos5() (soc_is_exynos5250() || soc_is_exynos5420()) #define IODESC_ENT(x) { (unsigned long)S3C24XX_VA_##x, __phys_to_pfn(S3C24XX_PA_##x), S3C24XX_SZ_##x, MT_DEVICE } -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 0/4] Introducing Exynos ChipId driver
This patch series attempts to get rid of soc_is_exynos macros and eventually with the help of this series we can probably get rid of CONFIG_SOC_EXYNOS in near future. Each Exynos SoC has ChipID block which can give information about SoC's product Id and revision number. Currently we have single DT binding information for this as "samsung,exynos4210-chipid". But Exynos4 and Exynos5 SoC series have one small difference in chip Id, with resepect to product id bit-masks. So it means we should have separate compatible string for these different series of SoCs. So I have created new binding information for handling this difference. Also currently I can think of putting this driver code under "drivers/misc/" but suggestions are welcome. Also current form of driver is missing platfrom driver and needs init function to be called from machine file (either exynos.c or platsmp.c). I hope lot of suggestions and comments to improve this further. This patch series is based on Kukjin Kim's for-next (3.14_rc1 tag) and prepared on top of following patch series and it's dependent patch series. [1]: Map SYSRAM through generic SRAM bindings. http://www.spinics.net/lists/arm-kernel/msg327677.html [2]: Exynos PMU cleanup and refactoring. https://lkml.org/lkml/2014/4/30/44 Pankaj Dubey (4): ARM: EXYNOS: remove soc_is_exynos4/5 from exynos.c ARM: EXYNOS: remove unused header inclusion from hotplug.c misc: exynos-chipid: Add Exynos Chipid driver support ARM: EXYNOS: Refactoring to remove soc_is_exynos macros from exynos .../bindings/arm/samsung/exynos-chipid.txt | 15 arch/arm/Kconfig |1 + arch/arm/boot/dts/exynos4.dtsi |2 +- arch/arm/boot/dts/exynos5.dtsi |2 +- arch/arm/mach-exynos/exynos.c | 66 arch/arm/mach-exynos/hotplug.c |2 - arch/arm/mach-exynos/platsmp.c | 10 ++- arch/arm/mach-exynos/pm.c | 28 +++ arch/arm/plat-samsung/include/plat/cpu.h | 60 -- drivers/clk/samsung/clk-exynos4.c |2 +- drivers/cpufreq/exynos-cpufreq.c |9 +-- drivers/cpufreq/exynos-cpufreq.h |1 - drivers/cpufreq/exynos4x12-cpufreq.c |5 +- drivers/misc/Kconfig |7 ++ drivers/misc/Makefile |1 + drivers/misc/exynos-chipid.c | 83 include/linux/exynos-soc.h | 46 +++ 17 files changed, 215 insertions(+), 125 deletions(-) create mode 100644 Documentation/devicetree/bindings/arm/samsung/exynos-chipid.txt create mode 100644 drivers/misc/exynos-chipid.c create mode 100644 include/linux/exynos-soc.h -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 3/4] misc: exynos-chipid: Add Exynos Chipid driver support
Exynos SoCs have Chipid IP, for identification of product IDs and SoC revistions. Till now we are using static macros such as soc_is_exynos and #ifdefs for run time identification of SoCs and their revisions. This is leading to add new Kconfig, soc_is_exynos definitions each time new SoC support is getting added. So this driver intends to provide initialization code all these functionalites and thus helping in removing macros. CC: Arnd Bergmann CC: Greg Kroah-Hartman Signed-off-by: Pankaj Dubey --- drivers/misc/Kconfig |7 drivers/misc/Makefile|1 + drivers/misc/exynos-chipid.c | 83 ++ include/linux/exynos-soc.h | 46 +++ 4 files changed, 137 insertions(+) create mode 100644 drivers/misc/exynos-chipid.c create mode 100644 include/linux/exynos-soc.h diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index 1cb7408..f313bd3 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -515,6 +515,13 @@ config SRAM the genalloc API. It is supposed to be used for small on-chip SRAM areas found on many SoCs. +config EXYNOS_CHIPID + tristate "Support Exynos CHIPID" + default y + depends on ARCH_EXYNOS || ARM64 + help + If you say Y here you get support for the Exynos CHIP id. + source "drivers/misc/c2port/Kconfig" source "drivers/misc/eeprom/Kconfig" source "drivers/misc/cb710/Kconfig" diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile index 7eb4b69..48c8fb5 100644 --- a/drivers/misc/Makefile +++ b/drivers/misc/Makefile @@ -55,3 +55,4 @@ obj-$(CONFIG_SRAM)+= sram.o obj-y += mic/ obj-$(CONFIG_GENWQE) += genwqe/ obj-$(CONFIG_ECHO) += echo/ +obj-$(CONFIG_EXYNOS_CHIPID)+= exynos-chipid.o diff --git a/drivers/misc/exynos-chipid.c b/drivers/misc/exynos-chipid.c new file mode 100644 index 000..eb23339 --- /dev/null +++ b/drivers/misc/exynos-chipid.c @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2014-2015 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * EXYNOS - CHIP ID support + * Author: Pankaj Dubey + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include + +#define EXYNOS4_SOC_MASK 0xFFFE0 +#define EXYNOS5_SOC_MASK 0xF + +#define PROD_ID_SHIFT (12) + +static void __iomem*exynos_chipid_base; +unsigned int exynos_soc_id = EXYNOS_SOC_UNKNOWN; +unsigned int exynos_soc_rev; + +struct exynos_chipid_data { + unsigned int product_id_mask; + unsigned int product_id_shift; +}; + +static struct exynos_chipid_data exynos4_chipid_data = { + .product_id_mask= EXYNOS4_SOC_MASK, + .product_id_shift = PROD_ID_SHIFT, +}; + +static struct exynos_chipid_data exynos5_chipid_data = { + .product_id_mask= EXYNOS5_SOC_MASK, + .product_id_shift = PROD_ID_SHIFT, +}; + +static struct of_device_id of_exynos_chipid_ids[] = { + { + .compatible = "samsung,exynos4-chipid", + .data = (void *)_chipid_data, + }, + { + .compatible = "samsung,exynos5-chipid", + .data = (void *)_chipid_data, + }, + {}, +}; + +/** + * early_exynos_chipid_init - Early chipid initialization + */ +void __init early_exynos_chipid_init(void) +{ + struct device_node *np = NULL; + const struct of_device_id *match; + struct exynos_chipid_data *chipid_data; + int pro_id; + + if (!exynos_chipid_base) { + np = of_find_matching_node_and_match(NULL, + of_exynos_chipid_ids, ); + if (!np) + panic("%s, failed to find chipid node\n", __func__); + + chipid_data = (struct exynos_chipid_data *) match->data; + exynos_chipid_base = of_iomap(np, 0); + + if (!exynos_chipid_base) + panic("%s: failed to map registers\n", __func__); + + pro_id = __raw_readl(exynos_chipid_base); + exynos_soc_id = (pro_id >> chipid_data->product_id_shift) + & chipid_data->product_id_mask; + exynos_soc_rev = pro_id & 0xFF; + pr_info("Exynos: CPUID[0x%x] CPU_REV[0x%x] Detected\n", + exynos_soc_id, exynos_soc_rev); + } +} diff --git a/include/linux/exynos-soc.h b/include/linux/exynos-soc.h new file mode 100644 index 000..cb3ae06 --- /dev/null +++ b/include/linux/exynos-soc.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2014 Samsung Electronics Co., Ltd. + * http://www.samsung.com + * + * Header for EXYNOS SoC Chipid support + * + * This program is free software; you can
[PATCH 2/4] ARM: EXYNOS: remove unused header inclusion from hotplug.c
This patch removed "plat/cpu.h" inclusion from hotplug.c as it is not required. Signed-off-by: Pankaj Dubey --- arch/arm/mach-exynos/hotplug.c |2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm/mach-exynos/hotplug.c b/arch/arm/mach-exynos/hotplug.c index 0243ef3..5e19601 100644 --- a/arch/arm/mach-exynos/hotplug.c +++ b/arch/arm/mach-exynos/hotplug.c @@ -19,8 +19,6 @@ #include #include -#include - #include "common.h" #include "regs-pmu.h" -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[GIT PULL] EFI urgent fix
Folks, please pull the following patch from Dave that fixes some bugs in the EFI earlyprintk code when using ",keep". The following changes since commit 47514c996fac5e6f13ef3a4c5e23f1c5cffabb7b: efi: Pass correct file handle to efi_file_{read,close} (2014-04-10 21:20:03 +0100) are available in the git repository at: git://git.kernel.org/pub/scm/linux/kernel/git/mfleming/efi.git tags/efi-urgent for you to fetch changes up to 5f35eb0e29ca26da82febe49d7698dbeb8882ea0: x86/efi: earlyprintk=efi,keep fix (2014-05-03 06:39:06 +0100) * Fix earlyprintk=efi,keep support by switching to an ioremap() mapping of the framebuffer when early_ioremap() is no longer available and dropping __init from functions that may be invoked after free_initmem() - Dave Young Dave Young (1): x86/efi: earlyprintk=efi,keep fix arch/x86/platform/efi/early_printk.c | 83 +++- 1 file changed, 64 insertions(+), 19 deletions(-) -- Matt Fleming, Intel Open Source Technology Center -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 03/13] PCI: quirk dma_func_alias for Ricoh devices
On Sat, 2014-05-03 at 10:29 +0800, Andrew Cooks wrote: > Hi Alex > > On Fri, May 2, 2014 at 12:27 AM, Alex Williamson > wrote: > > The existing quirk for these devices doesn't really solve the problem, > > re-implement it using the DMA alias iterator. We'll come back later > > and remove the existing quirk and dma_source interface. > > > > Signed-off-by: Alex Williamson > > --- > > drivers/pci/quirks.c | 16 > > 1 file changed, 16 insertions(+) > > > > diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c > > index e729206..a458c6b 100644 > > --- a/drivers/pci/quirks.c > > +++ b/drivers/pci/quirks.c > > @@ -,6 +,22 @@ int pci_dev_specific_reset(struct pci_dev *dev, int > > probe) > > return -ENOTTY; > > } > > > > +static void quirk_dma_func0_alias(struct pci_dev *dev) > > +{ > > + if (PCI_SLOT(dev->devfn) != 0) > > + dev->dma_func_alias |= (1 << 0); > > +} > > + > > +/* > > + * https://bugzilla.redhat.com/show_bug.cgi?id=605888 > > + * > > + * Some Ricoh devices use function 0 as the PCIe requester ID for DMA. > > + */ > > +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_RICOH, 0xe822, > > quirk_dma_func0_alias); > > +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_RICOH, 0xe230, > > quirk_dma_func0_alias); > > +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_RICOH, 0xe832, > > quirk_dma_func0_alias); > > +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_RICOH, 0xe832, > > quirk_dma_func0_alias); > > 0xe832 is listed twice. oops, copy-paste error > Previously only 0xe832 needed the dma alias on my thinkpad T410, which > has all three devices. > > > + > > static struct pci_dev *pci_func_0_dma_source(struct pci_dev *dev) > > { > > if (!PCI_FUNC(dev->devfn)) > > > > Unfortunately, this quirk doesn't work for me. I tried it without > modification, as well as with each alias individually. I get: > > Set context mapping for 0d:00.3 > firewire_ohci :0d:00.3: added OHCI v1.10 device as card 0, 4 IR + > 4 IT contexts, quirks 0x11 > dmar: DRHD: handling fault status reg 2 > dmar: DMAR:[DMA Read] Request device [0d:00.0] fault addr f000 > DMAR:[fault reason 02] Present bit in context entry is clear > > I think I need to see > Set context mapping for 0d:00.0 > before > Set context mapping for 0d:00.3 It would actually be the reverse, we always set the device, then the alias for the device. > in the log, but it's not there. I'd love to look into this and > understand it properly, but I don't have time for the next four weeks. > > The devices are attached as follows: > BDF, device ID > 0d:00.0, e822 > 0d:00.1, e230 > 0d:00.3, e832 > > The kernel log is attached. Hmm, there are only a few reasons why you'd never see 0d:00.3 followed by 0d:00.0... 1) dma_func_alias bit 0 isn't getting set on 0d:00.3; we are building with CONFIG_PCI_QUIRKS=y, right? 2) domain_context_mapping_one called from domain_context_mapping_cb returns !0; there's only one possible non-zero return for a non-vm, non-si domain 3) something is broken in the first loop of pci_for_each_dma_alias; I'm not seeing anything obvious Anyway, appreciate an debugging you're able to fit in, my only ricoh device has only function 0. Thanks, Alex -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 2/2] drivers/rtc/rtc-puv3.c: remove ">" for typo issue MIME-Version: 1.0
It is only a typo issue, the related commit: "1fbc4c4 drivers/rtc/rtc-puv3.c: use dev_dbg() instead of pr_debug()" The related error (for unicore32 with allmodconfig): CC [M] drivers/rtc/rtc-puv3.o drivers/rtc/rtc-puv3.c: In function ‘puv3_rtc_setalarm’: drivers/rtc/rtc-puv3.c:143: error: ‘struct device’ has no member named ‘dev’ Signed-off-by: Chen Gang --- drivers/rtc/rtc-puv3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-puv3.c b/drivers/rtc/rtc-puv3.c index c56310e..1cff2a2 100644 --- a/drivers/rtc/rtc-puv3.c +++ b/drivers/rtc/rtc-puv3.c @@ -140,7 +140,7 @@ static int puv3_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm) rtc_tm_to_time(tm, _count); writel(rtcalarm_count, RTC_RTAR); - puv3_rtc_setaie(>dev, alrm->enabled); + puv3_rtc_setaie(dev, alrm->enabled); if (alrm->enabled) enable_irq_wake(puv3_rtc_alarmno); -- 1.9.2.459.g68773ac -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 1/2] drivers/rtc/rtc-puv3.c: use dev_dbg() instead of dev_debug() for typo issue
It is only a typo issue, the related commit: "1fbc4c4 drivers/rtc/rtc-puv3.c: use dev_dbg() instead of pr_debug()" The related error (unicore32 with allmodconfig): CC [M] drivers/rtc/rtc-puv3.o drivers/rtc/rtc-puv3.c: In function ‘puv3_rtc_setpie’: drivers/rtc/rtc-puv3.c:74: error: implicit declaration of function ‘dev_debug’ Signed-off-by: Chen Gang --- drivers/rtc/rtc-puv3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-puv3.c b/drivers/rtc/rtc-puv3.c index 1ecfe3b..c56310e 100644 --- a/drivers/rtc/rtc-puv3.c +++ b/drivers/rtc/rtc-puv3.c @@ -71,7 +71,7 @@ static int puv3_rtc_setpie(struct device *dev, int enabled) { unsigned int tmp; - dev_debug(dev, "%s: pie=%d\n", __func__, enabled); + dev_dbg(dev, "%s: pie=%d\n", __func__, enabled); spin_lock_irq(_rtc_pie_lock); tmp = readl(RTC_RTSR) & ~RTC_RTSR_HZE; -- 1.9.2.459.g68773ac -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 0/2] drivers/rtc/rtc-puv3.c: fix 2 typo issues.
Fix 2 typo issues for commit "1fbc4c4 drivers/rtc/rtc-puv3.c: use dev_dbg() instead of pr_debug()". Signed-off-by: Chen Gang --- drivers/rtc/rtc-puv3.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH v2] x86: Return to kernel without IRET
On Fri, May 2, 2014 at 4:53 PM, Andy Lutomirski wrote: > On my box, this saves about 100ns on each interrupt and trap that > happens while running in kernel space. This speeds up my kernel_pf > microbenchmark by about 17%. Btw, would you mind _trying_ to do a similar trick for the "return to user space" case? At least as a proof-of-concept, having a code sequence in user mode trampoline that does popq %rsi popq %r11 retq $128 and building up a stack in user space at '%rsp-128' that has the values or rsi/r11/rip should allow us to use 'sysret'. Hmm? Linus -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Hello dear,
Hello dear, My name is Miss Adena, I'm very glad when i come across your profile today while searching for a partnership which interest me to write to you, I will like to build a good relationship with you because i see you as a one with a good heart .I want you to understand that a Good friend is like a tissue that wipe away tears, please if you don't mind i will like you to contact me through this my email address (adenapatric...@hotmail.com) for us to express ourselves and make exchange of our pictures and share a vital information's about my ourselves, Thanks, -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC PATCH 1/5] watchdog: Add API to trigger reboots
On Fri, May 02, 2014 at 06:22:43PM -0700, Maxime Ripard wrote: > Hi Guenter, > > On Thu, May 01, 2014 at 08:41:29AM -0700, Guenter Roeck wrote: > > Some hardware implements reboot through its watchdog hardware, > > for example by triggering a watchdog timeout. Platform specific > > code starts to spread into watchdog drivers, typically by setting > > pointers to a callback functions which is then called from the > > platform reset handler. > > > > To simplify code and provide a unified API to trigger reboots by > > watchdog drivers, provide a single API to trigger such reboots > > through the watchdog subsystem. > > > > Signed-off-by: Guenter Roeck > > --- > > drivers/watchdog/watchdog_core.c | 17 + > > include/linux/watchdog.h | 11 +++ > > 2 files changed, 28 insertions(+) > > > > diff --git a/drivers/watchdog/watchdog_core.c > > b/drivers/watchdog/watchdog_core.c > > index cec9b55..4ec6e2f 100644 > > --- a/drivers/watchdog/watchdog_core.c > > +++ b/drivers/watchdog/watchdog_core.c > > @@ -43,6 +43,17 @@ > > static DEFINE_IDA(watchdog_ida); > > static struct class *watchdog_class; > > > > +static struct watchdog_device *wdd_reboot_dev; > > + > > +void watchdog_do_reboot(enum reboot_mode mode, const char *cmd) > > +{ > > + if (wdd_reboot_dev) { > > + if (wdd_reboot_dev->ops->reboot) > > + wdd_reboot_dev->ops->reboot(wdd_reboot_dev, mode, cmd); > > + } > > +} > > +EXPORT_SYMBOL(watchdog_do_reboot); > > + > > static void watchdog_check_min_max_timeout(struct watchdog_device *wdd) > > { > > /* > > @@ -162,6 +173,9 @@ int watchdog_register_device(struct watchdog_device > > *wdd) > > return ret; > > } > > > > + if (wdd->ops->reboot) > > + wdd_reboot_dev = wdd; > > + > > Overall, it looks really great, but I guess we can make it a > list. Otherwise, we might end up in a situation where we could not > reboot anymore, like this one for example: > - a first watchdog is probed, registers a reboot function > - a second watchdog is probed, registers a reboot function that > overwrites the first one. > - then, the second watchdog disappears for some reason, and the > reboot is set to NULL > I thought about that, but how likely (or unlikely) is that to ever happen ? So I figured it is not worth the effort, and would just add complexity without real gain. We could always add the list later if we ever encounter a situation where two watchdogs in the same system provide a reboot callback. > Or maybe we can just use the start callback, with the min timeout already > registered, and prevent the user to kick the watchdog. > Doesn't always work, unfortunately, even now. The moxart driver causes an explicit and immediate reset. Also, some watchdogs don't reset the system directly but get an interrupt, which then calls the reset handler. Which, in our case, would call the start callback again, and you would have an endless loop. Guenter -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: dcache shrink list corruption?
On Fri, May 02, 2014 at 11:40:22PM +0100, Al Viro wrote: > On Fri, May 02, 2014 at 02:18:43PM -0700, Linus Torvalds wrote: > > On Fri, May 2, 2014 at 2:08 PM, Miklos Szeredi wrote: > > > There's more of the "delete from shrink list not owned by us" in select > > > parent. > > > Proposed patch appended. > > > > Ahh. Clearly this needs more work before I pull. > > *nod* > > Besides, I want to put Miklos' "don't bother with RCU in shrink_dentry_list()" > in there as soon as select_collect() has been dealt with. I don't think > that the currently posted patch for select_collect() is right, though - > see my reply to parent posting. Basically, I think we should treat "it's > on the shrink list already" as "increment data->found and keep going". IOW, > if (on shrink list) { > data->found++; > } else { > if (on lru list) > d_lru_del > if (refcount is zero) { > d_shrink_add > data->found++; > } > } > if (data->found) > ret = need_resched() ? D_WALK_QUIT : D_WALK_NORETRY; See vfs.git#dentry_kill-3; warning - this is completely untested and I would really like comments on spinning case there (i.e. the one where select_collect() finds some stuff already on some other shrink list and nothing with zero refcount that wouldn't be there). In that case (and it's basically "somebody else is evicting stuff in our subtree and they'd already picked everything we want evicted") I just let the loop in check_submounts_and_drop() repeat (we do have cond_resched() there). Any better suggestions would be welcome... -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH v2] x86: Return to kernel without IRET
On 05/02/2014 04:53 PM, Andy Lutomirski wrote: > On my box, this saves about 100ns on each interrupt and trap that > happens while running in kernel space. This speeds up my kernel_pf > microbenchmark by about 17%. > > Signed-off-by: Andy Lutomirski I'd really like to see a workload which would genuinely benefit before adding more complexity. Now... if we can determine that it doesn't harm anything and would solve the NMI nesting problem cleaner than the current solution, that would justify things, too... -hpa -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH] USB: ehci-hub: wait for RESUME finished when hub try to clear SUSPEND
We use usb ehci to connect with modem and run stress test on ehci remote wake. Sometimes usb disconnect. We add more debug ftrace (Kernel version: 3.10) and list the key log to show how problem happened. -0 [000] d.h2 26879.385095: ehci_irq: irq status 1008c PPCE FLR PCD -0 [000] d.h2 26879.385099: ehci_irq: rh_state[2] hcd->state[132] pstatus[0][238014c5] suspended_ports[1] reset_done[0] <...>-12873 [000] d..1 26879.393536: ehci_hub_control: GetStatus port:1 status 238014c5 17 ERR POWER sig=k SUSPEND RESUME PE CONNECT <...>-12873 [000] d..1 26879.393549: ehci_hub_control: typeReq [2301] wIndex[1] wValue[2] <...>-12873 [000] d..1 26879.393553: ehci_hub_control: [ehci_hub_control]line[891] port[0] hostpc_reg [44000202]->[44000202] -0 [001] ..s. 26879.403122: ehci_hub_status_data: wgq[ehci_hub_status_data] ignore_oc[0] resuming_ports[1] <...>-12873 [000] d..1 26879.413379: ehci_hub_control: [ehci_hub_control]line[907] port[0] write portsc_reg[238014c5] reset_done[2105769] <...>-12873 [000] d..1 26879.453173: ehci_hub_control: GetStatus port:1 status 23801885 17 ERR POWER sig=j SUSPEND PE CONNECT <...>-12873 [000] 26879.473158: check_port_resume_type: port 1 status .0507 after resume, -19 <...>-12873 [000] 26879.473160: usb_port_resume: status = -19 after check_port_resume_type <...>-12873 [000] 26879.473161: usb_port_resume: can't resume, status -19 <...>-12873 [000] 26879.473162: hub_port_logical_disconnect: logical disconnect on port 1 There is a in-band remote wakeup and controller run in k-state. Then kernel driver(ClearPortFeature/USB_PORT_FEAT_SUSPEND) write RESUME|LS(k-state) bit into controller. It makes controller status weird. It's defined in EHCI controller spec(Revision 1.0), "If it has enabled remote wake-up, a K-state on the bus will turn the transceiver clock and generate an interrupt. The software will then have to wait 20 ms for the resume to complete and the port to go back to an active state." In this case Kernel should wait for the wakeup finished, then judge what should do next step. We have some thought and give a patch. This patch is to wait for controller RESUME finished when hub try to clear port SUSPEND feature. Signed-off-by: xiao jin Reviewed-by: David Cohen --- drivers/usb/host/ehci-hub.c |7 +++ include/linux/usb/ehci_def.h |5 + 2 files changed, 12 insertions(+) diff --git a/drivers/usb/host/ehci-hub.c b/drivers/usb/host/ehci-hub.c index 7ae0c4d..09a8b6b 100644 --- a/drivers/usb/host/ehci-hub.c +++ b/drivers/usb/host/ehci-hub.c @@ -935,6 +935,13 @@ static int ehci_hub_control ( break; } #endif + if ((temp & PORT_RESUME) + && ((temp & PORT_LS_MASK) == PORT_K_STATE)) { + ehci_handshake(ehci, status_reg, + PORT_RESUME, 0, 2 /* 20msec */); + temp = ehci_readl(ehci, status_reg); + temp &= ~PORT_RWC_BITS; + } if (!(temp & PORT_SUSPEND)) break; if ((temp & PORT_PE) == 0) diff --git a/include/linux/usb/ehci_def.h b/include/linux/usb/ehci_def.h index daec99a..0f0f919 100644 --- a/include/linux/usb/ehci_def.h +++ b/include/linux/usb/ehci_def.h @@ -149,6 +149,11 @@ struct ehci_regs { #define PORT_POWER (1<<12) /* true: has power (see PPC) */ #define PORT_USB11(x) (((x)&(3<<10)) == (1<<10)) /* USB 1.1 device */ /* 11:10 for detecting lowspeed devices (reset vs release ownership) */ +#define PORT_LS_MASK (0x3<<10) /* line status */ +#define PORT_SE0_STATE (0<<10) +#define PORT_K_STATE (1<<10) +#define PORT_J_STATE (2<<10) +#define PORT_UNDEFINED_STATE (3<<10) /* 9 reserved */ #define PORT_LPM (1<<9) /* LPM transaction */ #define PORT_RESET (1<<8) /* reset port */ -- 1.7.9.5 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re
Noi oferim credite la rata dobânzii de 3% pentru toate scop. Ne e-mail dacă sunteți interesat. We offer loans at 3% interest rate for all purpose. Email us if you are interested. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[GIT PULL] tracing: Use rcu_dereference_sched() for trace event triggers
Linus, This is a small fix where the trigger code used the wrong rcu_dereference(). It required rcu_dereference_sched() instead of the normal rcu_dereference(). It produces a nasty RCU lockdep splat due to the incorrect rcu notation. Please pull the latest trace-fixes-v3.15-rc3 tree, which can be found at: git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace.git trace-fixes-v3.15-rc3 Tag SHA1: c06df51860503b3a39a8f37ddaaa047fd51f8201 Head SHA1: 561a4fe851ccab9dd0d14989ab566f9392d9f8b5 Steven Rostedt (Red Hat) (1): tracing: Use rcu_dereference_sched() for trace event triggers kernel/trace/trace_events_trigger.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) --- commit 561a4fe851ccab9dd0d14989ab566f9392d9f8b5 Author: Steven Rostedt (Red Hat) Date: Fri May 2 13:30:04 2014 -0400 tracing: Use rcu_dereference_sched() for trace event triggers As trace event triggers are now part of the mainline kernel, I added my trace event trigger tests to my test suite I run on all my kernels. Now these tests get run under different config options, and one of those options is CONFIG_PROVE_RCU, which checks under lockdep that the rcu locking primitives are being used correctly. This triggered the following splat: === [ INFO: suspicious RCU usage. ] 3.15.0-rc2-test+ #11 Not tainted --- kernel/trace/trace_events_trigger.c:80 suspicious rcu_dereference_check() usage! other info that might help us debug this: rcu_scheduler_active = 1, debug_locks = 0 4 locks held by swapper/1/0: #0: ((&(_cdbs->work)->timer)){..-...}, at: [] call_timer_fn+0x5/0x1be #1: (&(>lock)->rlock){-.-...}, at: [] __queue_work+0x140/0x283 #2: (>pi_lock){-.-.-.}, at: [] try_to_wake_up+0x2e/0x1e8 #3: (>lock){-.-.-.}, at: [] try_to_wake_up+0x1a0/0x1e8 stack backtrace: CPU: 1 PID: 0 Comm: swapper/1 Not tainted 3.15.0-rc2-test+ #11 Hardware name: /DG965MQ, BIOS MQ96510J.86A.0372.2006.0605.1717 06/05/2006 0001 88007e083b98 819f53a5 0006 88007b0942c0 88007e083bc8 81081307 88007ad96d20 88007af2d840 88007b2e701c 88007e083c18 Call Trace: [] dump_stack+0x4f/0x7c [] lockdep_rcu_suspicious+0x107/0x110 [] event_triggers_call+0x99/0x108 [] ftrace_event_buffer_commit+0x42/0xa4 [] ftrace_raw_event_sched_wakeup_template+0x71/0x7c [] ttwu_do_wakeup+0x7f/0xff [] ttwu_do_activate.constprop.126+0x5c/0x61 [] try_to_wake_up+0x1ac/0x1e8 [] wake_up_process+0x36/0x3b [] wake_up_worker+0x24/0x26 [] insert_work+0x5c/0x65 [] __queue_work+0x26c/0x283 [] ? __queue_work+0x283/0x283 [] delayed_work_timer_fn+0x1e/0x20 [] call_timer_fn+0xdf/0x1be^M [] ? call_timer_fn+0x5/0x1be [] ? __queue_work+0x283/0x283 [] run_timer_softirq+0x1a4/0x22f^M [] __do_softirq+0x17b/0x31b^M [] irq_exit+0x42/0x97 [] smp_apic_timer_interrupt+0x37/0x44 [] apic_timer_interrupt+0x6f/0x80 [] ? default_idle+0x21/0x32 [] ? default_idle+0x1f/0x32 [] arch_cpu_idle+0xf/0x11 [] cpu_startup_entry+0x1a3/0x213 [] start_secondary+0x212/0x219 The cause is that the triggers are protected by rcu_read_lock_sched() but the data is dereferenced with rcu_dereference() which expects it to be protected with rcu_read_lock(). The proper reference should be rcu_dereference_sched(). Cc: Tom Zanussi Cc: sta...@vger.kernel.org # 3.14+ Signed-off-by: Steven Rostedt diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index 925f537..4747b47 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -77,7 +77,7 @@ event_triggers_call(struct ftrace_event_file *file, void *rec) data->ops->func(data); continue; } - filter = rcu_dereference(data->filter); + filter = rcu_dereference_sched(data->filter); if (filter && !filter_match_preds(filter, rec)) continue; if (data->cmd_ops->post_trigger) { -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [perf] more perf_fuzzer memory corruption
On Fri, 2 May 2014, Vince Weaver wrote: > I've been fuzzing without your additional patch for 6 hours and all looks > (almost) good. I can add in your patch and let it fuzz overnight. and I applied the additional patch, installed the kernel, hit reboot, and the following happened (this was caused by rebooting while fuzzing was ongoing) :( I'm remote from the system too so the poor machine is going to be sitting there oopsing away until Monday. The system is going down for reboot NOW! INIT: Switching to runlevel: 6 INIT: Sending p [2.795403] [ cut here ] [2.802143] WARNING: CPU: 1 PID: 23062 at lib/list_debug.c:59 __list_del_entry+0xa1/0xd0() [2.812613] list_del corruption. prev->next should be 8800c9028010, but was 6b6b6b6b6b6b6b6b [2.908976] CPU: 1 PID: 23062 Comm: perf_fuzzer Tainted: GW 3.15.0-rc1+ #102 [2.919934] Hardware name: LENOVO 10AM000AUS/SHARKBAY, BIOS FBKT72AUS 01/26/2014 [2.930206] 0009 8800c8991ad8 81649c80 8800c8991b20 [2.940667] 8800c8991b10 810646ad 8800c9028000 8801181bf000 [2.951155] 8800c9028010 8800c9028000 0001 8800c8991b70 [2.961677] Call Trace: [2.967131] [] dump_stack+0x45/0x56 [2.975333] [] warn_slowpath_common+0x7d/0xa0 [2.984433] [] warn_slowpath_fmt+0x4c/0x50 [2.993286] [] __list_del_entry+0xa1/0xd0 [24445.002082] [] list_del_event+0xe4/0xf0 [24445.010729] [] perf_remove_from_context+0xb0/0x120 [24445.020315] [] perf_event_release_kernel+0x3f/0x80 [24445.029918] [] put_event+0xd3/0x100 [24445.038205] [] ? put_event+0x30/0x100 [24445.046638] [] perf_release+0x15/0x20 [24445.055082] [] __fput+0xdc/0x1e0 [24445.063108] [] fput+0xe/0x10 [24445.071114] [] task_work_run+0xc4/0xe0 [24445.079679] [] do_exit+0x2cc/0xa50 [24445.087922] [] ? get_signal_to_deliver+0x249/0x650 [24445.097582] [] do_group_exit+0x4c/0xc0 [24445.106200] [] get_signal_to_deliver+0x291/0x650 [24445.115733] [] do_signal+0x48/0x990 [24445.124132] [] ? finish_task_switch+0x7d/0x120 [24445.133520] [] ? _raw_spin_unlock_irq+0x27/0x40 [24445.143007] [] ? finish_task_switch+0x7d/0x120 [24445.152398] [] ? finish_task_switch+0x3f/0x120 [24445.161807] [] do_notify_resume+0x70/0xa0 [24445.170791] [] retint_signal+0x48/0x8c [24445.179516] ---[ end trace d44f7960e96a18db ]--- [24445.627788] [ cut here ] [24445.635804] WARNING: CPU: 2 PID: 23062 at lib/list_debug.c:59 __list_del_entry+0xa1/0xd0() [24445.646825] list_del corruption. prev->next should be 8800ce89a810, but was 6b6b6b6b6b6b6b6b [info] Will now restart. [24454.007929] general protection fault: [#1] SMP [24454.016867] Dumping ftrace buffer: [24454.023308](ftrace buffer empty) [24454.117735] CPU: 0 PID: 0 Comm: swapper/0 Tainted: GW 3.15.0-rc1+ #102 [24454.127563] Hardware name: LENOVO 10AM000AUS/SHARKBAY, BIOS FBKT72AUS 01/26/2014 [24454.137169] task: 81c184c0 ti: 81c0 task.ti: 81c0 [24454.146980] RIP: 0010:[] [] __perf_remove_from_context+0x22/0xd0 [24454.158482] RSP: 0018:88011ea03f18 EFLAGS: 00010087 [24454.165746] RAX: 01b855441f0f RBX: 81019590 RCX: 000a [24454.174917] RDX: 0001 RSI: RDI: 880118f0b800 [24454.184018] RBP: 88011ea03f40 R08: R09: 0001 [24454.193001] R10: R11: 000225c17d03 R12: 88011ea18310 [24454.202013] R13: 88011ea18310 R14: 0005 R15: 880118f0b800 [24454.211207] FS: () GS:88011ea0() knlGS: [24454.221242] CS: 0010 DS: ES: CR0: 80050033 [24454.228734] CR2: 7f449fcb0d20 CR3: 01c11000 CR4: 001407f0 [24454.237680] DR0: DR1: 02106000 DR2: [24454.246599] DR3: 02106000 DR6: fffe0ff0 DR7: 0600 [24454.255588] Stack: [24454.258976] 880118f0b800 88011ea18310 88011ea18408 0005 [24454.268286] 81c99ab0 88011ea03f78 81135818 811357b5 [24454.277423] 880118f0b800 880036de1c98 163b0f931a27 [24454.286642] Call Trace: [24454.290378] [24454.292457] [] __perf_event_exit_context+0x98/0xf0 [24454.301858] [] ? __perf_event_exit_context+0x35/0xf0 [24454.310098] [] generic_smp_call_function_single_interrupt+0x5d/0x100 [24454.319785] [] smp_call_function_single_interrupt+0x27/0x40 [24454.328627] [] call_function_single_interrupt+0x6d/0x80 [24454.337078] [24454.339156] [] ? cpuidle_enter_state+0x52/0xc0 [24454.347970] [] ? cpuidle_enter_state+0x48/0xc0 [24454.355492] [] cpuidle_enter+0x17/0x20 [24454.362218] [] cpu_startup_entry+0x2c0/0x3d0 [24454.369450] [] rest_init+0xb6/0xc0 [24454.375753] [] ? rest_init+0x5/0xc0 [24454.382110] [] start_kernel+0x43d/0x448 [24454.388873] [] ?
Re: [RFC] An Immune System for Linux
On Fri, May 02, 2014 at 06:27:56PM -0700, Sterling Huxley wrote: ^^ Better luck next year, and you might want to start preparing a posting a month earlier than this time around - you are 31 days late. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [perf] more perf_fuzzer memory corruption
On Fri, 2 May 2014, Thomas Gleixner wrote: > > OK the proper patch has been running the quick reproducer for a bit > > without triggering the issue, I'll let it run a bit more and then upgrade > > to full fuzzing. > > If you do that, please add the patch below. I've been fuzzing without your additional patch for 6 hours and all looks (almost) good. I can add in your patch and let it fuzz overnight. I say almost because the following gets triggered, but I think it's an unrelated issue. Vince [17190.202941] [ cut here ] [17190.207906] WARNING: CPU: 2 PID: 4743 at arch/x86/kernel/cpu/perf_event_intel.c:1373 intel_pmu_handle_irq+0x2a4/0x3c0() [17190.219460] perfevents: irq loop stuck! [17190.223579] Modules linked in: fuse x86_pkg_temp_thermal intel_powerclamp coretemp kvm snd_hda_codec_realtek snd_hda_codec_hdmi snd_hda_codec_generic snd_hda_intel snd_hda_controller crct10dif_pclmul snd_hda_codec crc32_pclmul snd_hwdep ghash_clmulni_intel snd_pcm aesni_intel aes_x86_64 lrw snd_seq snd_timer snd_seq_device gf128mul snd i915 glue_helper evdev soundcore drm_kms_helper mei_me iTCO_wdt iTCO_vendor_support lpc_ich battery drm ppdev psmouse serio_raw ablk_helper cryptd wmi parport_pc mei parport tpm_tis i2c_algo_bit button processor video tpm i2c_i801 i2c_core mfd_core pcspkr sd_mod sr_mod crc_t10dif cdrom crct10dif_common ehci_pci ahci xhci_hcd ehci_hcd libahci e1000e libata ptp crc32c_intel usbcore scsi_mod pps_core usb_common thermal fan thermal_sys [17190.298419] CPU: 2 PID: 4743 Comm: perf_fuzzer Not tainted 3.15.0-rc1+ #102 [17190.305926] Hardware name: LENOVO 10AM000AUS/SHARKBAY, BIOS FBKT72AUS 01/26/2014 [17190.313906] 0009 88011ea86cb0 81649c80 88011ea86cf8 [17190.322034] 88011ea86ce8 810646ad 0064 88011ea8cbe0 [17190.330134] 8800cf7a7800 0040 88011ea8cde0 88011ea86d48 [17190.338122] Call Trace: [17190.340775][] dump_stack+0x45/0x56 [17190.347023] [] warn_slowpath_common+0x7d/0xa0 [17190.353472] [] warn_slowpath_fmt+0x4c/0x50 [17190.359677] [] intel_pmu_handle_irq+0x2a4/0x3c0 [17190.366315] [] ? native_write_msr_safe+0xd/0x10 [17190.372954] [] perf_event_nmi_handler+0x2b/0x50 [17190.379629] [] nmi_handle.isra.5+0xa8/0x150 [17190.385879] [] ? nmi_handle.isra.5+0x5/0x150 [17190.392287] [] do_nmi+0xd8/0x340 [17190.397572] [] end_repeat_nmi+0x1e/0x2e [17190.403472] [] ? native_write_msr_safe+0xa/0x10 [17190.410098] [] ? native_write_msr_safe+0xa/0x10 [17190.416765] [] ? native_write_msr_safe+0xa/0x10 [17190.423386] <> [] intel_pmu_enable_event+0x21d/0x240 [17190.431048] [] x86_pmu_start+0x7a/0x100 [17190.436992] [] x86_pmu_enable+0x295/0x310 [17190.443104] [] perf_pmu_enable+0x2f/0x40 [17190.449087] [] perf_event_context_sched_in+0x88/0xd0 [17190.456165] [] __perf_event_task_sched_in+0x1dd/0x1f0 [17190.463412] [] finish_task_switch+0xd8/0x120 [17190.469750] [] __schedule+0x2c0/0x740 [17190.475443] [] schedule+0x29/0x70 [17190.480772] [] schedule_hrtimeout_range_clock+0x13c/0x180 [17190.488331] [] ? hrtimer_get_res+0x50/0x50 [17190.494491] [] ? schedule_hrtimeout_range_clock+0xb9/0x180 [17190.502135] [] schedule_hrtimeout_range+0x13/0x20 [17190.508983] [] poll_schedule_timeout+0x49/0x70 [17190.515535] [] do_sys_poll+0x422/0x540 [17190.521354] [] ? poll_select_copy_remaining+0x130/0x130 [17190.528737] [] ? poll_select_copy_remaining+0x130/0x130 [17190.536129] [] ? poll_select_copy_remaining+0x130/0x130 [17190.543552] [] ? poll_select_copy_remaining+0x130/0x130 [17190.550915] [] ? poll_select_copy_remaining+0x130/0x130 [17190.558290] [] ? poll_select_copy_remaining+0x130/0x130 [17190.565698] [] ? poll_select_copy_remaining+0x130/0x130 [17190.573075] [] ? poll_select_copy_remaining+0x130/0x130 [17190.580488] [] ? poll_select_copy_remaining+0x130/0x130 [17190.589071] [] SyS_poll+0x65/0x100 [17190.595690] [] system_call_fastpath+0x1a/0x1f [17190.603315] ---[ end trace d44f7960e96a18da ]--- [17190.609412] [17190.612182] CPU#2: ctrl: [17190.618136] CPU#2: status: [17190.624190] CPU#2: overflow: [17190.630144] CPU#2: fixed: 00ba [17190.636123] CPU#2: pebs: [17190.642042] CPU#2: active: 00030001 [17190.648000] CPU#2: gen-PMC0 ctrl: 004000c4 [17190.654531] CPU#2: gen-PMC0 count: 0001 [17190.661059] CPU#2: gen-PMC0 left: [17190.667576] CPU#2: gen-PMC1 ctrl: 00120280 [17190.674101] CPU#2: gen-PMC1 count: 5439 [17190.680623] CPU#2: gen-PMC1 left: af43 [17190.687127] CPU#2: gen-PMC2 ctrl: 00114f2e [17190.693589] CPU#2: gen-PMC2 count: 0001 [17190.700039] CPU#2: gen-PMC2 left: [17190.706455] CPU#2: gen-PMC3 ctrl: 001300c0 [17190.712846] CPU#2: gen-PMC3 count: 0001 [17190.719135] CPU#2:
Re: [PATCH v3 00/12] ARM: Exynos: PMU cleanup and refactoring for using DT
Hi Pankaj, On 03.05.2014 03:52, Pankaj Dubey wrote: Hi, Gentle ping. Tomasz, Kukjin will you please have a look on this series. Sure. I have added this series to my queue and should be able to find some time to review it after the weekend. Best regards, Tomasz -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH] i2c: qup: Fix pm_runtime_get_sync usage
This patch corrects the error check on the call to pm_runtime_get_sync. Signed-off-by: Andy Gross --- drivers/i2c/busses/i2c-qup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-qup.c b/drivers/i2c/busses/i2c-qup.c index 1b4cf14..2a5efb5 100644 --- a/drivers/i2c/busses/i2c-qup.c +++ b/drivers/i2c/busses/i2c-qup.c @@ -479,7 +479,7 @@ static int qup_i2c_xfer(struct i2c_adapter *adap, int ret, idx; ret = pm_runtime_get_sync(qup->dev); - if (ret) + if (ret < 0) goto out; writel(1, qup->base + QUP_SW_RESET); -- The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum, hosted by The Linux Foundation -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] spi: core: Protect DMA code by #ifdef CONFIG_HAS_DMA
On Fri, May 02, 2014 at 11:09:18PM +0200, Geert Uytterhoeven wrote: > On Fri, May 2, 2014 at 7:14 PM, Mark Brown wrote: > > Ugh. It would seem better to have these functions stubbed out. But > As long as we have include/asm-generic/dma-mapping-broken.h, > it'll be like this... Yeah, I'm not sure that it's an ideal approach. > BTW, I have a few more "depends on HAS_DMA" patches lying around for > drivers... Sure - it's good to fix these things, fire away. signature.asc Description: Digital signature
Re: [PATCH v3 00/12] ARM: Exynos: PMU cleanup and refactoring for using DT
Hi, Gentle ping. Tomasz, Kukjin will you please have a look on this series. Thanks, Pankaj Dubey On 04/30/2014 02:17 PM, Pankaj Dubey wrote: This patch series, does some minor cleanup of exynos machine files. It also modifies Exynos Power Management Unit (PMU) related code for converting it into a platform_driver. This is also preparation for moving PMU related code out of machine folder into a either "drivers/mfd", or "drivers/power" or some other suitable place so that ARM64 based SoC can utilize common piece of code. These patches require change in Exynos SoC dtsi files, which has been posted as separate patch series [2] These patches are created on top of Kukjin Kim's for-next (v3.15-rc1 tag) branch and on top of Daniel Lezcano's Exynos cpuidle refactor patches [3]. These patches depends on following three patch series: [1] mfd: syscon: Support early initialization https://lkml.org/lkml/2014/4/8/239 [2] Add PMU node for Exynos SoCs http://www.mail-archive.com/linux-samsung-soc@vger.kernel.org/msg29329.html [3] http://thread.gmane.org/gmane.linux.kernel.samsung-soc/29085 We have tested these patches on SMDK5250 board for System boot and Arndale (Exynos5250) board for System boot and PMU initialization and S2R. For testing on Arndale (Exynos5250) board: Tested-by: Pankaj Dubey Changes Since v2: - Rebased on top of Daniel Lezcano's Exynos cpuidle refactor patches. - Removed exynos_cpuidle_init and exynos_cpufreq_init code as suggested by Tomasz Figa. - Removed early mapping of PMU base address from exynos.c and removed "get_exynos_pmuaddr" function. Instead of this added code in platsmp.c to get PMU base address using of_iomap as suggested by Tomasz Figa. - Converted PMU implementation into platform_driver by using static platform_device method. Changes Since v1: - Rebased on latest for-next of Kukjin Kim's tree. - Added patch: "Make exynos machine_ops as static". For making more cleanup in "mach-exynos/common.h" as suggested by Tomasz Figa. - Addressed comments of Tomasz Figa for cleaning "mach-exynos/common.h". - Updated patch: Remove file path from comment section As suggested by Michel Simek, instead of updating file path lets remove them from each file under "mach-exynos". Even though Kukjin pointed out that there is similar patch pending from Sachin/Tushar but since I could not find I have included this here. If I have missed something please point to any existing such patch. - Updated patch: Add support for mapping PMU base address via DT - Removed __initdata from declaration of "exynos_pmu_base", as it caused kernel crash as pointed out by Vikas Sajjan. - Added support for Syscon initialization and getting PMU regmap handle as suggested by Sylwester. Since current implementation of early intialization [1] has limitation that "early_syscon_init" requires DT to be unflattened and system should be able to allocate memory, we can't use regmap handles for platsmp.c file as "smp_secondary_init" will be called before DT unflattening. So I have kept both method for accessing PMU base address. platsmp.c will use ioremmaped address where as rest other files can use regmap handle. - Added patch: Remove "linux/bug.h" from pmu.c. - Updated patch: Refactored code for PMU register mapping via DT - Modified to use regmap_read/write when using regmap handle. - Added patch: Move "mach/map.h" inclusion from regs-pmu.h to platsmp.c - Added patch: Add device tree based initialization support for PMU. - Convert existing PMU implementation to be a device tree based before moving it to "drivers/mfd" folder. As suggested by Bartlomiej. - Dropped making a platform_driver for PMU, as currently PMU binding has two compatibility strings as "samsung, exynosxxx-pmu", "syscon", once we enable MFD_SYSCON config option, current "syscon" driver probe gets called and PMU probe never gets called. So modified PMU initialization code to scan DT and match against supported compatiblity string in driver code, and once we get matching node use that for accessing PMU regmap handle using "syscon_early_regmap_lookup_by_phandle". If there is any better solution please suggest. Pankaj Dubey (8): ARM: EXYNOS: Make exynos machine_ops as static ARM: EXYNOS: Move cpufreq and cpuidle device registration to init_machine ARM: EXYNOS: Cleanup "mach-exynos/common.h" file ARM: EXYNOS: Remove file path from comment section ARM: EXYNOS: Remove "linux/bug.h" from pmu.c ARM: EXYNOS: Refactored code for using PMU address via DT ARM: EXYNOS: Move "mach/map.h" inclusion from regs-pmu.h to platsmp.c ARM: EXYNOS: Add platform driver support for Exynos PMU. Young-Gun Jang (4): ARM: EXYNOS: Move SYSREG definition into sys-reg specific file.
[RFC] An Immune System for Linux
An Immune System for Linux An Operating System does not know the origins of a program. If requested, the OS will run any program, every program, all programs. This was good in the old days when the logistics of distributing a program were expensive, time consuming, and labor intensive. Today a user need only tap a few on-screen buttons and the app store downloads a new program to your phone. Just because the OS can run a program, doesn't mean it should. Lets use the idea of an immune system to prevent unauthorized programs from running. An OS immune system should protect a computer from both external and internal malware attacks. External attacks might come in the form of programs on removable media like USB flash drives and SD cards. Internal attacks might come from zero-day memory corruption and buffer overflow bugs. Use public key cryptography to frustrate external attacks. Limit not-self programs ability to make OS system calls to frustrate internal attacks. 1- Use public key cryptography to frustrate external attacks. We can use public key cryptography to help the OS differentiate between self programs with acceptable provenance and not-self programs with questionable origins. Force all code to prove its origin every time it runs. The cryptography is not about obfuscation. It's obvious what the contents of the encrypted file /bin/ls is. This is about provenance. Who's /bin/ls is it? When a phone is built the manufacture creates a unique secret key / public key pair. The manufacture uses the secret key to scramble the programs and libraries which are then loaded onto the phone. The public key is compiled into the OS. The secret key is not put on the phone. The programs on disk are scrambled, random bytes. They don't look like an executable and can't run. When a user runs a program, the OS path goes through exec() and binfmt_elf.c which reads in the program. It's in load_binary() that the scrambled program data is decrypted. Now the program is cleartext, it loads into ram, and will execute. Malware does not have the secret key and is not scrambled. It's cleartext. When the malware cleartext is decrypted by load_binary(), it turns into ciphertext. Ciphertext doesn't have the internal structure of an executable and won't load into ram. Even if, magically, it loads into ram, when the OS jumps to main(), it is executing random bytes. The malware program can't do what the author intended. When picking the secret key / public key pair use a key size appropriate for the device. There is a pyramid of devices: billions ^ ^| 0 Big Keys |/ \| | / server \ | number MIPS|/desktop \| sold | / laptop \ | 0 |/ phone/tablet \v billions Little Keys -- Phone users won't wait more than a second or so for a program to start up. Use a small key size appropriate to low powered phones and tablets. Phones have a small key and are easier to attack but there are billions and billions of them, each one requiring some effort to break a key or somehow find a means around the decryption in exec(). As the power of the device increases it can have a larger key size. Malware might try to attack a server but the server is using a big key and is harder to attack. Servers might be a profitable target but they are heavily armoured. Phones are lightly armoured and easier to defeat, but the reward may not be worth the effort. 2- Limit not-self programs ability to make OS system calls. Consider the following pseudo-code: # assign random numbers to syscall symbolic constants $ for s in fork exit open close read write ; do echo "#define __NR_$s $RANDOM" >> asm/unistd_32.h done $ cat asm/unistd_32.h #define __NR_fork 9848 #define __NR_exit 11041 #define __NR_open 1857// random 32-bit int #define __NR_close 30024 #define __NR_read 27326 #define __NR_write 31273 $ --- // In the kernel source files: struct syscall_struct { syscall_handler_t *func; unsigned int tag; // 0 to 4294967295 }; sys_call_table[]= { { sys_fork, __NR_fork }, { sys_exit, __NR_exit }, { sys_open, __NR_open },// symbolic random number { sys_close,__NR_close }, { sys_read, __NR_read }, { sys_write,__NR_write }, }; --- // somewhere in entry_32.S // find the requested OS call using user supplied syscall number userrequest= %eax; // get syscall # from stack for ( i= 0; i <
Re: [RFC PATCH 1/5] watchdog: Add API to trigger reboots
Hi Guenter, On Thu, May 01, 2014 at 08:41:29AM -0700, Guenter Roeck wrote: > Some hardware implements reboot through its watchdog hardware, > for example by triggering a watchdog timeout. Platform specific > code starts to spread into watchdog drivers, typically by setting > pointers to a callback functions which is then called from the > platform reset handler. > > To simplify code and provide a unified API to trigger reboots by > watchdog drivers, provide a single API to trigger such reboots > through the watchdog subsystem. > > Signed-off-by: Guenter Roeck > --- > drivers/watchdog/watchdog_core.c | 17 + > include/linux/watchdog.h | 11 +++ > 2 files changed, 28 insertions(+) > > diff --git a/drivers/watchdog/watchdog_core.c > b/drivers/watchdog/watchdog_core.c > index cec9b55..4ec6e2f 100644 > --- a/drivers/watchdog/watchdog_core.c > +++ b/drivers/watchdog/watchdog_core.c > @@ -43,6 +43,17 @@ > static DEFINE_IDA(watchdog_ida); > static struct class *watchdog_class; > > +static struct watchdog_device *wdd_reboot_dev; > + > +void watchdog_do_reboot(enum reboot_mode mode, const char *cmd) > +{ > + if (wdd_reboot_dev) { > + if (wdd_reboot_dev->ops->reboot) > + wdd_reboot_dev->ops->reboot(wdd_reboot_dev, mode, cmd); > + } > +} > +EXPORT_SYMBOL(watchdog_do_reboot); > + > static void watchdog_check_min_max_timeout(struct watchdog_device *wdd) > { > /* > @@ -162,6 +173,9 @@ int watchdog_register_device(struct watchdog_device *wdd) > return ret; > } > > + if (wdd->ops->reboot) > + wdd_reboot_dev = wdd; > + Overall, it looks really great, but I guess we can make it a list. Otherwise, we might end up in a situation where we could not reboot anymore, like this one for example: - a first watchdog is probed, registers a reboot function - a second watchdog is probed, registers a reboot function that overwrites the first one. - then, the second watchdog disappears for some reason, and the reboot is set to NULL Or maybe we can just use the start callback, with the min timeout already registered, and prevent the user to kick the watchdog. Maxime -- Maxime Ripard, Free Electrons Embedded Linux, Kernel and Android engineering http://free-electrons.com signature.asc Description: Digital signature
[PATCH] drivers/tty/hvc: don't free hvc_console_setup after init
When 'console=hvc0' is specified to the kernel parameter in x86 KVM guest, hvc console is setup within a kthread. However, that will cause SEGV and the boot will fail when the driver is builtin to the kernel, because currently hvc_console_setup() is annotated with '__init'. This patch removes '__init' to boot the guest successfully with 'console=hvc0'. Signed-off-by: Tomoki Sekiyama --- drivers/tty/hvc/hvc_console.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/hvc/hvc_console.c b/drivers/tty/hvc/hvc_console.c index 94f9e3a..0ff7fda 100644 --- a/drivers/tty/hvc/hvc_console.c +++ b/drivers/tty/hvc/hvc_console.c @@ -190,7 +190,7 @@ static struct tty_driver *hvc_console_device(struct console *c, int *index) return hvc_driver; } -static int __init hvc_console_setup(struct console *co, char *options) +static int hvc_console_setup(struct console *co, char *options) { if (co->index < 0 || co->index >= MAX_NR_HVC_CONSOLES) return -ENODEV; -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH] arch/unicore32/include/asm/io.h: add readl_relaxed() generic definition
Need generic definition for readl_relaxed(), like other architectures have done. Or can not pass compiling with allmodconfig, the related error: CC [M] drivers/message/fusion/mptbase.o drivers/message/fusion/mptbase.c: In function ‘mpt_send_handshake_request’: drivers/message/fusion/mptbase.c:1224: error: implicit declaration of function ‘readl_relaxed’ Signed-off-by: Chen Gang --- arch/unicore32/include/asm/io.h | 4 1 file changed, 4 insertions(+) diff --git a/arch/unicore32/include/asm/io.h b/arch/unicore32/include/asm/io.h index ae327e4..cb1d8fd 100644 --- a/arch/unicore32/include/asm/io.h +++ b/arch/unicore32/include/asm/io.h @@ -39,6 +39,10 @@ extern void __uc32_iounmap(volatile void __iomem *addr); #define ioremap_nocache(cookie, size) __uc32_ioremap(cookie, size) #define iounmap(cookie)__uc32_iounmap(cookie) +#define readb_relaxed readb +#define readw_relaxed readw +#define readl_relaxed readl + #define HAVE_ARCH_PIO_SIZE #define PIO_OFFSET (unsigned int)(PCI_IOBASE) #define PIO_MASK (unsigned int)(IO_SPACE_LIMIT) -- 1.9.2.459.g68773ac -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] netdev: pktgen xmit packet through vlan interface
Thank Sergei for reviewing. I think On Sat, May 3, 2014 at 12:18 AM, Sergei Shtylyov wrote: >> + >> + if (pkt_dev->clone_skb && is_vlan_dev(odev)) { >> + nskb = skb_clone(pkt_dev->skb, GFP_ATOMIC); >> + ret = -ENOMEM; >> + if (nskb) >> + ret = (*xmit)(nskb, odev); if (is_vlan_dev(odev) && pkt_dev->clone_skb) { nskb = skb_clone(pkt_dev->skb, GFP_ATOMIC); ret = -ENOMEM; if (nskb) ret = (*xmit)(nskb, odev); } > and case NETDEV_TX_LOCKED: case NETDEV_TX_BUSY: /* Retry it next time */ atomic_dec(&(pkt_dev->skb->users)); if (is_vlan_dev(odev) && pkt_dev->clone_skb && nskb) kfree_skb(nskb); else atomic_dec(&(pkt_dev->skb->users)); is better, because is_vlan_dev(odev) is probably in read most cache line. Zhouyi -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
ACCOUNT UPDATE REQUIRED!
http://horde-update.yolasite.com/ Thanks. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
RE: [PATCH 1/3] PM / OPP: Add support for descending order for cpufreq table
Hi. Viresh Kumar Your reply is so fast like Usain Bolt. > So, create three flags: > OPP_TABLE_ORDER_ASCENDING 0 > OPP_TABLE_ORDER_DESCENDING1 > OPP_TABLE_ORDER_ORIGINAL 2 (And use this for your case.) -> Actually, I want to use OPP_TABLE_ORDER_DESCENDING.(Not OPP_TABLE_ORDER_ORIGINAL.) I think that it is enough to support both descending and ascending ordering only. The meaning of "ORIGIANL" Amit, said, when he(and I) writes a frequency in dts file with ordering(Ascending or Descending). He(and I) want the frequency to be register according to ordering.(Ascending or Descending). I concerned that if we use ORIGINAL ordering, opp_find_freq_ceil/foor can be broken. (example, 1GH - 500MH - 800MHz - 200MHz - 600MHz) Thanks~ Best Regars > -Original Message- > From: viresh.li...@gmail.com [mailto:viresh.li...@gmail.com] On Behalf Of > Viresh Kumar > Sent: Wednesday, April 30, 2014 5:25 PM > To: Jonghwan Choi; Linux PM list > Cc: open list; Rafael J. Wysocki; Len Brown; Amit Daniel Kachhap > Subject: Re: [PATCH 1/3] PM / OPP: Add support for descending order for > cpufreq table > > Hi, > > This isn't a very big patchset and this patch is very much required to > understand other patches and so please cc all people from other list here > as well.. > > On Wed, Apr 30, 2014 at 11:58 AM, Jonghwan Choi > wrote: > > In the frequency table dts file, the frequencies are arranged in > > Improve your logs a bit. Which dts file are you talking about here ? > How would anybody know that you are talking about exynos here? > > Also, you shouldn't mention that here, just tell the kind of requirement > platforms may have. i.e. people may want to keep the opp list in the same > order in which it came from DT. > > > descending order which maps 1 to 1 with other frequency parameter to > > be calculated and programmed in some registers. > > But the OPP library works by generating the frequencies in ascending > > order which breaks the above logic. > > So added OPP_TABLE_ORDER_DESCEND flag to consider descending order. > > So, create three flags: > OPP_TABLE_ORDER_ASCENDING 0 > OPP_TABLE_ORDER_DESCENDING1 > OPP_TABLE_ORDER_ORIGINAL 2 (And use this for your case.) > > > Cc: Amit Daniel Kachhap > > Signed-off-by: Jonghwan Choi > > --- > > drivers/base/power/opp.c | 17 - > > include/linux/pm_opp.h |7 +-- > > 2 files changed, 21 insertions(+), 3 deletions(-) > > You are changing prototype of a function and so all other files which are > using this routine will break after this patch and we can't afford it as > we want git bisect to work properly. > > So, fix all platforms here in this patch only, i.e. part of 2/3 and > complete 3/3 should have been merged into this one. > > > diff --git a/drivers/base/power/opp.c b/drivers/base/power/opp.c index > > 2553867..ec7d553 100644 > > --- a/drivers/base/power/opp.c > > +++ b/drivers/base/power/opp.c > > @@ -18,6 +18,7 @@ > > #include > > #include > > #include > > +#include > > #include > > #include > > #include > > @@ -597,10 +598,21 @@ int dev_pm_opp_disable(struct device *dev, > > unsigned long freq) EXPORT_SYMBOL_GPL(dev_pm_opp_disable); > > > > #ifdef CONFIG_CPU_FREQ > > + > > +static int opp_descend_cmp(void *priv, struct list_head *a, > > +struct list_head *b) { > > +struct dev_pm_opp *ra = list_entry(a, struct dev_pm_opp, node); > > +struct dev_pm_opp *rb = list_entry(b, struct dev_pm_opp, > > +node); > > + > > +return rb->rate - ra->rate; > > +} > > + > > /** > > * dev_pm_opp_init_cpufreq_table() - create a cpufreq table for a > device > > * @dev: device for which we do this operation > > * @table: Cpufreq table returned back to caller > > + * @flags: OPP_TABLE_ORDER_DESCEND or zero > > * > > * Generate a cpufreq table for a provided device- this assumes that > the > > * opp list is already initialized and ready for usage. > > @@ -622,7 +634,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_disable); > > * or in contexts where mutex locking cannot be used. > > */ > > int dev_pm_opp_init_cpufreq_table(struct device *dev, > > - struct cpufreq_frequency_table **table) > > + struct cpufreq_frequency_table **table, unsigned char > > + flags) > > You are targeting the wrong routine. Fix of_init_opp_table() instead and > things would work automatically then.. > > And please don't change prototype of dev_pm_opp_add() for now and just > define __dev_pm_opp_add() which will be called from > dev_pm_opp_add() and of_init_opp_table() with 'int order' parameter. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] x86: Return to kernel without IRET
On Fri, May 2, 2014 at 4:51 PM, Andy Lutomirski wrote: > On my box, this saves about 100ns on each interrupt and trap that > happens while running in kernel space. This speeds up my kernel_pf > microbenchmark by about 17%. Acked-by: Linus Torvalds Now it just needs lots of testing to make sure it's all good. But I can't imagine what would go wrong. Linus -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Did you still need asynchronous grace-period detection?
Hello, Peter, The following commit seems to have been dropped on the floor. Given that you haven't complained, I feel the need to ask you if you still need it. ;-) If you do need it, I will push it into 3.16. Thanx, Paul rcu: Provide grace-period piggybacking API The following pattern is currently not well supported by RCU: 1. Make data element inaccessible to RCU readers. 2. Do work that probably lasts for more than one grace period. 3. Do something to make sure RCU readers in flight before #1 above have completed. Here are some things that could currently be done: a. Do a synchronize_rcu() unconditionally at either #1 or #3 above. This works, but imposes needless work and latency. b. Post an RCU callback at #1 above that does a wakeup, then wait for the wakeup at #3. This works well, but likely results in an extra unneeded grace period. Open-coding this is also a bit more semi-tricky code than would be good. This commit therefore adds get_state_synchronize_rcu() and cond_synchronize_rcu() APIs. Call get_state_synchronize_rcu() at #1 above and pass its return value to cond_synchronize_rcu() at #3 above. This results in a call to synchronize_rcu() if no grace period has elapsed between #1 and #3, but requires only a load, comparison, and memory barrier if a full grace period did elapse. Requested-by: Peter Zijlstra Signed-off-by: Paul E. McKenney Acked-by: Peter Zijlstra diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index e8cb6e3b52a7..425c659d54e5 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -27,6 +27,16 @@ #include +static inline unsigned long get_state_synchronize_rcu(void) +{ + return 0; +} + +static inline void cond_synchronize_rcu(unsigned long oldstate) +{ + might_sleep(); +} + static inline void rcu_barrier_bh(void) { wait_rcu_gp(call_rcu_bh); diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index e9c63884df0a..a59ca05fd4e3 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -76,6 +76,8 @@ static inline void synchronize_rcu_bh_expedited(void) void rcu_barrier(void); void rcu_barrier_bh(void); void rcu_barrier_sched(void); +unsigned long get_state_synchronize_rcu(void); +void cond_synchronize_rcu(unsigned long oldstate); extern unsigned long rcutorture_testseq; extern unsigned long rcutorture_vernum; diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 351faba48b91..0c47e300210a 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -1421,13 +1421,14 @@ static int rcu_gp_init(struct rcu_state *rsp) /* Advance to a new grace period and initialize state. */ record_gp_stall_check_time(rsp); - smp_wmb(); /* Record GP times before starting GP. */ - rsp->gpnum++; + /* Record GP times before starting GP, hence smp_store_release(). */ + smp_store_release(>gpnum, rsp->gpnum + 1); trace_rcu_grace_period(rsp->name, rsp->gpnum, TPS("start")); raw_spin_unlock_irq(>lock); /* Exclude any concurrent CPU-hotplug operations. */ mutex_lock(>onoff_mutex); + smp_mb__after_unlock_lock(); /* ->gpnum increment before GP! */ /* * Set the quiescent-state-needed bits in all the rcu_node @@ -1555,10 +1556,11 @@ static void rcu_gp_cleanup(struct rcu_state *rsp) } rnp = rcu_get_root(rsp); raw_spin_lock_irq(>lock); - smp_mb__after_unlock_lock(); + smp_mb__after_unlock_lock(); /* Order GP before ->completed update. */ rcu_nocb_gp_set(rnp, nocb); - rsp->completed = rsp->gpnum; /* Declare grace period done. */ + /* Declare grace period done. */ + ACCESS_ONCE(rsp->completed) = rsp->gpnum; trace_rcu_grace_period(rsp->name, rsp->completed, TPS("end")); rsp->fqs_state = RCU_GP_IDLE; rdp = this_cpu_ptr(rsp->rda); @@ -2637,6 +2639,58 @@ void synchronize_rcu_bh(void) } EXPORT_SYMBOL_GPL(synchronize_rcu_bh); +/** + * get_state_synchronize_rcu - Snapshot current RCU state + * + * Returns a cookie that is used by a later call to cond_synchronize_rcu() + * to determine whether or not a full grace period has elapsed in the + * meantime. + */ +unsigned long get_state_synchronize_rcu(void) +{ + /* +* Any prior manipulation of RCU-protected data must happen +* before the load from ->gpnum. +*/ + smp_mb(); /* ^^^ */ + + /* +* Make sure this load happens before the purportedly +* time-consuming work between get_state_synchronize_rcu() +* and cond_synchronize_rcu(). +*/ + return smp_load_acquire(_state->gpnum); +} +EXPORT_SYMBOL_GPL(get_state_synchronize_rcu); + +/** + * cond_synchronize_rcu - Conditionally wait for an RCU grace period + * + *
[PATCH v2] x86: Return to kernel without IRET
On my box, this saves about 100ns on each interrupt and trap that happens while running in kernel space. This speeds up my kernel_pf microbenchmark by about 17%. Signed-off-by: Andy Lutomirski --- Changes from v1: - Comment fix *facepalm* Changes from the RFC: - Much better comments - Rewritten to use popq_cfi directly instead of RESTORE_ARGS - Uses sti to restore IF so we get the interrupt shadow arch/x86/kernel/entry_64.S | 49 +- 1 file changed, 48 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 1e96c36..504cec5 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1033,9 +1033,56 @@ retint_swapgs: /* return to user-space */ retint_restore_args: /* return to kernel space */ DISABLE_INTERRUPTS(CLBR_ANY) /* -* The iretq could re-enable interrupts: +* The sti could re-enable interrupts: */ TRACE_IRQS_IRETQ + + /* +* Fast return to kernel. The stack looks like: +* +* previous frame +* possible 8 byte gap for alignment +* SS RSP EFLAGS CS RIP +* ORIG_RAX RDI ... R11 +* +* We rewrite it to: +* +* previous frame +* RIP (EFLAGS & ~IF) ... +* pointer to the EFLAGS slot +* RDI ... R11 +*/ + movq RSP-ARGOFFSET(%rsp), %rsi + subq $16, %rsi + movq EFLAGS-ARGOFFSET(%rsp), %rdi + movq RIP-ARGOFFSET(%rsp), %rcx + btr $9, %rdi + movq %rdi, (%rsi) + movq %rcx, 8(%rsi) + movq %rsi, ORIG_RAX-ARGOFFSET(%rsp) + popq_cfi %r11 + popq_cfi %r10 + popq_cfi %r9 + popq_cfi %r8 + popq_cfi %rax + popq_cfi %rcx + popq_cfi %rdx + popq_cfi %rsi + popq_cfi %rdi + + popq %rsp + jc 1f + /* Interrupts were not enabled */ + popfq_cfi + retq +1: + CFI_ADJUST_CFA_OFFSET 8 + /* Interrupts were enabled */ + popfq_cfi + sti + /* Interrupts are still off because of the one-insn grace period. */ + retq + restore_args: RESTORE_ARGS 1,8,1 -- 1.9.0 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH] x86: Return to kernel without IRET
On my box, this saves about 100ns on each interrupt and trap that happens while running in kernel space. This speeds up my kernel_pf microbenchmark by about 17%. Signed-off-by: Andy Lutomirski --- Changes from the RFC: - Much better comments - Rewritten to use popq_cfi directly instead of RESTORE_ARGS - Uses sti to restore IF so we get the interrupt shadow arch/x86/kernel/entry_64.S | 51 -- 1 file changed, 49 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 1e96c36..0f6fe36 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1023,7 +1023,7 @@ retint_check: retint_swapgs: /* return to user-space */ /* -* The iretq could re-enable interrupts: +* The sti could re-enable interrupts: */ DISABLE_INTERRUPTS(CLBR_ANY) TRACE_IRQS_IRETQ @@ -1033,9 +1033,56 @@ retint_swapgs: /* return to user-space */ retint_restore_args: /* return to kernel space */ DISABLE_INTERRUPTS(CLBR_ANY) /* -* The iretq could re-enable interrupts: +* The popfq could re-enable interrupts: */ TRACE_IRQS_IRETQ + + /* +* Fast return to kernel. The stack looks like: +* +* previous frame +* possible 8 byte gap for alignment +* SS RSP EFLAGS CS RIP +* ORIG_RAX RDI ... R11 +* +* We rewrite it to: +* +* previous frame +* RIP (EFLAGS & ~IF) ... +* pointer to the EFLAGS slot +* RDI ... R11 +*/ + movq RSP-ARGOFFSET(%rsp), %rsi + subq $16, %rsi + movq EFLAGS-ARGOFFSET(%rsp), %rdi + movq RIP-ARGOFFSET(%rsp), %rcx + btr $9, %rdi + movq %rdi, (%rsi) + movq %rcx, 8(%rsi) + movq %rsi, ORIG_RAX-ARGOFFSET(%rsp) + popq_cfi %r11 + popq_cfi %r10 + popq_cfi %r9 + popq_cfi %r8 + popq_cfi %rax + popq_cfi %rcx + popq_cfi %rdx + popq_cfi %rsi + popq_cfi %rdi + + popq %rsp + jc 1f + /* Interrupts were not enabled */ + popfq_cfi + retq +1: + CFI_ADJUST_CFA_OFFSET 8 + /* Interrupts were enabled */ + popfq_cfi + sti + /* Interrupts are still off because of the one-insn grace period. */ + retq + restore_args: RESTORE_ARGS 1,8,1 -- 1.9.0 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH v2] uprobes: fix scratch register selection for rip-relative fixups
On Fri, 2014-05-02 at 17:04 +0200, Denys Vlasenko wrote: > Before this patch, instructions such as div, mul, > shifts with count in CL, cmpxchg are mishandled. I just noticed that this sounds rather worse than it is. It would be more precise to say, "Before this patch, the rip-relative addressing mode in instructions such as ... is mishandled." ... > > Signed-off-by: Denys Vlasenko > CC: Jim Keniston > CC: Masami Hiramatsu > CC: Srikar Dronamraju > CC: Ingo Molnar > CC: Oleg Nesterov > --- > arch/x86/kernel/uprobes.c | 179 > +- > 1 file changed, 128 insertions(+), 51 deletions(-) > > diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c > index dbbf6cd..5b387b7 100644 > --- a/arch/x86/kernel/uprobes.c > +++ b/arch/x86/kernel/uprobes.c > @@ -41,8 +41,12 @@ > /* Instruction will modify TF, don't change it */ > #define UPROBE_FIX_SETF 0x04 > > -#define UPROBE_FIX_RIP_AX0x08 > -#define UPROBE_FIX_RIP_CX0x10 > +#define UPROBE_FIX_RIP_SI0x08 > +#define UPROBE_FIX_RIP_DI0x10 > +#define UPROBE_FIX_RIP_BX0x20 > +#define UPROBE_FIX_RIP_MASK (UPROBE_FIX_RIP_SI \ > + | UPROBE_FIX_RIP_DI \ > + | UPROBE_FIX_RIP_BX) Yes. ... > + /* Fetch vex. */ > + reg2 = 0xff; > + if (insn->vex_prefix.nbytes == 2) { > + reg2 = insn->vex_prefix.bytes[1]; > + } > + if (insn->vex_prefix.nbytes == 3) { > + reg2 = insn->vex_prefix.bytes[2]; > + } > + /* TODO: add XOP, EXEV reading */ > + /* > + * vex. field is in bits 6-3, bits are inverted. > + * But in 32-bit mode, high-order bit may be ignored. > + * Therefore, let's consider only 3 low-order bits. > + */ > + reg2 = ((reg2 >> 3) & 0x7) ^ 0x7; > > + /* Register numbering is ax,cx,dx,bx, sp,bp,si,di, r8..r15 */ > + /* > + * Choose scratch reg. Order is important: > + * must not select bx if we can use si (cmpxchg8b case!) It'd be good to add here: * For instructions without a VEX prefix, reg2 is 0 here. Otherwise it kind of looks like you forgot to address that case, and the reader shouldn't have to do the bit fiddling to figure it out. > + */ > + if (reg != 6 && reg2 != 6) { > + reg2 = 6; > + auprobe->def.fixups |= UPROBE_FIX_RIP_SI; > + } else if (reg != 7 && reg2 != 7) { > + reg2 = 7; > + auprobe->def.fixups |= UPROBE_FIX_RIP_DI; > + /* TODO (paranoia): force maskmovq to not use di */ > + } else { > + reg2 = 3; /* BX */ > + auprobe->def.fixups |= UPROBE_FIX_RIP_BX; > + } Yes. Looks good from here down. Reviewed-by: Jim Keniston -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[GIT PULL] ACPI and power management fixes for v3.15-rc4
Hi Linus, Please pull from git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git \ pm+acpi-3.15-rc4 to receive ACPI, power management and PNP fixes for v3.15-rc4 with top-most commit de3afce5336af3214374de6df360f9eb010a9a36 Merge branches 'acpi-ec' and 'acpi-processor' on top of commit d1db0eea852497762cab43b905b879dfcd3b8987 Linux 3.15-rc3 A bunch of regression fixes this time. They fix two regressions in the PNP subsystem, one in the ACPI processor driver and one in the ACPI EC driver, four cpufreq driver regressions and an unrelated bug in one of the drivers. The regressions are recent or introduced in 3.14. Specifics: - There are two bugs in the ACPI PNP core that cause errors to be returned if optional ACPI methods are not present. After an ACPI core change made in 3.14 one of those errors leads to serial port suspend failures on some systems. Fix from yours truly. - A recently added PNP quirk related to Intel chipsets intorduced a build error in unusual configurations (PNP without PCI). Fix from Bjorn Helgaas. - An ACPI EC workaround related to system suspend on Samsung machines added in 3.14 introduced a race causing some valid EC events to be discarded. Fix from Kieran Clancy. - The acpi-cpufreq driver fails to load on some systems after a 3.14 commit related to APIC ID parsing that overlooked one corner case. Fix from Lan Tianyu. - Fix for a recently introduced build problem in the ppc-corenet cpufreq driver from Tim Gardner. - A recent cpufreq core change to ensure serialization of frequency transitions for drivers with a ->target_index() callback overlooked the fact that some of those drivers had been doing operations introduced by it into the core already by themselves. That resulted in a mess in which the core and the drivers try to do the same thing and block each other which leads to deadlocks. Fixes for the powernow-k7, powernow-k6, and longhaul cpufreq drivers from Srivatsa S Bhat. - Fix for a computational error in the powernow-k6 cpufreq driver from Srivatsa S Bhat. Thanks! --- Bjorn Helgaas (1): PNP: Fix compile error in quirks.c Kieran Clancy (1): ACPI / EC: Process rather than discard events in acpi_ec_clear Lan Tianyu (1): ACPI / processor: Fix failure of loading acpi-cpufreq driver Rafael J. Wysocki (1): PNP / ACPI: Do not return errors if _DIS or _SRS are not present Srivatsa S. Bhat (4): cpufreq: longhaul: Fix double invocation of cpufreq_freq_transition_begin/end cpufreq: powernow-k6: Fix incorrect comparison with max_multipler cpufreq: powernow-k6: Fix double invocation of cpufreq_freq_transition_begin/end cpufreq: powernow-k7: Fix double invocation of cpufreq_freq_transition_begin/end Tim Gardner (1): cpufreq: ppc-corenet-cpufreq: Fix __udivdi3 modpost error --- drivers/acpi/acpi_processor.c | 7 +++--- drivers/acpi/ec.c | 21 ++--- drivers/cpufreq/longhaul.c| 36 ++-- drivers/cpufreq/powernow-k6.c | 23 ++ drivers/cpufreq/powernow-k7.c | 4 drivers/cpufreq/ppc-corenet-cpufreq.c | 5 +++- drivers/pnp/pnpacpi/core.c| 44 +-- drivers/pnp/quirks.c | 4 ++-- 8 files changed, 85 insertions(+), 59 deletions(-) -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 2/2] mm/memcontrol.c: introduce helper mem_cgroup_zoneinfo_zone()
While in that area, I noticed that the soft limit tree updaters don't actually use the memcg argument anymore... --- From: Johannes Weiner Subject: [patch] mm: memcontrol: remove unnecessary memcg argument from soft limit functions Signed-off-by: Johannes Weiner --- mm/memcontrol.c | 34 ++ 1 file changed, 14 insertions(+), 20 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 83cbd5a0e62f..3381f76df084 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -714,11 +714,9 @@ soft_limit_tree_from_page(struct page *page) return _limit_tree.rb_tree_per_node[nid]->rb_tree_per_zone[zid]; } -static void -__mem_cgroup_insert_exceeded(struct mem_cgroup *memcg, - struct mem_cgroup_per_zone *mz, - struct mem_cgroup_tree_per_zone *mctz, - unsigned long long new_usage_in_excess) +static void __mem_cgroup_insert_exceeded(struct mem_cgroup_per_zone *mz, +struct mem_cgroup_tree_per_zone *mctz, +unsigned long long new_usage_in_excess) { struct rb_node **p = >rb_root.rb_node; struct rb_node *parent = NULL; @@ -748,10 +746,8 @@ __mem_cgroup_insert_exceeded(struct mem_cgroup *memcg, mz->on_tree = true; } -static void -__mem_cgroup_remove_exceeded(struct mem_cgroup *memcg, - struct mem_cgroup_per_zone *mz, - struct mem_cgroup_tree_per_zone *mctz) +static void __mem_cgroup_remove_exceeded(struct mem_cgroup_per_zone *mz, +struct mem_cgroup_tree_per_zone *mctz) { if (!mz->on_tree) return; @@ -759,13 +755,11 @@ __mem_cgroup_remove_exceeded(struct mem_cgroup *memcg, mz->on_tree = false; } -static void -mem_cgroup_remove_exceeded(struct mem_cgroup *memcg, - struct mem_cgroup_per_zone *mz, - struct mem_cgroup_tree_per_zone *mctz) +static void mem_cgroup_remove_exceeded(struct mem_cgroup_per_zone *mz, + struct mem_cgroup_tree_per_zone *mctz) { spin_lock(>lock); - __mem_cgroup_remove_exceeded(memcg, mz, mctz); + __mem_cgroup_remove_exceeded(mz, mctz); spin_unlock(>lock); } @@ -792,12 +786,12 @@ static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page) spin_lock(>lock); /* if on-tree, remove it */ if (mz->on_tree) - __mem_cgroup_remove_exceeded(memcg, mz, mctz); + __mem_cgroup_remove_exceeded(mz, mctz); /* * Insert again. mz->usage_in_excess will be updated. * If excess is 0, no tree ops. */ - __mem_cgroup_insert_exceeded(memcg, mz, mctz, excess); + __mem_cgroup_insert_exceeded(mz, mctz, excess); spin_unlock(>lock); } } @@ -813,7 +807,7 @@ static void mem_cgroup_remove_from_trees(struct mem_cgroup *memcg) for (zid = 0; zid < MAX_NR_ZONES; zid++) { mz = >nodeinfo[nid]->zoneinfo[zid]; mctz = soft_limit_tree_node_zone(nid, zid); - mem_cgroup_remove_exceeded(memcg, mz, mctz); + mem_cgroup_remove_exceeded(mz, mctz); } } } @@ -836,7 +830,7 @@ retry: * we will to add it back at the end of reclaim to its correct * position in the tree. */ - __mem_cgroup_remove_exceeded(mz->memcg, mz, mctz); + __mem_cgroup_remove_exceeded(mz, mctz); if (!res_counter_soft_limit_excess(>memcg->res) || !css_tryget(>memcg->css)) goto retry; @@ -4694,7 +4688,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, break; } while (1); } - __mem_cgroup_remove_exceeded(mz->memcg, mz, mctz); + __mem_cgroup_remove_exceeded(mz, mctz); excess = res_counter_soft_limit_excess(>memcg->res); /* * One school of thought says that we should not add @@ -4705,7 +4699,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, * term TODO. */ /* If excess == 0, no tree ops */ - __mem_cgroup_insert_exceeded(mz->memcg, mz, mctz, excess); + __mem_cgroup_insert_exceeded(mz, mctz, excess); spin_unlock(>lock); css_put(>memcg->css); loop++; -- 1.9.2 -- To unsubscribe from this list: send the line
Re: [PATCH 2/2] mm/memcontrol.c: introduce helper mem_cgroup_zoneinfo_zone()
On Fri, May 02, 2014 at 03:05:16PM -0700, Andrew Morton wrote: > On Thu, 1 May 2014 08:54:50 -0400 Johannes Weiner wrote: > > > On Mon, Apr 28, 2014 at 05:04:26PM +0200, Michal Hocko wrote: > > > On Tue 22-04-14 11:59:23, Michal Hocko wrote: > > > > On Sat 19-04-14 07:01:43, Jianyu Zhan wrote: > > > > > introduce helper mem_cgroup_zoneinfo_zone(). This will make > > > > > mem_cgroup_iter() code more compact. > > > > > > > > I dunno. Helpers are usually nice but this one adds more code then it > > > > removes. It also doesn't help the generated code. > > > > > > > > So I don't see any reason to merge it. > > > > > > So should we drop it from mmotm? > > > > Yes, please. > > > > > > > Signed-off-by: Jianyu Zhan > > > > > --- > > > > > mm/memcontrol.c | 15 +++ > > > > > 1 file changed, 11 insertions(+), 4 deletions(-) > > > > This helper adds no value, but more code and indirection. > > > > Cc'd Andrew - this is about > > mm-memcontrolc-introduce-helper-mem_cgroup_zoneinfo_zone.patch > > mm-memcontrolc-introduce-helper-mem_cgroup_zoneinfo_zone-checkpatch-fixes.patch > > The patch seemed rather nice to me. mem_cgroup_zoneinfo_zone() > encapsulates a particular concept and gives it a name. That's better > than splattering the logic into callsites. Yeah, that helper is actually a good idea, for me it was just drowned out by the diffstat, the naming, and that the zoneinfo lookup overall was still left in bad shape. Thanks for prodding ;-) How about this? --- From: Jianyu Zhan Subject: [patch] mm: memcontrol: clean up memcg zoneinfo lookup Memcg zoneinfo lookup sites have either the page, the zone, or the node id and zone index, but sites that only have the zone have to look up the node id and zone index themselves, whereas sites that already have those two integers use a function for a simple pointer chase. Provide mem_cgroup_zone_zoneinfo() that takes a zone pointer and let sites that already have node id and zone index - all for each node, for each zone iterators - use >nodeinfo[nid]->zoneinfo[zid]. Rename page_cgroup_zoneinfo() to mem_cgroup_page_zoneinfo() to match. Signed-off-by: Jianyu Zhan Signed-off-by: Johannes Weiner --- mm/memcontrol.c | 89 + 1 file changed, 39 insertions(+), 50 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 29501f040568..83cbd5a0e62f 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -677,9 +677,11 @@ static void disarm_static_keys(struct mem_cgroup *memcg) static void drain_all_stock_async(struct mem_cgroup *memcg); static struct mem_cgroup_per_zone * -mem_cgroup_zoneinfo(struct mem_cgroup *memcg, int nid, int zid) +mem_cgroup_zone_zoneinfo(struct mem_cgroup *memcg, struct zone *zone) { - VM_BUG_ON((unsigned)nid >= nr_node_ids); + int nid = zone_to_nid(zone); + int zid = zone_idx(zone); + return >nodeinfo[nid]->zoneinfo[zid]; } @@ -689,12 +691,12 @@ struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *memcg) } static struct mem_cgroup_per_zone * -page_cgroup_zoneinfo(struct mem_cgroup *memcg, struct page *page) +mem_cgroup_page_zoneinfo(struct mem_cgroup *memcg, struct page *page) { int nid = page_to_nid(page); int zid = page_zonenum(page); - return mem_cgroup_zoneinfo(memcg, nid, zid); + return >nodeinfo[nid]->zoneinfo[zid]; } static struct mem_cgroup_tree_per_zone * @@ -773,16 +775,14 @@ static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page) unsigned long long excess; struct mem_cgroup_per_zone *mz; struct mem_cgroup_tree_per_zone *mctz; - int nid = page_to_nid(page); - int zid = page_zonenum(page); - mctz = soft_limit_tree_from_page(page); + mctz = soft_limit_tree_from_page(page); /* * Necessary to update all ancestors when hierarchy is used. * because their event counter is not touched. */ for (; memcg; memcg = parent_mem_cgroup(memcg)) { - mz = mem_cgroup_zoneinfo(memcg, nid, zid); + mz = mem_cgroup_page_zoneinfo(memcg, page); excess = res_counter_soft_limit_excess(>res); /* * We have to update the tree if mz is on RB-tree or @@ -805,14 +805,14 @@ static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page) static void mem_cgroup_remove_from_trees(struct mem_cgroup *memcg) { - int node, zone; - struct mem_cgroup_per_zone *mz; struct mem_cgroup_tree_per_zone *mctz; + struct mem_cgroup_per_zone *mz; + int nid, zid; - for_each_node(node) { - for (zone = 0; zone < MAX_NR_ZONES; zone++) { - mz = mem_cgroup_zoneinfo(memcg, node, zone); - mctz = soft_limit_tree_node_zone(node, zone); + for_each_node(nid) { + for (zid = 0; zid < MAX_NR_ZONES; zid++) { +
Re: [RFC][PATCH v2] hwmon: add support for Sensirion SHTC1 sensor
On Fri, May 02, 2014 at 01:59:29PM -0700, Tomas Pop wrote: > Hi Guenter, thanks for comments! I will include them in third version, > but I have still few questions... > > On Don, 2014-05-01 at 19:06 -0700, Guenter Roeck wrote: > > On 05/01/2014 04:05 PM, Tomas Pop wrote: > > > One more time this patch sent with correct settings of my email client > > > - I'm sorry for this. > > > > > > This is a second version of the driver for Sensirion SHTC1 humidity and > > > temperature sensor. Initial version was submitted in July 2012. > > > http://www.gossamer-threads.com/lists/linux/kernel/1569130#1569130 > > > > > > We included suggested corrections formerly discussed in this list after > > > initial submission, but since it is quite a while, we are re-submitting > > > it again as a request for comments. Here is a list of important changes > > > to the initial version: > > > > > > * returning real error codes instead of -1 or -ENODEV > > > * using boolean variables instead of bitmaps where possible > > > * macros be16_to_cpup used for conversion of indianneess > > > * corrected formula for decoding of humidity and temperature values > > > * documentation update > > > > > > Patch was generated against kernel v3.15-rc3 > > > > > > Signed-off-by: Tomas Pop > > > --- > > > Documentation/hwmon/shtc1 | 38 + > > > drivers/hwmon/Kconfig | 10 ++ > > > drivers/hwmon/Makefile | 1 + > > > drivers/hwmon/shtc1.c | 323 > > > > > > include/linux/platform_data/shtc1.h | 24 +++ > > > 5 files changed, 396 insertions(+) > > > create mode 100644 Documentation/hwmon/shtc1 > > > create mode 100644 drivers/hwmon/shtc1.c > > > create mode 100644 include/linux/platform_data/shtc1.h > > > > > > diff --git a/Documentation/hwmon/shtc1 b/Documentation/hwmon/shtc1 > > > new file mode 100644 > > > index 000..6a72ae2d > > > --- /dev/null > > > +++ b/Documentation/hwmon/shtc1 > > > @@ -0,0 +1,38 @@ > > > +Kernel driver shtc1 > > > +=== > > > + > > > +Supported chips: > > > + * Sensirion SHTC1 > > > +Prefix: 'shtc1' > > > +Addresses scanned: none > > > +Datasheet: Publicly available at the Sensirion website > > > + > > > http://www.sensirion.com/fileadmin/user_upload/customers/sensirion/Dokumente/Humidity/Sensirion_Humidity_SHTC1_Datasheet.pdf > > > > Ok to add SHTW1 here if it is known to work. > > Just say: > > Datasheet: Not publicly available > > Actually, there is no way to find out, if you are speaking > to SHTC1 or SHTW1. (i.e., the id is the same for both). > So I will add it here and we will provide link to data-sheet > later in a separate patch. > Ok. > > > + > > > +Author: > > > + Johannes Winkelmann > > > + > > > +Description > > > +--- > > > + > > > +This driver implements support for the Sensirion SHTC1 chip, a humidity > > > and > > > > Two spaces > > > > > +temperature sensor. Temperature is measured in degrees celsius, relative > > > +humidity is expressed as a percentage. Driver can be used as well for > > > SHTW1 > > > +chip, that has the same electrical interface, but datasheet has not been > > > > ... for SHTW1, which has the same electrical interface. > > > > > +yet published. > > > + > > > > Either add support for the second now, or don't mention it at all > > (especially if the chip has a different ID and you don't want to add > > that ID at this point for some reason). > > > > > +The device communicates with the I2C protocol. All sensors are set to > > > the same > > > > ... are set to I2C address 0x70. > > > > > +I2C address 0x70, so an entry with I2C_BOARD_INFO("shtc1", 0x70) can be > > > used > > > +in the board setup code. See Documentation/i2c/instantiating-devices for > > > +other methods to instantiate the device. > > > + > > I would suggest to just refer to the instantiating-devices document and drop > > the I2C_BOARD_INFO example. > > > > > +Furthermore, there are two configuration options by means of > > > platform_data: > > > > options configurable by means ... > > > > > +1. blocking (pull the I2C clock line down while performing the > > > measurement) or > > > + non-blocking, mode. Blocking mode will guarantee the fastest result, > > > but > > > > non-blocking mode (no comma) > > > > > + the I2C bus will be busy during that time > > > > that time. > > > > > +2. high or low accuracy. Using high accuracy is always recommended. > > > + > > > +sysfs-Interface > > > +--- > > > + > > > +temp1_input - temperature input > > > +humidity1_input - humidity input > > > diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig > > > index bc196f4..4d58149 100644 > > > --- a/drivers/hwmon/Kconfig > > > +++ b/drivers/hwmon/Kconfig > > > @@ -1114,6 +1114,16 @@ config SENSORS_SHT21 > > > This driver can also be built as a module. If so, the module > > >
Re: dcache shrink list corruption?
On Fri, May 02, 2014 at 11:40:22PM +0100, Al Viro wrote: > On Fri, May 02, 2014 at 02:18:43PM -0700, Linus Torvalds wrote: > > On Fri, May 2, 2014 at 2:08 PM, Miklos Szeredi wrote: > > > There's more of the "delete from shrink list not owned by us" in select > > > parent. > > > Proposed patch appended. > > > > Ahh. Clearly this needs more work before I pull. > > *nod* > > Besides, I want to put Miklos' "don't bother with RCU in shrink_dentry_list()" > in there as soon as select_collect() has been dealt with. I don't think > that the currently posted patch for select_collect() is right, though - > see my reply to parent posting. Basically, I think we should treat "it's > on the shrink list already" as "increment data->found and keep going". IOW, > if (on shrink list) { > data->found++; > } else { > if (on lru list) > d_lru_del > if (refcount is zero) { > d_shrink_add > data->found++; > } > } > if (data->found) > ret = need_resched() ? D_WALK_QUIT : D_WALK_NORETRY; While we are at it - BUG() in umount_collect() is probably a bad idea. At that point we are holding ->s_umount, so it guarantees that a lot of stuff from that point on will get stuck. Starting with sync(2). And I really doubt that damage from WARN() instead will be more... -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 0/4] Convert timekeeping core to use printk_deferred (v2)
On Fri, 2 May 2014 15:09:14 -0700 John Stultz wrote: > Recently, Jiri pointed out a potential deadlock when calling printk > while holding the timekeeping seqlock. > > Annoyingly, the seqlock lockdep enablement doesn't catch this, as > printk disables lockdep. > > When looking for possible solutions, one idea was to use a local buffer > and defer the printk to later. Ends up there is already similar > functionality in printk_sched() to avoid similar style deadlocks w/ > the scheduler. > > Thus this patchset (based on next/akpm) renames printk_sched to > printk_deferred and then moves the affected timekeeping printks to make > use of it. > > There were some points in the discussion between Jan and Peter that > made it seem that there may still be problems lurking in the console > layer, and I'm not sure I fully understand their point, so this solution > may be incomplete. > > Additionally, the same issue likely affects any WARN_ONs as well, but > I wanted to get some thoughts on this approach before trying to remove > or convert affected WARN_ONS. > > Your thoughts and feedback are greatly appreciated! All look pretty simple and sane to me. printk is a crazy hotspot lately but this patchset looks like it won't get singed. Would "printk_deferred_once" be more logical than "printk_once_deferred"? Think so. It's (((printk(deferred(once))), not (((printk(once(deferred))). Why do I see a pr_emerg_once_deferred() in my future? -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH] drivers/tty/hvc: don't free hvc_console_setup after init
When 'console=hvc0' is specified to the kernel parameter in x86 KVM guest, hvc console is setup within a kthread. However, that will cause SEGV and the boot will fail when the driver is builtin to the kernel, because currently hvc_console_setup() is annotated with '__init'. This patch removes '__init' to boot the guest successfully with 'console=hvc0'. Signed-off-by: Tomoki Sekiyama --- drivers/tty/hvc/hvc_console.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/hvc/hvc_console.c b/drivers/tty/hvc/hvc_console.c index 94f9e3a..0ff7fda 100644 --- a/drivers/tty/hvc/hvc_console.c +++ b/drivers/tty/hvc/hvc_console.c @@ -190,7 +190,7 @@ static struct tty_driver *hvc_console_device(struct console *c, int *index) return hvc_driver; } -static int __init hvc_console_setup(struct console *co, char *options) +static int hvc_console_setup(struct console *co, char *options) { if (co->index < 0 || co->index >= MAX_NR_HVC_CONSOLES) return -ENODEV; -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] pinctrl-baytrail: fix for irq descriptor conflict on ASUS T100TA
On Fri, Apr 25, 2014 at 8:42 PM, Jin, Yao wrote: > On 2014/4/26 11:04, Jin, Yao wrote: (...) > I think we still need a small modification on pinctrl-baytrail.c, that > is moving gpiochip_add forward, otherwise the to_irq method is not set. > > diff --git a/drivers/pinctrl/pinctrl-baytrail.c > b/drivers/pinctrl/pinctrl-baytrail.c > index e599834..fdfb84b 100644 > --- a/drivers/pinctrl/pinctrl-baytrail.c > +++ b/drivers/pinctrl/pinctrl-baytrail.c > @@ -491,12 +491,6 @@ static int byt_gpio_probe(struct platform_device *pdev) > gc->can_sleep = false; > gc->dev = dev; > > - ret = gpiochip_add(gc); > - if (ret) { > - dev_err(>dev, "failed adding byt-gpio chip\n"); > - return ret; > - } > - > /* set up interrupts */ > irq_rc = platform_get_resource(pdev, IORESOURCE_IRQ, 0); > if (irq_rc && irq_rc->start) { > @@ -514,6 +508,12 @@ static int byt_gpio_probe(struct platform_device *pdev) > irq_set_chained_handler(hwirq, byt_gpio_irq_handler); > } > > + ret = gpiochip_add(gc); > + if (ret) { > + dev_err(>dev, "failed adding byt-gpio chip\n"); > + return ret; > + } > + Now I feel I'm not following what combo of patches solves the problem any more. Do you mean you think that this patch combined with Thomas' will solve the root cause? Yours, Linus Walleij -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: + printk-print-initial-logbuf-contents-before-re-enabling-interrupts.patch added to -mm tree
On Fri 02-05-14 14:22:20, Andrew Morton wrote: > From: Will Deacon > Subject: printk: print initial logbuf contents before re-enabling interrupts > > When running on a hideously slow system (~10Mhz FPGA) with a bunch of > debug printk invocations on the timer interrupt path, we end up filling > the log buffer faster than we can drain it. > > The reason is that console_unlock (which is responsible for moving > messages out of logbuf to hand over to the console driver) removes one > message at a time, briefly re-enabling interrupts between each of them. > If the interrupt path prints more than a single message, then we can > easily generate more messages than we can print for a regular, recurring > interrupt (e.g. a 1khz timer). This results in messages getting silently > dropped, leading to counter-intuitive, incomplete printk traces on the > console. > > Rather than run the console_unlock loop with interrupts disabled (which > has obvious latency problems), this patch records the sequence number of > the last message in the log buffer after taking the logbuf_lock. We can > then print this fixed amount of work before re-enabling interrupts again, > making sure we keep up with ourself. Other CPUs could still potentially > flood the buffer, but there's little that we can do to protect against > that. I really dislike this patch. It goes completely against my efforts of lowering irq latency caused by printing to console (which are the problems I have observed ;). My opinion is that when you are printing from each and every interrupt which happens so often, then you have a problem and disabling IRQs in printk so that your interrupt doesn't happen that often seems like a poor solution to me. You could as well just ratelimit your debug messages, couldn't you? Honza > Signed-off-by: Will Deacon > Acked-by: Peter Zijlstra > Cc: Kay Sievers > Cc: Jan Kara > Signed-off-by: Andrew Morton > --- > > kernel/printk/printk.c |9 + > 1 file changed, 9 insertions(+) > > diff -puN > kernel/printk/printk.c~printk-print-initial-logbuf-contents-before-re-enabling-interrupts > kernel/printk/printk.c > --- > a/kernel/printk/printk.c~printk-print-initial-logbuf-contents-before-re-enabling-interrupts > +++ a/kernel/printk/printk.c > @@ -2147,10 +2147,13 @@ void console_unlock(void) > again: > for (;;) { > struct printk_log *msg; > + u64 console_end_seq; > size_t len; > int level; > > raw_spin_lock_irqsave(_lock, flags); > + console_end_seq = log_next_seq; > +again_noirq: > if (seen_seq != log_next_seq) { > wake_klogd = true; > seen_seq = log_next_seq; > @@ -2195,6 +2198,12 @@ skip: > stop_critical_timings();/* don't trace print latency */ > call_console_drivers(level, text, len); > start_critical_timings(); > + > + if (console_seq < console_end_seq) { > + raw_spin_lock(_lock); > + goto again_noirq; > + } > + > local_irq_restore(flags); > } > console_locked = 0; > _ > > Patches currently in -mm which might be from will.dea...@arm.com are > > origin.patch > printk-print-initial-logbuf-contents-before-re-enabling-interrupts.patch > printk-report-dropping-of-messages-from-logbuf.patch > documentation-devicetree-bindings-add-documentation-for-the-apm-x-gene-soc-rtc-dts-binding.patch > drivers-rtc-add-apm-x-gene-soc-rtc-driver.patch > arm64-add-apm-x-gene-soc-rtc-dts-entry.patch > linux-next.patch > -- Jan Kara SUSE Labs, CR -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCHv5 01/15] Documentation: dt: add common bindings for hwspinlock
Hi Rob, On 05/02/2014 09:58 AM, Rob Herring wrote: > On Wed, Apr 30, 2014 at 7:34 PM, Suman Anna wrote: >> This patch adds the generic common bindings used to represent >> a hwlock device and use/request locks in a device-tree build. >> >> All the platform-specific hwlock driver implementations need the >> number of locks and associated base id for registering the locks >> present within the device with the driver core. The number of locks >> is represented by 'hwlock-num-locks' property in DT bindings. A >> property for base id is not needed in DT binding, as it can be >> satisfied using a phandle + args specifier. The args specifier >> length is dependent on each vendor-specific implementation and >> is represented through the '#hwlock-cells' property. >> >> Note that the document is named hwlock.txt deliberately to keep it >> a bit more generic. >> >> Cc: Rob Herring >> Signed-off-by: Suman Anna >> --- >> .../devicetree/bindings/hwlock/hwlock.txt | 52 >> ++ >> 1 file changed, 52 insertions(+) >> create mode 100644 Documentation/devicetree/bindings/hwlock/hwlock.txt >> >> diff --git a/Documentation/devicetree/bindings/hwlock/hwlock.txt >> b/Documentation/devicetree/bindings/hwlock/hwlock.txt >> new file mode 100644 >> index 000..32381cc >> --- /dev/null >> +++ b/Documentation/devicetree/bindings/hwlock/hwlock.txt >> @@ -0,0 +1,52 @@ >> +Generic hwlock bindings >> +=== >> + >> +Generic bindings that are common to all the hwlock platform specific driver >> +implementations, the retrieved values are used for registering the device >> +specific parameters with the hwspinlock core. >> + >> +The validity and need of these common properties may vary from one platform >> +implementation to another. The platform specific bindings should explicitly >> +state if a property is mandatory or optional. Please look through the >> +individual platform specific hwlock binding documentations for identifying >> +the applicable properties. >> + >> +Common properties: >> +- #hwlock-cells: Specifies the number of cells needed to represent a >> + specific lock. > > This should never be optional. Thanks for reviewing this. I can add a statement here to make this clear. > >> +- hwlock-num-locks:Number of locks present in a hwlock device. This >> + property is needed on hwlock devices, where the >> number >> + of supported locks within a hwlock device cannot be >> + read from a register. > > Do you have any users of this? The omap binding doesn't use it. > Wouldn't you typically know this based on the IP block? Similarly you > typically don't have to list how many irqs an interrupt controller > has. The MSM Spinlock driver [1] would be using this, it is waiting on this series to get finalized. It currently defines a custom property, and the number of locks is a generic property that the hwspinlock core uses and is common to different platform implementations, so created the generic property. OMAP doesn't use this because the number is read directly off a IP register. can you also take a look at patches 8 and 12 as they add additional properties based on discussion in [2]. The hwspinlocks are used for arbitration between different initiators on an SoC, and typically would need a SoC-level identifier for each lock. All these properties allow a hwlock to be statically identified and be assigned to a user and its peer user on a different initiator, and not allowing them to be run-time assigned. regards Suman [1] https://lkml.org/lkml/2013/8/14/528 [2] http://marc.info/?l=linux-omap=139510004009415=2 > > >> + >> +Hwlock Users: >> += >> + >> +Nodes that require specific hwlock(s) should specify them using one or more >> +properties, each containing a phandle to the hwlock node and an args >> specifier >> +value as indicated by #hwlock-cells. Multiple hwlocks can be requested using >> +an array of the phandle and hwlock number specifier tuple. >> + >> +1. Example of a node using a single specific hwlock: >> + >> +The following example has a node requesting a hwlock in the bank defined by >> +the node hwlock1. hwlock1 is a hwlock provider with an argument specifier >> +of length 1. >> + >> + node { >> + ... >> + hwlocks = < 2>; >> + ... >> + }; >> + >> +2. Example of a node using multiple specific hwlocks: >> + >> +The following example has a node requesting two hwlocks, a hwlock within >> +the hwlock device node 'hwlock1' with #hwlock-cells value of 1, and another >> +hwlock within the hwlock device node 'hwlock2' with #hwlock-cells value of >> 2. >> + >> + node { >> + ... >> + hwlocks = < 2>, < 0 3>; >> + ... >> + }; >> -- >> 1.9.2 >> -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to
Re: [PATCH] pinctrl: qcom: Correct name for pin 0
On Fri, Apr 25, 2014 at 1:41 PM, Andy Gross wrote: > Fix copy/paste error in pinctrl_pin_desc for pin 0. > > Signed-off-by: Andy Gross Pretty obvious. Patch applied! Yours, Linus Walleij -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 2/2] iio: fix possible buffer overflow
Found using smatch: drivers/iio/industrialio-event.c:327 iio_device_add_event() error: buffer overflow 'iio_ev_info_text' 3 <= 7 It was probably never hit because the mask_* members of the event_spec struct are filled by using the BIT() macro with values from the iio_event_info enum that also serve as the index of the iio_ev_info_text array. Also, for_each_set_bit takes a number of bits as the size, not a number of bytes. Signed-off-by: Alexandre Belloni --- drivers/iio/industrialio-event.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iio/industrialio-event.c b/drivers/iio/industrialio-event.c index ea6e06b9c7d4..804e90676159 100644 --- a/drivers/iio/industrialio-event.c +++ b/drivers/iio/industrialio-event.c @@ -321,7 +321,8 @@ static int iio_device_add_event(struct iio_dev *indio_dev, char *postfix; int ret; - for_each_set_bit(i, mask, sizeof(*mask)) { + for_each_set_bit(i, mask, +min(sizeof(*mask)*8, ARRAY_SIZE(iio_ev_info_text))) { postfix = kasprintf(GFP_KERNEL, "%s_%s_%s", iio_ev_type_text[type], iio_ev_dir_text[dir], iio_ev_info_text[i]); -- 1.9.1 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 1/2] iio: fix possible buffer overflow
Found using smatch: drivers/iio/industrialio-core.c:719 iio_device_add_info_mask_type() error: buffer overflow 'iio_chan_info_postfix' 17 <= 63 It was probably never hit because the info_mask_* members are filled by using the BIT() macro with values from the iio_chan_info_enum enum that also serve as the index of the iio_chan_info_postfix array. Signed-off-by: Alexandre Belloni --- drivers/iio/industrialio-core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c index ede16aec20fb..5e7a67e53879 100644 --- a/drivers/iio/industrialio-core.c +++ b/drivers/iio/industrialio-core.c @@ -715,7 +715,8 @@ static int iio_device_add_info_mask_type(struct iio_dev *indio_dev, { int i, ret, attrcount = 0; - for_each_set_bit(i, infomask, sizeof(infomask)*8) { + for_each_set_bit(i, infomask, min(sizeof(infomask)*8, + ARRAY_SIZE(iio_chan_info_postfix))) { ret = __iio_add_chan_devattr(iio_chan_info_postfix[i], chan, _read_channel_info, -- 1.9.1 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: dcache shrink list corruption?
On Fri, May 02, 2014 at 02:18:43PM -0700, Linus Torvalds wrote: > On Fri, May 2, 2014 at 2:08 PM, Miklos Szeredi wrote: > > There's more of the "delete from shrink list not owned by us" in select > > parent. > > Proposed patch appended. > > Ahh. Clearly this needs more work before I pull. *nod* Besides, I want to put Miklos' "don't bother with RCU in shrink_dentry_list()" in there as soon as select_collect() has been dealt with. I don't think that the currently posted patch for select_collect() is right, though - see my reply to parent posting. Basically, I think we should treat "it's on the shrink list already" as "increment data->found and keep going". IOW, if (on shrink list) { data->found++; } else { if (on lru list) d_lru_del if (refcount is zero) { d_shrink_add data->found++; } } if (data->found) ret = need_resched() ? D_WALK_QUIT : D_WALK_NORETRY; -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 08/17] mm: page_alloc: Use word-based accesses for get/set pageblock bitmaps
Hi Mel, Vlastimil Babka suggested I should try this patch to work around a different issue I'm seeing, and noticed that it doesn't build because: On 05/01/2014 04:44 AM, Mel Gorman wrote: > +void set_pageblock_flags_mask(struct page *page, > + unsigned long flags, > + unsigned long end_bitidx, > + unsigned long nr_flag_bits, > + unsigned long mask); set_pageblock_flags_mask() is declared. > +static inline void set_pageblock_flags_group(struct page *page, > + unsigned long flags, > + int start_bitidx, int end_bitidx) > +{ > + unsigned long nr_flag_bits = end_bitidx - start_bitidx + 1; > + unsigned long mask = (1 << nr_flag_bits) - 1; > + > + set_pageblock_flags_mask(page, flags, end_bitidx, nr_flag_bits, mask); > +} And used here, but never actually defined. Thanks, Sasha -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC PATCH] cmdline: Hide "debug" from /proc/cmdline
On Mon, 07 Apr 2014 14:24:45 +0930 Rusty Russell wrote: > Subject: param: hand arguments after -- straight to init > > The kernel passes any args it doesn't need through to init, except it > assumes anything containing '.' belongs to the kernel (for a module). > This change means all users can clearly distinguish which arguments > are for init. > > For example, the kernel uses debug ("dee-bug") to mean log everything to > the console, where systemd uses the debug from the Scandinavian "day-boog" > meaning "fail to boot". If a future versions uses argv[] instead of > reading /proc/cmdline, this confusion will be avoided. > > eg: test 'FOO="this is --foo"' -- 'systemd.debug="true true true"' > > Gives: > argv[0] = '/debug-init' > argv[1] = 'test' > argv[2] = 'systemd.debug=true true true' > envp[0] = 'HOME=/' > envp[1] = 'TERM=linux' > envp[2] = 'FOO=this is --foo' This (user-facing) feature doesn't seem to have been documented anywhere. Documentation/kernel-parameters.txt, I guess. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: dcache shrink list corruption?
On Fri, May 02, 2014 at 11:08:13PM +0200, Miklos Szeredi wrote: > There's more of the "delete from shrink list not owned by us" in select > parent. > Proposed patch appended. While it certainly looks like dentry_lru_del() should die, I really wonder if "let's pretend that dentry isn't there if it's on some other shrink list" is the right approach. You've already noticed one problem (check-and-drop giving false busy indications), but shrink_dcache_parent() has similar issues. How about incrementing data->found instead? That'll end up rescanning the tree in case if it's not ours; so what? If it's another process doing the same shrinking in a subtree, we want to let it do its job anyway. Sure, somebody doing mount -o remount in a loop might be able to stall the living hell out of us, for as long as new non-busy dentries are being added in our subtree, but the second part in itself is sufficient; we will keep picking those new non-busy dentries as long as they keep coming. And if they do not, eventually they will be all taken out by us or by those other shrinkers and we'll be done. > And I'm not sure what umount_collect() is supposed to do. Can other shrinkers > still be active at that point? That would present other problems, no? No other shrinkers - prune_dcache_sb() and shrink_dcache_sb() are also serialized by ->s_umount, shrink_dcache_parent() and check_submounts_and_drop() are called only when an active reference is held, which obviously prevents fs shutdown. > Also somewhat related is the question: how check_submounts_and_drop() could be > guaranteed correctness (timely removal of all unsed dentries) in the presence > of > other shrinkers? Another interesting question is what the hell are shrink_dcache_parent() callers expecting. E.g. what's going on in fuse_reverse_inval_entry()? And what is nilfs_tree_is_busy() about? FWIW, I'm not at all sure that vfs_rmdir() and vfs_rename() have any reason to call it these days, and dentry_unhash() one simply ought to die - it's used only by hpfs unlink() in case it wants to truncate in order to work around -ENOSPC. And _that_ won't have any subdirectories to deal with anyway, so shrink_dcache_parent() there is a no-op, even if we keep the damn helper alive. The rest of callers also look dubious... -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH v3 1/1] pinctrl: add Intel BayTrail GPIO/pinctrl support
On Fri, Apr 25, 2014 at 9:31 AM, Timur Tabi wrote: > Rafael J. Wysocki wrote: >> >> I would be interested in understanding what exactly the flow is in that >> situation, so care to educate me? What does the driver do to trigger >> this and what exactly does happen in response to that? > > > I only just learned some of this myself, so I'm no expert. My understanding > is that the all of the pinctrl-* properties and nodes are scanned by the > pinctrl layer itself. So you could have a SATA controller node that points > to a pin control node (via phandles). When the SATA driver is probed, the > pinctrl layer notices the phandles and automatically calls the pinctrl layer > to configure the pins and pin muxes. There is a global pin control map for the system spanning all possible pin controller instances. Right before a device probe() function is called, the device core will attempt to grab and activate a pin control setting tied to this device and named "default". (It can also handle some PM states.) The table of states can come from: (a) Platform data or (b) Device tree And if there was an ACPI pin controller it would be case (c) and need to have intelligent bindings allowing such a table to be built so that the device core can make use of it. When the different states (such as "default") are enabled, this results in calls into the pin control driver. In most cases that ends up with simple register writes but I guess an ACPI driver would result in calling some esoteric bytecode or whatever or both. There is no escape from the fact that this needs being handled from the pin control subsystem though, it can't be sneaked into the ACPI core or something. You may want to add new states apart from the ones defined in include/linux/pinctrl/pinctrl-state.h, as I know ACPI is a bit picky about it's states and what they are named (D-states right?) Yours, Linus Walleij (pretending to understand ACPI) -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[ANNOUNCE] Git v2.0.0-rc2
A release candidate Git v2.0.0-rc2 is now available for testing at the usual places. The tarballs are found at: https://www.kernel.org/pub/software/scm/git/testing/ The following public repositories all have a copy of the 'v2.0.0-rc2' tag and the 'master' branch that the tag points at: url = https://kernel.googlesource.com/pub/scm/git/git url = git://repo.or.cz/alt-git.git url = https://code.google.com/p/git-core/ url = git://git.sourceforge.jp/gitroot/git-core/git.git url = git://git-core.git.sourceforge.net/gitroot/git-core/git-core url = https://github.com/gitster/git Git v2.0 Release Notes (draft) == Backward compatibility notes When "git push [$there]" does not say what to push, we have used the traditional "matching" semantics so far (all your branches were sent to the remote as long as there already are branches of the same name over there). In Git 2.0, the default is now the "simple" semantics, which pushes: - only the current branch to the branch with the same name, and only when the current branch is set to integrate with that remote branch, if you are pushing to the same remote as you fetch from; or - only the current branch to the branch with the same name, if you are pushing to a remote that is not where you usually fetch from. You can use the configuration variable "push.default" to change this. If you are an old-timer who wants to keep using the "matching" semantics, you can set the variable to "matching", for example. Read the documentation for other possibilities. When "git add -u" and "git add -A" are run inside a subdirectory without specifying which paths to add on the command line, they operate on the entire tree for consistency with "git commit -a" and other commands (these commands used to operate only on the current subdirectory). Say "git add -u ." or "git add -A ." if you want to limit the operation to the current directory. "git add " is the same as "git add -A " now, so that "git add dir/" will notice paths you removed from the directory and record the removal. In older versions of Git, "git add " used to ignore removals. You can say "git add --ignore-removal " to add only added or modified paths in , if you really want to. The "-q" option to "git diff-files", which does *NOT* mean "quiet", has been removed (it told Git to ignore deletion, which you can do with "git diff-files --diff-filter=d"). "git request-pull" lost a few "heuristics" that often led to mistakes. The default prefix for "git svn" has changed in Git 2.0. For a long time, "git svn" created its remote-tracking branches directly under refs/remotes, but it now places them under refs/remotes/origin/ unless it is told otherwise with its --prefix option. Updates since v1.9 series - UI, Workflows & Features * The "multi-mail" post-receive hook (in contrib/) has been updated to a more recent version from the upstream. * "git gc --aggressive" learned "--depth" option and "gc.aggressiveDepth" configuration variable to allow use of a less insane depth than the built-in default value of 250. * "git log" learned the "--show-linear-break" option to show where a single strand-of-pearls is broken in its output. * The "rev-parse --parseopt" mechanism used by scripted Porcelains to parse command line options and to give help text learned to take the argv-help (the placeholder string for an option parameter, e.g. "key-id" in "--gpg-sign="). * The pattern to find where the function begins in C/C++ used in "diff" and "grep -p" have been updated to help C++ source better. * "git rebase" learned to interpret a lone "-" as "@{-1}", the branch that we were previously on. * "git commit --cleanup=" learned a new mode, scissors. * "git tag --list" output can be sorted using "version sort" with "--sort=version:refname". * Discard the accumulated "heuristics" to guess from which branch the result wants to be pulled from and make sure what the end user specified is not second-guessed by "git request-pull", to avoid mistakes. When you pushed out your 'master' branch to your public repository as 'for-linus', use the new "master:for-linus" syntax to denote the branch to be pulled. * "git grep" learned to behave in a way similar to native grep when "-h" (no header) and "-c" (count) options are given. * "git push" via transport-helper interface (e.g. remote-hg) has been updated to allow ref deletion in a way similar to the natively supported transports. * The "simple" mode is the default for "git push". * "git add -u" and "git add -A", when run without any pathspec, is a tree-wide operation even when run inside a subdirectory of a working tree. * "git add is the same as "git add -A " now. * "core.statinfo" configuration variable, which is a never-advertised synonym to "core.checkstat", has been removed. * The "-q" option
Re: [PATCH 2/2] gpio: of: Allow -gpio suffix for property names
On Fri, Apr 25, 2014 at 8:24 AM, Stephen Warren wrote: > On 04/24/2014 12:22 PM, Thierry Reding wrote: > ... >> The downside of not allowing the gpiod API to support the -gpio suffix >> is that we'll never be able to convert drivers that use such a binding >> and will forever have a hodgepodge of GPIO APIs that we need to support. > > Perhaps rather than making the existing gpiod API automatically search > for both -gpios and -gpio, we could make a new API for the other suffix, > so that driver indicate explicitly which property name they want. That > way, someone can't accidentally write -gpio in the DT and have it still > work. Or, add a parameter to the existing API, but that's probably a lot > more churn. Hm, that is possible, I just worry that this will lead the DT and ACPI semantics to diverge even more, and the present patch make things more coherent from the framework side of things instead of even more elaborate per-HW-info-method :-/ Yours, Linus Walleij -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 1/2] gpio: Add missing device-managed documentation
On Fri, Apr 25, 2014 at 8:10 AM, Thierry Reding wrote: > From: Thierry Reding > > Add the GPIO-related device-managed functions to the list of functions > in Documentation/driver-model/devres.txt. > > Signed-off-by: Thierry Reding Patch applied. Yours, Linus Walleij -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] gpio: Fix gpio_get -> gpiod_get typo in kernel-doc
On Fri, Apr 25, 2014 at 7:54 AM, Thierry Reding wrote: > From: Thierry Reding > > The function is called gpiod_get(), not gpio_get(). Fix the kernel-doc > comment to match the name. > > Signed-off-by: Thierry Reding Patch applied. Yours, Linus Walleij -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 04/12] clocksource: Sched clock source for Versatile Express
On Tue, Feb 11, 2014 at 9:10 AM, Pawel Moll wrote: > This patch adds a trival sched clock source using free > running, 24MHz clocked counter present in the ARM Ltd. > Versatile Express platform's System Registers block. > > This code replaces the call in the VE machine code. > > Cc: Daniel Lezcano > Cc: Thomas Gleixner > Signed-off-by: Pawel Moll Even the Integrator is very similar, albeit using register 0x28 instead of register 0x5c. cd commit a79528e9d849803457b6235ddb1a1cfd4e11c6cd It's one of these things where writing a device driver results in more code than just registering this counter with the sched_clock guts :-/ Getting the clock from the DT gives this nice feeling of having it all complete, and sched_clock cannot really change frequency anyway so I'm happy with this thing. I'll augment it for Integrator when/if I find time. Yours, Linus Walleij -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 1/4] printk: Re-add irqsave/restore in printk_sched
A commit in akpm's tree (printk: remove separate printk_sched buffers...), removed the printk_sched irqsave/restore lines since it was safe for current users. Since we may be expanding usage of printk_sched(), re-add the irqsave/restore logic to make the functionality more generally safe. Cc: Jan Kara Cc: Peter Zijlstra Cc: Jiri Bohac Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Andrew Morton Cc: Steven Rostedt Signed-off-by: John Stultz --- kernel/printk/printk.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 82d19e6..bf62f2b 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -2586,15 +2586,18 @@ void wake_up_klogd(void) int printk_sched(const char *fmt, ...) { + unsigned long flags; va_list args; int r; + local_irq_save(flags); va_start(args, fmt); r = vprintk_emit(0, SCHED_MESSAGE_LOGLEVEL, NULL, 0, fmt, args); va_end(args); __this_cpu_or(printk_pending, PRINTK_PENDING_OUTPUT); irq_work_queue(&__get_cpu_var(wake_up_klogd_work)); + local_irq_restore(flags); return r; } -- 1.9.1 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 0/4] Convert timekeeping core to use printk_deferred (v2)
Recently, Jiri pointed out a potential deadlock when calling printk while holding the timekeeping seqlock. Annoyingly, the seqlock lockdep enablement doesn't catch this, as printk disables lockdep. When looking for possible solutions, one idea was to use a local buffer and defer the printk to later. Ends up there is already similar functionality in printk_sched() to avoid similar style deadlocks w/ the scheduler. Thus this patchset (based on next/akpm) renames printk_sched to printk_deferred and then moves the affected timekeeping printks to make use of it. There were some points in the discussion between Jan and Peter that made it seem that there may still be problems lurking in the console layer, and I'm not sure I fully understand their point, so this solution may be incomplete. Additionally, the same issue likely affects any WARN_ONs as well, but I wanted to get some thoughts on this approach before trying to remove or convert affected WARN_ONS. Your thoughts and feedback are greatly appreciated! thanks -john Changes since v1: * Rebased on next/akpm, since there are queued prink patches there * Re-added irqsave/restore per irc discussion w/ PeterZ Cc: Jan Kara Cc: Peter Zijlstra Cc: Jiri Bohac Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Andrew Morton Cc: Steven Rostedt John Stultz (4): printk: Re-add irqsave/restore in printk_sched printk: Rename printk_sched to printk_deferred printk: Add printk_once_deferred timekeeping: Use printk_deferred when holding timekeeping seqlock include/linux/printk.h| 17 ++--- kernel/printk/printk.c| 5 - kernel/sched/core.c | 2 +- kernel/sched/deadline.c | 7 +-- kernel/sched/rt.c | 8 +--- kernel/time/ntp.c | 15 +-- kernel/time/timekeeping.c | 7 --- 7 files changed, 34 insertions(+), 27 deletions(-) -- 1.9.1 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 4/4] timekeeping: Use printk_deferred when holding timekeeping seqlock
Jiri Bohac pointed out that there are rare but potential deadlock possibilities when calling printk while holding the timekeeping seqlock. This is due to printk() triggering console sem wakeup, which can cause scheduling code to trigger hrtimers which may try to read the time. Specifically, as Jiri pointed out, that path is: printk vprintk_emit console_unlock up(_sem) __up wake_up_process try_to_wake_up ttwu_do_activate ttwu_activate activate_task enqueue_task enqueue_task_fair hrtick_update hrtick_start_fair hrtick_start_fair get_time ktime_get --> endless loop on read_seqcount_retry(_seq, ...) This patch tries to avoid this issue by using printk_deferred (previously named printk_sched) which should defer printing via a irq_work_queue. Cc: Jan Kara Cc: Peter Zijlstra Cc: Jiri Bohac Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Andrew Morton Cc: Steven Rostedt Reported-by: Jiri Bohac Signed-off-by: John Stultz --- kernel/time/ntp.c | 15 +-- kernel/time/timekeeping.c | 7 --- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 419a52c..5b0ac4d 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -786,8 +786,9 @@ static long hardpps_update_freq(struct pps_normtime freq_norm) time_status |= STA_PPSERROR; pps_errcnt++; pps_dec_freq_interval(); - pr_err("hardpps: PPSERROR: interval too long - %ld s\n", - freq_norm.sec); + printk_deferred(KERN_ERR + "hardpps: PPSERROR: interval too long - %ld s\n", + freq_norm.sec); return 0; } @@ -800,7 +801,8 @@ static long hardpps_update_freq(struct pps_normtime freq_norm) delta = shift_right(ftemp - pps_freq, NTP_SCALE_SHIFT); pps_freq = ftemp; if (delta > PPS_MAXWANDER || delta < -PPS_MAXWANDER) { - pr_warning("hardpps: PPSWANDER: change=%ld\n", delta); + printk_deferred(KERN_WARNING + "hardpps: PPSWANDER: change=%ld\n", delta); time_status |= STA_PPSWANDER; pps_stbcnt++; pps_dec_freq_interval(); @@ -844,8 +846,9 @@ static void hardpps_update_phase(long error) * the time offset is updated. */ if (jitter > (pps_jitter << PPS_POPCORN)) { - pr_warning("hardpps: PPSJITTER: jitter=%ld, limit=%ld\n", - jitter, (pps_jitter << PPS_POPCORN)); + printk_deferred(KERN_WARNING + "hardpps: PPSJITTER: jitter=%ld, limit=%ld\n", + jitter, (pps_jitter << PPS_POPCORN)); time_status |= STA_PPSJITTER; pps_jitcnt++; } else if (time_status & STA_PPSTIME) { @@ -902,7 +905,7 @@ void __hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts) time_status |= STA_PPSJITTER; /* restart the frequency calibration interval */ pps_fbase = *raw_ts; - pr_err("hardpps: PPSJITTER: bad pulse\n"); + printk_deferred(KERN_ERR "hardpps: PPSJITTER: bad pulse\n"); return; } diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index f7df8ea..ffd3113 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -852,8 +852,9 @@ static void __timekeeping_inject_sleeptime(struct timekeeper *tk, struct timespec *delta) { if (!timespec_valid_strict(delta)) { - printk(KERN_WARNING "__timekeeping_inject_sleeptime: Invalid " - "sleep delta value!\n"); + printk_deferred(KERN_WARNING + "__timekeeping_inject_sleeptime: Invalid " + "sleep delta value!\n"); return; } tk_xtime_add(tk, delta); @@ -1157,7 +1158,7 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset) if (unlikely(tk->clock->maxadj && (tk->mult + adj > tk->clock->mult + tk->clock->maxadj))) { - printk_once(KERN_WARNING + printk_once_deferred(KERN_WARNING "Adjusting %s more than 11%% (%ld vs %ld)\n", tk->clock->name, (long)tk->mult + adj, (long)tk->clock->mult + tk->clock->maxadj); -- 1.9.1 -- To unsubscribe from this list: send
[PATCH 3/4] printk: Add printk_once_deferred
Two of the three prink_deferred uses are really printk_once style uses, so add a printk_once_deferred macro to simplify those call sites. Cc: Jan Kara Cc: Peter Zijlstra Cc: Jiri Bohac Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Andrew Morton Cc: Steven Rostedt Signed-off-by: John Stultz --- include/linux/printk.h | 11 +++ kernel/sched/deadline.c | 7 +-- kernel/sched/rt.c | 8 +--- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/include/linux/printk.h b/include/linux/printk.h index 7847301..bd21234 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -266,9 +266,20 @@ extern asmlinkage void dump_stack(void) __cold; printk(fmt, ##__VA_ARGS__); \ } \ }) +#define printk_once_deferred(fmt, ...) \ +({ \ + static bool __print_once __read_mostly; \ + \ + if (!__print_once) {\ + __print_once = true;\ + printk_deferred(fmt, ##__VA_ARGS__);\ + } \ +}) #else #define printk_once(fmt, ...) \ no_printk(fmt, ##__VA_ARGS__) +#define printk_once_deferred(fmt, ...) \ + no_printk(fmt, ##__VA_ARGS__) #endif #define pr_emerg_once(fmt, ...)\ diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 657ed68..3ec96bd 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -348,12 +348,7 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se, * entity. */ if (dl_time_before(dl_se->deadline, rq_clock(rq))) { - static bool lag_once = false; - - if (!lag_once) { - lag_once = true; - printk_deferred("sched: DL replenish lagged to much\n"); - } + printk_once_deferred("sched: DL replenish lagged to much\n"); dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline; dl_se->runtime = pi_se->dl_runtime; } diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index e7dc728..fd6e9ca 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -890,14 +890,8 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq) * but accrue some time due to boosting. */ if (likely(rt_b->rt_runtime)) { - static bool once = false; - rt_rq->rt_throttled = 1; - - if (!once) { - once = true; - printk_deferred("sched: RT throttling activated\n"); - } + printk_once_deferred("sched: RT throttling activated\n"); } else { /* * In case we did anyway, make it go away, -- 1.9.1 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 2/4] printk: Rename printk_sched to printk_deferred
After learning we'll need some sort of deferred printk functionality in the timekeeping core, Peter suggested we rename the printk_sched function so it can be reused by needed subsystems. This only changes the function name and name of the associated buffer. No logic changes. Cc: Jan Kara Cc: Peter Zijlstra Cc: Jiri Bohac Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Andrew Morton Cc: Steven Rostedt Signed-off-by: John Stultz --- include/linux/printk.h | 6 +++--- kernel/printk/printk.c | 2 +- kernel/sched/core.c | 2 +- kernel/sched/deadline.c | 2 +- kernel/sched/rt.c | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/include/linux/printk.h b/include/linux/printk.h index 8752f75..7847301 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -128,9 +128,9 @@ asmlinkage __printf(1, 2) __cold int printk(const char *fmt, ...); /* - * Special printk facility for scheduler use only, _DO_NOT_USE_ ! + * Special printk facility for scheduler/timekeeping use only, _DO_NOT_USE_ ! */ -__printf(1, 2) __cold int printk_sched(const char *fmt, ...); +__printf(1, 2) __cold int printk_deferred(const char *fmt, ...); /* * Please don't use printk_ratelimit(), because it shares ratelimiting state @@ -165,7 +165,7 @@ int printk(const char *s, ...) return 0; } static inline __printf(1, 2) __cold -int printk_sched(const char *s, ...) +int printk_deferred(const char *s, ...) { return 0; } diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index bf62f2b..ffcb487 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -2584,7 +2584,7 @@ void wake_up_klogd(void) preempt_enable(); } -int printk_sched(const char *fmt, ...) +int printk_deferred(const char *fmt, ...) { unsigned long flags; va_list args; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 805b8a9..263c790 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1336,7 +1336,7 @@ out: * leave kernel. */ if (p->mm && printk_ratelimit()) { - printk_sched("process %d (%s) no longer affine to cpu%d\n", + printk_deferred("process %d (%s) no longer affine to cpu%d\n", task_pid_nr(p), p->comm, cpu); } } diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index b080957..657ed68 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -352,7 +352,7 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se, if (!lag_once) { lag_once = true; - printk_sched("sched: DL replenish lagged to much\n"); + printk_deferred("sched: DL replenish lagged to much\n"); } dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline; dl_se->runtime = pi_se->dl_runtime; diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 7795e29..e7dc728 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -896,7 +896,7 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq) if (!once) { once = true; - printk_sched("sched: RT throttling activated\n"); + printk_deferred("sched: RT throttling activated\n"); } } else { /* -- 1.9.1 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 2/2] mm/memcontrol.c: introduce helper mem_cgroup_zoneinfo_zone()
On Thu, 1 May 2014 08:54:50 -0400 Johannes Weiner wrote: > On Mon, Apr 28, 2014 at 05:04:26PM +0200, Michal Hocko wrote: > > On Tue 22-04-14 11:59:23, Michal Hocko wrote: > > > On Sat 19-04-14 07:01:43, Jianyu Zhan wrote: > > > > introduce helper mem_cgroup_zoneinfo_zone(). This will make > > > > mem_cgroup_iter() code more compact. > > > > > > I dunno. Helpers are usually nice but this one adds more code then it > > > removes. It also doesn't help the generated code. > > > > > > So I don't see any reason to merge it. > > > > So should we drop it from mmotm? > > Yes, please. > > > > > Signed-off-by: Jianyu Zhan > > > > --- > > > > mm/memcontrol.c | 15 +++ > > > > 1 file changed, 11 insertions(+), 4 deletions(-) > > This helper adds no value, but more code and indirection. > > Cc'd Andrew - this is about > mm-memcontrolc-introduce-helper-mem_cgroup_zoneinfo_zone.patch > mm-memcontrolc-introduce-helper-mem_cgroup_zoneinfo_zone-checkpatch-fixes.patch The patch seemed rather nice to me. mem_cgroup_zoneinfo_zone() encapsulates a particular concept and gives it a name. That's better than splattering the logic into callsites. The patch makes no change to code size but that's because gcc is silly. Mark mem_cgroup_zoneinfo_zone() as noinline and the patch shrinks .text by 40 bytes. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 1/4] memcg, mm: introduce lowlimit reclaim
On Fri, May 02, 2014 at 06:49:30PM +0200, Michal Hocko wrote: > On Fri 02-05-14 11:58:05, Johannes Weiner wrote: > > On Fri, May 02, 2014 at 11:36:28AM +0200, Michal Hocko wrote: > > > On Wed 30-04-14 18:55:50, Johannes Weiner wrote: > > > > On Mon, Apr 28, 2014 at 02:26:42PM +0200, Michal Hocko wrote: > > > > > diff --git a/mm/memcontrol.c b/mm/memcontrol.c > > > > > index 19d620b3d69c..40e517630138 100644 > > > > > --- a/mm/memcontrol.c > > > > > +++ b/mm/memcontrol.c > > > > > @@ -2808,6 +2808,29 @@ static struct mem_cgroup > > > > > *mem_cgroup_lookup(unsigned short id) > > > > > return mem_cgroup_from_id(id); > > > > > } > > > > > > > > > > +/** > > > > > + * mem_cgroup_reclaim_eligible - checks whether given memcg is > > > > > eligible for the > > > > > + * reclaim > > > > > + * @memcg: target memcg for the reclaim > > > > > + * @root: root of the reclaim hierarchy (null for the global reclaim) > > > > > + * > > > > > + * The given group is reclaimable if it is above its low limit and > > > > > the same > > > > > + * applies for all parents up the hierarchy until root (including). > > > > > + */ > > > > > +bool mem_cgroup_reclaim_eligible(struct mem_cgroup *memcg, > > > > > + struct mem_cgroup *root) > > > > > > > > Could you please rename this to something that is more descriptive in > > > > the reclaim callsite? How about mem_cgroup_within_low_limit()? > > > > > > I have intentionally used somethig that is not low_limit specific. The > > > generic reclaim code does't have to care about the reason why a memcg is > > > not reclaimable. I agree that having follow_low_limit paramter explicit > > > and mem_cgroup_reclaim_eligible not is messy. So something should be > > > renamed. I would probably go with > > > s@follow_low_limit@check_reclaim_eligible@ > > > but I do not have a strong preference. > > > > > > > > diff --git a/mm/vmscan.c b/mm/vmscan.c > > > > > index c1cd99a5074b..0f428158254e 100644 > > > > > --- a/mm/vmscan.c > > > > > +++ b/mm/vmscan.c > > > [...] > > > > > +static void shrink_zone(struct zone *zone, struct scan_control *sc) > > > > > +{ > > > > > + if (!__shrink_zone(zone, sc, true)) { > > > > > + /* > > > > > + * First round of reclaim didn't find anything to > > > > > reclaim > > > > > + * because of low limit protection so try again and > > > > > ignore > > > > > + * the low limit this time. > > > > > + */ > > > > > + __shrink_zone(zone, sc, false); > > > > > + } > > > > So I don't think this can work as it is, because we are not actually > > changing priority levels yet. > > __shrink_zone returns with 0 only if the whole hierarchy is is under low > limit. This means that they are over-committed and it doesn't make much > sense to play with priority. Low limit reclaimability is independent on > the priority. > > > It will give up on the guarantees of bigger groups way before smaller > > groups are even seriously looked at. > > How would that happen? Those (smaller) groups would get reclaimed and we > wouldn't fallback. Or am I missing your point? Lol, I hadn't updated my brain to a394cb8ee632 ("memcg,vmscan: do not break out targeted reclaim without reclaimed pages") yet... Yes, you are right. > > > > I would actually prefer not having a second round here, and make the > > > > low limit behave more like mlock memory. If there is no reclaimable > > > > memory, go OOM. > > > > > > This was done in my previous attempt and I prefer OOM myself but it is > > > also true that starting with a more relaxed limit and adding an > > > option for hard guarantee later when we have a clear usecase is a better > > > approach. Although I can see potential in go-oom-rather-than-reclaim > > > configurations, usecases I am primarily interested in won't overcommit on > > > low_limit. > > > > > > That being said, I like the idea of having the hard guarantee but I also > > > think it should be configurable. I can post those patches in this thread > > > but I feel it is too early as nobody has explicitly asked for this yet. > > > > As per above, this makes the semantics so much more fishy. When > > exactly do we stop honoring the guarantees in the process? > > When the reclaimed hierarchy is bellow low_limit. In other words when we > would go and OOM without fallback. > > > This is not even guarantees anymore, but rather another reclaim > > prioritization scheme with best-effort semantics. That went over > > horribly with soft limits, and I don't want to repeat this. > > > > Overcommitting on guarantees makes no sense, and you even agree you > > are not interested in it. We also agree that we can always add a knob > > later on to change semantics when an actual usecase presents itself, > > so why not start with the clear and simple semantics, and the simpler > > implementation? > > So you are really preferring an OOM instead? That was the original > implementation posted
[git pull] drivercore deferred probe bug fix
Hi Linus, This was the pending bug fix I mentioned in my previous pull request. Normally this would go through Greg's tree seeing that it is a drivercore change, but devicetree exposes the problem. I've discussed with Greg and he okayed me asking you to pull directly. Cheers, g. The following changes since commit 9ec36cafe43bf835f8f29273597a5b0cbc8267ef: of/irq: do irq resolution in platform_get_irq (2014-04-24 21:40:22 +0100) are available in the git repository at: git://git.secretlab.ca/git/linux tags/dt-for-linus for you to fetch changes up to 58b116bce13612e5aa6fcd49ecbd4cf8bb59e835: drivercore: deferral race condition fix (2014-04-29 15:44:05 +0100) Drivercore race condition fix (exposed by devicetree) This branch fixes a bug where a device can get stuck in the deferred list even though all its dependencies are met. The bug has existed for a long time, but new platform conversions to device tree have exposed it. This patch is needed to get those platforms working. Grant Likely (1): drivercore: deferral race condition fix drivers/base/dd.c | 17 + 1 file changed, 17 insertions(+) -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC/HACK] x86: Fast return to kernel
On 05/02/2014 02:42 PM, Andy Lutomirski wrote: > > Hah -- I think I just faked both of you out :) > > I don't think this has anything to do with the error code, and I think > that the errorentry code already does more or less that: it pushes -1. > > The real issue here is probably the magic 16-byte stack alignment when > a non-stack-switching interrupt happens. > Errorentry is when there *is* an error code pushed by the hardware. The other variant is zeroentry, which does generate a zero error code -- eventually. The -1 means we didn't enter the kernel through a system call. -hpa -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] net: Allow tc changes in user namespaces
From: Stéphane Graber Date: Wed, 30 Apr 2014 11:25:43 -0400 > This switches a few remaining capable(CAP_NET_ADMIN) to ns_capable so > that root in a user namespace may set tc rules inside that namespace. > > Signed-off-by: Stéphane Graber > Acked-by: Serge E. Hallyn Applied to net-next, thanks. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC/HACK] x86: Fast return to kernel
On Fri, May 2, 2014 at 2:37 PM, H. Peter Anvin wrote: > On 05/02/2014 02:07 PM, Linus Torvalds wrote: >> On Fri, May 2, 2014 at 2:04 PM, Andy Lutomirski wrote: >>> >>> Because otherwise I'd have to keep track of whether it's a zeroentry >>> or an errorentry. I can't stuff the offset in a register without even >>> more stack hackery, since there are no available registers there. I >>> could split the whole thing into two code paths, I guess. >> >> Ahh. Never mind. I didn't think about the fact that the error entry >> case had one more field on the stack. Your approach is all fine, it >> was me not seeing the problem. >> > > I have to admit to being rather partial to the idea of simply doing > "push $0" on entry for the vectors that don't push an error code, like > the early exception handling code does. > Hah -- I think I just faked both of you out :) I don't think this has anything to do with the error code, and I think that the errorentry code already does more or less that: it pushes -1. The real issue here is probably the magic 16-byte stack alignment when a non-stack-switching interrupt happens. --Andy -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC/HACK] x86: Fast return to kernel
On 05/02/2014 02:07 PM, Linus Torvalds wrote: > On Fri, May 2, 2014 at 2:04 PM, Andy Lutomirski wrote: >> >> Because otherwise I'd have to keep track of whether it's a zeroentry >> or an errorentry. I can't stuff the offset in a register without even >> more stack hackery, since there are no available registers there. I >> could split the whole thing into two code paths, I guess. > > Ahh. Never mind. I didn't think about the fact that the error entry > case had one more field on the stack. Your approach is all fine, it > was me not seeing the problem. > I have to admit to being rather partial to the idea of simply doing "push $0" on entry for the vectors that don't push an error code, like the early exception handling code does. -hpa -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] sched: Sanitize irq accounting madness
On Fri, May 02, 2014 at 11:26:24PM +0200, Thomas Gleixner wrote: > Russell reported, that irqtime_account_idle_ticks() takes ages due to: > >for (i = 0; i < ticks; i++) >irqtime_account_process_tick(current, 0, rq); > > It's sad, that this code was written way _AFTER_ the NOHZ idle > functionality was available. I charge myself guitly for not paying > attention when that crap got merged with commit abb74cefa (sched: > Export ns irqtimes through /proc/stat) > > So instead of looping nr_ticks times just apply the whole thing at > once. > > As a side note: The whole cputime_t vs. u64 business in that context > wants to be cleaned up as well. There is no point in having all these > back and forth conversions. Lets standardise on u64 nsec for all > kernel internal accounting and be done with it. Everything else does > not make sense at all for fine grained accounting. Frederic, can you > please take care of that? > > Reported-by: Russell King > Signed-off-by: Thomas Gleixner > Cc: sta...@vger.kernel.org One nit below, other than that: Reviewed-by: Paul E. McKenney > --- > kernel/sched/cputime.c | 32 > 1 file changed, 16 insertions(+), 16 deletions(-) > > Index: linux-2.6/kernel/sched/cputime.c > === > --- linux-2.6.orig/kernel/sched/cputime.c > +++ linux-2.6/kernel/sched/cputime.c > @@ -332,50 +332,50 @@ out: > * softirq as those do not count in task exec_runtime any more. > */ > static void irqtime_account_process_tick(struct task_struct *p, int > user_tick, > - struct rq *rq) > + struct rq *rq, int ticks) > { > - cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy); > + cputime_t scaled = cputime_to_scaled(cputime_one_jiffy); > + u64 cputime = (__force u64) cputime_one_jiffy; > u64 *cpustat = kcpustat_this_cpu->cpustat; > > if (steal_account_process_tick()) > return; > > + cputime *= ticks; > + scaled *= ticks; > + > if (irqtime_account_hi_update()) { > - cpustat[CPUTIME_IRQ] += (__force u64) cputime_one_jiffy; > + cpustat[CPUTIME_IRQ] += cputime; > } else if (irqtime_account_si_update()) { > - cpustat[CPUTIME_SOFTIRQ] += (__force u64) cputime_one_jiffy; > + cpustat[CPUTIME_SOFTIRQ] += cputime; > } else if (this_cpu_ksoftirqd() == p) { > /* >* ksoftirqd time do not get accounted in cpu_softirq_time. >* So, we have to handle it separately here. >* Also, p->stime needs to be updated for ksoftirqd. >*/ > - __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled, > - CPUTIME_SOFTIRQ); > + __account_system_time(p, cputime, scaled, CPUTIME_SOFTIRQ); > } else if (user_tick) { > - account_user_time(p, cputime_one_jiffy, one_jiffy_scaled); > + account_user_time(p, cputime, scaled); > } else if (p == rq->idle) { > - account_idle_time(cputime_one_jiffy); > + account_idle_time(cputime); > } else if (p->flags & PF_VCPU) { /* System time or guest time */ > - account_guest_time(p, cputime_one_jiffy, one_jiffy_scaled); > + account_guest_time(p, cputime, scaled); > } else { > - __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled, > - CPUTIME_SYSTEM); > + __account_system_time(p, cputime, scaled, CPUTIME_SYSTEM); Stray tab character. > } > } > > static void irqtime_account_idle_ticks(int ticks) > { > - int i; > struct rq *rq = this_rq(); > > - for (i = 0; i < ticks; i++) > - irqtime_account_process_tick(current, 0, rq); > + irqtime_account_process_tick(current, 0, rq, ticks); > } > #else /* CONFIG_IRQ_TIME_ACCOUNTING */ > static inline void irqtime_account_idle_ticks(int ticks) {} > static inline void irqtime_account_process_tick(struct task_struct *p, int > user_tick, > - struct rq *rq) {} > + struct rq *rq, int nr_ticks) {} > #endif /* CONFIG_IRQ_TIME_ACCOUNTING */ > > /* > @@ -464,7 +464,7 @@ void account_process_tick(struct task_st > return; > > if (sched_clock_irqtime) { > - irqtime_account_process_tick(p, user_tick, rq); > + irqtime_account_process_tick(p, user_tick, rq, 1); > return; > } > > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC/HACK] x86: Fast return to kernel
On Fri, 2 May 2014, Linus Torvalds wrote: > On Fri, May 2, 2014 at 1:30 PM, Thomas Gleixner wrote: > > > > So what about manipulating the stack so that the popf does not enable > > interrupts and do an explicit sti to get the benefit of the > > one-instruction shadow ? > > That's what I already suggested in the original "I don't think popf > works" email. Missed that. > It does get more complex since you now have to test things (there are > very much cases where we get page faults and other exceptions with > interrupts disabled), but it shouldn't be much worse. Right. The extra conditional is probably not noticable at all. Thanks, tglx -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH] sched: Sanitize irq accounting madness
Russell reported, that irqtime_account_idle_ticks() takes ages due to: for (i = 0; i < ticks; i++) irqtime_account_process_tick(current, 0, rq); It's sad, that this code was written way _AFTER_ the NOHZ idle functionality was available. I charge myself guitly for not paying attention when that crap got merged with commit abb74cefa (sched: Export ns irqtimes through /proc/stat) So instead of looping nr_ticks times just apply the whole thing at once. As a side note: The whole cputime_t vs. u64 business in that context wants to be cleaned up as well. There is no point in having all these back and forth conversions. Lets standardise on u64 nsec for all kernel internal accounting and be done with it. Everything else does not make sense at all for fine grained accounting. Frederic, can you please take care of that? Reported-by: Russell King Signed-off-by: Thomas Gleixner Cc: sta...@vger.kernel.org --- kernel/sched/cputime.c | 32 1 file changed, 16 insertions(+), 16 deletions(-) Index: linux-2.6/kernel/sched/cputime.c === --- linux-2.6.orig/kernel/sched/cputime.c +++ linux-2.6/kernel/sched/cputime.c @@ -332,50 +332,50 @@ out: * softirq as those do not count in task exec_runtime any more. */ static void irqtime_account_process_tick(struct task_struct *p, int user_tick, - struct rq *rq) +struct rq *rq, int ticks) { - cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy); + cputime_t scaled = cputime_to_scaled(cputime_one_jiffy); + u64 cputime = (__force u64) cputime_one_jiffy; u64 *cpustat = kcpustat_this_cpu->cpustat; if (steal_account_process_tick()) return; + cputime *= ticks; + scaled *= ticks; + if (irqtime_account_hi_update()) { - cpustat[CPUTIME_IRQ] += (__force u64) cputime_one_jiffy; + cpustat[CPUTIME_IRQ] += cputime; } else if (irqtime_account_si_update()) { - cpustat[CPUTIME_SOFTIRQ] += (__force u64) cputime_one_jiffy; + cpustat[CPUTIME_SOFTIRQ] += cputime; } else if (this_cpu_ksoftirqd() == p) { /* * ksoftirqd time do not get accounted in cpu_softirq_time. * So, we have to handle it separately here. * Also, p->stime needs to be updated for ksoftirqd. */ - __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled, - CPUTIME_SOFTIRQ); + __account_system_time(p, cputime, scaled, CPUTIME_SOFTIRQ); } else if (user_tick) { - account_user_time(p, cputime_one_jiffy, one_jiffy_scaled); + account_user_time(p, cputime, scaled); } else if (p == rq->idle) { - account_idle_time(cputime_one_jiffy); + account_idle_time(cputime); } else if (p->flags & PF_VCPU) { /* System time or guest time */ - account_guest_time(p, cputime_one_jiffy, one_jiffy_scaled); + account_guest_time(p, cputime, scaled); } else { - __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled, - CPUTIME_SYSTEM); + __account_system_time(p, cputime, scaled, CPUTIME_SYSTEM); } } static void irqtime_account_idle_ticks(int ticks) { - int i; struct rq *rq = this_rq(); - for (i = 0; i < ticks; i++) - irqtime_account_process_tick(current, 0, rq); + irqtime_account_process_tick(current, 0, rq, ticks); } #else /* CONFIG_IRQ_TIME_ACCOUNTING */ static inline void irqtime_account_idle_ticks(int ticks) {} static inline void irqtime_account_process_tick(struct task_struct *p, int user_tick, - struct rq *rq) {} + struct rq *rq, int nr_ticks) {} #endif /* CONFIG_IRQ_TIME_ACCOUNTING */ /* @@ -464,7 +464,7 @@ void account_process_tick(struct task_st return; if (sched_clock_irqtime) { - irqtime_account_process_tick(p, user_tick, rq); + irqtime_account_process_tick(p, user_tick, rq, 1); return; } -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] rwsem: Comments to explain the meaning of the rwsem's count field
> > /* > > + * Guide to the rw_semaphore's count field for common values. > > + * (32 bit case illustrated, similar for 64 bit) > > The values below are x86-specific; the actual defines are arch-dependent. > Do other archs use different values? This is also the value used also for generic case. I don't see other arch specific values defined. > > > + * > > + * 0x000X (1) X readers active or attempting lock, no writer > > waiting > > + * X = #active_readers + #readers attempting to lock > > + * (X*ACTIVE_BIAS) > > Not sure it matters, but maybe you want to note that it's possible for 0 > readers > to be active with this value, and all of the other readers may have initially > failed to claim the lock but may be successful if one can claim the wait_lock > while > the count is still > 0. I'll add the explanation for the down_read_failed scenario in the note section below. > > > + * > > + * 0x rwsem is unlocked, and no one is waiting for the lock or > > + * attempting to read lock or write lock. > > + * > > + * 0x000X (1) X readers active or attempt lock, there are waiters > > for lock > > + * X = #active readers + # readers attempting lock > > + * (X*ACTIVE_BIAS + WAITING_BIAS) > > + * (2) 1 writer attempting lock, no waiters for lock > > + * X-1 = #active readers + #readers attempting lock > > + * ((X-1)*ACTIVE_BIAS + ACTIVE_WRITE_BIAS) > > + * (3) 1 writer active, no waiters for lock > > + * X-1 = #active readers + #readers attempting lock > > + * ((X-1)*ACTIVE_BIAS + ACTIVE_WRITE_BIAS) > > + * > > + * 0x0001 (1) 1 reader active or attempting lock, waiters for lock > > + * (WAITING_BIAS + ACTIVE_BIAS) > > + * (2) 1 writer active or attempt lock, no waiters for lock > > + * (ACTIVE_BIAS + ACTIVE_WRITE_BIAS) > > + * > > + * 0x (1) There are writers or readers queued but none active > > + * or in the process of attempting lock. > > + * (WAITING_BIAS) > > + * Note: writer can attempt to steal lock for this count by adding > > + * ACTIVE_WRITE_BIAS in cmpxchg and checking the old count > > + * > > + * 0xfffe0001 (1) 1 writer active, or attempting lock. Waiters on > > queue. > > + * (ACTIVE_WRITE_BIAS + WAITING_BIAS) > > The count can have more values than just 0xfffe0001 because multiple > failed write lock attempts plus failed read lock attempts can produce other > values than those listed. You're correct. The values are not comprehensive. I tried to show the common ones and how they arose. How about I replace the 0xfffe0001 case with count < WAITING_BIAS (1) X writer active, Y writers attempting lock, Z readers attempting lock, no waiters where X = 0 or 1, (X+Y) >= 2, Z >= 0 (X+Y) * ACTIVE_WRITE_BIAS + Z * ACTIVE_BIAS (2) X writer active, Y writers attempting lock, Z readers attempting lock, with waiters where X = 0 or 1, (X+Y) >= 1, Z >= 0 (X+Y) * ACTIVE_WRITE_BIAS + Z * ACTIVE_BIAS + WAITING_BIAS Thanks. Tim -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: dcache shrink list corruption?
On Fri, May 2, 2014 at 2:08 PM, Miklos Szeredi wrote: > There's more of the "delete from shrink list not owned by us" in select > parent. > Proposed patch appended. Ahh. Clearly this needs more work before I pull. Linus -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] spi: core: Protect DMA code by #ifdef CONFIG_HAS_DMA
Hi Mark, On Fri, May 2, 2014 at 7:14 PM, Mark Brown wrote: > On Fri, May 02, 2014 at 06:29:34AM +0200, Geert Uytterhoeven wrote: >> If NO_DMA=y: >> >> drivers/built-in.o: In function `spi_map_buf': >> spi.c:(.text+0x21bc60): undefined reference to `dma_map_sg' >> drivers/built-in.o: In function `spi_unmap_buf.isra.33': >> spi.c:(.text+0x21c32e): undefined reference to `dma_unmap_sg' >> make[3]: *** [vmlinux] Error 1 > > Ugh. It would seem better to have these functions stubbed out. But As long as we have include/asm-generic/dma-mapping-broken.h, it'll be like this... > applied anyway. Thanks! BTW, I have a few more "depends on HAS_DMA" patches lying around for drivers... Gr{oetje,eeting}s, Geert -- Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- ge...@linux-m68k.org In personal conversations with technical people, I call myself a hacker. But when I'm talking to journalists I just say "programmer" or something like that. -- Linus Torvalds -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH] ARM: zynq: dt: Add a fixed regulator for CPU voltage
To silence the warning cpufreq_cpu0: failed to get cpu0 regulator: -19 from the cpufreq driver regarding a missing regulator, add a fixed regulator to the DT. Zynq does not support voltage scaling and the CPU rail should always be supplied with 1 V, hence it is added in the SOC-level dtsi. Signed-off-by: Soren Brinkmann --- arch/arm/boot/dts/zynq-7000.dtsi | 10 ++ 1 file changed, 10 insertions(+) diff --git a/arch/arm/boot/dts/zynq-7000.dtsi b/arch/arm/boot/dts/zynq-7000.dtsi index c1176abc34d9..64779f9d5c38 100644 --- a/arch/arm/boot/dts/zynq-7000.dtsi +++ b/arch/arm/boot/dts/zynq-7000.dtsi @@ -25,6 +25,7 @@ reg = <0>; clocks = < 3>; clock-latency = <1000>; + cpu0-supply = <_vccpint>; operating-points = < /* kHzuV */ 67 100 @@ -48,6 +49,15 @@ reg = < 0xf8891000 0x1000 0xf8893000 0x1000 >; }; + regulator_vccpint: fixedregulator@0 { + compatible = "regulator-fixed"; + regulator-name = "VCCPINT"; + regulator-min-microvolt = <100>; + regulator-max-microvolt = <100>; + regulator-boot-on; + regulator-always-on; + }; + amba { compatible = "simple-bus"; #address-cells = <1>; -- 1.9.2.1.g06c4abd -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: dcache shrink list corruption?
There's more of the "delete from shrink list not owned by us" in select parent. Proposed patch appended. And I'm not sure what umount_collect() is supposed to do. Can other shrinkers still be active at that point? That would present other problems, no? Also somewhat related is the question: how check_submounts_and_drop() could be guaranteed correctness (timely removal of all unsed dentries) in the presence of other shrinkers? Thanks, Miklos From: Miklos Szeredi Subject: dcache: select_collect(): don't remove from shrink list Shrink lists are not protected by any lock, so don't remove from an unknown one. Signed-off-by: Miklos Szeredi --- fs/dcache.c | 23 --- 1 file changed, 12 insertions(+), 11 deletions(-) --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1241,18 +1241,19 @@ static enum d_walk_ret select_collect(vo * loop in shrink_dcache_parent() might not make any progress * and loop forever. */ - if (dentry->d_lockref.count) { - dentry_lru_del(dentry); - } else if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) { - /* -* We can't use d_lru_shrink_move() because we -* need to get the global LRU lock and do the -* LRU accounting. -*/ + if (dentry->d_flags & (DCACHE_SHRINK_LIST | DCACHE_LRU_LIST) == + DCACHE_LRU_LIST) { d_lru_del(dentry); - d_shrink_add(dentry, >dispose); - data->found++; - ret = D_WALK_NORETRY; + if (!dentry->d_lockref.count) { + /* +* We can't use d_lru_shrink_move() because we +* need to get the global LRU lock and do the +* LRU accounting. +*/ + d_shrink_add(dentry, >dispose); + data->found++; + ret = D_WALK_NORETRY; + } } /* * We can return to the caller if we have found some (this -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC/HACK] x86: Fast return to kernel
On Fri, May 2, 2014 at 2:04 PM, Andy Lutomirski wrote: > > Because otherwise I'd have to keep track of whether it's a zeroentry > or an errorentry. I can't stuff the offset in a register without even > more stack hackery, since there are no available registers there. I > could split the whole thing into two code paths, I guess. Ahh. Never mind. I didn't think about the fact that the error entry case had one more field on the stack. Your approach is all fine, it was me not seeing the problem. Linus -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: dcache shrink list corruption?
On Thu, May 1, 2014 at 7:34 AM, Al Viro wrote: > > OK, fixed and pushed (both branches). Al, can you send a real pull request (the "both branches" part in particular makes me worry about which one you think is right), because I suspect by now we just need to get this wider testing. Linus -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC/HACK] x86: Fast return to kernel
On Fri, May 2, 2014 at 2:01 PM, Linus Torvalds wrote: > On Fri, May 2, 2014 at 1:30 PM, Thomas Gleixner wrote: >> >> So what about manipulating the stack so that the popf does not enable >> interrupts and do an explicit sti to get the benefit of the >> one-instruction shadow ? > > That's what I already suggested in the original "I don't think popf > works" email. > > It does get more complex since you now have to test things (there are > very much cases where we get page faults and other exceptions with > interrupts disabled), but it shouldn't be much worse. > > Btw, Andy, why did you do "popq %rsp"? That just looks crazy. If the > stack isn't contiguous, the subsequent "popf" couldn't have worked > anyway. And I bet it screws with the stack engine. So you should just > have done something like "addq $16,%rsp" or whatever the constant ends > up being. Because otherwise I'd have to keep track of whether it's a zeroentry or an errorentry. I can't stuff the offset in a register without even more stack hackery, since there are no available registers there. I could split the whole thing into two code paths, I guess. --Andy -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: dcache shrink list corruption?
On Fri, May 02, 2014 at 11:00:04AM +0200, Szeredi Miklos wrote: > The bug is private, but I'll ask if I can repost it. The first thing > is a a warning from the D_FLAG_VERIFY() in d_shrink_del() from > shrink_dentry_list(). They added printks that show that the dentry > has DCACHE_DENTRY_KILLED. > > We could ask for a dump, but this is the only rational explanation I > could find for this (and a shrink list with two dentries, with racing > dput on both nicely explains the case where the shrink list's prev > pointer still points to the already killed dentry). Bug details: == WARNING: at /home/abuild/rpmbuild/BUILD/kernel-default-3.12.15/linux-3.12/fs/dcache.c:392 Modules linked in: iptable_filter ip_tables x_tables rpcsec_gss_krb5 auth_rpcgss nfsv4 dns_resolver nfs lockd sunrpc fscache af_packet xfs libcrc32c dm_mod autofs4 btrfs xor sr_mod cdrom usb_storage raid6_pq sd_mod crc_t10dif ibmvscsi(X) scsi_transport_srp scsi_tgt ibmveth(X) ohci_pci ohci_hcd ehci_hcd usbcore usb_common sg scsi_mod Supported: Yes CPU: 7 PID: 25685 Comm: host01 Tainted: G X 3.12.15-3-default #1 task: c003f7fb68f0 ti: c003f31f4000 task.ti: c003f31f4000 NIP: c024552c LR: c02462b8 CTR: c02aad40 REGS: c003f31f7530 TRAP: 0700 Tainted: G X (3.12.15-3-default) MSR: 800100029033 CR: 24242448 XER: 2000 SOFTE: 1 CFAR: c02454b0 GPR00: c02462b8 c003f31f77b0 c0f10fb0 c003dfa7f028 GPR04: 0001 c0e4fe00 0600 GPR08: c0e30fb0 0001 GPR12: 24242442 cfe41880 GPR16: 010013556de0 6458 0002 GPR20: c084e940 0004 c003f8afa400 GPR24: 6457 c003fde09e68 GPR28: c003dfa7f028 c003dfa7f028 c003f31f7948 c003dfa7f0a8 NIP [c024552c] .d_shrink_del+0x9c/0xc0 LR [c02462b8] .shrink_dentry_list+0xc8/0x160 PACATMSCRATCH [80019033] Call Trace: [c003f31f77b0] [c003f8f1ef80] 0xc003f8f1ef80 (unreliable) [c003f31f7820] [c02462b8] .shrink_dentry_list+0xc8/0x160 [c003f31f78d0] [c0246754] .shrink_dcache_parent+0x44/0xa0 [c003f31f7980] [c02b0a0c] .proc_flush_task+0xbc/0x1f0 [c003f31f7a70] [c008e554] .release_task+0x94/0x530 [c003f31f7b50] [c008efb8] .wait_task_zombie+0x5c8/0x750 [c003f31f7c10] [c008fa80] .do_wait+0x120/0x2c0 [c003f31f7cd0] [c0091040] .SyS_wait4+0x90/0x130 [c003f31f7dc0] [c00910fc] .SyS_waitpid+0x1c/0x30 [c003f31f7e30] [c0009dfc] syscall_exit+0x0/0x7c Instruction dump: 7d09502a 3908 7d09512a 4bdcaf19 6000 38210070 e8010010 7c0803a6 4e800020 3d02fff2 8928dfd6 69290001 <0b09> 2fa9 41feff80 3921 ---[ end trace 56e8481827564dc3 ]--- == I forgot to mention that the TCP test used in the recreates is from the LTP suite from http://ltp.sourceforge.net It has been observed that each time we run into it so far, the host01 test is the one running. == After installing the kernel with some conditional printk statements in it, the condition hit and I caught a glimpse of it. I then dropped the system into xmon to dump out some further info: When we first detect that the dentry->d_flags is missing these flags DCACHE_SHRINK_LIST | DCACHE_LRU_LIST we start to dump some information about the dentry from within shrink_dentry_list() and got: dentry c003d6240ce8 name 5 lockcnt -128 flags 1048780 post-shrink-del: dentry c003d6240ce8 name 5 lockcnt -128 dentry c003d6240ce8 lockcnt -128 dentry c003d6240ce8 name 5 lockcnt -128 flags 1048780 post-shrink-del: dentry c003d6240ce8 name 5 lockcnt -128 dentry c003d6240ce8 lockcnt -128 dentry c003d6240ce8 name 5 lockcnt -128 flags 1048780 and this repeats over and over... The code (with printks) looks like this: 899 for (;;) { 900 dentry = list_entry_rcu(list->prev, struct dentry, d_lru); 901 if (>d_lru == list) 902 break; /* empty */ 903 904 if (print_more) 905 printk(KERN_WARNING "dentry %p lockcnt %d\n", 906 dentry, dentry->d_lockref.count); 907 908 /* 909 * Get the dentry lock, and re-verify that the dentry is 910 * this on the shrinking list. If it is, we know that 911 * DCACHE_SHRINK_LIST and DCACHE_LRU_LIST are set. 912 */
Re: [RFC/HACK] x86: Fast return to kernel
On Fri, May 2, 2014 at 1:30 PM, Thomas Gleixner wrote: > > So what about manipulating the stack so that the popf does not enable > interrupts and do an explicit sti to get the benefit of the > one-instruction shadow ? That's what I already suggested in the original "I don't think popf works" email. It does get more complex since you now have to test things (there are very much cases where we get page faults and other exceptions with interrupts disabled), but it shouldn't be much worse. Btw, Andy, why did you do "popq %rsp"? That just looks crazy. If the stack isn't contiguous, the subsequent "popf" couldn't have worked anyway. And I bet it screws with the stack engine. So you should just have done something like "addq $16,%rsp" or whatever the constant ends up being. Linus -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 0/2] namespaces: log namespaces per task
Quoting Richard Guy Briggs (r...@redhat.com): > On 14/05/02, Serge E. Hallyn wrote: > > Quoting Richard Guy Briggs (r...@redhat.com): > > > I saw no replies to my questions when I replied a year after Aris' > > > posting, so > > > I don't know if it was ignored or got lost in stale threads: > > > > > > https://www.redhat.com/archives/linux-audit/2013-March/msg00020.html > > > > > > https://www.redhat.com/archives/linux-audit/2013-March/msg00033.html > > > > > > (https://lists.linux-foundation.org/pipermail/containers/2013-March/032063.html) > > > > > > https://www.redhat.com/archives/linux-audit/2014-January/msg00180.html > > > > > > I've tried to answer a number of questions that were raised in that > > > thread. > > > > > > The goal is not quite identical to Aris' patchset. > > > > > > The purpose is to track namespaces in use by logged processes from the > > > perspective of init_*_ns. The first patch defines a function to list > > > them. > > > The second patch provides an example of usage for audit_log_task_info() > > > which > > > is used by syscall audits, among others. audit_log_task() and > > > audit_common_recv_message() would be other potential use cases. > > > > > > Use a serial number per namespace (unique across one boot of one kernel) > > > instead of the inode number (which is claimed to have had the right to > > > change > > > reserved and is not necessarily unique if there is more than one proc > > > fs). It > > > could be argued that the inode numbers have now become a defacto > > > interface and > > > can't change now, but I'm proposing this approach to see if this helps > > > address > > > some of the objections to the earlier patchset. > > > > > > There could also have messages added to track the creation and the > > > destruction > > > of namespaces, listing the parent for hierarchical namespaces such as > > > pidns, > > > userns, and listing other ids for non-hierarchical namespaces, as well as > > > other > > > information to help identify a namespace. > > > > > > There has been some progress made for audit in net namespaces and pid > > > namespaces since this previous thread. net namespaces are now served as > > > peers > > > by one auditd in the init_net namespace with processes in a non-init_net > > > namespace being able to write records if they are in the init_user_ns and > > > have > > > CAP_AUDIT_WRITE. Processes in a non-init_pid_ns can now similarly write > > > records. As for CAP_AUDIT_READ, I just posted a patchset to check > > > capabilities > > > of userspace processes that try to join netlink broadcast groups. > > > > > > > > > Questions: > > > Is there a way to link serial numbers of namespaces involved in migration > > > of a > > > container to another kernel? (I had a brief look at CRIU.) Is there a > > > unique > > > identifier for each running instance of a kernel? Or at least some > > > identifier > > > within the container migration realm? > > > > Eric Biederman has always been adamantly opposed to adding new namespaces > > of namespaces, so the fact that you're asking this question concerns me. > > I have seen that position and I don't fully understand the justification > for it other than added complexity. > > One way that occured to me to be able to identify a kernel instance was > to look at CPU serial numbers or other CPU entity intended to be > globally unique, but that isn't universally available. That's one issue, which is uniqueness of namespaces cross-machines. But it gets worse if we consider that after allowing in-container audit, we'll have a nested container running, then have the parent container migrated to another host (or just checkpointed and restarted); Now the nexted container's indexes will all be changed. Is there any way audit can track who's who after the migration? That's not an indictment of the serial # approach, since (a) we don't have in-container audit yet and (b) we don't have c/r/migration of nested containers. But it's worth considering whether we can solve the issue with serial #s, and, if not, whether we can solve it with any other approach. I guess one approach to solve it would be to allow userspace to request a next serial #. Which will immediately lead us to a namespace of serial #s (since the requested # might be lower than the last used one on the new host). As you've said inode #s for /proc/self/ns/* probably aren't sufficiently unique, though perhaps we could attach a generation # for the sake of audit. Then after a c/r/migration the generation # may be different, but we may have a better shot at at least using the same ino#. > Another possibility was RTC reading at time of boot, but that isn't good > enough either. > > Both are dubious in VMs anyways. > > > The way things are right now, since audit belongs to the init userns, > > we can get away with saying if a container 'migrates', the new kernel > > will see a different set of
Re: [RFC][PATCH v2] hwmon: add support for Sensirion SHTC1 sensor
Hi Guenter, thanks for comments! I will include them in third version, but I have still few questions... On Don, 2014-05-01 at 19:06 -0700, Guenter Roeck wrote: > On 05/01/2014 04:05 PM, Tomas Pop wrote: > > One more time this patch sent with correct settings of my email client > > - I'm sorry for this. > > > > This is a second version of the driver for Sensirion SHTC1 humidity and > > temperature sensor. Initial version was submitted in July 2012. > > http://www.gossamer-threads.com/lists/linux/kernel/1569130#1569130 > > > > We included suggested corrections formerly discussed in this list after > > initial submission, but since it is quite a while, we are re-submitting > > it again as a request for comments. Here is a list of important changes > > to the initial version: > > > > * returning real error codes instead of -1 or -ENODEV > > * using boolean variables instead of bitmaps where possible > > * macros be16_to_cpup used for conversion of indianneess > > * corrected formula for decoding of humidity and temperature values > > * documentation update > > > > Patch was generated against kernel v3.15-rc3 > > > > Signed-off-by: Tomas Pop > > --- > > Documentation/hwmon/shtc1 | 38 + > > drivers/hwmon/Kconfig | 10 ++ > > drivers/hwmon/Makefile | 1 + > > drivers/hwmon/shtc1.c | 323 > > > > include/linux/platform_data/shtc1.h | 24 +++ > > 5 files changed, 396 insertions(+) > > create mode 100644 Documentation/hwmon/shtc1 > > create mode 100644 drivers/hwmon/shtc1.c > > create mode 100644 include/linux/platform_data/shtc1.h > > > > diff --git a/Documentation/hwmon/shtc1 b/Documentation/hwmon/shtc1 > > new file mode 100644 > > index 000..6a72ae2d > > --- /dev/null > > +++ b/Documentation/hwmon/shtc1 > > @@ -0,0 +1,38 @@ > > +Kernel driver shtc1 > > +=== > > + > > +Supported chips: > > + * Sensirion SHTC1 > > +Prefix: 'shtc1' > > +Addresses scanned: none > > +Datasheet: Publicly available at the Sensirion website > > + > > http://www.sensirion.com/fileadmin/user_upload/customers/sensirion/Dokumente/Humidity/Sensirion_Humidity_SHTC1_Datasheet.pdf > > Ok to add SHTW1 here if it is known to work. > Just say: > Datasheet: Not publicly available Actually, there is no way to find out, if you are speaking to SHTC1 or SHTW1. (i.e., the id is the same for both). So I will add it here and we will provide link to data-sheet later in a separate patch. > > + > > +Author: > > + Johannes Winkelmann > > + > > +Description > > +--- > > + > > +This driver implements support for the Sensirion SHTC1 chip, a humidity > > and > > Two spaces > > > +temperature sensor. Temperature is measured in degrees celsius, relative > > +humidity is expressed as a percentage. Driver can be used as well for SHTW1 > > +chip, that has the same electrical interface, but datasheet has not been > > ... for SHTW1, which has the same electrical interface. > > > +yet published. > > + > > Either add support for the second now, or don't mention it at all > (especially if the chip has a different ID and you don't want to add > that ID at this point for some reason). > > > +The device communicates with the I2C protocol. All sensors are set to the > > same > > ... are set to I2C address 0x70. > > > +I2C address 0x70, so an entry with I2C_BOARD_INFO("shtc1", 0x70) can be > > used > > +in the board setup code. See Documentation/i2c/instantiating-devices for > > +other methods to instantiate the device. > > + > I would suggest to just refer to the instantiating-devices document and drop > the I2C_BOARD_INFO example. > > > +Furthermore, there are two configuration options by means of platform_data: > > options configurable by means ... > > > +1. blocking (pull the I2C clock line down while performing the > > measurement) or > > + non-blocking, mode. Blocking mode will guarantee the fastest result, but > > non-blocking mode (no comma) > > > + the I2C bus will be busy during that time > > that time. > > > +2. high or low accuracy. Using high accuracy is always recommended. > > + > > +sysfs-Interface > > +--- > > + > > +temp1_input - temperature input > > +humidity1_input - humidity input > > diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig > > index bc196f4..4d58149 100644 > > --- a/drivers/hwmon/Kconfig > > +++ b/drivers/hwmon/Kconfig > > @@ -1114,6 +1114,16 @@ config SENSORS_SHT21 > > This driver can also be built as a module. If so, the module > > will be called sht21. > > > > +config SENSORS_SHTC1 > > + tristate "Sensiron humidity and temperature sensors. SHTC1 and compat." > > + depends on I2C > > + help > > + If you say yes here you get support for the Sensiron SHTC1 humidity > > and SHTW1 ? I will add SHTW1 here as well > > > + and temperature sensor. > > + > > + This
Re: [PATCH 1/2] namespaces: give each namespace a serial number
Quoting Richard Guy Briggs (r...@redhat.com): > On 14/05/02, Serge E. Hallyn wrote: > > Quoting Richard Guy Briggs (r...@redhat.com): > > > > Most of this looks reasonable, but I'm curious about something, > > > > > +/** > > > + * ns_serial - compute a serial number for the namespace > > > + * > > > + * Compute a serial number for the namespace to uniquely identify it in > > > + * audit records. > > > + */ > > > +unsigned int ns_serial(void) > > > +{ > > > + static DEFINE_SPINLOCK(serial_lock); > > > + static unsigned int serial = 4; /* reserved for IPC, UTS, user, PID */ > > > + > > > + unsigned long flags; > > > + unsigned int ret; > > > + > > > + spin_lock_irqsave(_lock, flags); > > > + do { > > > + ret = ++serial; > > > + } while (unlikely(!ret)); > > > > Why exactly are you doing this? Surely if serial is going to > > wrap around we've got a bigger problem than just wanting go > > bump one more time? > > Thanks for catching this. > The code was templated off audit_serial() which tries to solve a > different problem and rolling it is much more likely. I hadn't noticed > that rollover protection. However, I *had* thought of making it a long > (which would be the same size on 32-bit arches, but larger on 64-bit) > since a 64-bit system is more likely to roll it out of sheer speed and > resource availability. But perhaps a long long would be safer. Sounds good, and perhaps a BUG_ON(!serial) for good measure. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [perf] more perf_fuzzer memory corruption
On Fri, 2 May 2014, Vince Weaver wrote: > On Fri, 2 May 2014, Thomas Gleixner wrote: > > > Hmm, and where comes the WARN_ON in _free_event() from? That's not in > > Peters last patch. > > ahh, you're right :( My fault. I gave the new patch and the previous > patch similar names and applied the wrong one. > > OK the proper patch has been running the quick reproducer for a bit > without triggering the issue, I'll let it run a bit more and then upgrade > to full fuzzing. If you do that, please add the patch below. Thanks, tglx Index: linux-2.6/kernel/events/core.c === --- linux-2.6.orig/kernel/events/core.c +++ linux-2.6/kernel/events/core.c @@ -7378,7 +7378,7 @@ __perf_event_exit_task(struct perf_event struct perf_event_context *child_ctx, struct task_struct *child) { - perf_remove_from_context(child_event, !!child_event->parent); + perf_remove_from_context(child_event, true); /* * It can happen that the parent exits first, and has events -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] plist: include -DDEBUG if CONFIG_DEBUG_PI_LIST
On Fri, 2 May 2014 16:23:43 -0400 Dan Streetman wrote: > lib/plist.c uses pr_debug() in its test function, which is compiled > and run only when CONFIG_DEBUG_PI_LIST in set; however pr_debug() > is compiled out unless -DDEBUG is set for the file. > > Update lib/Makefile to add -DDEBUG to CFLAGS_plist.o if > CONFIG_DEBUG_PI_LIST is set, so that the pr_debug() output from > plist_test() is shown. Why not just use printk(KERN_DEBUG ...) then if we always want to print it? You could just place #define DEBUG in plist.c as well. Although I think just switching to printk() is better. -- Steve > > Signed-off-by: Dan Streetman > --- > lib/Makefile | 4 > 1 file changed, 4 insertions(+) > > diff --git a/lib/Makefile b/lib/Makefile > index 0cd7b68..fd1f4c8 100644 > --- a/lib/Makefile > +++ b/lib/Makefile > @@ -39,6 +39,10 @@ CFLAGS_kobject.o += -DDEBUG > CFLAGS_kobject_uevent.o += -DDEBUG > endif > > +ifeq ($(CONFIG_DEBUG_PI_LIST),y) > +CFLAGS_plist.o += -DDEBUG > +endif > + > obj-$(CONFIG_GENERIC_IOMAP) += iomap.o > obj-$(CONFIG_GENERIC_PCI_IOMAP) += pci_iomap.o > obj-$(CONFIG_HAS_IOMEM) += iomap_copy.o devres.o -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH] staging: bcm: fix lines over 80 characters
Signed-off-by: Greg DeAngelis --- drivers/staging/bcm/Transmit.c | 19 +-- 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/staging/bcm/Transmit.c b/drivers/staging/bcm/Transmit.c index 2ed4836..4976746 100644 --- a/drivers/staging/bcm/Transmit.c +++ b/drivers/staging/bcm/Transmit.c @@ -46,12 +46,14 @@ int SendControlPacket(struct bcm_mini_adapter *Adapter, char *pControlPacket) BCM_DEBUG_PRINT(Adapter, DBG_TYPE_TX, TX_CONTROL, DBG_LVL_ALL, "Tx"); if (!pControlPacket || !Adapter) { - BCM_DEBUG_PRINT(Adapter, DBG_TYPE_TX, TX_CONTROL, DBG_LVL_ALL, "Got NULL Control Packet or Adapter"); + BCM_DEBUG_PRINT(Adapter, DBG_TYPE_TX, TX_CONTROL, DBG_LVL_ALL, + "Got NULL Control Packet or Adapter"); return STATUS_FAILURE; } if ((atomic_read(>CurrNumFreeTxDesc) < ((PLeader->PLength-1)/MAX_DEVICE_DESC_SIZE)+1)) { - BCM_DEBUG_PRINT(Adapter, DBG_TYPE_TX, TX_CONTROL, DBG_LVL_ALL, "NO FREE DESCRIPTORS TO SEND CONTROL PACKET"); + BCM_DEBUG_PRINT(Adapter, DBG_TYPE_TX, TX_CONTROL, DBG_LVL_ALL, + "NO FREE DESCRIPTORS TO SEND CONTROL PACKET"); return STATUS_FAILURE; } @@ -109,7 +111,8 @@ int SetupNextSend(struct bcm_mini_adapter *Adapter, struct sk_buff *Packet, USH (UINT *)>len, Adapter->PackInfo[QueueIndex].bEthCSSupport); if (status != STATUS_SUCCESS) { - BCM_DEBUG_PRINT(Adapter, DBG_TYPE_TX, NEXT_SEND, DBG_LVL_ALL, "PHS Transmit failed..\n"); + BCM_DEBUG_PRINT(Adapter, DBG_TYPE_TX, NEXT_SEND, DBG_LVL_ALL, + "PHS Transmit failed..\n"); goto errExit; } @@ -217,12 +220,15 @@ int tx_pkt_handler(struct bcm_mini_adapter *Adapter /**< pointer to adapter obje Adapter->LinkStatus == SYNC_UP_REQUEST && !Adapter->bSyncUpRequestSent) { - BCM_DEBUG_PRINT(Adapter, DBG_TYPE_TX, TX_PACKETS, DBG_LVL_ALL, "Calling LinkMessage"); + BCM_DEBUG_PRINT(Adapter, DBG_TYPE_TX, TX_PACKETS, + DBG_LVL_ALL, "Calling LinkMessage"); LinkMessage(Adapter); } if ((Adapter->IdleMode || Adapter->bShutStatus) && atomic_read(>TotalPacketCount)) { - BCM_DEBUG_PRINT(Adapter, DBG_TYPE_TX, TX_PACKETS, DBG_LVL_ALL, "Device in Low Power mode...waking up"); + BCM_DEBUG_PRINT(Adapter, DBG_TYPE_TX, + TX_PACKETS, DBG_LVL_ALL, + "Device in Low Power mode...waking up"); Adapter->usIdleModePattern = ABORT_IDLE_MODE; Adapter->bWakeUpDevice = TRUE; wake_up(>process_rx_cntrlpkt); @@ -232,7 +238,8 @@ int tx_pkt_handler(struct bcm_mini_adapter *Adapter /**< pointer to adapter obje atomic_set(>TxPktAvail, 0); } - BCM_DEBUG_PRINT(Adapter, DBG_TYPE_TX, TX_PACKETS, DBG_LVL_ALL, "Exiting the tx thread..\n"); + BCM_DEBUG_PRINT(Adapter, DBG_TYPE_TX, TX_PACKETS, DBG_LVL_ALL, + "Exiting the tx thread..\n"); Adapter->transmit_packet_thread = NULL; return 0; } -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] staging/comedi: fix sparse warning: shift too big
On 02/05/14 19:28, Hartley Sweeten wrote: On Friday, May 02, 2014 9:50 AM, Ian Abbott wrote: On 2014-05-02 15:57, Brilliantov Kirill Vladimirovich wrote: Signed-off-by: Brilliantov Kirill Vladimirovich --- drivers/staging/comedi/drivers/ni_stc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/comedi/drivers/ni_stc.h b/drivers/staging/comedi/drivers/ni_stc.h index f0630b78..197b568 100644 --- a/drivers/staging/comedi/drivers/ni_stc.h +++ b/drivers/staging/comedi/drivers/ni_stc.h @@ -717,7 +717,7 @@ enum AI_AO_Select_Bits { static inline unsigned ni_stc_dma_channel_select_bitfield(unsigned channel) { if (channel < 4) - return 1 << channel; + return 1 << (channel & 0x3); if (channel == 4) return 0x3; if (channel == 5) The warning seems a bit spurious! The whole mite.c driver and it's users are all a bit of a mess. I did find the information about the "AI AO Select Register" in the National Instruments PCI E Series manual: - The AI AO Select Register contains 8 bits that control the logical DMA selection for the analog input and analog output resources. The contents of this register are cleared upon power up and after a reset condition. Address:base address + 0x09 Type: write-only Word size: 8-bit Bit NameDescription 7-4 ReservedReserved-Always write 0 to these bits (for PCI-6032E and PCI-6033E only). 7-4 Output Analog Output Logical Channel D through A-These four bits select the logical channels of the MITE to be used by the analog output. You can only set one of these bits at a time (except for the PCI-6032E and PCI-6033E). 3-0 Input Analog Input Logical Channel D through A-These four bits select the logical channels to be used by the analog input. You can only set one of these bits at a time. - Based on this the ni_stc_dma_channel_select_bitfield() function appears to be incorrect. The "channel"s should only be in the range 0 to 3 and result in a returned bitfield of: channel bitfieldDescription 0 0x01Analog Input or Output Channel A 1 0x02Analog Input or Output Channel B 2 0x04Analog Input or Output Channel C 3 0x08Analog Input or Output Channel D Just my two cents... Apparently, the M-series cards have six DMA channels - two more than the E-series cards. They must have decided to keep the existing bitfield encoding for the first four channels for backwards compatibility, and use special bitfield values for the extra channels. -- -=( Ian Abbott @ MEV Ltd.E-mail: )=- -=( Tel: +44 (0)161 477 1898 FAX: +44 (0)161 718 3587 )=- -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC/HACK] x86: Fast return to kernel
On Fri, 2 May 2014, Linus Torvalds wrote: > On Fri, May 2, 2014 at 12:31 PM, Linus Torvalds > wrote: > > > > Also, are you *really* sure that "popf" has the same one-instruction > > interrupt shadow that "sti" has? Because I'm not at all sure that is > > true, and it's not documented as far as I can tell. In contrast, the > > one-instruction shadow after "sti" very much _is_ documented. > > Yeah, I'm pretty sure about this. The only instructions with an > interrupt shadow are "sti", "mov ss" and "pop ss". > > There may be specific microarchitectures that do it for a "popf" that > enables interrupts too, but that is not documented _anywhere_ I could > find. So what about manipulating the stack so that the popf does not enable interrupts and do an explicit sti to get the benefit of the one-instruction shadow ? Thanks, tglx -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [patch v2 4/4] mm, thp: do not perform sync compaction on pagefault
On Fri, 2 May 2014, Mel Gorman wrote: > > The page locks I'm referring to is the lock_page() in __unmap_and_move() > > that gets called for sync compaction after the migrate_pages() iteration > > makes a few passes and unsuccessfully grabs it. This becomes a forced > > migration since __unmap_and_move() returns -EAGAIN when the trylock fails. > > > > Can that be fixed then instead of disabling it entirely? > We could return -EAGAIN when the trylock_page() fails for MIGRATE_SYNC_LIGHT. It would become a forced migration but we ignore that currently for MIGRATE_ASYNC, and I could extend it to be ignored for MIGRATE_SYNC_LIGHT as well. > > We have perf profiles from one workload in particular that shows > > contention on i_mmap_mutex (anon isn't interesting since the vast majority > > of memory on this workload [120GB on a 128GB machine] is has a gup pin and > > doesn't get isolated because of 119d6d59dcc0 ("mm, compaction: avoid > > isolating pinned pages")) between cpus all doing memory compaction trying > > to fault thp memory. > > > > Abort SYNC_LIGHT compaction if the mutex is contended. > Yeah, I have patches for that as well but we're waiting to see if they are actually needed when sync compaction is disabled for thp. If we aren't actually going to disable it entirely, then I can revive those patches if the contention becomes such an issue. > > That's one example that we've seen, but the fact remains that at times > > sync compaction will iterate the entire 128GB machine and not allow an > > order-9 page to be allocated and there's nothing to preempt it like the > > need_resched() or lock contention checks that async compaction has. > > Make compact_control->sync the same enum field and check for contention > on the async/sync_light case but leave it for sync if compacting via the > proc interface? > Ok, that certainly can be done, I wasn't sure you would be happy with such a change. I'm not sure there's so much of a difference between the new compact_control->sync == MIGRATE_ASYNC and == MIGRATE_SYNC_LIGHT now, though. Would it make sense to remove MIGRATE_SYNC_LIGHT entirely from the page allocator, i.e. remove sync_migration entirely, and just retry with a second call to compaction before failing instead? -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH v3 2/6] x86: Move syscall and sysenter setup into kernel/cpu/common.c
On 05/02/2014 01:15 PM, Andy Lutomirski wrote: > This just moves code around. > > Signed-off-by: Andy Lutomirski One more thing: please explain *why* here. -hpa -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH] plist: include -DDEBUG if CONFIG_DEBUG_PI_LIST
lib/plist.c uses pr_debug() in its test function, which is compiled and run only when CONFIG_DEBUG_PI_LIST in set; however pr_debug() is compiled out unless -DDEBUG is set for the file. Update lib/Makefile to add -DDEBUG to CFLAGS_plist.o if CONFIG_DEBUG_PI_LIST is set, so that the pr_debug() output from plist_test() is shown. Signed-off-by: Dan Streetman --- lib/Makefile | 4 1 file changed, 4 insertions(+) diff --git a/lib/Makefile b/lib/Makefile index 0cd7b68..fd1f4c8 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -39,6 +39,10 @@ CFLAGS_kobject.o += -DDEBUG CFLAGS_kobject_uevent.o += -DDEBUG endif +ifeq ($(CONFIG_DEBUG_PI_LIST),y) +CFLAGS_plist.o += -DDEBUG +endif + obj-$(CONFIG_GENERIC_IOMAP) += iomap.o obj-$(CONFIG_GENERIC_PCI_IOMAP) += pci_iomap.o obj-$(CONFIG_HAS_IOMEM) += iomap_copy.o devres.o -- 1.8.3.1 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC/HACK] x86: Fast return to kernel
On Fri, 2 May 2014 12:31:42 -0700 Linus Torvalds wrote: > > And NMI not being re-enabled might just be a real advantage. Adding > Steven to the cc to make him aware of this patch. > There's not much of an advantage for NMIs, as they seldom page fault. We may get some due to vmalloc'd areas, but the whole nested NMI code that I wrote was to deal with breakpoints in NMIs. Although, this patch would have helped before my code, when doing things like dumping stacks from NMI context, as some stack dumps can trigger a page fault. In the past, I used dump all task's states from NMI context to find why the system locked up hard. But due to the re-enabling of NMIs with page faults, that usually caused the system to triple fault, and made that debugging method rather useless. -- Steve -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH v3 0/4] introduce devm_mdiobus_alloc/free and clean up davinci mdio
From: Grygorii Strashko Date: Wed, 30 Apr 2014 15:23:32 +0300 > Introduce a resource managed devm_mdiobus_alloc[_size]()/devm_mdiobus_free() > to automatically clean up MDIO bus alocations made by MDIO drivers, > thus leading to simplified MDIO drivers code. > > Clean up Davinci MDIO driver and use new devm API. > > Changes in v3: > - added devm_mdiobus_alloc_size() and > devm_mdiobus_alloc() converted to be just a simple wrapper now. > > Changes in v2: > - minor comments taken into account > - additional patches added for cleaning up Davinci MDIO driver Series applied to net-next, thanks. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v3 2/6] x86: Move syscall and sysenter setup into kernel/cpu/common.c
This just moves code around. Signed-off-by: Andy Lutomirski --- arch/x86/include/asm/proto.h | 2 -- arch/x86/kernel/cpu/common.c | 32 arch/x86/vdso/vdso32-setup.c | 30 -- 3 files changed, 32 insertions(+), 32 deletions(-) diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h index 6fd3fd7..a90f897 100644 --- a/arch/x86/include/asm/proto.h +++ b/arch/x86/include/asm/proto.h @@ -12,8 +12,6 @@ void ia32_syscall(void); void ia32_cstar_target(void); void ia32_sysenter_target(void); -void syscall32_cpu_init(void); - void x86_configure_nx(void); void x86_report_nx(void); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index a135239..7c65b46 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -953,6 +953,38 @@ static void vgetcpu_set_mode(void) else vgetcpu_mode = VGETCPU_LSL; } + +/* May not be __init: called during resume */ +static void syscall32_cpu_init(void) +{ + /* Load these always in case some future AMD CPU supports + SYSENTER from compat mode too. */ + wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS); + wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL); + wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)ia32_sysenter_target); + + wrmsrl(MSR_CSTAR, ia32_cstar_target); +} +#endif + +#ifdef CONFIG_X86_32 +void enable_sep_cpu(void) +{ + int cpu = get_cpu(); + struct tss_struct *tss = _cpu(init_tss, cpu); + + if (!boot_cpu_has(X86_FEATURE_SEP)) { + put_cpu(); + return; + } + + tss->x86_tss.ss1 = __KERNEL_CS; + tss->x86_tss.sp1 = sizeof(struct tss_struct) + (unsigned long) tss; + wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0); + wrmsr(MSR_IA32_SYSENTER_ESP, tss->x86_tss.sp1, 0); + wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long) ia32_sysenter_target, 0); + put_cpu(); +} #endif void __init identify_boot_cpu(void) diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c index 5a657d9..9c78d5b 100644 --- a/arch/x86/vdso/vdso32-setup.c +++ b/arch/x86/vdso/vdso32-setup.c @@ -75,41 +75,11 @@ static unsigned vdso32_size; #definevdso32_sysenter() (boot_cpu_has(X86_FEATURE_SYSENTER32)) #definevdso32_syscall()(boot_cpu_has(X86_FEATURE_SYSCALL32)) -/* May not be __init: called during resume */ -void syscall32_cpu_init(void) -{ - /* Load these always in case some future AMD CPU supports - SYSENTER from compat mode too. */ - wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS); - wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL); - wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)ia32_sysenter_target); - - wrmsrl(MSR_CSTAR, ia32_cstar_target); -} - #else /* CONFIG_X86_32 */ #define vdso32_sysenter() (boot_cpu_has(X86_FEATURE_SEP)) #define vdso32_syscall() (0) -void enable_sep_cpu(void) -{ - int cpu = get_cpu(); - struct tss_struct *tss = _cpu(init_tss, cpu); - - if (!boot_cpu_has(X86_FEATURE_SEP)) { - put_cpu(); - return; - } - - tss->x86_tss.ss1 = __KERNEL_CS; - tss->x86_tss.sp1 = sizeof(struct tss_struct) + (unsigned long) tss; - wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0); - wrmsr(MSR_IA32_SYSENTER_ESP, tss->x86_tss.sp1, 0); - wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long) ia32_sysenter_target, 0); - put_cpu(); -} - #endif /* CONFIG_X86_64 */ int __init sysenter_setup(void) -- 1.9.0 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v3 3/6] x86: Reimplement vdso.so preparation in build-time C
Currently, vdso.so files are prepared and analyzed by a combination of objcopy, nm, some linker script tricks, and some simple ELF parsers in the kernel. Replace all of that with plain C code that runs at build time. All five vdso images now generate .c files that are compiled and linked in to the kernel image. This should cause only one userspace-visible change: the loaded vDSO images are stripped more heavily than they used to be. Everything outside the loadable segment is dropped. In particular, this causes the section table and section name strings to be missing. This should be fine: real dynamic loaders don't load or inspect these tables anyway. The result is roughly equivalent to eu-strip's --strip-sections option. The purpose of this change is to enable the vvar and hpet mappings to be moved to the page following the vDSO load segment. Currently, it is possible for the section table to extend into the page after the load segment, so, if we map it, it risks overlapping the vvar or hpet page. This happens whenever the load segment is just under a multiple of PAGE_SIZE. The only real subtlety here is that the old code had a C file with inline assembler that did 'call VDSO32_vsyscall' and a linker script that defined 'VDSO32_vsyscall = __kernel_vsyscall'. This most likely worked by accident: the linker script entry defines a symbol associated with an address as opposed to an alias for the real dynamic symbol __kernel_vsyscall. That caused ld to relocate the reference at link time instead of leaving an interposable dynamic relocation. Since the VDSO32_vsyscall hack is no longer needed, I now use 'call __kernel_vsyscall', and I added -Bsymbolic to make it work. vdso2c will generate an error and abort the build if the resulting image contains any dynamic relocations, so we won't silently generate bad vdso images. (Dynamic relocations are a problem because nothing will even attempt to relocate the vdso.) Signed-off-by: Andy Lutomirski --- arch/x86/ia32/ia32_signal.c | 8 +-- arch/x86/include/asm/elf.h| 7 +- arch/x86/include/asm/mmu.h| 2 +- arch/x86/include/asm/vdso.h | 70 +++ arch/x86/kernel/signal.c | 6 +- arch/x86/mm/init_64.c | 3 +- arch/x86/vdso/.gitignore | 5 +- arch/x86/vdso/Makefile| 90 +--- arch/x86/vdso/vclock_gettime.c| 4 +- arch/x86/vdso/vdso.S | 3 - arch/x86/vdso/vdso2c.c| 142 ++ arch/x86/vdso/vdso2c.h| 137 arch/x86/vdso/vdso32-setup.c | 50 ++ arch/x86/vdso/vdso32.S| 9 --- arch/x86/vdso/vdso32/vdso32.lds.S | 10 --- arch/x86/vdso/vdsox32.S | 3 - arch/x86/vdso/vma.c | 100 +-- arch/x86/xen/setup.c | 11 ++- 18 files changed, 400 insertions(+), 260 deletions(-) delete mode 100644 arch/x86/vdso/vdso.S create mode 100644 arch/x86/vdso/vdso2c.c create mode 100644 arch/x86/vdso/vdso2c.h delete mode 100644 arch/x86/vdso/vdso32.S delete mode 100644 arch/x86/vdso/vdsox32.S diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index 2206757..f9e181a 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -383,8 +383,8 @@ int ia32_setup_frame(int sig, struct ksignal *ksig, } else { /* Return stub is in 32bit vsyscall page */ if (current->mm->context.vdso) - restorer = VDSO32_SYMBOL(current->mm->context.vdso, -sigreturn); + restorer = current->mm->context.vdso + + selected_vdso32->sym___kernel_sigreturn; else restorer = >retcode; } @@ -462,8 +462,8 @@ int ia32_setup_rt_frame(int sig, struct ksignal *ksig, if (ksig->ka.sa.sa_flags & SA_RESTORER) restorer = ksig->ka.sa.sa_restorer; else - restorer = VDSO32_SYMBOL(current->mm->context.vdso, -rt_sigreturn); + restorer = current->mm->context.vdso + + selected_vdso32->sym___kernel_rt_sigreturn; put_user_ex(ptr_to_compat(restorer), >pretcode); /* diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index e96df2c..65b21bc 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -299,7 +299,7 @@ do { \ do { \ if (vdso64_enabled) \ NEW_AUX_ENT(AT_SYSINFO_EHDR,\ -
[PATCH v3 6/6] x86: Remove vestiges of VDSO_PRELINK and some outdated comments
These definitions had no effect. Signed-off-by: Andy Lutomirski --- arch/x86/vdso/vdso.lds.S | 7 +-- arch/x86/vdso/vdso32/vdso32.lds.S | 5 + arch/x86/vdso/vdsox32.lds.S | 7 +-- 3 files changed, 3 insertions(+), 16 deletions(-) diff --git a/arch/x86/vdso/vdso.lds.S b/arch/x86/vdso/vdso.lds.S index b96b267..75e3404 100644 --- a/arch/x86/vdso/vdso.lds.S +++ b/arch/x86/vdso/vdso.lds.S @@ -1,14 +1,11 @@ /* * Linker script for 64-bit vDSO. * We #include the file to define the layout details. - * Here we only choose the prelinked virtual address. * * This file defines the version script giving the user-exported symbols in - * the DSO. We can define local symbols here called VDSO* to make their - * values visible using the asm-x86/vdso.h macros from the kernel proper. + * the DSO. */ -#define VDSO_PRELINK 0xff70 #include "vdso-layout.lds.S" /* @@ -28,5 +25,3 @@ VERSION { local: *; }; } - -VDSO64_PRELINK = VDSO_PRELINK; diff --git a/arch/x86/vdso/vdso32/vdso32.lds.S b/arch/x86/vdso/vdso32/vdso32.lds.S index f072095..31056cf 100644 --- a/arch/x86/vdso/vdso32/vdso32.lds.S +++ b/arch/x86/vdso/vdso32/vdso32.lds.S @@ -1,17 +1,14 @@ /* * Linker script for 32-bit vDSO. * We #include the file to define the layout details. - * Here we only choose the prelinked virtual address. * * This file defines the version script giving the user-exported symbols in - * the DSO. We can define local symbols here called VDSO* to make their - * values visible using the asm-x86/vdso.h macros from the kernel proper. + * the DSO. */ #include #define BUILD_VDSO32 -#define VDSO_PRELINK 0 #include "../vdso-layout.lds.S" diff --git a/arch/x86/vdso/vdsox32.lds.S b/arch/x86/vdso/vdsox32.lds.S index 62272aa..46b991b 100644 --- a/arch/x86/vdso/vdsox32.lds.S +++ b/arch/x86/vdso/vdsox32.lds.S @@ -1,14 +1,11 @@ /* * Linker script for x32 vDSO. * We #include the file to define the layout details. - * Here we only choose the prelinked virtual address. * * This file defines the version script giving the user-exported symbols in - * the DSO. We can define local symbols here called VDSO* to make their - * values visible using the asm-x86/vdso.h macros from the kernel proper. + * the DSO. */ -#define VDSO_PRELINK 0 #include "vdso-layout.lds.S" /* @@ -24,5 +21,3 @@ VERSION { local: *; }; } - -VDSOX32_PRELINK = VDSO_PRELINK; -- 1.9.0 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/