[PATCH] PCI: iproc: Remove PAXC slot check to allow VF support
From: Jitendra Bhivare Fix previous incorrect logic that limits PAXC slot number to zero only. In order for SRIOV/VF to work, we need to allow the slot number to be greater than zero. Fixes: 46560388c476c ("PCI: iproc: Allow multiple devices except on PAXC") Signed-off-by: Jitendra Bhivare Signed-off-by: Ray Jui Reviewed-by: Andy Gospodarek --- drivers/pci/controller/pcie-iproc.c | 8 1 file changed, 8 deletions(-) diff --git a/drivers/pci/controller/pcie-iproc.c b/drivers/pci/controller/pcie-iproc.c index 3160e9342a2f..c20fd6bd68fd 100644 --- a/drivers/pci/controller/pcie-iproc.c +++ b/drivers/pci/controller/pcie-iproc.c @@ -630,14 +630,6 @@ static void __iomem *iproc_pcie_map_cfg_bus(struct iproc_pcie *pcie, return (pcie->base + offset); } - /* -* PAXC is connected to an internally emulated EP within the SoC. It -* allows only one device. -*/ - if (pcie->ep_is_internal) - if (slot > 0) - return NULL; - return iproc_pcie_map_ep_cfg_reg(pcie, busno, slot, fn, where); } -- 2.17.1
[PATCH] PCI: iproc: Remove PAXC slot check to allow VF support
From: Jitendra Bhivare Fix previous incorrect logic that limits PAXC slot number to zero only. In order for SRIOV/VF to work, we need to allow the slot number to be greater than zero. Fixes: 46560388c476c ("PCI: iproc: Allow multiple devices except on PAXC") Signed-off-by: Jitendra Bhivare Signed-off-by: Ray Jui Reviewed-by: Andy Gospodarek --- drivers/pci/controller/pcie-iproc.c | 8 1 file changed, 8 deletions(-) diff --git a/drivers/pci/controller/pcie-iproc.c b/drivers/pci/controller/pcie-iproc.c index 3160e9342a2f..c20fd6bd68fd 100644 --- a/drivers/pci/controller/pcie-iproc.c +++ b/drivers/pci/controller/pcie-iproc.c @@ -630,14 +630,6 @@ static void __iomem *iproc_pcie_map_cfg_bus(struct iproc_pcie *pcie, return (pcie->base + offset); } - /* -* PAXC is connected to an internally emulated EP within the SoC. It -* allows only one device. -*/ - if (pcie->ep_is_internal) - if (slot > 0) - return NULL; - return iproc_pcie_map_ep_cfg_reg(pcie, busno, slot, fn, where); } -- 2.17.1
Re: [PATCH 1/2] x86/mm: add .data..decrypted section to hold shared variables
On 08/27/2018 05:11 PM, Tom Lendacky wrote: On 08/27/2018 06:24 AM, Brijesh Singh wrote: kvmclock defines few static variables which are shared with hypervisor during the kvmclock initialization. When SEV is active, memory is encrypted with a guest-specific key, and if guest OS wants to share the memory region with hypervisor then it must clear the C-bit before sharing it. The '__decrypted' can be used to define a shared variables; the variables will be put in the .data.decryption section. This section is mapped with C=0 early in the boot, we also ensure that the initialized values are updated to match with C=0 (i.e peform an in-place decryption). The .data..decrypted section is PMD aligned and sized so that we avoid the need for spliting the pages when map with C=0. This should probably be broken into a few smaller patches. Maybe a patch that adds the section and the attribute, a patch that re-arranges the mapping setup and then the in-place decryption and clearing of the encryption bit for the area. OK, I will break the patch. Probably will create a separate patch which just re-arranges the mapping setup without making any logical changes. Signed-off-by: Brijesh Singh Fixes: 368a540e0232 ("x86/kvmclock: Remove memblock dependency") Cc: sta...@vger.kernel.org Cc: Tom Lendacky Cc: k...@vger.kernel.org Cc: Thomas Gleixner Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: linux-kernel@vger.kernel.org Cc: Paolo Bonzini Cc: Sean Christopherson Cc: "Radim Krčmář" --- arch/x86/include/asm/mem_encrypt.h | 4 + arch/x86/kernel/head64.c | 12 ++ arch/x86/kernel/vmlinux.lds.S | 18 +++ arch/x86/mm/mem_encrypt_identity.c | 220 +++-- 4 files changed, 197 insertions(+), 57 deletions(-) diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h index c064383..3f7d9d3 100644 --- a/arch/x86/include/asm/mem_encrypt.h +++ b/arch/x86/include/asm/mem_encrypt.h @@ -52,6 +52,8 @@ void __init mem_encrypt_init(void); bool sme_active(void); bool sev_active(void); +#define __decrypted __attribute__((__section__(".data..decrypted"))) + #else /* !CONFIG_AMD_MEM_ENCRYPT */ #define sme_me_mask 0ULL @@ -77,6 +79,8 @@ early_set_memory_decrypted(unsigned long vaddr, unsigned long size) { return 0; static inline int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 0; } +#define __decrypted + #endif/* CONFIG_AMD_MEM_ENCRYPT */ /* diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 8047379..6a18297 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -43,6 +43,9 @@ extern pmd_t early_dynamic_pgts[EARLY_DYNAMIC_PAGE_TABLES][PTRS_PER_PMD]; static unsigned int __initdata next_early_pgt; pmdval_t early_pmd_flags = __PAGE_KERNEL_LARGE & ~(_PAGE_GLOBAL | _PAGE_NX); +/* To clear memory encryption mask from the decrypted section */ +extern char __start_data_decrypted[], __end_data_decrypted[]; + Should find a header for these rather than defining them here. OK, will move then in mem_encrypt.h. Will that work ? #ifdef CONFIG_X86_5LEVEL unsigned int __pgtable_l5_enabled __ro_after_init; unsigned int pgdir_shift __ro_after_init = 39; @@ -112,6 +115,7 @@ static bool __head check_la57_support(unsigned long physaddr) unsigned long __head __startup_64(unsigned long physaddr, struct boot_params *bp) { + unsigned long vaddr, vaddr_end; unsigned long load_delta, *p; unsigned long pgtable_flags; pgdval_t *pgd; @@ -234,6 +238,14 @@ unsigned long __head __startup_64(unsigned long physaddr, /* Encrypt the kernel and related (if SME is active) */ sme_encrypt_kernel(bp); + /* Clear the memory encryption mask from the decrypted section */ + vaddr = (unsigned long)__start_data_decrypted; + vaddr_end = (unsigned long)__end_data_decrypted; + for (; vaddr < vaddr_end; vaddr += PMD_SIZE) { + i = pmd_index(vaddr); + pmd[i] -= sme_get_me_mask(); + } + /* * Return the SME encryption mask (if SME is active) to be used as a * modifier for the initial pgdir entry programmed into CR3. diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 8bde0a4..511b875 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -89,6 +89,22 @@ PHDRS { note PT_NOTE FLAGS(0); /* ___ */ } +/* + * This section contains data which will be mapped as decrypted. Memory + * encryption operates on a page basis. But we make this section a pmd + * aligned to avoid spliting the pages while mapping the section early. + * + * Note: We use a separate section so that only this section gets + * decrypted to avoid exposing more than we wish. + */ +#define DATA_DECRYPTED_SECTION \ + . =
Re: [PATCH 1/2] x86/mm: add .data..decrypted section to hold shared variables
On 08/27/2018 05:11 PM, Tom Lendacky wrote: On 08/27/2018 06:24 AM, Brijesh Singh wrote: kvmclock defines few static variables which are shared with hypervisor during the kvmclock initialization. When SEV is active, memory is encrypted with a guest-specific key, and if guest OS wants to share the memory region with hypervisor then it must clear the C-bit before sharing it. The '__decrypted' can be used to define a shared variables; the variables will be put in the .data.decryption section. This section is mapped with C=0 early in the boot, we also ensure that the initialized values are updated to match with C=0 (i.e peform an in-place decryption). The .data..decrypted section is PMD aligned and sized so that we avoid the need for spliting the pages when map with C=0. This should probably be broken into a few smaller patches. Maybe a patch that adds the section and the attribute, a patch that re-arranges the mapping setup and then the in-place decryption and clearing of the encryption bit for the area. OK, I will break the patch. Probably will create a separate patch which just re-arranges the mapping setup without making any logical changes. Signed-off-by: Brijesh Singh Fixes: 368a540e0232 ("x86/kvmclock: Remove memblock dependency") Cc: sta...@vger.kernel.org Cc: Tom Lendacky Cc: k...@vger.kernel.org Cc: Thomas Gleixner Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: linux-kernel@vger.kernel.org Cc: Paolo Bonzini Cc: Sean Christopherson Cc: "Radim Krčmář" --- arch/x86/include/asm/mem_encrypt.h | 4 + arch/x86/kernel/head64.c | 12 ++ arch/x86/kernel/vmlinux.lds.S | 18 +++ arch/x86/mm/mem_encrypt_identity.c | 220 +++-- 4 files changed, 197 insertions(+), 57 deletions(-) diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h index c064383..3f7d9d3 100644 --- a/arch/x86/include/asm/mem_encrypt.h +++ b/arch/x86/include/asm/mem_encrypt.h @@ -52,6 +52,8 @@ void __init mem_encrypt_init(void); bool sme_active(void); bool sev_active(void); +#define __decrypted __attribute__((__section__(".data..decrypted"))) + #else /* !CONFIG_AMD_MEM_ENCRYPT */ #define sme_me_mask 0ULL @@ -77,6 +79,8 @@ early_set_memory_decrypted(unsigned long vaddr, unsigned long size) { return 0; static inline int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 0; } +#define __decrypted + #endif/* CONFIG_AMD_MEM_ENCRYPT */ /* diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 8047379..6a18297 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -43,6 +43,9 @@ extern pmd_t early_dynamic_pgts[EARLY_DYNAMIC_PAGE_TABLES][PTRS_PER_PMD]; static unsigned int __initdata next_early_pgt; pmdval_t early_pmd_flags = __PAGE_KERNEL_LARGE & ~(_PAGE_GLOBAL | _PAGE_NX); +/* To clear memory encryption mask from the decrypted section */ +extern char __start_data_decrypted[], __end_data_decrypted[]; + Should find a header for these rather than defining them here. OK, will move then in mem_encrypt.h. Will that work ? #ifdef CONFIG_X86_5LEVEL unsigned int __pgtable_l5_enabled __ro_after_init; unsigned int pgdir_shift __ro_after_init = 39; @@ -112,6 +115,7 @@ static bool __head check_la57_support(unsigned long physaddr) unsigned long __head __startup_64(unsigned long physaddr, struct boot_params *bp) { + unsigned long vaddr, vaddr_end; unsigned long load_delta, *p; unsigned long pgtable_flags; pgdval_t *pgd; @@ -234,6 +238,14 @@ unsigned long __head __startup_64(unsigned long physaddr, /* Encrypt the kernel and related (if SME is active) */ sme_encrypt_kernel(bp); + /* Clear the memory encryption mask from the decrypted section */ + vaddr = (unsigned long)__start_data_decrypted; + vaddr_end = (unsigned long)__end_data_decrypted; + for (; vaddr < vaddr_end; vaddr += PMD_SIZE) { + i = pmd_index(vaddr); + pmd[i] -= sme_get_me_mask(); + } + /* * Return the SME encryption mask (if SME is active) to be used as a * modifier for the initial pgdir entry programmed into CR3. diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 8bde0a4..511b875 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -89,6 +89,22 @@ PHDRS { note PT_NOTE FLAGS(0); /* ___ */ } +/* + * This section contains data which will be mapped as decrypted. Memory + * encryption operates on a page basis. But we make this section a pmd + * aligned to avoid spliting the pages while mapping the section early. + * + * Note: We use a separate section so that only this section gets + * decrypted to avoid exposing more than we wish. + */ +#define DATA_DECRYPTED_SECTION \ + . =
Re: [RFC PATCH] EDAC, ghes: Enable per-layer error reporting for ARM
Hi Tyler, On 24/08/18 16:14, Tyler Baicar wrote: > On Fri, Aug 24, 2018 at 5:48 AM, James Morse wrote: >> On 23/08/18 16:46, Tyler Baicar wrote: >>> On Thu, Aug 23, 2018 at 5:29 AM James Morse wrote: On 19/07/18 19:36, Tyler Baicar wrote: > This seems pretty hacky to me, so if anyone has other suggestions please > share > them. CPER's "Memory Error Record 2" thinks that "NODE, CARD and MODULE should provide the information necessary to identify the failing FRU". As EDAC has three 'levels', these are what they should correspond to for ghes-edac. I assume NODE means rack/chassis in some distributed system. Lets ignore it as it doesn't seem to map to anything in the SMBIOS table. >>> >>> I believe NODE should map to socket number for multi-socket systems. >> >> Isn't the Memory Array Structure still unique in a multi-socket system? If so >> the node isn't telling us anything new. > Yes, the Memory Array structure in SMBIOS is still unique, but the NODE value > is needed in NODE, CARD, MODULE because the CARD number here typically > maps to channel number which each socket has their own channel numbers. /me flinches at 'typically'. Okay, so the hierarchy applies to the numbers, not the handles. How come there isn't a node handle? I think we should ignore the extra hierarchy. The module/devices/dimms are the only replaceable part, and we don't know if the firmware will provide the information. >> Do sockets show up in the SMBIOS table? We would need to know how many there >> are >> in advance. For arm systems the cpu topology from PPTT is the best bet for >> this >> information, but what do we do if that table is missing? (also, does firmware >> count from 1 or 0?) I suspect we can't use this field unless we know what the >> range of values is going to be in advance. > > An Fan mentioned in his response, what the customers really care about > is mapping to > a particular DIMM since that is what they can replace. To do this, the > Memory Device > handle should be enough since those are all unique regardless of > Memory Array handle > and which socket the DIMM is on. The Firmware I've worked with counts > from 0, but I'm > not sure if that is required. If the spec doesn't say, its difficult to know the range of values until we've seen one of the limits. > That won't matter if we just use the Memory Device handle. I agree. >>> I think the proper way to get this working would be to use these handles. >>> We can >>> avoid populating this layer information and instead have a mapping of type >>> 17 >>> index number (how edac is numbering the DIMMs today) to the handle number. >> >> Why get avoid the layer stuff? Isn't counting DIMM/memory-devices what >> EDAC_MC_LAYER_SLOT is for? > > The problem with the layer reporting is that you need to know all the > layer information as Fan mentioned. I haven't spotted what requires this, there seems to be a bit of a mix of numbers-of-layers and what order they go in. thunderx_edac.c just uses SLOT. I think we can get away with using EDAC_MC_LAYER_ALL_MEM, sized by num_dimms (which ghes-edac already does). Providing extra topology may not be useful unless the firmware populates this information. What do we do if we export card+module, but then firmware only populates the module-handle? > SoCs can support multiple board combinations (ie > 1DPC vs. 2DPC) > and there is no standardized way of knowing whether you are booted on a 1DPC > or > 2DPC board. > >>> Then we will need a new function to increment the counter based on the >>> handle >>> number rather than this layer information. Is that how you are envisioning >>> it? >> >> I'm not familiar with edac's internals, so I didn't have any particular >> vision! >> >> Isn't the problem that ghes_edac_report_mem_error() does this: >> | e->top_layer = -1; >> | e->mid_layer = -1; >> | e->low_layer = -1; > > The other problem is that the sysfs nodes are all setup with a single > layer representing > all of the memory on the board. > > https://elixir.bootlin.com/linux/latest/source/drivers/edac/ghes_edac.c#L469 > > So the DIMM counters exposed in sysfs are all under a single memory > controller and just > numbered from 0 to n-1 based on the order in which the type 17 SMBIOS > entries show up > in the DMI walk. User-space depending on the dmi walk order makes me nervous. Doing this gives you per-dimm counters, if user-space needs to know which physical-dimm/slot that is, we'd need a way of matching it with one of the smbios location strings. >> so edac_raw_mc_handle_error() has no clue where the error happened. (I >> haven't >> read what it does with this information yet). >> >> ghes_edac_report_mem_error() does check CPER_MEM_VALID_MODULE_HANDLE, and if >> its >> set, it uses the handle to find the bank/device strings and prints them out. > > Yes, I think this is where we need to add support to increment the > count
Re: [PATCH v2 1/6] dt-bindings: reset: Add PDC Global binding for SDM845 SoCs
On Tue, Aug 28, 2018 at 11:38:26AM +0530, si...@codeaurora.org wrote: > Hi Matthias, > Thanks for the review > > On 2018-08-28 06:08, Matthias Kaehlcke wrote: > > Hi Sibi, > > > > On Fri, Aug 24, 2018 at 06:48:55PM +0530, Sibi Sankar wrote: > > > Add PDC Global(Power Domain Controller) binding for SDM845 SoCs. > > > > nit: missing blank before the opening parenthesis. > > > > Will fix it > > > > > > > Signed-off-by: Sibi Sankar > > > --- > > > .../bindings/reset/qcom,pdc-global.txt| 52 > > > +++ > > > include/dt-bindings/reset/qcom,sdm845-pdc.h | 20 +++ > > > 2 files changed, 72 insertions(+) > > > create mode 100644 > > > Documentation/devicetree/bindings/reset/qcom,pdc-global.txt > > > create mode 100644 include/dt-bindings/reset/qcom,sdm845-pdc.h > > > > > > diff --git > > > a/Documentation/devicetree/bindings/reset/qcom,pdc-global.txt > > > b/Documentation/devicetree/bindings/reset/qcom,pdc-global.txt > > > new file mode 100644 > > > index ..69f9edca9503 > > > --- /dev/null > > > +++ b/Documentation/devicetree/bindings/reset/qcom,pdc-global.txt > > > @@ -0,0 +1,52 @@ > > > +PDC Global > > > +== > > > + > > > +This binding describes a reset-controller found on PDC-Global(Power > > > Domain > > > +Controller) block for Qualcomm Technologies Inc SDM845 SoCs. > > > > Are there other PDC reset controllers that aren't 'global'? Otherwise > > I'd suggest to use 'pdc-reset' instead of 'pdc-global', which is more > > specific and in line with the name of the driver added by this series. > > Or something like 'pdc-reset-global/main' if there are other > > controllers? > > > > These are the only reset lines found in the pdc-global register space. But > as > explained by Bjorn, wouldn't it be better to leave it as such since > pdc-global > best describes the hardware without being limited by the current > functionality > it is being used for? If I understand Bjorn's reply on https://patchwork.kernel.org/patch/10575335/ correctly it is planned to use the 'pdc-global' compatible string in the future in some sort of MFD driver for the 'PDC Global' IP block, which then instantiates other drivers like the reset controller. In this case I agree that 'pdc-global' seems a reasonable choice.
Re: [RFC PATCH] EDAC, ghes: Enable per-layer error reporting for ARM
Hi Tyler, On 24/08/18 16:14, Tyler Baicar wrote: > On Fri, Aug 24, 2018 at 5:48 AM, James Morse wrote: >> On 23/08/18 16:46, Tyler Baicar wrote: >>> On Thu, Aug 23, 2018 at 5:29 AM James Morse wrote: On 19/07/18 19:36, Tyler Baicar wrote: > This seems pretty hacky to me, so if anyone has other suggestions please > share > them. CPER's "Memory Error Record 2" thinks that "NODE, CARD and MODULE should provide the information necessary to identify the failing FRU". As EDAC has three 'levels', these are what they should correspond to for ghes-edac. I assume NODE means rack/chassis in some distributed system. Lets ignore it as it doesn't seem to map to anything in the SMBIOS table. >>> >>> I believe NODE should map to socket number for multi-socket systems. >> >> Isn't the Memory Array Structure still unique in a multi-socket system? If so >> the node isn't telling us anything new. > Yes, the Memory Array structure in SMBIOS is still unique, but the NODE value > is needed in NODE, CARD, MODULE because the CARD number here typically > maps to channel number which each socket has their own channel numbers. /me flinches at 'typically'. Okay, so the hierarchy applies to the numbers, not the handles. How come there isn't a node handle? I think we should ignore the extra hierarchy. The module/devices/dimms are the only replaceable part, and we don't know if the firmware will provide the information. >> Do sockets show up in the SMBIOS table? We would need to know how many there >> are >> in advance. For arm systems the cpu topology from PPTT is the best bet for >> this >> information, but what do we do if that table is missing? (also, does firmware >> count from 1 or 0?) I suspect we can't use this field unless we know what the >> range of values is going to be in advance. > > An Fan mentioned in his response, what the customers really care about > is mapping to > a particular DIMM since that is what they can replace. To do this, the > Memory Device > handle should be enough since those are all unique regardless of > Memory Array handle > and which socket the DIMM is on. The Firmware I've worked with counts > from 0, but I'm > not sure if that is required. If the spec doesn't say, its difficult to know the range of values until we've seen one of the limits. > That won't matter if we just use the Memory Device handle. I agree. >>> I think the proper way to get this working would be to use these handles. >>> We can >>> avoid populating this layer information and instead have a mapping of type >>> 17 >>> index number (how edac is numbering the DIMMs today) to the handle number. >> >> Why get avoid the layer stuff? Isn't counting DIMM/memory-devices what >> EDAC_MC_LAYER_SLOT is for? > > The problem with the layer reporting is that you need to know all the > layer information as Fan mentioned. I haven't spotted what requires this, there seems to be a bit of a mix of numbers-of-layers and what order they go in. thunderx_edac.c just uses SLOT. I think we can get away with using EDAC_MC_LAYER_ALL_MEM, sized by num_dimms (which ghes-edac already does). Providing extra topology may not be useful unless the firmware populates this information. What do we do if we export card+module, but then firmware only populates the module-handle? > SoCs can support multiple board combinations (ie > 1DPC vs. 2DPC) > and there is no standardized way of knowing whether you are booted on a 1DPC > or > 2DPC board. > >>> Then we will need a new function to increment the counter based on the >>> handle >>> number rather than this layer information. Is that how you are envisioning >>> it? >> >> I'm not familiar with edac's internals, so I didn't have any particular >> vision! >> >> Isn't the problem that ghes_edac_report_mem_error() does this: >> | e->top_layer = -1; >> | e->mid_layer = -1; >> | e->low_layer = -1; > > The other problem is that the sysfs nodes are all setup with a single > layer representing > all of the memory on the board. > > https://elixir.bootlin.com/linux/latest/source/drivers/edac/ghes_edac.c#L469 > > So the DIMM counters exposed in sysfs are all under a single memory > controller and just > numbered from 0 to n-1 based on the order in which the type 17 SMBIOS > entries show up > in the DMI walk. User-space depending on the dmi walk order makes me nervous. Doing this gives you per-dimm counters, if user-space needs to know which physical-dimm/slot that is, we'd need a way of matching it with one of the smbios location strings. >> so edac_raw_mc_handle_error() has no clue where the error happened. (I >> haven't >> read what it does with this information yet). >> >> ghes_edac_report_mem_error() does check CPER_MEM_VALID_MODULE_HANDLE, and if >> its >> set, it uses the handle to find the bank/device strings and prints them out. > > Yes, I think this is where we need to add support to increment the > count
Re: [PATCH v2 1/6] dt-bindings: reset: Add PDC Global binding for SDM845 SoCs
On Tue, Aug 28, 2018 at 11:38:26AM +0530, si...@codeaurora.org wrote: > Hi Matthias, > Thanks for the review > > On 2018-08-28 06:08, Matthias Kaehlcke wrote: > > Hi Sibi, > > > > On Fri, Aug 24, 2018 at 06:48:55PM +0530, Sibi Sankar wrote: > > > Add PDC Global(Power Domain Controller) binding for SDM845 SoCs. > > > > nit: missing blank before the opening parenthesis. > > > > Will fix it > > > > > > > Signed-off-by: Sibi Sankar > > > --- > > > .../bindings/reset/qcom,pdc-global.txt| 52 > > > +++ > > > include/dt-bindings/reset/qcom,sdm845-pdc.h | 20 +++ > > > 2 files changed, 72 insertions(+) > > > create mode 100644 > > > Documentation/devicetree/bindings/reset/qcom,pdc-global.txt > > > create mode 100644 include/dt-bindings/reset/qcom,sdm845-pdc.h > > > > > > diff --git > > > a/Documentation/devicetree/bindings/reset/qcom,pdc-global.txt > > > b/Documentation/devicetree/bindings/reset/qcom,pdc-global.txt > > > new file mode 100644 > > > index ..69f9edca9503 > > > --- /dev/null > > > +++ b/Documentation/devicetree/bindings/reset/qcom,pdc-global.txt > > > @@ -0,0 +1,52 @@ > > > +PDC Global > > > +== > > > + > > > +This binding describes a reset-controller found on PDC-Global(Power > > > Domain > > > +Controller) block for Qualcomm Technologies Inc SDM845 SoCs. > > > > Are there other PDC reset controllers that aren't 'global'? Otherwise > > I'd suggest to use 'pdc-reset' instead of 'pdc-global', which is more > > specific and in line with the name of the driver added by this series. > > Or something like 'pdc-reset-global/main' if there are other > > controllers? > > > > These are the only reset lines found in the pdc-global register space. But > as > explained by Bjorn, wouldn't it be better to leave it as such since > pdc-global > best describes the hardware without being limited by the current > functionality > it is being used for? If I understand Bjorn's reply on https://patchwork.kernel.org/patch/10575335/ correctly it is planned to use the 'pdc-global' compatible string in the future in some sort of MFD driver for the 'PDC Global' IP block, which then instantiates other drivers like the reset controller. In this case I agree that 'pdc-global' seems a reasonable choice.
Re: [RFC PATCH] EDAC, ghes: Enable per-layer error reporting for ARM
Hi Fan, On 24/08/18 15:30, wufan wrote: >> Why get avoid the layer stuff? Isn't counting DIMM/memory-devices what >> EDAC_MC_LAYER_SLOT is for? > > Borislav has explained it in his response. Here let me elaborate a little > more. > To use the layer information you need an accurate way to pinpoint each > component > in the layer and the parent components in the layers above. For example, to > use > EDAC_MC_LAYER_SLOT you also need information for the parent layer say > EDAC_MC_LAYER_CHANNEL, or another layer on top say EDAC_MC_LAYER_BRANCH. I haven't spotted anything that forces a particular meaning/topology on these types. (there are four of them, but only three 'levels') i3000_edac.c has EDAC_MC_LAYER_CHIP_SELECT then EDAC_MC_LAYER_CHANNEL, but i5400_edac.c has EDAC_MC_LAYER_BRANCH then EDAC_MC_LAYER_CHANNEL. pnd2_edac.c agrees that channel comes before slot, but thunderx_edac.c just uses slot directly ghes-edac already describes memory as 'EDAC_MC_LAYER_ALL_MEM', with num_dimms, I think we just need to get 'the' dimm number. Using the cper-module-handle means we don't have to worry about firmware's count of dimms being different, as we both agree its smbios-type-17 we're talking about. > There > are no clear ways to get the information from SMBIOS table. In the case of > "memory > channel" we looked at type 37 which has the exact spelling but it was > introduced > to support RamBus and Synclink. Not sure we can readily use it for modern > architecture concept of "channel/slot". I think we should avoid this 'channel' thing as it means different things to different people! We can use ghes:card smbios:physical-memory-array as the UEFI spec tells us they are the same, and we don't actually need to know what they mean. > I think it is good enough if we can pin each error to the corresponding DIMM. > At the end of the day DIMMs are what customer can replace in the memory system > and that's all that they care about. For the manufacturers of the board/chips > they have the knowledge to map the specific DIMMs to the upper layer > components, > so they can easily collect error counter data for upper layers. I agree. >> CPER's "Memory Error Record 2" thinks that "NODE, CARD and MODULE >> should provide the information necessary to identify the failing FRU". As >> EDAC has three 'levels', these are what they should correspond to for ghes- >> edac. >> >> I assume NODE means rack/chassis in some distributed system. Lets ignore it >> as it doesn't seem to map to anything in the SMBIOS table. > > How about type 4 "Processor Information"? As the spec doesn't tell us what the field means, we can't really do anything other than print the value out. >> 'Card' doesn't mean much to me, but it maps to SMBIOS:17 "Memory Array >> Structure", which the Memory Device structure also points to. >> Card then must mean "a collection of memory devices (DIMMs) that operate >> together to form an address space". >> >> This might be what I think of as a memory-controller, or it might be >> something more complicated. Regardless, the CPER records think its relevant. > > Originally I thought "Card" were memory channel. But looking at the definition > of "Card Handle" in CPER: "... this field contains the SMBIOS handle for the > Type 16 Memory Array Structure that represents the memory card". So Card is > memory controller or something similar to that. > Right now ghes-edac assumes > one mc. We probably need to map mc(s) to the type 16 instances in SMBIOS > table. I think we should ignore 'mc's, and just report the dimm numbers. ghes-edac only cares about the number of dimms today, and this would work on systems that only describe the dimms in the smbios table. Thanks, James
Re: [RFC PATCH] EDAC, ghes: Enable per-layer error reporting for ARM
Hi Fan, On 24/08/18 15:30, wufan wrote: >> Why get avoid the layer stuff? Isn't counting DIMM/memory-devices what >> EDAC_MC_LAYER_SLOT is for? > > Borislav has explained it in his response. Here let me elaborate a little > more. > To use the layer information you need an accurate way to pinpoint each > component > in the layer and the parent components in the layers above. For example, to > use > EDAC_MC_LAYER_SLOT you also need information for the parent layer say > EDAC_MC_LAYER_CHANNEL, or another layer on top say EDAC_MC_LAYER_BRANCH. I haven't spotted anything that forces a particular meaning/topology on these types. (there are four of them, but only three 'levels') i3000_edac.c has EDAC_MC_LAYER_CHIP_SELECT then EDAC_MC_LAYER_CHANNEL, but i5400_edac.c has EDAC_MC_LAYER_BRANCH then EDAC_MC_LAYER_CHANNEL. pnd2_edac.c agrees that channel comes before slot, but thunderx_edac.c just uses slot directly ghes-edac already describes memory as 'EDAC_MC_LAYER_ALL_MEM', with num_dimms, I think we just need to get 'the' dimm number. Using the cper-module-handle means we don't have to worry about firmware's count of dimms being different, as we both agree its smbios-type-17 we're talking about. > There > are no clear ways to get the information from SMBIOS table. In the case of > "memory > channel" we looked at type 37 which has the exact spelling but it was > introduced > to support RamBus and Synclink. Not sure we can readily use it for modern > architecture concept of "channel/slot". I think we should avoid this 'channel' thing as it means different things to different people! We can use ghes:card smbios:physical-memory-array as the UEFI spec tells us they are the same, and we don't actually need to know what they mean. > I think it is good enough if we can pin each error to the corresponding DIMM. > At the end of the day DIMMs are what customer can replace in the memory system > and that's all that they care about. For the manufacturers of the board/chips > they have the knowledge to map the specific DIMMs to the upper layer > components, > so they can easily collect error counter data for upper layers. I agree. >> CPER's "Memory Error Record 2" thinks that "NODE, CARD and MODULE >> should provide the information necessary to identify the failing FRU". As >> EDAC has three 'levels', these are what they should correspond to for ghes- >> edac. >> >> I assume NODE means rack/chassis in some distributed system. Lets ignore it >> as it doesn't seem to map to anything in the SMBIOS table. > > How about type 4 "Processor Information"? As the spec doesn't tell us what the field means, we can't really do anything other than print the value out. >> 'Card' doesn't mean much to me, but it maps to SMBIOS:17 "Memory Array >> Structure", which the Memory Device structure also points to. >> Card then must mean "a collection of memory devices (DIMMs) that operate >> together to form an address space". >> >> This might be what I think of as a memory-controller, or it might be >> something more complicated. Regardless, the CPER records think its relevant. > > Originally I thought "Card" were memory channel. But looking at the definition > of "Card Handle" in CPER: "... this field contains the SMBIOS handle for the > Type 16 Memory Array Structure that represents the memory card". So Card is > memory controller or something similar to that. > Right now ghes-edac assumes > one mc. We probably need to map mc(s) to the type 16 instances in SMBIOS > table. I think we should ignore 'mc's, and just report the dimm numbers. ghes-edac only cares about the number of dimms today, and this would work on systems that only describe the dimms in the smbios table. Thanks, James
Re: [PATCH] ARM: dts: imx7s-warp: use SPDX-License-Identifier
On Tue, Aug 28, 2018 at 2:06 PM, Pierre-Jean Texier wrote: > Adopt the SPDX license identifier headers to ease > license compliance management. > > Signed-off-by: Pierre-Jean Texier Reviewed-by: Fabio Estevam
Re: [PATCH] ARM: dts: imx7s-warp: use SPDX-License-Identifier
On Tue, Aug 28, 2018 at 2:06 PM, Pierre-Jean Texier wrote: > Adopt the SPDX license identifier headers to ease > license compliance management. > > Signed-off-by: Pierre-Jean Texier Reviewed-by: Fabio Estevam
Re: [RFC PATCH] EDAC, ghes: Enable per-layer error reporting for ARM
Hi Boris, On 24/08/18 13:01, Borislav Petkov wrote: > On Fri, Aug 24, 2018 at 10:48:24AM +0100, James Morse wrote: >> so edac_raw_mc_handle_error() has no clue where the error happened. (I >> haven't >> read what it does with this information yet). > > See edac_inc_ce_error(), for example - it uses the layers which are not > negative (-1) to increment the error counts of the respective layer. It > all depends on what granularity of the hardware part you're reporting > the error for: is it a DIMM rank, a whole DIMM or for a channel which > can span multiple DIMM ranks. And so on... > > Look at some of the drivers and how they're doing that layering. It all > depends on whether you can get the precise info from the hw. Hmmm, in this example we need the information from firmware, as that is where ghes-edac gets its information from. We already count the module/device/dimms in the smbios table, memory is described as 'EDAC_MC_LAYER_ALL_MEM' with num_dimms. I think all we're missing is which dimm in ghes_edac_report_mem_error(). We have the handle, we just need a number between 1 and num_dimms. If it turns out firmware doesn't populate the handles in its cper records, then we can keep e->enable_per_layer_report false when calling edac_raw_mc_handle_error(). (I suggest we ignore 'card', and just do this for the device/dimms). >> Naively I thought we could generate some index during >> ghes_edac_count_dimms(), >> and use this as e->${whichever}_layer. I hoped there would be something we >> could >> already use as the index, but I can't spot it, so this will be more than the >> one-liner I was hoping for! > > If you can get that info from the hardware and injecting an error into > a DIMM gives you the correct DIMM number so that we can increment the > proper counter, then you're golden. I don't think that works reliably on > x86, though, therefore the lumping together. ... 'correct DIMM number' ... Does x86 have another source of memory-topology information it needs to correlate smbios with? For arm there is nothing else describing the memory-topology, so as long as we can correlate the smbios table and ghes:cper records through the handles, we can get this working for all systems. Thanks, James
Re: [RFC PATCH] EDAC, ghes: Enable per-layer error reporting for ARM
Hi Boris, On 24/08/18 13:01, Borislav Petkov wrote: > On Fri, Aug 24, 2018 at 10:48:24AM +0100, James Morse wrote: >> so edac_raw_mc_handle_error() has no clue where the error happened. (I >> haven't >> read what it does with this information yet). > > See edac_inc_ce_error(), for example - it uses the layers which are not > negative (-1) to increment the error counts of the respective layer. It > all depends on what granularity of the hardware part you're reporting > the error for: is it a DIMM rank, a whole DIMM or for a channel which > can span multiple DIMM ranks. And so on... > > Look at some of the drivers and how they're doing that layering. It all > depends on whether you can get the precise info from the hw. Hmmm, in this example we need the information from firmware, as that is where ghes-edac gets its information from. We already count the module/device/dimms in the smbios table, memory is described as 'EDAC_MC_LAYER_ALL_MEM' with num_dimms. I think all we're missing is which dimm in ghes_edac_report_mem_error(). We have the handle, we just need a number between 1 and num_dimms. If it turns out firmware doesn't populate the handles in its cper records, then we can keep e->enable_per_layer_report false when calling edac_raw_mc_handle_error(). (I suggest we ignore 'card', and just do this for the device/dimms). >> Naively I thought we could generate some index during >> ghes_edac_count_dimms(), >> and use this as e->${whichever}_layer. I hoped there would be something we >> could >> already use as the index, but I can't spot it, so this will be more than the >> one-liner I was hoping for! > > If you can get that info from the hardware and injecting an error into > a DIMM gives you the correct DIMM number so that we can increment the > proper counter, then you're golden. I don't think that works reliably on > x86, though, therefore the lumping together. ... 'correct DIMM number' ... Does x86 have another source of memory-topology information it needs to correlate smbios with? For arm there is nothing else describing the memory-topology, so as long as we can correlate the smbios table and ghes:cper records through the handles, we can get this working for all systems. Thanks, James
[PATCH] ARM: dts: imx7s-warp: use SPDX-License-Identifier
Adopt the SPDX license identifier headers to ease license compliance management. Signed-off-by: Pierre-Jean Texier --- arch/arm/boot/dts/imx7s-warp.dts | 39 +-- 1 file changed, 1 insertion(+), 38 deletions(-) diff --git a/arch/arm/boot/dts/imx7s-warp.dts b/arch/arm/boot/dts/imx7s-warp.dts index fa390da..bf33c74 100644 --- a/arch/arm/boot/dts/imx7s-warp.dts +++ b/arch/arm/boot/dts/imx7s-warp.dts @@ -1,44 +1,7 @@ +// SPDX-License-Identifier: (GPL-2.0+ OR MIT) /* * Copyright (C) 2016 NXP Semiconductors. * Author: Fabio Estevam - * - * This file is dual-licensed: you can use it either under the terms - * of the GPL or the X11 license, at your option. Note that this dual - * licensing only applies to this file, and not this project as a - * whole. - * - * a) This file is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of the - * License, or (at your option) any later version. - * - * This file is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Or, alternatively, - * - * b) Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. */ /dts-v1/; -- 2.7.4
[PATCH] ARM: dts: imx7s-warp: use SPDX-License-Identifier
Adopt the SPDX license identifier headers to ease license compliance management. Signed-off-by: Pierre-Jean Texier --- arch/arm/boot/dts/imx7s-warp.dts | 39 +-- 1 file changed, 1 insertion(+), 38 deletions(-) diff --git a/arch/arm/boot/dts/imx7s-warp.dts b/arch/arm/boot/dts/imx7s-warp.dts index fa390da..bf33c74 100644 --- a/arch/arm/boot/dts/imx7s-warp.dts +++ b/arch/arm/boot/dts/imx7s-warp.dts @@ -1,44 +1,7 @@ +// SPDX-License-Identifier: (GPL-2.0+ OR MIT) /* * Copyright (C) 2016 NXP Semiconductors. * Author: Fabio Estevam - * - * This file is dual-licensed: you can use it either under the terms - * of the GPL or the X11 license, at your option. Note that this dual - * licensing only applies to this file, and not this project as a - * whole. - * - * a) This file is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of the - * License, or (at your option) any later version. - * - * This file is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Or, alternatively, - * - * b) Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. */ /dts-v1/; -- 2.7.4
Re: [PATCH v2 1/1] hdac-codec runtime suspended at PM:Suspend.
On Tue, 28 Aug 2018 18:34:15 +0200, Anshuman Gupta wrote: > > Is this patch not in consideration, there are no review comment for it. > this patch fixes an issue with hdac hdmi codec driver. > > On one of our platform HD audio controller takes arounf 300ms. > Below are the snippet of dmesg log. > > [ 3778.461888] calling :00:0e.0+ @ 3667, parent: pci:00, cb: > pci_pm_resume > [ 3778.775273] call :00:0e.0+ returned 0 after 306016 usecs > > When HD audio controller is in runtime suspend state, > with direct complete, we can improve overall system wide resume latency. Actually, *this* should have been mentioned in the changelog, and the subject would be better phrased to reflect it. After all, you're trying to reduce / optimize the runtime PM latency. thanks, Takashi > On Sat, Aug 18, 2018 at 11:42:03PM +0530, Anshuman Gupta wrote: > > Keep hdac hdmi codec to be in runtime suspended while entering to > > system wide suspend. Currently hdac hdmi codec driver using its > > suspend and resume operation in prepare and complete PM callbacks, > > and it resumes the hd audio controller (parent of self) from runtime > > suspend and blocks the direct complete for hd audio controller. > > > > If hdac-codec is already in runtime suspend state skip its power down > > sequence in prepare, power up sequence in complete phase. It enables > > direct complete path for hdac-codec and hd audio controller > > PCI device. > > > > Signed-off-by: Anshuman Gupta > > --- > > Changes in v2 > > - Changed the commit message. > > - Using pm_runtime_suspended instead of pm_runtime_status_suspended > > in order to handle any race condition. > > > > sound/soc/codecs/hdac_hdmi.c | 6 ++ > > 1 file changed, 6 insertions(+) > > > > diff --git a/sound/soc/codecs/hdac_hdmi.c b/sound/soc/codecs/hdac_hdmi.c > > index 84f7a7a..e965338 100644 > > --- a/sound/soc/codecs/hdac_hdmi.c > > +++ b/sound/soc/codecs/hdac_hdmi.c > > @@ -1859,6 +1859,9 @@ static int hdmi_codec_prepare(struct device *dev) > > struct hdac_ext_device *edev = to_hda_ext_device(dev); > > struct hdac_device *hdev = >hdev; > > > > + if (pm_runtime_suspended(dev)) > > + return 1; > > + > > pm_runtime_get_sync(>hdev.dev); > > > > /* > > @@ -1880,6 +1883,9 @@ static void hdmi_codec_complete(struct device *dev) > > struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(>hdev); > > struct hdac_device *hdev = >hdev; > > > > + if (pm_runtime_suspended(dev)) > > + return; > > + > > /* Power up afg */ > > snd_hdac_codec_read(hdev, hdev->afg, 0, AC_VERB_SET_POWER_STATE, > > AC_PWRST_D0); > > -- > > 2.7.4 > > > > -- > Thanks, > Anshuman. > >
Re: [PATCH v2 1/1] hdac-codec runtime suspended at PM:Suspend.
On Tue, 28 Aug 2018 18:34:15 +0200, Anshuman Gupta wrote: > > Is this patch not in consideration, there are no review comment for it. > this patch fixes an issue with hdac hdmi codec driver. > > On one of our platform HD audio controller takes arounf 300ms. > Below are the snippet of dmesg log. > > [ 3778.461888] calling :00:0e.0+ @ 3667, parent: pci:00, cb: > pci_pm_resume > [ 3778.775273] call :00:0e.0+ returned 0 after 306016 usecs > > When HD audio controller is in runtime suspend state, > with direct complete, we can improve overall system wide resume latency. Actually, *this* should have been mentioned in the changelog, and the subject would be better phrased to reflect it. After all, you're trying to reduce / optimize the runtime PM latency. thanks, Takashi > On Sat, Aug 18, 2018 at 11:42:03PM +0530, Anshuman Gupta wrote: > > Keep hdac hdmi codec to be in runtime suspended while entering to > > system wide suspend. Currently hdac hdmi codec driver using its > > suspend and resume operation in prepare and complete PM callbacks, > > and it resumes the hd audio controller (parent of self) from runtime > > suspend and blocks the direct complete for hd audio controller. > > > > If hdac-codec is already in runtime suspend state skip its power down > > sequence in prepare, power up sequence in complete phase. It enables > > direct complete path for hdac-codec and hd audio controller > > PCI device. > > > > Signed-off-by: Anshuman Gupta > > --- > > Changes in v2 > > - Changed the commit message. > > - Using pm_runtime_suspended instead of pm_runtime_status_suspended > > in order to handle any race condition. > > > > sound/soc/codecs/hdac_hdmi.c | 6 ++ > > 1 file changed, 6 insertions(+) > > > > diff --git a/sound/soc/codecs/hdac_hdmi.c b/sound/soc/codecs/hdac_hdmi.c > > index 84f7a7a..e965338 100644 > > --- a/sound/soc/codecs/hdac_hdmi.c > > +++ b/sound/soc/codecs/hdac_hdmi.c > > @@ -1859,6 +1859,9 @@ static int hdmi_codec_prepare(struct device *dev) > > struct hdac_ext_device *edev = to_hda_ext_device(dev); > > struct hdac_device *hdev = >hdev; > > > > + if (pm_runtime_suspended(dev)) > > + return 1; > > + > > pm_runtime_get_sync(>hdev.dev); > > > > /* > > @@ -1880,6 +1883,9 @@ static void hdmi_codec_complete(struct device *dev) > > struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(>hdev); > > struct hdac_device *hdev = >hdev; > > > > + if (pm_runtime_suspended(dev)) > > + return; > > + > > /* Power up afg */ > > snd_hdac_codec_read(hdev, hdev->afg, 0, AC_VERB_SET_POWER_STATE, > > AC_PWRST_D0); > > -- > > 2.7.4 > > > > -- > Thanks, > Anshuman. > >
Re: [PATCH] include/linux/compiler*.h: Use feature checking instead of version checks for attributes
On Tue, Aug 28, 2018 at 8:10 AM Miguel Ojeda wrote: > > Hi Nick, > > On Mon, Aug 27, 2018 at 7:48 PM, Nick Desaulniers > wrote: > > On Mon, Aug 27, 2018 at 10:43 AM Nick Desaulniers > >> > + > >> > +/* > >> > + * Optional attributes: your compiler may or may not support them. > >> > + * > >> > + * To check for them, we use __has_attribute, which is supported on gcc > >> > >= 5, > >> > + * clang >= 2.9 and icc >= 17. In the meantime, to support 4.6 <= gcc < > >> > 5, > >> > + * we implement it by hand. > >> > + */ > >> > +#ifndef __has_attribute > >> > +#define __has_attribute(x) __GCC46_has_attribute_##x > >> > +#define __GCC46_has_attribute_assume_aligned 0 > >> > +#define __GCC46_has_attribute_designated_init 0 > >> > +#define __GCC46_has_attribute_externally_visible 1 > >> > +#define __GCC46_has_attribute_noclone 1 > >> > +#define __GCC46_has_attribute_optimize 1 > >> > +#define __GCC46_has_attribute_no_sanitize_address 0 > >> > +#endif > > > > And a follow up; I'm trying to understand what will happen in the case > > of say gcc 4.9 here. Were any of these supported between gcc 4.6 and > > 5.0? If so, then this code will not use them. It's simpler than > > explicit version checks, but it won't use features that are supported. > > > > I addressed that in the email I sent afterwards: > > """ > Note that: > - assume_aligned came with gcc 4.9 > - no_sanitize_address came with gcc 4.8 > > So if we feel it is important to have them there (before gcc 5), we > would need here a quick version check here. > """ > > The idea is that, in the future, whenever gcc 5 or later is the > minimum version, we just get rid of the #ifdef block without touching > the rest of the code :-) So if __has_attribute came with gcc 5, then that means that this patch will break assume_aligned for gcc-4.9 users and no_sanitize_address for gcc-4.8 and gcc-4.9 users? The slab allocator uses assume_aligned, and no_sanitize_address for CONFIG_KASAN. Should this patch ever come back through stable, Android and ChromeOS gcc-4.9/KASAN builds will break. I don't think we should leave that for a follow up; I would like to see it as part of this patch. It's ok to have explicit version checks for those 2 attributes since it's not possible to feature detect them for the versions of gcc that we support in this code base. I think you should add them in a v2 of this patch. Then we can point to this commit as the *shining example* of how to do proper feature detection, falling back to version checks only as a last resort. -- Thanks, ~Nick Desaulniers
Re: [PATCH] include/linux/compiler*.h: Use feature checking instead of version checks for attributes
On Tue, Aug 28, 2018 at 8:10 AM Miguel Ojeda wrote: > > Hi Nick, > > On Mon, Aug 27, 2018 at 7:48 PM, Nick Desaulniers > wrote: > > On Mon, Aug 27, 2018 at 10:43 AM Nick Desaulniers > >> > + > >> > +/* > >> > + * Optional attributes: your compiler may or may not support them. > >> > + * > >> > + * To check for them, we use __has_attribute, which is supported on gcc > >> > >= 5, > >> > + * clang >= 2.9 and icc >= 17. In the meantime, to support 4.6 <= gcc < > >> > 5, > >> > + * we implement it by hand. > >> > + */ > >> > +#ifndef __has_attribute > >> > +#define __has_attribute(x) __GCC46_has_attribute_##x > >> > +#define __GCC46_has_attribute_assume_aligned 0 > >> > +#define __GCC46_has_attribute_designated_init 0 > >> > +#define __GCC46_has_attribute_externally_visible 1 > >> > +#define __GCC46_has_attribute_noclone 1 > >> > +#define __GCC46_has_attribute_optimize 1 > >> > +#define __GCC46_has_attribute_no_sanitize_address 0 > >> > +#endif > > > > And a follow up; I'm trying to understand what will happen in the case > > of say gcc 4.9 here. Were any of these supported between gcc 4.6 and > > 5.0? If so, then this code will not use them. It's simpler than > > explicit version checks, but it won't use features that are supported. > > > > I addressed that in the email I sent afterwards: > > """ > Note that: > - assume_aligned came with gcc 4.9 > - no_sanitize_address came with gcc 4.8 > > So if we feel it is important to have them there (before gcc 5), we > would need here a quick version check here. > """ > > The idea is that, in the future, whenever gcc 5 or later is the > minimum version, we just get rid of the #ifdef block without touching > the rest of the code :-) So if __has_attribute came with gcc 5, then that means that this patch will break assume_aligned for gcc-4.9 users and no_sanitize_address for gcc-4.8 and gcc-4.9 users? The slab allocator uses assume_aligned, and no_sanitize_address for CONFIG_KASAN. Should this patch ever come back through stable, Android and ChromeOS gcc-4.9/KASAN builds will break. I don't think we should leave that for a follow up; I would like to see it as part of this patch. It's ok to have explicit version checks for those 2 attributes since it's not possible to feature detect them for the versions of gcc that we support in this code base. I think you should add them in a v2 of this patch. Then we can point to this commit as the *shining example* of how to do proper feature detection, falling back to version checks only as a last resort. -- Thanks, ~Nick Desaulniers
Re: [PATCH] perf: Convert to using %pOFn instead of device_node.name
On Mon, Aug 27, 2018 at 08:52:39PM -0500, Rob Herring wrote: > In preparation to remove the node name pointer from struct device_node, > convert printf users to use the %pOFn format specifier. > > Cc: Will Deacon > Cc: Mark Rutland > Cc: linux-arm-ker...@lists.infradead.org > Signed-off-by: Rob Herring > --- > drivers/perf/arm_pmu_platform.c | 6 +++--- > 1 file changed, 3 insertions(+), 3 deletions(-) Cheers, I can queue this up for 4.20. Let me know if you'd rather take it along with the name pointer removal instead. Will > diff --git a/drivers/perf/arm_pmu_platform.c b/drivers/perf/arm_pmu_platform.c > index 96075cecb0ae..933bd8410fc2 100644 > --- a/drivers/perf/arm_pmu_platform.c > +++ b/drivers/perf/arm_pmu_platform.c > @@ -77,14 +77,14 @@ static int pmu_parse_irq_affinity(struct device_node > *node, int i) > > dn = of_parse_phandle(node, "interrupt-affinity", i); > if (!dn) { > - pr_warn("failed to parse interrupt-affinity[%d] for %s\n", > - i, node->name); > + pr_warn("failed to parse interrupt-affinity[%d] for %pOFn\n", > + i, node); > return -EINVAL; > } > > cpu = of_cpu_node_to_id(dn); > if (cpu < 0) { > - pr_warn("failed to find logical CPU for %s\n", dn->name); > + pr_warn("failed to find logical CPU for %pOFn\n", dn); > cpu = nr_cpu_ids; > } > > -- > 2.17.1 >
Re: [PATCH] perf: Convert to using %pOFn instead of device_node.name
On Mon, Aug 27, 2018 at 08:52:39PM -0500, Rob Herring wrote: > In preparation to remove the node name pointer from struct device_node, > convert printf users to use the %pOFn format specifier. > > Cc: Will Deacon > Cc: Mark Rutland > Cc: linux-arm-ker...@lists.infradead.org > Signed-off-by: Rob Herring > --- > drivers/perf/arm_pmu_platform.c | 6 +++--- > 1 file changed, 3 insertions(+), 3 deletions(-) Cheers, I can queue this up for 4.20. Let me know if you'd rather take it along with the name pointer removal instead. Will > diff --git a/drivers/perf/arm_pmu_platform.c b/drivers/perf/arm_pmu_platform.c > index 96075cecb0ae..933bd8410fc2 100644 > --- a/drivers/perf/arm_pmu_platform.c > +++ b/drivers/perf/arm_pmu_platform.c > @@ -77,14 +77,14 @@ static int pmu_parse_irq_affinity(struct device_node > *node, int i) > > dn = of_parse_phandle(node, "interrupt-affinity", i); > if (!dn) { > - pr_warn("failed to parse interrupt-affinity[%d] for %s\n", > - i, node->name); > + pr_warn("failed to parse interrupt-affinity[%d] for %pOFn\n", > + i, node); > return -EINVAL; > } > > cpu = of_cpu_node_to_id(dn); > if (cpu < 0) { > - pr_warn("failed to find logical CPU for %s\n", dn->name); > + pr_warn("failed to find logical CPU for %pOFn\n", dn); > cpu = nr_cpu_ids; > } > > -- > 2.17.1 >
Re: [PATCH v2] objtool: Support multiple rodata sections.
On Fri, Aug 03, 2018 at 07:40:40PM +0100, Allan Xavier wrote: > +static void mark_rodata(struct objtool_file *file) > +{ > + struct section *sec; > + bool found = false; > + static const char *str1 = ".str1."; > + const int str1len = strlen(str1) + 1; > + A comment here would help, explaining that this looks for both .rodata and .rodata.func_name sections (when using -fdata-sections). > + for_each_sec(file, sec) { > + if (strstr(sec->name, ".rodata") == sec->name) { > + char *str1pos = sec->name + strlen(sec->name) - str1len; > + > + /* Skips over .rodata.str1.1 and .rodata.str.1.8 */ > + if (strstr(sec->name, str1) != str1pos) { > + sec->rodata = true; > + found = true; > + } > + } > + } This could be simplified and made more readable with something like: for_each_sec(file, sec) { if (!strncmp(sec->name, ".rodata", 7) && !strstr(sec->name, ".str1.") { sec->rodata = true; found = true; } } -- Josh
Re: [PATCH v2] objtool: Support multiple rodata sections.
On Fri, Aug 03, 2018 at 07:40:40PM +0100, Allan Xavier wrote: > +static void mark_rodata(struct objtool_file *file) > +{ > + struct section *sec; > + bool found = false; > + static const char *str1 = ".str1."; > + const int str1len = strlen(str1) + 1; > + A comment here would help, explaining that this looks for both .rodata and .rodata.func_name sections (when using -fdata-sections). > + for_each_sec(file, sec) { > + if (strstr(sec->name, ".rodata") == sec->name) { > + char *str1pos = sec->name + strlen(sec->name) - str1len; > + > + /* Skips over .rodata.str1.1 and .rodata.str.1.8 */ > + if (strstr(sec->name, str1) != str1pos) { > + sec->rodata = true; > + found = true; > + } > + } > + } This could be simplified and made more readable with something like: for_each_sec(file, sec) { if (!strncmp(sec->name, ".rodata", 7) && !strstr(sec->name, ".str1.") { sec->rodata = true; found = true; } } -- Josh
Re: [PATCH v2 2/6] reset: qcom: PDC Global (Power Domain Controller) reset controller
On Mon, Aug 27, 2018 at 08:02:53PM -0700, Bjorn Andersson wrote: > On Mon 27 Aug 17:22 PDT 2018, Matthias Kaehlcke wrote: > > On Fri, Aug 24, 2018 at 06:48:56PM +0530, Sibi Sankar wrote: > > > diff --git a/drivers/reset/reset-qcom-pdc.c > > > b/drivers/reset/reset-qcom-pdc.c > [..] > > > +struct qcom_pdc_desc { > > > + const struct regmap_config *config; > > > + const struct qcom_pdc_reset_map *resets; > > > + size_t num_resets; > > > +}; > > > > Not sure if this structure adds much value or just a layer of > > indirection: > > > > - .config is only accessed in _probe(), sdm845_pdc_regmap_config could > > be used directly > > - .resets is used in _(de)assert(), sdm845_pdc_resets could be used > > directly > > - .num_resets is only accessed in _probe(), > > ARRAY_SIZE(sdm845_pdc_resets) could be used instead > > > > It probably makes sense if it is planned to support reset controllers > > of other SoCs with this driver. > > > > I like this suggestion, once we need the configurability we can add the > type for it. It also shows that only .resets would need to be referenced > by qcom_pdc_reset_data. > > > > +struct qcom_pdc_reset_data { > > > + struct reset_controller_dev rcdev; > > > + struct regmap *regmap; > > > + const struct qcom_pdc_desc *desc; > > > +}; > [..] > > > +static int qcom_pdc_reset_probe(struct platform_device *pdev) > > > +{ > [..] > > > + data->regmap = devm_regmap_init_mmio(dev, base, desc->config); > > > + if (IS_ERR(data->regmap)) { > > > + dev_err(dev, "Unable to get pdc-global regmap"); > > > > Add missing '\n' > > > > Say 'pdc-reset' instead of 'pdc-global'? (see also my other comment > > below). > > > > This regmap is created out of the single anonymous "reg", so I think the > error should be reduced to "Unable to initialize regmap\n". > > [..] > > > +static const struct of_device_id qcom_pdc_reset_of_match[] = { > > > + { .compatible = "qcom,sdm845-pdc-global", .data = _pdc_desc }, > > > > Should this be 'qcom,sdm845-pdc-reset' which is more specific than > > 'global' and in line with the name and purpose of the driver? > > Obviously this would require to adjust the bindings doc too. > > > > No, the binding describes the hardware block named "PDC Global", > currently implemented as a reset controller. The reason for doing this > is so that we one day can expose additional interfaces in a backwards > compatible fashion. Sounds reasonable, thanks for the clarification.
Re: [PATCH v2 2/6] reset: qcom: PDC Global (Power Domain Controller) reset controller
On Mon, Aug 27, 2018 at 08:02:53PM -0700, Bjorn Andersson wrote: > On Mon 27 Aug 17:22 PDT 2018, Matthias Kaehlcke wrote: > > On Fri, Aug 24, 2018 at 06:48:56PM +0530, Sibi Sankar wrote: > > > diff --git a/drivers/reset/reset-qcom-pdc.c > > > b/drivers/reset/reset-qcom-pdc.c > [..] > > > +struct qcom_pdc_desc { > > > + const struct regmap_config *config; > > > + const struct qcom_pdc_reset_map *resets; > > > + size_t num_resets; > > > +}; > > > > Not sure if this structure adds much value or just a layer of > > indirection: > > > > - .config is only accessed in _probe(), sdm845_pdc_regmap_config could > > be used directly > > - .resets is used in _(de)assert(), sdm845_pdc_resets could be used > > directly > > - .num_resets is only accessed in _probe(), > > ARRAY_SIZE(sdm845_pdc_resets) could be used instead > > > > It probably makes sense if it is planned to support reset controllers > > of other SoCs with this driver. > > > > I like this suggestion, once we need the configurability we can add the > type for it. It also shows that only .resets would need to be referenced > by qcom_pdc_reset_data. > > > > +struct qcom_pdc_reset_data { > > > + struct reset_controller_dev rcdev; > > > + struct regmap *regmap; > > > + const struct qcom_pdc_desc *desc; > > > +}; > [..] > > > +static int qcom_pdc_reset_probe(struct platform_device *pdev) > > > +{ > [..] > > > + data->regmap = devm_regmap_init_mmio(dev, base, desc->config); > > > + if (IS_ERR(data->regmap)) { > > > + dev_err(dev, "Unable to get pdc-global regmap"); > > > > Add missing '\n' > > > > Say 'pdc-reset' instead of 'pdc-global'? (see also my other comment > > below). > > > > This regmap is created out of the single anonymous "reg", so I think the > error should be reduced to "Unable to initialize regmap\n". > > [..] > > > +static const struct of_device_id qcom_pdc_reset_of_match[] = { > > > + { .compatible = "qcom,sdm845-pdc-global", .data = _pdc_desc }, > > > > Should this be 'qcom,sdm845-pdc-reset' which is more specific than > > 'global' and in line with the name and purpose of the driver? > > Obviously this would require to adjust the bindings doc too. > > > > No, the binding describes the hardware block named "PDC Global", > currently implemented as a reset controller. The reason for doing this > is so that we one day can expose additional interfaces in a backwards > compatible fashion. Sounds reasonable, thanks for the clarification.
Re: [PATCH] MIPS: Convert to using %pOFn instead of device_node.name
Hi Rob, On Mon, Aug 27, 2018 at 08:52:05PM -0500, Rob Herring wrote: > In preparation to remove the node name pointer from struct device_node, > convert printf users to use the %pOFn format specifier. > > Cc: Ralf Baechle > Cc: Paul Burton > Cc: James Hogan > Cc: John Crispin > Cc: linux-m...@linux-mips.org > Signed-off-by: Rob Herring > --- > arch/mips/cavium-octeon/octeon-irq.c | 16 > arch/mips/netlogic/common/irq.c | 14 +++--- > arch/mips/ralink/cevt-rt3352.c | 6 +++--- > arch/mips/ralink/ill_acc.c | 2 +- > 4 files changed, 19 insertions(+), 19 deletions(-) Thanks - applied to mips-next for 4.20. Paul
Re: [PATCH] MIPS: Convert to using %pOFn instead of device_node.name
Hi Rob, On Mon, Aug 27, 2018 at 08:52:05PM -0500, Rob Herring wrote: > In preparation to remove the node name pointer from struct device_node, > convert printf users to use the %pOFn format specifier. > > Cc: Ralf Baechle > Cc: Paul Burton > Cc: James Hogan > Cc: John Crispin > Cc: linux-m...@linux-mips.org > Signed-off-by: Rob Herring > --- > arch/mips/cavium-octeon/octeon-irq.c | 16 > arch/mips/netlogic/common/irq.c | 14 +++--- > arch/mips/ralink/cevt-rt3352.c | 6 +++--- > arch/mips/ralink/ill_acc.c | 2 +- > 4 files changed, 19 insertions(+), 19 deletions(-) Thanks - applied to mips-next for 4.20. Paul
Re: [PATCH v13 07/13] x86/sgx: Add data structures for tracking the EPC pages
>>> extern bool sgx_enabled; >>> extern bool sgx_lc_enabled; >>> +extern struct sgx_epc_bank sgx_epc_banks[SGX_MAX_EPC_BANKS]; >>> + >>> +/* >>> + * enum sgx_epc_page_desc - defines bits and masks for an EPC page's desc >> >> Why are you bothering packing these bits? This seems a rather >> convoluted way to store two integers. > > To keep struct sgx_epc_page 64 bytes. It's a list_head and a ulong now. That doesn't add up to 64. If you properly describe the bounds and limits of banks we can possibly help you find a nice solution. As it stands, they are totally opaque and we have no idea what is going on. >>> +static __init int sgx_init_epc_bank(u64 addr, u64 size, unsigned long >>> index, >>> + struct sgx_epc_bank *bank) >>> +{ >>> + unsigned long nr_pages = size >> PAGE_SHIFT; >>> + struct sgx_epc_page *pages_data; >>> + unsigned long i; >>> + void *va; >>> + >>> + va = ioremap_cache(addr, size); >>> + if (!va) >>> + return -ENOMEM; >>> + >>> + pages_data = kcalloc(nr_pages, sizeof(struct sgx_epc_page), GFP_KERNEL); >>> + if (!pages_data) >>> + goto out_iomap; >> >> This looks like you're roughly limited by the page allocator to a bank >> size of ~1.4GB which seems kinda small. Is this really OK? > > Where does this limitation come from? The page allocator can only do 4MB at a time. Using your 64 byte numbers: 4MB/64 = 64k sgx_epc_pages. 64k*PAGE_SIZE = 256MB. So you can only handle 256MB banks with this code. BTW, if you only have 64k worth of pages, you can use a u16 for the index. >>> + u32 eax, ebx, ecx, edx; >>> + u64 pa, size; >>> + int ret; >>> + int i; >>> + >>> + for (i = 0; i < SGX_MAX_EPC_BANKS; i++) { >>> + cpuid_count(SGX_CPUID, 2 + i, , , , ); >>> + if (!(eax & 0xF)) >>> + break; >> >> So, we have random data coming out of a random CPUID leaf being called >> 'eax' and then being tested against a random hard-coded mask. This >> seems rather unfortunate for someone trying to understand the code. Can >> we do better? > > Should probably do something along the lines: > > #define SGX_CPUID_SECTION(i) (2 + (i)) > > enum sgx_section { > SGX_CPUID_SECTION_INVALID = 0x00, > SGX_CPUID_SECTION_VALID = 0x1B, > SGX_CPUID_SECTION_MASK = 0xFF, > }; Plus comments, that would be nice. >>> + sgx_nr_epc_banks++; >>> + } >>> + >>> + if (!sgx_nr_epc_banks) { >>> + pr_err("There are zero EPC banks.\n"); >>> + return -ENODEV; >>> + } >>> + >>> + return 0; >>> +} >> >> Does this support hot-addition of a bank? If not, why not? ... > I'm not aware that we would have an ACPI specification for SGX so this > is all I have at the moment (does not show any ACPI event for > hotplugging). So you're saying the one platform you looked at don't support hotplug. I was looking for a more broad statement about SGX.
[PATCH v5 2/3] overlayfs: check CAP_MKNOD before issuing vfs_whiteout
Assumption never checked, should fail if the mounter creds are not sufficient. Signed-off-by: Mark Salyzyn Cc: Miklos Szeredi Cc: Jonathan Corbet Cc: Vivek Goyal Cc: Eric W. Biederman Cc: Amir Goldstein Cc: Randy Dunlap Cc: Stephen Smalley Cc: linux-unio...@vger.kernel.org Cc: linux-...@vger.kernel.org Cc: linux-kernel@vger.kernel.org v5 - dependency of "overlayfs: override_creds=off option bypass creator_cred" --- fs/overlayfs/overlayfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 7538b9b56237..bf3a80157d42 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -176,7 +176,7 @@ static inline int ovl_do_rename(struct inode *olddir, struct dentry *olddentry, static inline int ovl_do_whiteout(struct inode *dir, struct dentry *dentry) { - int err = vfs_whiteout(dir, dentry); + int err = capable(CAP_MKNOD) ? vfs_whiteout(dir, dentry) : -EPERM; pr_debug("whiteout(%pd2) = %i\n", dentry, err); return err; } -- 2.19.0.rc0.228.g281dcd1b4d0-goog
Re: [PATCH v13 07/13] x86/sgx: Add data structures for tracking the EPC pages
>>> extern bool sgx_enabled; >>> extern bool sgx_lc_enabled; >>> +extern struct sgx_epc_bank sgx_epc_banks[SGX_MAX_EPC_BANKS]; >>> + >>> +/* >>> + * enum sgx_epc_page_desc - defines bits and masks for an EPC page's desc >> >> Why are you bothering packing these bits? This seems a rather >> convoluted way to store two integers. > > To keep struct sgx_epc_page 64 bytes. It's a list_head and a ulong now. That doesn't add up to 64. If you properly describe the bounds and limits of banks we can possibly help you find a nice solution. As it stands, they are totally opaque and we have no idea what is going on. >>> +static __init int sgx_init_epc_bank(u64 addr, u64 size, unsigned long >>> index, >>> + struct sgx_epc_bank *bank) >>> +{ >>> + unsigned long nr_pages = size >> PAGE_SHIFT; >>> + struct sgx_epc_page *pages_data; >>> + unsigned long i; >>> + void *va; >>> + >>> + va = ioremap_cache(addr, size); >>> + if (!va) >>> + return -ENOMEM; >>> + >>> + pages_data = kcalloc(nr_pages, sizeof(struct sgx_epc_page), GFP_KERNEL); >>> + if (!pages_data) >>> + goto out_iomap; >> >> This looks like you're roughly limited by the page allocator to a bank >> size of ~1.4GB which seems kinda small. Is this really OK? > > Where does this limitation come from? The page allocator can only do 4MB at a time. Using your 64 byte numbers: 4MB/64 = 64k sgx_epc_pages. 64k*PAGE_SIZE = 256MB. So you can only handle 256MB banks with this code. BTW, if you only have 64k worth of pages, you can use a u16 for the index. >>> + u32 eax, ebx, ecx, edx; >>> + u64 pa, size; >>> + int ret; >>> + int i; >>> + >>> + for (i = 0; i < SGX_MAX_EPC_BANKS; i++) { >>> + cpuid_count(SGX_CPUID, 2 + i, , , , ); >>> + if (!(eax & 0xF)) >>> + break; >> >> So, we have random data coming out of a random CPUID leaf being called >> 'eax' and then being tested against a random hard-coded mask. This >> seems rather unfortunate for someone trying to understand the code. Can >> we do better? > > Should probably do something along the lines: > > #define SGX_CPUID_SECTION(i) (2 + (i)) > > enum sgx_section { > SGX_CPUID_SECTION_INVALID = 0x00, > SGX_CPUID_SECTION_VALID = 0x1B, > SGX_CPUID_SECTION_MASK = 0xFF, > }; Plus comments, that would be nice. >>> + sgx_nr_epc_banks++; >>> + } >>> + >>> + if (!sgx_nr_epc_banks) { >>> + pr_err("There are zero EPC banks.\n"); >>> + return -ENODEV; >>> + } >>> + >>> + return 0; >>> +} >> >> Does this support hot-addition of a bank? If not, why not? ... > I'm not aware that we would have an ACPI specification for SGX so this > is all I have at the moment (does not show any ACPI event for > hotplugging). So you're saying the one platform you looked at don't support hotplug. I was looking for a more broad statement about SGX.
[PATCH v5 2/3] overlayfs: check CAP_MKNOD before issuing vfs_whiteout
Assumption never checked, should fail if the mounter creds are not sufficient. Signed-off-by: Mark Salyzyn Cc: Miklos Szeredi Cc: Jonathan Corbet Cc: Vivek Goyal Cc: Eric W. Biederman Cc: Amir Goldstein Cc: Randy Dunlap Cc: Stephen Smalley Cc: linux-unio...@vger.kernel.org Cc: linux-...@vger.kernel.org Cc: linux-kernel@vger.kernel.org v5 - dependency of "overlayfs: override_creds=off option bypass creator_cred" --- fs/overlayfs/overlayfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 7538b9b56237..bf3a80157d42 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -176,7 +176,7 @@ static inline int ovl_do_rename(struct inode *olddir, struct dentry *olddentry, static inline int ovl_do_whiteout(struct inode *dir, struct dentry *dentry) { - int err = vfs_whiteout(dir, dentry); + int err = capable(CAP_MKNOD) ? vfs_whiteout(dir, dentry) : -EPERM; pr_debug("whiteout(%pd2) = %i\n", dentry, err); return err; } -- 2.19.0.rc0.228.g281dcd1b4d0-goog
Re: [PATCH] include/linux/compiler*.h: Use feature checking instead of version checks for attributes
On Tue, Aug 28, 2018 at 8:04 AM Miguel Ojeda wrote: > > Hi Nick, > > On Mon, Aug 27, 2018 at 7:43 PM, Nick Desaulniers > wrote: > > On Sun, Aug 26, 2018 at 10:58 AM Miguel Ojeda > > wrote: > >> > >> Instead of using version checks per-compiler to define (or not) each > >> attribute, > >> use __has_attribute to test for them, following the cleanup started with > >> commit 815f0ddb346c ("include/linux/compiler*.h: make compiler-*.h > >> mutually exclusive"). > >> > >> All the attributes that are fairly common/standard (i.e. those that do not > >> require extra logic to define them) have been moved to a new file > >> include/linux/compiler_attributes.h. The attributes have been sorted > >> and divided between "required" and "optional". > > > > Nice! Thanks Miguel. Regarding sorting, I'm happy with that. In > > fact, some of the comments can be removed IMO, as the attributes have > > common definitions in the docs (maybe an added link to the gcc and > > clang attribute docs at the top of the file rather than per attribute > > comments). > > Thanks for the review! > > I thought about that, although there isn't a single page with them in > GCC (we could group them by type though: function ones, variable > ones... and then link to those). > On the other hand, maybe writing a > Doc/ file is better and allows us to write as much as one would like > about each of them (and a link to each page compiler's page about it, > etc.). I think in the end the Doc/ file might be the best, in order > not to crowd the header. A comment is closer to the source, but I guess that's bytes for each inclusion for every file. I don't feel passionate about this point one way or the other. > > > > >> > >> Further, attributes that are already supported in gcc >= 4.6 and recent > >> clang > >> were simply made to be required (instead of testing for them): > >> * always_inline > >> * const (pure was already "required", by the way) > >> * gnu_inline > > > > There's an important test for gnu_inline that isn't checking that it's > > supported, but rather what the implicit behavior is depending on which > > C standard is being used. It's important not to remove that. > > Hm... I actually thought it was not available at some point before 4.6 > and removed the #ifdef. The comment even says it is featuring > detecting it so that the old GCC inlining is used; but it shouldn't > matter if you always use it, no? Good point. Rather than defining it only if GNU inline is not the current behavior is a bit more verbose than just always defining it. This seems to confirm that that should work: https://godbolt.org/z/igwh32. > > I just went looking for more info in d03db2bc2 ("compiler-gcc.h: Add > __attribute__((gnu_inline)) to all inline declarations") and if I > understood the commit message, the problem is compiling with implicit > new standard in newer compilers which trigger the C90 behavior, while > we need the old one --- but if we use gnu_inline, we are getting it > regardless. > > I am sure I am missing something, but I think a clarification is > needed (and in the code comment as well) -- a bit off-topic, anyway. > > [Also, I wouldn't define an attribute or not depending on some other > condition. I would, instead, define something some other symbol with > that logic (i.e. instead of using "__gnu_inline", because that is > lying -- it is not using the attribute even if the compiler supports > it).] > > > > >> > >> Finally, some other bits were cleaned up in the process: > >> * __optimize: removed (unused in the whole kernel tree) > > > > A+ for removing dead code. I also don't see it used anywhere. > > > >> * __must_be_array: removed from -gcc and -clang (identical), moved to > >> _types > > > > Yep, uses a builtin (we should add guards for that, later, in a > > similar style change that guards the use of builtins). Looks good. > > > >> (it depends on the unconditionally used __builtin_types_compatible_p > >> * Removes unneeded underscores on the attributes' names > > > > That doesn't sound right, but lets see what you mean by that. > > Some attributes used the __name__ syntax (i.e. inside the double > parenthesis), others didn't. I simplified by removing all the extra > underscores. A+ > > > > >> > >> There are some things that can be further cleaned up afterwards: > >> * __attribute_const__: rename to __const > > > > This doesn't look correct to me; the kernel is full of call sites for > > __attribute_const__. You can't rename the definition without renaming > > Of course it is full of use sites! That is why I said it is a possible > cleanup for *afterwards* this patch :-) > > > all of the call sites (and that would be too big a change for this > > patch, IMO). Skip the rename, and it also looks like you just removed > > it outright (Oops). > > Not sure what you mean by this (?). The attribute is still there unchanged. Nevermind, I misinterpretered this part of the patch. > > > > >> * __noretpoline:
Re: [PATCH] include/linux/compiler*.h: Use feature checking instead of version checks for attributes
On Tue, Aug 28, 2018 at 8:04 AM Miguel Ojeda wrote: > > Hi Nick, > > On Mon, Aug 27, 2018 at 7:43 PM, Nick Desaulniers > wrote: > > On Sun, Aug 26, 2018 at 10:58 AM Miguel Ojeda > > wrote: > >> > >> Instead of using version checks per-compiler to define (or not) each > >> attribute, > >> use __has_attribute to test for them, following the cleanup started with > >> commit 815f0ddb346c ("include/linux/compiler*.h: make compiler-*.h > >> mutually exclusive"). > >> > >> All the attributes that are fairly common/standard (i.e. those that do not > >> require extra logic to define them) have been moved to a new file > >> include/linux/compiler_attributes.h. The attributes have been sorted > >> and divided between "required" and "optional". > > > > Nice! Thanks Miguel. Regarding sorting, I'm happy with that. In > > fact, some of the comments can be removed IMO, as the attributes have > > common definitions in the docs (maybe an added link to the gcc and > > clang attribute docs at the top of the file rather than per attribute > > comments). > > Thanks for the review! > > I thought about that, although there isn't a single page with them in > GCC (we could group them by type though: function ones, variable > ones... and then link to those). > On the other hand, maybe writing a > Doc/ file is better and allows us to write as much as one would like > about each of them (and a link to each page compiler's page about it, > etc.). I think in the end the Doc/ file might be the best, in order > not to crowd the header. A comment is closer to the source, but I guess that's bytes for each inclusion for every file. I don't feel passionate about this point one way or the other. > > > > >> > >> Further, attributes that are already supported in gcc >= 4.6 and recent > >> clang > >> were simply made to be required (instead of testing for them): > >> * always_inline > >> * const (pure was already "required", by the way) > >> * gnu_inline > > > > There's an important test for gnu_inline that isn't checking that it's > > supported, but rather what the implicit behavior is depending on which > > C standard is being used. It's important not to remove that. > > Hm... I actually thought it was not available at some point before 4.6 > and removed the #ifdef. The comment even says it is featuring > detecting it so that the old GCC inlining is used; but it shouldn't > matter if you always use it, no? Good point. Rather than defining it only if GNU inline is not the current behavior is a bit more verbose than just always defining it. This seems to confirm that that should work: https://godbolt.org/z/igwh32. > > I just went looking for more info in d03db2bc2 ("compiler-gcc.h: Add > __attribute__((gnu_inline)) to all inline declarations") and if I > understood the commit message, the problem is compiling with implicit > new standard in newer compilers which trigger the C90 behavior, while > we need the old one --- but if we use gnu_inline, we are getting it > regardless. > > I am sure I am missing something, but I think a clarification is > needed (and in the code comment as well) -- a bit off-topic, anyway. > > [Also, I wouldn't define an attribute or not depending on some other > condition. I would, instead, define something some other symbol with > that logic (i.e. instead of using "__gnu_inline", because that is > lying -- it is not using the attribute even if the compiler supports > it).] > > > > >> > >> Finally, some other bits were cleaned up in the process: > >> * __optimize: removed (unused in the whole kernel tree) > > > > A+ for removing dead code. I also don't see it used anywhere. > > > >> * __must_be_array: removed from -gcc and -clang (identical), moved to > >> _types > > > > Yep, uses a builtin (we should add guards for that, later, in a > > similar style change that guards the use of builtins). Looks good. > > > >> (it depends on the unconditionally used __builtin_types_compatible_p > >> * Removes unneeded underscores on the attributes' names > > > > That doesn't sound right, but lets see what you mean by that. > > Some attributes used the __name__ syntax (i.e. inside the double > parenthesis), others didn't. I simplified by removing all the extra > underscores. A+ > > > > >> > >> There are some things that can be further cleaned up afterwards: > >> * __attribute_const__: rename to __const > > > > This doesn't look correct to me; the kernel is full of call sites for > > __attribute_const__. You can't rename the definition without renaming > > Of course it is full of use sites! That is why I said it is a possible > cleanup for *afterwards* this patch :-) > > > all of the call sites (and that would be too big a change for this > > patch, IMO). Skip the rename, and it also looks like you just removed > > it outright (Oops). > > Not sure what you mean by this (?). The attribute is still there unchanged. Nevermind, I misinterpretered this part of the patch. > > > > >> * __noretpoline:
[PATCH v5 0/3]
overlayfs: check CAP_DAC_READ_SEARCH before issuing exportfs_decode_fh overlayfs: check CAP_MKNOD before issuing vfs_whiteout Assumption never checked, should fail if the mounter creds are not sufficient. overlayfs: override_creds=off option bypass creator_cred By default, all access to the upper, lower and work directories is the recorded mounter's MAC and DAC credentials. The incoming accesses are checked against the caller's credentials. If the principles of least privilege are applied, the mounter's credentials might not overlap the credentials of the caller's when accessing the overlayfs filesystem. For example, a file that a lower DAC privileged caller can execute, is MAC denied to the generally higher DAC privileged mounter, to prevent an attack vector. We add the option to turn off override_creds in the mount options; all subsequent operations after mount on the filesystem will be only the caller's credentials. This option default is set in the CONFIG OVERLAY_FS_OVERRIDE_CREDS or in the module option override_creds. The module boolean parameter and mount option override_creds is also added as a presence check for this "feature" by checking existence of /sys/module/overlay/parameters/overlay_creds. This will allow user space to determine if the option can be supplied successfully to the mount(2) operation. Signed-off-by: Mark Salyzyn Cc: Miklos Szeredi Cc: Jonathan Corbet Cc: Vivek Goyal Cc: Eric W. Biederman Cc: Amir Goldstein Cc: Randy Dunlap Cc: Stephen Smalley Cc: linux-unio...@vger.kernel.org Cc: linux-...@vger.kernel.org Cc: linux-kernel@vger.kernel.org v2: - Forward port changed attr to stat, resulting in a build error. - altered commit message. v3: - Change name from caller_credentials / creator_credentials to the boolean override_creds. - Changed from creator to mounter credentials. - Updated and fortified the documentation. - Added CONFIG_OVERLAY_FS_OVERRIDE_CREDS v4: - spelling and grammar errors in text v5: - beefed up the caveats in the Documentation - Is dependent on "overlayfs: check CAP_DAC_READ_SEARCH before issuing exportfs_decode_fh" "overlayfs: check CAP_MKNOD before issuing vfs_whiteout" - Added prwarn when override_creds=off
Re: [PATCH V2] xen: export device state to sysfs
On 08/28/2018 10:56 AM, Joe Jin wrote: > Export device state to sysfs to allow for easier get device state. > > Signed-off-by: Joe Jin > Cc: Boris Ostrovsky > Cc: Juergen Gross > Cc: Konrad Rzeszutek Wilk > --- > Documentation/ABI/stable/sysfs-bus-xen-backend | 9 + > drivers/xen/xenbus/xenbus_probe.c | 9 + > 2 files changed, 18 insertions(+) > > diff --git a/Documentation/ABI/stable/sysfs-bus-xen-backend > b/Documentation/ABI/stable/sysfs-bus-xen-backend > index 3d5951c8bf5f..e8b60bd766f7 100644 > --- a/Documentation/ABI/stable/sysfs-bus-xen-backend Won't this show up in the frontend as well? -boris > +++ b/Documentation/ABI/stable/sysfs-bus-xen-backend > @@ -73,3 +73,12 @@ KernelVersion: 3.0 > Contact: Konrad Rzeszutek Wilk > Description: > Number of sectors written by the frontend. > + > +What:/sys/bus/xen-backend/devices/*/state > +Date:August 2018 > +KernelVersion: 4.19 > +Contact: Joe Jin > +Description: > +The state of the device. One of: 'Unknown', > +'Initialising', 'Initialised', 'Connected', 'Closing', > +'Closed', 'Reconfiguring', 'Reconfigured'. > diff --git a/drivers/xen/xenbus/xenbus_probe.c > b/drivers/xen/xenbus/xenbus_probe.c > index f2088838f690..5b471889d723 100644 > --- a/drivers/xen/xenbus/xenbus_probe.c > +++ b/drivers/xen/xenbus/xenbus_probe.c > @@ -402,10 +402,19 @@ static ssize_t modalias_show(struct device *dev, > } > static DEVICE_ATTR_RO(modalias); > > +static ssize_t state_show(struct device *dev, > + struct device_attribute *attr, char *buf) > +{ > + return sprintf(buf, "%s\n", > + xenbus_strstate(to_xenbus_device(dev)->state)); > +} > +static DEVICE_ATTR_RO(state); > + > static struct attribute *xenbus_dev_attrs[] = { > _attr_nodename.attr, > _attr_devtype.attr, > _attr_modalias.attr, > + _attr_state.attr, > NULL, > }; >
[PATCH v5 0/3]
overlayfs: check CAP_DAC_READ_SEARCH before issuing exportfs_decode_fh overlayfs: check CAP_MKNOD before issuing vfs_whiteout Assumption never checked, should fail if the mounter creds are not sufficient. overlayfs: override_creds=off option bypass creator_cred By default, all access to the upper, lower and work directories is the recorded mounter's MAC and DAC credentials. The incoming accesses are checked against the caller's credentials. If the principles of least privilege are applied, the mounter's credentials might not overlap the credentials of the caller's when accessing the overlayfs filesystem. For example, a file that a lower DAC privileged caller can execute, is MAC denied to the generally higher DAC privileged mounter, to prevent an attack vector. We add the option to turn off override_creds in the mount options; all subsequent operations after mount on the filesystem will be only the caller's credentials. This option default is set in the CONFIG OVERLAY_FS_OVERRIDE_CREDS or in the module option override_creds. The module boolean parameter and mount option override_creds is also added as a presence check for this "feature" by checking existence of /sys/module/overlay/parameters/overlay_creds. This will allow user space to determine if the option can be supplied successfully to the mount(2) operation. Signed-off-by: Mark Salyzyn Cc: Miklos Szeredi Cc: Jonathan Corbet Cc: Vivek Goyal Cc: Eric W. Biederman Cc: Amir Goldstein Cc: Randy Dunlap Cc: Stephen Smalley Cc: linux-unio...@vger.kernel.org Cc: linux-...@vger.kernel.org Cc: linux-kernel@vger.kernel.org v2: - Forward port changed attr to stat, resulting in a build error. - altered commit message. v3: - Change name from caller_credentials / creator_credentials to the boolean override_creds. - Changed from creator to mounter credentials. - Updated and fortified the documentation. - Added CONFIG_OVERLAY_FS_OVERRIDE_CREDS v4: - spelling and grammar errors in text v5: - beefed up the caveats in the Documentation - Is dependent on "overlayfs: check CAP_DAC_READ_SEARCH before issuing exportfs_decode_fh" "overlayfs: check CAP_MKNOD before issuing vfs_whiteout" - Added prwarn when override_creds=off
Re: [PATCH V2] xen: export device state to sysfs
On 08/28/2018 10:56 AM, Joe Jin wrote: > Export device state to sysfs to allow for easier get device state. > > Signed-off-by: Joe Jin > Cc: Boris Ostrovsky > Cc: Juergen Gross > Cc: Konrad Rzeszutek Wilk > --- > Documentation/ABI/stable/sysfs-bus-xen-backend | 9 + > drivers/xen/xenbus/xenbus_probe.c | 9 + > 2 files changed, 18 insertions(+) > > diff --git a/Documentation/ABI/stable/sysfs-bus-xen-backend > b/Documentation/ABI/stable/sysfs-bus-xen-backend > index 3d5951c8bf5f..e8b60bd766f7 100644 > --- a/Documentation/ABI/stable/sysfs-bus-xen-backend Won't this show up in the frontend as well? -boris > +++ b/Documentation/ABI/stable/sysfs-bus-xen-backend > @@ -73,3 +73,12 @@ KernelVersion: 3.0 > Contact: Konrad Rzeszutek Wilk > Description: > Number of sectors written by the frontend. > + > +What:/sys/bus/xen-backend/devices/*/state > +Date:August 2018 > +KernelVersion: 4.19 > +Contact: Joe Jin > +Description: > +The state of the device. One of: 'Unknown', > +'Initialising', 'Initialised', 'Connected', 'Closing', > +'Closed', 'Reconfiguring', 'Reconfigured'. > diff --git a/drivers/xen/xenbus/xenbus_probe.c > b/drivers/xen/xenbus/xenbus_probe.c > index f2088838f690..5b471889d723 100644 > --- a/drivers/xen/xenbus/xenbus_probe.c > +++ b/drivers/xen/xenbus/xenbus_probe.c > @@ -402,10 +402,19 @@ static ssize_t modalias_show(struct device *dev, > } > static DEVICE_ATTR_RO(modalias); > > +static ssize_t state_show(struct device *dev, > + struct device_attribute *attr, char *buf) > +{ > + return sprintf(buf, "%s\n", > + xenbus_strstate(to_xenbus_device(dev)->state)); > +} > +static DEVICE_ATTR_RO(state); > + > static struct attribute *xenbus_dev_attrs[] = { > _attr_nodename.attr, > _attr_devtype.attr, > _attr_modalias.attr, > + _attr_state.attr, > NULL, > }; >
[PATCH net-next] genetlink: constify genl_err_attr() argument
genl_err_attr() sets netlink_ext_ack::bad_attr which is a pointer to const struct nlattr so make the attr argument also const. Signed-off-by: Michal Kubecek --- include/net/genetlink.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/genetlink.h b/include/net/genetlink.h index decf6012a401..aa2e5888f18d 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -112,7 +112,7 @@ static inline void genl_info_net_set(struct genl_info *info, struct net *net) #define GENL_SET_ERR_MSG(info, msg) NL_SET_ERR_MSG((info)->extack, msg) static inline int genl_err_attr(struct genl_info *info, int err, - struct nlattr *attr) + const struct nlattr *attr) { info->extack->bad_attr = attr; -- 2.18.0
[PATCH net-next] genetlink: constify genl_err_attr() argument
genl_err_attr() sets netlink_ext_ack::bad_attr which is a pointer to const struct nlattr so make the attr argument also const. Signed-off-by: Michal Kubecek --- include/net/genetlink.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/genetlink.h b/include/net/genetlink.h index decf6012a401..aa2e5888f18d 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -112,7 +112,7 @@ static inline void genl_info_net_set(struct genl_info *info, struct net *net) #define GENL_SET_ERR_MSG(info, msg) NL_SET_ERR_MSG((info)->extack, msg) static inline int genl_err_attr(struct genl_info *info, int err, - struct nlattr *attr) + const struct nlattr *attr) { info->extack->bad_attr = attr; -- 2.18.0
Re: [PATCH] RISC-V: Mask out the F extension on systems without D
On Tue, 28 Aug 2018 00:10:32 PDT (-0700), alan...@andestech.com wrote: Hi Palmer, On Mon, Aug 27, 2018 at 03:03:52PM -0700, Palmer Dabbelt wrote: The RISC-V Linux port doesn't support systems that have the F extension but don't have the D extension -- we actually don't support systems without D either, but Alan's patch set is rectifying that soon. For now I think we can leave this in a semi-broken state and just wait for Alan's patch set to get merged for proper non-FPU support -- the patch set is starting to look good, so doing something in-between doesn't seem like it's worth the work. I don't think it's worth fretting about support for systems with F but not D for now: our glibc ABIs are IMAC and IMAFDC so they probably won't end up being popular. We can always extend this in the future. CC: Alan Kao Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/cpufeature.c | 7 +++ 1 file changed, 7 insertions(+) diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index 17011a870044..652d102ffa06 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -57,5 +57,12 @@ void riscv_fill_hwcap(void) for (i = 0; i < strlen(isa); ++i) elf_hwcap |= isa2hwcap[(unsigned char)(isa[i])]; + /* We don't support systems with F but without D, so mask those out +* here. */ + if ((elf_hwcap & COMPAT_HWCAP_ISA_F) && !(elf_hwcap & COMPAT_HWCAP_ISA_D)) { + pr_info("This kernel does not support systems with F but not D"); + elf_hwcap &= ~COMPAT_HWCAP_ISA_F; + } + The commit message does address the problem and this patch does provide checks and helpful information to users, but I wonder if we really need this patch, for two reasons: * Just as you mentioned, current glibc ABI does not support such a thing as IMAFC, so probably no one has had trouble with this. To be honest, I suppose that anybody (RISC-V enthusiasts or vendors) who really need F-only support in kernel should get themself involved in the development by sending patches to improve. * There are corner cases to let a F-only machine to pass the check in this patch. For instance, a vendor decides to name her extension ISA as doom, and supports single-precision FP only, so her ISA string would be IMAFCXdoom. The variable elf_hwcap is calculated at the loop in line 57,58, the 'd' from Xdoom would bypass the check, while the underlying machine does not support double-precision FP. Ah, yes, that makes sense. I'd go the other way here and just be strict about parsing the ISA string: it's defined to be listed in a particular order, so we should really only be accepting legal ISA strings. I'll submit a second patch to fix this behavior. pr_info("elf_hwcap is 0x%lx", elf_hwcap); } -- 2.16.4 I don't know if the reasons make sense to you, but anyway that's all I would like to say about this patch. Alan
Re: [PATCH] RISC-V: Mask out the F extension on systems without D
On Tue, 28 Aug 2018 00:10:32 PDT (-0700), alan...@andestech.com wrote: Hi Palmer, On Mon, Aug 27, 2018 at 03:03:52PM -0700, Palmer Dabbelt wrote: The RISC-V Linux port doesn't support systems that have the F extension but don't have the D extension -- we actually don't support systems without D either, but Alan's patch set is rectifying that soon. For now I think we can leave this in a semi-broken state and just wait for Alan's patch set to get merged for proper non-FPU support -- the patch set is starting to look good, so doing something in-between doesn't seem like it's worth the work. I don't think it's worth fretting about support for systems with F but not D for now: our glibc ABIs are IMAC and IMAFDC so they probably won't end up being popular. We can always extend this in the future. CC: Alan Kao Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/cpufeature.c | 7 +++ 1 file changed, 7 insertions(+) diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index 17011a870044..652d102ffa06 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -57,5 +57,12 @@ void riscv_fill_hwcap(void) for (i = 0; i < strlen(isa); ++i) elf_hwcap |= isa2hwcap[(unsigned char)(isa[i])]; + /* We don't support systems with F but without D, so mask those out +* here. */ + if ((elf_hwcap & COMPAT_HWCAP_ISA_F) && !(elf_hwcap & COMPAT_HWCAP_ISA_D)) { + pr_info("This kernel does not support systems with F but not D"); + elf_hwcap &= ~COMPAT_HWCAP_ISA_F; + } + The commit message does address the problem and this patch does provide checks and helpful information to users, but I wonder if we really need this patch, for two reasons: * Just as you mentioned, current glibc ABI does not support such a thing as IMAFC, so probably no one has had trouble with this. To be honest, I suppose that anybody (RISC-V enthusiasts or vendors) who really need F-only support in kernel should get themself involved in the development by sending patches to improve. * There are corner cases to let a F-only machine to pass the check in this patch. For instance, a vendor decides to name her extension ISA as doom, and supports single-precision FP only, so her ISA string would be IMAFCXdoom. The variable elf_hwcap is calculated at the loop in line 57,58, the 'd' from Xdoom would bypass the check, while the underlying machine does not support double-precision FP. Ah, yes, that makes sense. I'd go the other way here and just be strict about parsing the ISA string: it's defined to be listed in a particular order, so we should really only be accepting legal ISA strings. I'll submit a second patch to fix this behavior. pr_info("elf_hwcap is 0x%lx", elf_hwcap); } -- 2.16.4 I don't know if the reasons make sense to you, but anyway that's all I would like to say about this patch. Alan
Re: [PATCH 1/2] arm64: PCI: Remove node-local allocations when initialising host controller
On Tue, Aug 28, 2018 at 04:05:12PM +0100, Punit Agrawal wrote: > Memory for host controller data structures is allocated local to the > node to which the controller is associated with. This has been the > behaviour since support for ACPI was added in > commit 0cb0786bac15 ("ARM64: PCI: Support ACPI-based PCI host controller"). > > Drop the node local allocation as there is no benefit from doing so - > the usage of these structures is independent from where the controller > is located. > > Signed-off-by: Punit Agrawal > Cc: Catalin Marinas > Cc: Will Deacon > Cc: Lorenzo Pieralisi > Cc: linux-arm-ker...@lists.infradead.org > --- > arch/arm64/kernel/pci.c | 5 ++--- > 1 file changed, 2 insertions(+), 3 deletions(-) Acked-by: Will Deacon Will > diff --git a/arch/arm64/kernel/pci.c b/arch/arm64/kernel/pci.c > index 0e2ea1c78542..bb85e2f4603f 100644 > --- a/arch/arm64/kernel/pci.c > +++ b/arch/arm64/kernel/pci.c > @@ -165,16 +165,15 @@ static void pci_acpi_generic_release_info(struct > acpi_pci_root_info *ci) > /* Interface called from ACPI code to setup PCI host controller */ > struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) > { > - int node = acpi_get_node(root->device->handle); > struct acpi_pci_generic_root_info *ri; > struct pci_bus *bus, *child; > struct acpi_pci_root_ops *root_ops; > > - ri = kzalloc_node(sizeof(*ri), GFP_KERNEL, node); > + ri = kzalloc(sizeof(*ri), GFP_KERNEL); > if (!ri) > return NULL; > > - root_ops = kzalloc_node(sizeof(*root_ops), GFP_KERNEL, node); > + root_ops = kzalloc(sizeof(*root_ops), GFP_KERNEL); > if (!root_ops) { > kfree(ri); > return NULL; > -- > 2.18.0 >
Re: [PATCH 1/2] arm64: PCI: Remove node-local allocations when initialising host controller
On Tue, Aug 28, 2018 at 04:05:12PM +0100, Punit Agrawal wrote: > Memory for host controller data structures is allocated local to the > node to which the controller is associated with. This has been the > behaviour since support for ACPI was added in > commit 0cb0786bac15 ("ARM64: PCI: Support ACPI-based PCI host controller"). > > Drop the node local allocation as there is no benefit from doing so - > the usage of these structures is independent from where the controller > is located. > > Signed-off-by: Punit Agrawal > Cc: Catalin Marinas > Cc: Will Deacon > Cc: Lorenzo Pieralisi > Cc: linux-arm-ker...@lists.infradead.org > --- > arch/arm64/kernel/pci.c | 5 ++--- > 1 file changed, 2 insertions(+), 3 deletions(-) Acked-by: Will Deacon Will > diff --git a/arch/arm64/kernel/pci.c b/arch/arm64/kernel/pci.c > index 0e2ea1c78542..bb85e2f4603f 100644 > --- a/arch/arm64/kernel/pci.c > +++ b/arch/arm64/kernel/pci.c > @@ -165,16 +165,15 @@ static void pci_acpi_generic_release_info(struct > acpi_pci_root_info *ci) > /* Interface called from ACPI code to setup PCI host controller */ > struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) > { > - int node = acpi_get_node(root->device->handle); > struct acpi_pci_generic_root_info *ri; > struct pci_bus *bus, *child; > struct acpi_pci_root_ops *root_ops; > > - ri = kzalloc_node(sizeof(*ri), GFP_KERNEL, node); > + ri = kzalloc(sizeof(*ri), GFP_KERNEL); > if (!ri) > return NULL; > > - root_ops = kzalloc_node(sizeof(*root_ops), GFP_KERNEL, node); > + root_ops = kzalloc(sizeof(*root_ops), GFP_KERNEL); > if (!root_ops) { > kfree(ri); > return NULL; > -- > 2.18.0 >
Re: [PATCH v2 1/1] hdac-codec runtime suspended at PM:Suspend.
Is this patch not in consideration, there are no review comment for it. this patch fixes an issue with hdac hdmi codec driver. On one of our platform HD audio controller takes arounf 300ms. Below are the snippet of dmesg log. [ 3778.461888] calling :00:0e.0+ @ 3667, parent: pci:00, cb: pci_pm_resume [ 3778.775273] call :00:0e.0+ returned 0 after 306016 usecs When HD audio controller is in runtime suspend state, with direct complete, we can improve overall system wide resume latency. On Sat, Aug 18, 2018 at 11:42:03PM +0530, Anshuman Gupta wrote: > Keep hdac hdmi codec to be in runtime suspended while entering to > system wide suspend. Currently hdac hdmi codec driver using its > suspend and resume operation in prepare and complete PM callbacks, > and it resumes the hd audio controller (parent of self) from runtime > suspend and blocks the direct complete for hd audio controller. > > If hdac-codec is already in runtime suspend state skip its power down > sequence in prepare, power up sequence in complete phase. It enables > direct complete path for hdac-codec and hd audio controller > PCI device. > > Signed-off-by: Anshuman Gupta > --- > Changes in v2 > - Changed the commit message. > - Using pm_runtime_suspended instead of pm_runtime_status_suspended > in order to handle any race condition. > > sound/soc/codecs/hdac_hdmi.c | 6 ++ > 1 file changed, 6 insertions(+) > > diff --git a/sound/soc/codecs/hdac_hdmi.c b/sound/soc/codecs/hdac_hdmi.c > index 84f7a7a..e965338 100644 > --- a/sound/soc/codecs/hdac_hdmi.c > +++ b/sound/soc/codecs/hdac_hdmi.c > @@ -1859,6 +1859,9 @@ static int hdmi_codec_prepare(struct device *dev) > struct hdac_ext_device *edev = to_hda_ext_device(dev); > struct hdac_device *hdev = >hdev; > > + if (pm_runtime_suspended(dev)) > + return 1; > + > pm_runtime_get_sync(>hdev.dev); > > /* > @@ -1880,6 +1883,9 @@ static void hdmi_codec_complete(struct device *dev) > struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(>hdev); > struct hdac_device *hdev = >hdev; > > + if (pm_runtime_suspended(dev)) > + return; > + > /* Power up afg */ > snd_hdac_codec_read(hdev, hdev->afg, 0, AC_VERB_SET_POWER_STATE, > AC_PWRST_D0); > -- > 2.7.4 > -- Thanks, Anshuman.
Re: [PATCH v2 1/1] hdac-codec runtime suspended at PM:Suspend.
Is this patch not in consideration, there are no review comment for it. this patch fixes an issue with hdac hdmi codec driver. On one of our platform HD audio controller takes arounf 300ms. Below are the snippet of dmesg log. [ 3778.461888] calling :00:0e.0+ @ 3667, parent: pci:00, cb: pci_pm_resume [ 3778.775273] call :00:0e.0+ returned 0 after 306016 usecs When HD audio controller is in runtime suspend state, with direct complete, we can improve overall system wide resume latency. On Sat, Aug 18, 2018 at 11:42:03PM +0530, Anshuman Gupta wrote: > Keep hdac hdmi codec to be in runtime suspended while entering to > system wide suspend. Currently hdac hdmi codec driver using its > suspend and resume operation in prepare and complete PM callbacks, > and it resumes the hd audio controller (parent of self) from runtime > suspend and blocks the direct complete for hd audio controller. > > If hdac-codec is already in runtime suspend state skip its power down > sequence in prepare, power up sequence in complete phase. It enables > direct complete path for hdac-codec and hd audio controller > PCI device. > > Signed-off-by: Anshuman Gupta > --- > Changes in v2 > - Changed the commit message. > - Using pm_runtime_suspended instead of pm_runtime_status_suspended > in order to handle any race condition. > > sound/soc/codecs/hdac_hdmi.c | 6 ++ > 1 file changed, 6 insertions(+) > > diff --git a/sound/soc/codecs/hdac_hdmi.c b/sound/soc/codecs/hdac_hdmi.c > index 84f7a7a..e965338 100644 > --- a/sound/soc/codecs/hdac_hdmi.c > +++ b/sound/soc/codecs/hdac_hdmi.c > @@ -1859,6 +1859,9 @@ static int hdmi_codec_prepare(struct device *dev) > struct hdac_ext_device *edev = to_hda_ext_device(dev); > struct hdac_device *hdev = >hdev; > > + if (pm_runtime_suspended(dev)) > + return 1; > + > pm_runtime_get_sync(>hdev.dev); > > /* > @@ -1880,6 +1883,9 @@ static void hdmi_codec_complete(struct device *dev) > struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(>hdev); > struct hdac_device *hdev = >hdev; > > + if (pm_runtime_suspended(dev)) > + return; > + > /* Power up afg */ > snd_hdac_codec_read(hdev, hdev->afg, 0, AC_VERB_SET_POWER_STATE, > AC_PWRST_D0); > -- > 2.7.4 > -- Thanks, Anshuman.
Re: [PATCH v2] x86/dumpstack: don't dump kernel memory based on usermode RIP
On Tue, Aug 28, 2018 at 6:25 PM Borislav Petkov wrote: > > On Tue, Aug 28, 2018 at 05:49:01PM +0200, Jann Horn wrote: > > show_opcodes() is used both for dumping kernel instructions and for dumping > > user instructions. If userspace causes #PF by jumping to a kernel address, > > show_opcodes() can be reached with regs->ip controlled by the user, > > pointing to kernel code. > > Yap, and people keep asking how to dump the running kernel, after > patching and jump labels and stuff... Here's how! > > :- > > > Make sure that userspace can't trick us into > > dumping kernel memory into dmesg. > > > > Cc: sta...@vger.kernel.org > > Fixes: 7cccf0725cf7 ("x86/dumpstack: Add a show_ip() function") > > I think this one is more likely: > > ba54d856a9d8 ("x86/fault: Dump user opcode bytes on fatal faults") > > as it added the dumping of user opcode bytes. No, you can also get user opcode bytes printed by WARN() and friends. When you add a WARN() in the pagefault handler, you get something like this. The first "Code:" line is from ba54d856a9d8, but the second one further down is from before that. [ 125.564041] segfault[1602]: segfault at 854340c0 ip 854340c0 sp 7ffd4cc7a568 error 15 [ 125.569923] Code: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 <63> 6f 72 65 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 125.576859] [ cut here ] [ 125.578406] TESTING WARN() [ 125.578439] WARNING: CPU: 6 PID: 1602 at arch/x86/mm/fault.c:894 __bad_area_nosemaphore+0x147/0x270 [ 125.582172] Modules linked in: bpfilter [ 125.583394] CPU: 6 PID: 1602 Comm: segfault Tainted: GW 4.18.0+ #108 [ 125.585811] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/01/2014 [ 125.588410] RIP: 0010:__bad_area_nosemaphore+0x147/0x270 [ 125.590078] Code: 48 89 d9 48 89 ea 44 89 e6 48 c7 83 30 0b 00 00 0e 00 00 00 bf 0b 00 00 00 e8 f5 eb ff ff 48 c7 c7 00 61 66 84 e8 79 11 05 00 <0f> 0b 48 83 c4 28 5b 5d 41 5c 41 5d 41 5e 41 5f c3 48 83 c4 28 4c [ 125.595779] RSP: 0018:8801cb3b7e18 EFLAGS: 00010286 [ 125.597426] RAX: RBX: 8801cbb9e000 RCX: [ 125.599605] RDX: 0001 RSI: dc00 RDI: 86678ea0 [ 125.601800] RBP: 854340c0 R08: ed003d873ed5 R09: ed003d873ed5 [ 125.603935] R10: 0001 R11: ed003d873ed4 R12: 0001 [ 125.606113] R13: R14: 0015 R15: 8801cb3b7f58 [ 125.608250] FS: 7fe30d518700() GS:8801ec38() knlGS: [ 125.610608] CS: 0010 DS: ES: CR0: 80050033 [ 125.612331] CR2: 854340c0 CR3: 0001d563e001 CR4: 003606e0 [ 125.614470] DR0: DR1: DR2: [ 125.616607] DR3: DR6: fffe0ff0 DR7: 0400 [ 125.618736] Call Trace: [ 125.619475] __do_page_fault+0x133/0x780 [ 125.620646] ? mm_fault_error+0x1b0/0x1b0 [ 125.622236] ? async_page_fault+0x8/0x30 [ 125.623388] async_page_fault+0x1e/0x30 [ 125.624526] RIP: 0033:core_pattern+0x0/0x880 [ 125.625786] Code: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 <63> 6f 72 65 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 125.631208] RSP: 002b:7ffd4cc7a568 EFLAGS: 00010202 [ 125.632737] RAX: 854340c0 RBX: RCX: [ 125.635039] RDX: 7ffd4cc7a678 RSI: 7ffd4cc7a668 RDI: 0001 [ 125.637088] RBP: 7ffd4cc7a580 R08: 562d395106f0 R09: 7fe30d323cb0 [ 125.639153] R10: R11: 7fe30d0d23c0 R12: 562d39510530 [ 125.641183] R13: 7ffd4cc7a660 R14: R15: [ 125.643221] ---[ end trace fb20716f9d6369bd ]--- > > Reviewed-by: Kees Cook > > Signed-off-by: Jann Horn > > --- > > v2: Andy pointed out that I probably shouldn't be doing wrapping > > arithmetic on pointers. > > > > arch/x86/include/asm/stacktrace.h | 2 +- > > arch/x86/kernel/dumpstack.c | 13 ++--- > > arch/x86/mm/fault.c | 2 +- > > 3 files changed, 12 insertions(+), 5 deletions(-) > > > > diff --git a/arch/x86/include/asm/stacktrace.h > > b/arch/x86/include/asm/stacktrace.h > > index b6dc698f992a..f335aad404a4 100644 > > --- a/arch/x86/include/asm/stacktrace.h > > +++ b/arch/x86/include/asm/stacktrace.h > > @@ -111,6 +111,6 @@ static inline unsigned long caller_frame_pointer(void) > > return (unsigned long)frame; > > } > > > > -void show_opcodes(u8 *rip, const char *loglvl); > > +void show_opcodes(struct pt_regs *regs, const char *loglvl); > > void show_ip(struct pt_regs *regs, const char *loglvl); > > #endif /* _ASM_X86_STACKTRACE_H */ > > diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c > >
Re: [PATCH v2] x86/dumpstack: don't dump kernel memory based on usermode RIP
On Tue, Aug 28, 2018 at 6:25 PM Borislav Petkov wrote: > > On Tue, Aug 28, 2018 at 05:49:01PM +0200, Jann Horn wrote: > > show_opcodes() is used both for dumping kernel instructions and for dumping > > user instructions. If userspace causes #PF by jumping to a kernel address, > > show_opcodes() can be reached with regs->ip controlled by the user, > > pointing to kernel code. > > Yap, and people keep asking how to dump the running kernel, after > patching and jump labels and stuff... Here's how! > > :- > > > Make sure that userspace can't trick us into > > dumping kernel memory into dmesg. > > > > Cc: sta...@vger.kernel.org > > Fixes: 7cccf0725cf7 ("x86/dumpstack: Add a show_ip() function") > > I think this one is more likely: > > ba54d856a9d8 ("x86/fault: Dump user opcode bytes on fatal faults") > > as it added the dumping of user opcode bytes. No, you can also get user opcode bytes printed by WARN() and friends. When you add a WARN() in the pagefault handler, you get something like this. The first "Code:" line is from ba54d856a9d8, but the second one further down is from before that. [ 125.564041] segfault[1602]: segfault at 854340c0 ip 854340c0 sp 7ffd4cc7a568 error 15 [ 125.569923] Code: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 <63> 6f 72 65 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 125.576859] [ cut here ] [ 125.578406] TESTING WARN() [ 125.578439] WARNING: CPU: 6 PID: 1602 at arch/x86/mm/fault.c:894 __bad_area_nosemaphore+0x147/0x270 [ 125.582172] Modules linked in: bpfilter [ 125.583394] CPU: 6 PID: 1602 Comm: segfault Tainted: GW 4.18.0+ #108 [ 125.585811] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/01/2014 [ 125.588410] RIP: 0010:__bad_area_nosemaphore+0x147/0x270 [ 125.590078] Code: 48 89 d9 48 89 ea 44 89 e6 48 c7 83 30 0b 00 00 0e 00 00 00 bf 0b 00 00 00 e8 f5 eb ff ff 48 c7 c7 00 61 66 84 e8 79 11 05 00 <0f> 0b 48 83 c4 28 5b 5d 41 5c 41 5d 41 5e 41 5f c3 48 83 c4 28 4c [ 125.595779] RSP: 0018:8801cb3b7e18 EFLAGS: 00010286 [ 125.597426] RAX: RBX: 8801cbb9e000 RCX: [ 125.599605] RDX: 0001 RSI: dc00 RDI: 86678ea0 [ 125.601800] RBP: 854340c0 R08: ed003d873ed5 R09: ed003d873ed5 [ 125.603935] R10: 0001 R11: ed003d873ed4 R12: 0001 [ 125.606113] R13: R14: 0015 R15: 8801cb3b7f58 [ 125.608250] FS: 7fe30d518700() GS:8801ec38() knlGS: [ 125.610608] CS: 0010 DS: ES: CR0: 80050033 [ 125.612331] CR2: 854340c0 CR3: 0001d563e001 CR4: 003606e0 [ 125.614470] DR0: DR1: DR2: [ 125.616607] DR3: DR6: fffe0ff0 DR7: 0400 [ 125.618736] Call Trace: [ 125.619475] __do_page_fault+0x133/0x780 [ 125.620646] ? mm_fault_error+0x1b0/0x1b0 [ 125.622236] ? async_page_fault+0x8/0x30 [ 125.623388] async_page_fault+0x1e/0x30 [ 125.624526] RIP: 0033:core_pattern+0x0/0x880 [ 125.625786] Code: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 <63> 6f 72 65 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 125.631208] RSP: 002b:7ffd4cc7a568 EFLAGS: 00010202 [ 125.632737] RAX: 854340c0 RBX: RCX: [ 125.635039] RDX: 7ffd4cc7a678 RSI: 7ffd4cc7a668 RDI: 0001 [ 125.637088] RBP: 7ffd4cc7a580 R08: 562d395106f0 R09: 7fe30d323cb0 [ 125.639153] R10: R11: 7fe30d0d23c0 R12: 562d39510530 [ 125.641183] R13: 7ffd4cc7a660 R14: R15: [ 125.643221] ---[ end trace fb20716f9d6369bd ]--- > > Reviewed-by: Kees Cook > > Signed-off-by: Jann Horn > > --- > > v2: Andy pointed out that I probably shouldn't be doing wrapping > > arithmetic on pointers. > > > > arch/x86/include/asm/stacktrace.h | 2 +- > > arch/x86/kernel/dumpstack.c | 13 ++--- > > arch/x86/mm/fault.c | 2 +- > > 3 files changed, 12 insertions(+), 5 deletions(-) > > > > diff --git a/arch/x86/include/asm/stacktrace.h > > b/arch/x86/include/asm/stacktrace.h > > index b6dc698f992a..f335aad404a4 100644 > > --- a/arch/x86/include/asm/stacktrace.h > > +++ b/arch/x86/include/asm/stacktrace.h > > @@ -111,6 +111,6 @@ static inline unsigned long caller_frame_pointer(void) > > return (unsigned long)frame; > > } > > > > -void show_opcodes(u8 *rip, const char *loglvl); > > +void show_opcodes(struct pt_regs *regs, const char *loglvl); > > void show_ip(struct pt_regs *regs, const char *loglvl); > > #endif /* _ASM_X86_STACKTRACE_H */ > > diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c > >
[PATCH] power: supply: remove unused pointer 'dev'
From: Colin Ian King Pointer 'dev' is being assigned but is never used hence it is redundant and can be removed. Cleans up clang warning: variable 'dev' set but not used [-Wunused-but-set-variable] Signed-off-by: Colin Ian King --- drivers/power/supply/cros_usbpd-charger.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/power/supply/cros_usbpd-charger.c b/drivers/power/supply/cros_usbpd-charger.c index 688a16bacfbb..f2b8de502b82 100644 --- a/drivers/power/supply/cros_usbpd-charger.c +++ b/drivers/power/supply/cros_usbpd-charger.c @@ -378,12 +378,10 @@ static int cros_usbpd_charger_ec_event(struct notifier_block *nb, { struct cros_ec_device *ec_device; struct charger_data *charger; - struct device *dev; u32 host_event; charger = container_of(nb, struct charger_data, notifier); ec_device = charger->ec_device; - dev = charger->dev; host_event = cros_ec_get_host_event(ec_device); if (host_event & EC_HOST_EVENT_MASK(EC_HOST_EVENT_PD_MCU)) { -- 2.17.1
[PATCH] power: supply: remove unused pointer 'dev'
From: Colin Ian King Pointer 'dev' is being assigned but is never used hence it is redundant and can be removed. Cleans up clang warning: variable 'dev' set but not used [-Wunused-but-set-variable] Signed-off-by: Colin Ian King --- drivers/power/supply/cros_usbpd-charger.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/power/supply/cros_usbpd-charger.c b/drivers/power/supply/cros_usbpd-charger.c index 688a16bacfbb..f2b8de502b82 100644 --- a/drivers/power/supply/cros_usbpd-charger.c +++ b/drivers/power/supply/cros_usbpd-charger.c @@ -378,12 +378,10 @@ static int cros_usbpd_charger_ec_event(struct notifier_block *nb, { struct cros_ec_device *ec_device; struct charger_data *charger; - struct device *dev; u32 host_event; charger = container_of(nb, struct charger_data, notifier); ec_device = charger->ec_device; - dev = charger->dev; host_event = cros_ec_get_host_event(ec_device); if (host_event & EC_HOST_EVENT_MASK(EC_HOST_EVENT_PD_MCU)) { -- 2.17.1
Re: [PATCH v2] objtool: Support multiple rodata sections.
Ping... are there any comments on this? On 03/08/18 19:40, Allan Xavier wrote: > This commit adds support for processing switch jump tables in objects > with multiple .rodata sections, such as those created when using > -ffunction-sections and -fdata-sections. Currently, objtool always > looks in .rodata for jump table information which results in many > "sibling call from callable instruction with modified stack frame" > warnings with objects compiled using those flags. > > The fix is comprised of three parts: > > 1. Flagging all .rodata sections when importing elf information for > easier checking later. > > 2. Keeping a reference to the section each relocation is from in order > to get the list_head for the other relocations in that section. > > 3. Finding jump tables by following relocations to .rodata sections, > rather than always referencing a single global .rodata section. > > The patch has been tested without data sections enabled and no > differences in the resulting orc unwind information were seen. > > Note that as objtool adds terminators to end of each .text section the > unwind information generated between a function+data sections build and > a normal build aren't directly comparable. Manual inspection suggests > that objtool is now generating the correct information, or at least > making more of an effort to do so than it did previously. > > Signed-off-by: Allan Xavier > --- > Changes since v1: > - Cleaned up section symbol check. > - Fixed brackets. > - Moved mark_rodata to decode_sections(). > - Excluded *.str1.[18] sections from rodata sections. > > tools/objtool/check.c | 39 +-- > tools/objtool/check.h | 4 ++-- > tools/objtool/elf.c | 1 + > tools/objtool/elf.h | 3 ++- > 4 files changed, 38 insertions(+), 9 deletions(-) > > diff --git a/tools/objtool/check.c b/tools/objtool/check.c > index f4a25bd1871fb..e3a5d53c4b83b 100644 > --- a/tools/objtool/check.c > +++ b/tools/objtool/check.c > @@ -836,7 +836,7 @@ static int add_switch_table(struct objtool_file *file, > struct instruction *insn, > struct symbol *pfunc = insn->func->pfunc; > unsigned int prev_offset = 0; > > - list_for_each_entry_from(rela, >rodata->rela->rela_list, list) { > + list_for_each_entry_from(rela, >rela_sec->rela_list, list) { > if (rela == next_table) > break; > > @@ -926,6 +926,7 @@ static struct rela *find_switch_table(struct objtool_file > *file, > { > struct rela *text_rela, *rodata_rela; > struct instruction *orig_insn = insn; > + struct section *rodata_sec; > unsigned long table_offset; > > /* > @@ -953,10 +954,13 @@ static struct rela *find_switch_table(struct > objtool_file *file, > /* look for a relocation which references .rodata */ > text_rela = find_rela_by_dest_range(insn->sec, insn->offset, > insn->len); > - if (!text_rela || text_rela->sym != file->rodata->sym) > + if (!text_rela || text_rela->sym->type != STT_SECTION || > + !text_rela->sym->sec->rodata) > continue; > > table_offset = text_rela->addend; > + rodata_sec = text_rela->sym->sec; > + > if (text_rela->type == R_X86_64_PC32) > table_offset += 4; > > @@ -964,10 +968,10 @@ static struct rela *find_switch_table(struct > objtool_file *file, >* Make sure the .rodata address isn't associated with a >* symbol. gcc jump tables are anonymous data. >*/ > - if (find_symbol_containing(file->rodata, table_offset)) > + if (find_symbol_containing(rodata_sec, table_offset)) > continue; > > - rodata_rela = find_rela_by_dest(file->rodata, table_offset); > + rodata_rela = find_rela_by_dest(rodata_sec, table_offset); > if (rodata_rela) { > /* >* Use of RIP-relative switch jumps is quite rare, and > @@ -1052,7 +1056,7 @@ static int add_switch_table_alts(struct objtool_file > *file) > struct symbol *func; > int ret; > > - if (!file->rodata || !file->rodata->rela) > + if (!file->rodata) > return 0; > > for_each_sec(file, sec) { > @@ -1197,10 +1201,34 @@ static int read_retpoline_hints(struct objtool_file > *file) > return 0; > } > > +static void mark_rodata(struct objtool_file *file) > +{ > + struct section *sec; > + bool found = false; > + static const char *str1 = ".str1."; > + const int str1len = strlen(str1) + 1; > + > + for_each_sec(file, sec) { > + if (strstr(sec->name, ".rodata") == sec->name) { > + char *str1pos = sec->name + strlen(sec->name) - str1len; > + > + /* Skips over .rodata.str1.1 and
Re: [PATCH v2] objtool: Support multiple rodata sections.
Ping... are there any comments on this? On 03/08/18 19:40, Allan Xavier wrote: > This commit adds support for processing switch jump tables in objects > with multiple .rodata sections, such as those created when using > -ffunction-sections and -fdata-sections. Currently, objtool always > looks in .rodata for jump table information which results in many > "sibling call from callable instruction with modified stack frame" > warnings with objects compiled using those flags. > > The fix is comprised of three parts: > > 1. Flagging all .rodata sections when importing elf information for > easier checking later. > > 2. Keeping a reference to the section each relocation is from in order > to get the list_head for the other relocations in that section. > > 3. Finding jump tables by following relocations to .rodata sections, > rather than always referencing a single global .rodata section. > > The patch has been tested without data sections enabled and no > differences in the resulting orc unwind information were seen. > > Note that as objtool adds terminators to end of each .text section the > unwind information generated between a function+data sections build and > a normal build aren't directly comparable. Manual inspection suggests > that objtool is now generating the correct information, or at least > making more of an effort to do so than it did previously. > > Signed-off-by: Allan Xavier > --- > Changes since v1: > - Cleaned up section symbol check. > - Fixed brackets. > - Moved mark_rodata to decode_sections(). > - Excluded *.str1.[18] sections from rodata sections. > > tools/objtool/check.c | 39 +-- > tools/objtool/check.h | 4 ++-- > tools/objtool/elf.c | 1 + > tools/objtool/elf.h | 3 ++- > 4 files changed, 38 insertions(+), 9 deletions(-) > > diff --git a/tools/objtool/check.c b/tools/objtool/check.c > index f4a25bd1871fb..e3a5d53c4b83b 100644 > --- a/tools/objtool/check.c > +++ b/tools/objtool/check.c > @@ -836,7 +836,7 @@ static int add_switch_table(struct objtool_file *file, > struct instruction *insn, > struct symbol *pfunc = insn->func->pfunc; > unsigned int prev_offset = 0; > > - list_for_each_entry_from(rela, >rodata->rela->rela_list, list) { > + list_for_each_entry_from(rela, >rela_sec->rela_list, list) { > if (rela == next_table) > break; > > @@ -926,6 +926,7 @@ static struct rela *find_switch_table(struct objtool_file > *file, > { > struct rela *text_rela, *rodata_rela; > struct instruction *orig_insn = insn; > + struct section *rodata_sec; > unsigned long table_offset; > > /* > @@ -953,10 +954,13 @@ static struct rela *find_switch_table(struct > objtool_file *file, > /* look for a relocation which references .rodata */ > text_rela = find_rela_by_dest_range(insn->sec, insn->offset, > insn->len); > - if (!text_rela || text_rela->sym != file->rodata->sym) > + if (!text_rela || text_rela->sym->type != STT_SECTION || > + !text_rela->sym->sec->rodata) > continue; > > table_offset = text_rela->addend; > + rodata_sec = text_rela->sym->sec; > + > if (text_rela->type == R_X86_64_PC32) > table_offset += 4; > > @@ -964,10 +968,10 @@ static struct rela *find_switch_table(struct > objtool_file *file, >* Make sure the .rodata address isn't associated with a >* symbol. gcc jump tables are anonymous data. >*/ > - if (find_symbol_containing(file->rodata, table_offset)) > + if (find_symbol_containing(rodata_sec, table_offset)) > continue; > > - rodata_rela = find_rela_by_dest(file->rodata, table_offset); > + rodata_rela = find_rela_by_dest(rodata_sec, table_offset); > if (rodata_rela) { > /* >* Use of RIP-relative switch jumps is quite rare, and > @@ -1052,7 +1056,7 @@ static int add_switch_table_alts(struct objtool_file > *file) > struct symbol *func; > int ret; > > - if (!file->rodata || !file->rodata->rela) > + if (!file->rodata) > return 0; > > for_each_sec(file, sec) { > @@ -1197,10 +1201,34 @@ static int read_retpoline_hints(struct objtool_file > *file) > return 0; > } > > +static void mark_rodata(struct objtool_file *file) > +{ > + struct section *sec; > + bool found = false; > + static const char *str1 = ".str1."; > + const int str1len = strlen(str1) + 1; > + > + for_each_sec(file, sec) { > + if (strstr(sec->name, ".rodata") == sec->name) { > + char *str1pos = sec->name + strlen(sec->name) - str1len; > + > + /* Skips over .rodata.str1.1 and
Re: [PATCH] arm64: dts: ti: k3-am65: Change #address-cells and #size-cells of interconnect to 2
* Kishon Vijay Abraham I [180828 10:31]: > AM65 has two PCIe controllers and each PCIe controller has '2' address > spaces one within the 4GB address space of the SoC and the other above > the 4GB address space of the SoC in addition to the register space. The > size of the address space above the 4GB SoC address space is 4GB. These > address ranges will be used by CPU/DMA to access the PCIe address space. > In order to represent the address space above the 4GB SoC address space > and to represent the size of this address space as 4GB, change > address-cells and size-cells of interconnect to 2. ... > cbass_mcu: interconnect@2838 { > compatible = "simple-bus"; > #address-cells = <1>; > #size-cells = <1>; Yup great, the interconnect instances that don't need above 4GB address space should stay this way. Acked-by: Tony Lindgren
Re: [PATCH] arm64: dts: ti: k3-am65: Change #address-cells and #size-cells of interconnect to 2
* Kishon Vijay Abraham I [180828 10:31]: > AM65 has two PCIe controllers and each PCIe controller has '2' address > spaces one within the 4GB address space of the SoC and the other above > the 4GB address space of the SoC in addition to the register space. The > size of the address space above the 4GB SoC address space is 4GB. These > address ranges will be used by CPU/DMA to access the PCIe address space. > In order to represent the address space above the 4GB SoC address space > and to represent the size of this address space as 4GB, change > address-cells and size-cells of interconnect to 2. ... > cbass_mcu: interconnect@2838 { > compatible = "simple-bus"; > #address-cells = <1>; > #size-cells = <1>; Yup great, the interconnect instances that don't need above 4GB address space should stay this way. Acked-by: Tony Lindgren
Re: [PATCH v2] x86/dumpstack: don't dump kernel memory based on usermode RIP
On Tue, Aug 28, 2018 at 05:49:01PM +0200, Jann Horn wrote: > show_opcodes() is used both for dumping kernel instructions and for dumping > user instructions. If userspace causes #PF by jumping to a kernel address, > show_opcodes() can be reached with regs->ip controlled by the user, > pointing to kernel code. Yap, and people keep asking how to dump the running kernel, after patching and jump labels and stuff... Here's how! :- > Make sure that userspace can't trick us into > dumping kernel memory into dmesg. > > Cc: sta...@vger.kernel.org > Fixes: 7cccf0725cf7 ("x86/dumpstack: Add a show_ip() function") I think this one is more likely: ba54d856a9d8 ("x86/fault: Dump user opcode bytes on fatal faults") as it added the dumping of user opcode bytes. > Reviewed-by: Kees Cook > Signed-off-by: Jann Horn > --- > v2: Andy pointed out that I probably shouldn't be doing wrapping > arithmetic on pointers. > > arch/x86/include/asm/stacktrace.h | 2 +- > arch/x86/kernel/dumpstack.c | 13 ++--- > arch/x86/mm/fault.c | 2 +- > 3 files changed, 12 insertions(+), 5 deletions(-) > > diff --git a/arch/x86/include/asm/stacktrace.h > b/arch/x86/include/asm/stacktrace.h > index b6dc698f992a..f335aad404a4 100644 > --- a/arch/x86/include/asm/stacktrace.h > +++ b/arch/x86/include/asm/stacktrace.h > @@ -111,6 +111,6 @@ static inline unsigned long caller_frame_pointer(void) > return (unsigned long)frame; > } > > -void show_opcodes(u8 *rip, const char *loglvl); > +void show_opcodes(struct pt_regs *regs, const char *loglvl); > void show_ip(struct pt_regs *regs, const char *loglvl); > #endif /* _ASM_X86_STACKTRACE_H */ > diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c > index 9c8652974f8e..14b337582b6f 100644 > --- a/arch/x86/kernel/dumpstack.c > +++ b/arch/x86/kernel/dumpstack.c > @@ -89,14 +89,21 @@ static void printk_stack_address(unsigned long address, > int reliable, > * Thus, the 2/3rds prologue and 64 byte OPCODE_BUFSIZE is just a random > * guesstimate in attempt to achieve all of the above. > */ > -void show_opcodes(u8 *rip, const char *loglvl) > +void show_opcodes(struct pt_regs *regs, const char *loglvl) > { > #define PROLOGUE_SIZE 42 > #define EPILOGUE_SIZE 21 > #define OPCODE_BUFSIZE (PROLOGUE_SIZE + 1 + EPILOGUE_SIZE) > u8 opcodes[OPCODE_BUFSIZE]; > + u8 *prologue = (u8 *)(regs->ip - PROLOGUE_SIZE); > + /* > + * Make sure userspace isn't trying to trick us into dumping kernel > + * memory by pointing the userspace instruction pointer at it. > + */ > + bool bad_ip = user_mode(regs) && > + __range_not_ok(prologue, OPCODE_BUFSIZE, TASK_SIZE_MAX); > Ok, can we pls move the sole dumping of opcodes in a helper called, __show_opcodes(), for example, which the checking wrapper show_opcodes() - without the "__" prefix - calls? So that show_signal_msg() can call the checking variant - show_opcodes() - as userspace might be doing monkey business there and we definitely wanna check first but __show_regs() can call the non-checking variant __show_opcodes() because there we wanna dump whatever rIP points to because we wanna know if the machine has gone off into the weeds etc, when staring at splats. Or am I missing a security aspect here? Thx. -- Regards/Gruss, Boris. SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nürnberg) --
Re: [PATCH v2] x86/dumpstack: don't dump kernel memory based on usermode RIP
On Tue, Aug 28, 2018 at 05:49:01PM +0200, Jann Horn wrote: > show_opcodes() is used both for dumping kernel instructions and for dumping > user instructions. If userspace causes #PF by jumping to a kernel address, > show_opcodes() can be reached with regs->ip controlled by the user, > pointing to kernel code. Yap, and people keep asking how to dump the running kernel, after patching and jump labels and stuff... Here's how! :- > Make sure that userspace can't trick us into > dumping kernel memory into dmesg. > > Cc: sta...@vger.kernel.org > Fixes: 7cccf0725cf7 ("x86/dumpstack: Add a show_ip() function") I think this one is more likely: ba54d856a9d8 ("x86/fault: Dump user opcode bytes on fatal faults") as it added the dumping of user opcode bytes. > Reviewed-by: Kees Cook > Signed-off-by: Jann Horn > --- > v2: Andy pointed out that I probably shouldn't be doing wrapping > arithmetic on pointers. > > arch/x86/include/asm/stacktrace.h | 2 +- > arch/x86/kernel/dumpstack.c | 13 ++--- > arch/x86/mm/fault.c | 2 +- > 3 files changed, 12 insertions(+), 5 deletions(-) > > diff --git a/arch/x86/include/asm/stacktrace.h > b/arch/x86/include/asm/stacktrace.h > index b6dc698f992a..f335aad404a4 100644 > --- a/arch/x86/include/asm/stacktrace.h > +++ b/arch/x86/include/asm/stacktrace.h > @@ -111,6 +111,6 @@ static inline unsigned long caller_frame_pointer(void) > return (unsigned long)frame; > } > > -void show_opcodes(u8 *rip, const char *loglvl); > +void show_opcodes(struct pt_regs *regs, const char *loglvl); > void show_ip(struct pt_regs *regs, const char *loglvl); > #endif /* _ASM_X86_STACKTRACE_H */ > diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c > index 9c8652974f8e..14b337582b6f 100644 > --- a/arch/x86/kernel/dumpstack.c > +++ b/arch/x86/kernel/dumpstack.c > @@ -89,14 +89,21 @@ static void printk_stack_address(unsigned long address, > int reliable, > * Thus, the 2/3rds prologue and 64 byte OPCODE_BUFSIZE is just a random > * guesstimate in attempt to achieve all of the above. > */ > -void show_opcodes(u8 *rip, const char *loglvl) > +void show_opcodes(struct pt_regs *regs, const char *loglvl) > { > #define PROLOGUE_SIZE 42 > #define EPILOGUE_SIZE 21 > #define OPCODE_BUFSIZE (PROLOGUE_SIZE + 1 + EPILOGUE_SIZE) > u8 opcodes[OPCODE_BUFSIZE]; > + u8 *prologue = (u8 *)(regs->ip - PROLOGUE_SIZE); > + /* > + * Make sure userspace isn't trying to trick us into dumping kernel > + * memory by pointing the userspace instruction pointer at it. > + */ > + bool bad_ip = user_mode(regs) && > + __range_not_ok(prologue, OPCODE_BUFSIZE, TASK_SIZE_MAX); > Ok, can we pls move the sole dumping of opcodes in a helper called, __show_opcodes(), for example, which the checking wrapper show_opcodes() - without the "__" prefix - calls? So that show_signal_msg() can call the checking variant - show_opcodes() - as userspace might be doing monkey business there and we definitely wanna check first but __show_regs() can call the non-checking variant __show_opcodes() because there we wanna dump whatever rIP points to because we wanna know if the machine has gone off into the weeds etc, when staring at splats. Or am I missing a security aspect here? Thx. -- Regards/Gruss, Boris. SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nürnberg) --
Re: [PATCH] serial: 8250_omap: Make 8250_omap driver driver depend on ARCH_K3
* Nishanth Menon [180828 01:07]: > From: Lokesh Vutla > > Allow 8250 omap serial driver to be used for K3 platforms. > > Signed-off-by: Lokesh Vutla > Signed-off-by: Nishanth Menon > --- > > Now that we have the device tree support merged integrated AND we have > ARCH_K3, > Lets enable the 820 OMAP Driver to build for ARCH_K3 and make it operational. Acked-by: Tony Lindgren
Re: [PATCH] serial: 8250_omap: Make 8250_omap driver driver depend on ARCH_K3
* Nishanth Menon [180828 01:07]: > From: Lokesh Vutla > > Allow 8250 omap serial driver to be used for K3 platforms. > > Signed-off-by: Lokesh Vutla > Signed-off-by: Nishanth Menon > --- > > Now that we have the device tree support merged integrated AND we have > ARCH_K3, > Lets enable the 820 OMAP Driver to build for ARCH_K3 and make it operational. Acked-by: Tony Lindgren
Re: [PATCH] arm64: defconfig: Enable TI's AM6 SoC platform
* Nishanth Menon [180828 00:50]: > Enable K3 SoC platform for TI's AM6 SoC. > > Signed-off-by: Nishanth Menon > --- > Ref: https://www.spinics.net/lists/arm-kernel/msg667805.html > > Unfortunately, Kconfig changes did'nt hit v4.19-rc1 window. > So, as promised, patch based off v4.19-rc1 tag. Acked-by: Tony Lindgren
Re: [PATCH] arm64: defconfig: Enable TI's AM6 SoC platform
* Nishanth Menon [180828 00:50]: > Enable K3 SoC platform for TI's AM6 SoC. > > Signed-off-by: Nishanth Menon > --- > Ref: https://www.spinics.net/lists/arm-kernel/msg667805.html > > Unfortunately, Kconfig changes did'nt hit v4.19-rc1 window. > So, as promised, patch based off v4.19-rc1 tag. Acked-by: Tony Lindgren
[ftrace/kprobes PATCH 3/3] tracing/kprobes: Allow kprobe-events to record module symbol
Allow kprobe-events to record module symbols. Since data symbols in a non-loaded module doesn't exist, it fails to define such symbol as an argument of kprobe-event. But if the kprobe event is defined on that module, we can defer to resolve the symbol address. Note that if given symbol is not found, the event is kept unavailable. User can enable it but the event is not recorded. Signed-off-by: Masami Hiramatsu --- kernel/trace/trace_kprobe.c | 12 kernel/trace/trace_probe.c | 62 +-- kernel/trace/trace_probe.h |4 ++- 3 files changed, 68 insertions(+), 10 deletions(-) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index fbf609cbeac2..920985fae8c0 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -366,7 +366,7 @@ static bool within_notrace_func(struct trace_kprobe *tk) /* Internal register function - just handle k*probes and flags */ static int __register_trace_kprobe(struct trace_kprobe *tk) { - int ret; + int i, ret; if (trace_probe_is_registered(>tp)) return -EINVAL; @@ -377,6 +377,12 @@ static int __register_trace_kprobe(struct trace_kprobe *tk) return -EINVAL; } + for (i = 0; i < tk->tp.nr_args; i++) { + ret = traceprobe_update_arg(>tp.args[i]); + if (ret) + return ret; + } + /* Set/clear disabled flag according to tp->flag */ if (trace_probe_is_enabled(>tp)) tk->rp.kp.flags &= ~KPROBE_FLAG_DISABLED; @@ -925,6 +931,7 @@ process_fetch_insn(struct fetch_insn *code, struct pt_regs *regs, void *dest, { unsigned long val; +retry: /* 1st stage: get value from context */ switch (code->op) { case FETCH_OP_REG: @@ -950,6 +957,9 @@ process_fetch_insn(struct fetch_insn *code, struct pt_regs *regs, void *dest, val = regs_get_kernel_argument(regs, code->param); break; #endif + case FETCH_NOP_SYMBOL: /* Ignore a place holder */ + code++; + goto retry; default: return -EILSEQ; } diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index 333cda6d2633..5b3d573b3dcf 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -251,16 +251,16 @@ parse_probe_arg(char *arg, const struct fetch_type *type, if (!(flags & TPARG_FL_KERNEL)) return -EINVAL; - ret = traceprobe_split_symbol_offset(arg + 1, ); - if (ret) - break; + /* Preserve symbol for updating */ + code->op = FETCH_NOP_SYMBOL; + code->data = kstrdup(arg + 1, GFP_KERNEL); + if (!code->data) + return -ENOMEM; + if (++code == end) + return -E2BIG; code->op = FETCH_OP_IMM; - code->immediate = - (unsigned long)kallsyms_lookup_name(arg + 1); - if (!code->immediate) - return -ENOENT; - code->immediate += offset; + code->immediate = 0; } /* These are fetching from memory */ if (++code == end) @@ -480,6 +480,11 @@ int traceprobe_parse_probe_arg(char *arg, ssize_t *size, memcpy(parg->code, tmp, sizeof(*code) * (code - tmp + 1)); fail: + if (ret) { + for (code = tmp; code < tmp + FETCH_INSN_MAX; code++) + if (code->op == FETCH_NOP_SYMBOL) + kfree(code->data); + } kfree(tmp); return ret; @@ -504,12 +509,53 @@ int traceprobe_conflict_field_name(const char *name, void traceprobe_free_probe_arg(struct probe_arg *arg) { + struct fetch_insn *code = arg->code; + + while (code && code->op != FETCH_OP_END) { + if (code->op == FETCH_NOP_SYMBOL) + kfree(code->data); + code++; + } kfree(arg->code); kfree(arg->name); kfree(arg->comm); kfree(arg->fmt); } +int traceprobe_update_arg(struct probe_arg *arg) +{ + struct fetch_insn *code = arg->code; + long offset; + char *tmp; + char c; + int ret = 0; + + while (code && code->op != FETCH_OP_END) { + if (code->op == FETCH_NOP_SYMBOL) { + if (code[1].op != FETCH_OP_IMM) + return -EINVAL; + + tmp = strpbrk("+-", code->data); + if (tmp) + c = *tmp; + ret =
[ftrace/kprobes PATCH 3/3] tracing/kprobes: Allow kprobe-events to record module symbol
Allow kprobe-events to record module symbols. Since data symbols in a non-loaded module doesn't exist, it fails to define such symbol as an argument of kprobe-event. But if the kprobe event is defined on that module, we can defer to resolve the symbol address. Note that if given symbol is not found, the event is kept unavailable. User can enable it but the event is not recorded. Signed-off-by: Masami Hiramatsu --- kernel/trace/trace_kprobe.c | 12 kernel/trace/trace_probe.c | 62 +-- kernel/trace/trace_probe.h |4 ++- 3 files changed, 68 insertions(+), 10 deletions(-) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index fbf609cbeac2..920985fae8c0 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -366,7 +366,7 @@ static bool within_notrace_func(struct trace_kprobe *tk) /* Internal register function - just handle k*probes and flags */ static int __register_trace_kprobe(struct trace_kprobe *tk) { - int ret; + int i, ret; if (trace_probe_is_registered(>tp)) return -EINVAL; @@ -377,6 +377,12 @@ static int __register_trace_kprobe(struct trace_kprobe *tk) return -EINVAL; } + for (i = 0; i < tk->tp.nr_args; i++) { + ret = traceprobe_update_arg(>tp.args[i]); + if (ret) + return ret; + } + /* Set/clear disabled flag according to tp->flag */ if (trace_probe_is_enabled(>tp)) tk->rp.kp.flags &= ~KPROBE_FLAG_DISABLED; @@ -925,6 +931,7 @@ process_fetch_insn(struct fetch_insn *code, struct pt_regs *regs, void *dest, { unsigned long val; +retry: /* 1st stage: get value from context */ switch (code->op) { case FETCH_OP_REG: @@ -950,6 +957,9 @@ process_fetch_insn(struct fetch_insn *code, struct pt_regs *regs, void *dest, val = regs_get_kernel_argument(regs, code->param); break; #endif + case FETCH_NOP_SYMBOL: /* Ignore a place holder */ + code++; + goto retry; default: return -EILSEQ; } diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index 333cda6d2633..5b3d573b3dcf 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -251,16 +251,16 @@ parse_probe_arg(char *arg, const struct fetch_type *type, if (!(flags & TPARG_FL_KERNEL)) return -EINVAL; - ret = traceprobe_split_symbol_offset(arg + 1, ); - if (ret) - break; + /* Preserve symbol for updating */ + code->op = FETCH_NOP_SYMBOL; + code->data = kstrdup(arg + 1, GFP_KERNEL); + if (!code->data) + return -ENOMEM; + if (++code == end) + return -E2BIG; code->op = FETCH_OP_IMM; - code->immediate = - (unsigned long)kallsyms_lookup_name(arg + 1); - if (!code->immediate) - return -ENOENT; - code->immediate += offset; + code->immediate = 0; } /* These are fetching from memory */ if (++code == end) @@ -480,6 +480,11 @@ int traceprobe_parse_probe_arg(char *arg, ssize_t *size, memcpy(parg->code, tmp, sizeof(*code) * (code - tmp + 1)); fail: + if (ret) { + for (code = tmp; code < tmp + FETCH_INSN_MAX; code++) + if (code->op == FETCH_NOP_SYMBOL) + kfree(code->data); + } kfree(tmp); return ret; @@ -504,12 +509,53 @@ int traceprobe_conflict_field_name(const char *name, void traceprobe_free_probe_arg(struct probe_arg *arg) { + struct fetch_insn *code = arg->code; + + while (code && code->op != FETCH_OP_END) { + if (code->op == FETCH_NOP_SYMBOL) + kfree(code->data); + code++; + } kfree(arg->code); kfree(arg->name); kfree(arg->comm); kfree(arg->fmt); } +int traceprobe_update_arg(struct probe_arg *arg) +{ + struct fetch_insn *code = arg->code; + long offset; + char *tmp; + char c; + int ret = 0; + + while (code && code->op != FETCH_OP_END) { + if (code->op == FETCH_NOP_SYMBOL) { + if (code[1].op != FETCH_OP_IMM) + return -EINVAL; + + tmp = strpbrk("+-", code->data); + if (tmp) + c = *tmp; + ret =
Re: [PATCH] perf: Support for Arm A32/T32 instruction sets in CoreSight trace
Hi Robert, Your patch landed in the middle of the merge window and will have to be sent again rebased on 4.19-rc1 and long with minor fix found below. Regards, Mathieu On Wed, Aug 22, 2018 at 05:03:57PM +0100, Robert Walker wrote: > This patch adds support for generating instruction samples from trace of > AArch32 programs using the A32 and T32 instruction sets. > > T32 has variable 2 or 4 byte instruction size, so the conversion between > addresses and instruction counts requires extra information from the trace > decoder, requiring version 0.9.1 of OpenCSD. A check for the new struct > member has been added to the feature check for OpenCSD. > > Signed-off-by: Robert Walker > --- > tools/build/feature/test-libopencsd.c | 7 +++ > tools/perf/util/cs-etm-decoder/cs-etm-decoder.c | 27 ++ > tools/perf/util/cs-etm-decoder/cs-etm-decoder.h | 10 > tools/perf/util/cs-etm.c| 71 > +++-- > 4 files changed, 75 insertions(+), 40 deletions(-) > > diff --git a/tools/build/feature/test-libopencsd.c > b/tools/build/feature/test-libopencsd.c > index 5ff1246..d96b2df 100644 > --- a/tools/build/feature/test-libopencsd.c > +++ b/tools/build/feature/test-libopencsd.c > @@ -3,6 +3,13 @@ > > int main(void) > { > + /* > + * Requires ocsd_generic_trace_elem.num_instr_range introduced in > + * OpenCSD 0.9 > + */ > + ocsd_generic_trace_elem elem; > + (void)elem.num_instr_range; > + I really like this - simple and efficient. > (void)ocsd_get_version(); > return 0; > } > diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c > b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c > index 938def6..73d8384 100644 > --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c > +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c > @@ -263,9 +263,12 @@ static void cs_etm_decoder__clear_buffer(struct > cs_etm_decoder *decoder) > decoder->tail = 0; > decoder->packet_count = 0; > for (i = 0; i < MAX_BUFFER; i++) { > + decoder->packet_buffer[i].isa = CS_ETM_ISA_UNKNOWN; > decoder->packet_buffer[i].start_addr = CS_ETM_INVAL_ADDR; > decoder->packet_buffer[i].end_addr = CS_ETM_INVAL_ADDR; > + decoder->packet_buffer[i].instr_count = 0; > decoder->packet_buffer[i].last_instr_taken_branch = false; > + decoder->packet_buffer[i].last_instr_size = 0; > decoder->packet_buffer[i].exc = false; > decoder->packet_buffer[i].exc_ret = false; > decoder->packet_buffer[i].cpu = INT_MIN; > @@ -294,11 +297,13 @@ cs_etm_decoder__buffer_packet(struct cs_etm_decoder > *decoder, > decoder->packet_count++; > > decoder->packet_buffer[et].sample_type = sample_type; > + decoder->packet_buffer[et].isa = CS_ETM_ISA_UNKNOWN; > decoder->packet_buffer[et].exc = false; > decoder->packet_buffer[et].exc_ret = false; > decoder->packet_buffer[et].cpu = *((int *)inode->priv); > decoder->packet_buffer[et].start_addr = CS_ETM_INVAL_ADDR; > decoder->packet_buffer[et].end_addr = CS_ETM_INVAL_ADDR; > + decoder->packet_buffer[et].instr_count = 0; > > if (decoder->packet_count == MAX_BUFFER - 1) > return OCSD_RESP_WAIT; > @@ -321,8 +326,28 @@ cs_etm_decoder__buffer_range(struct cs_etm_decoder > *decoder, > > packet = >packet_buffer[decoder->tail]; > > + switch (elem->isa) { > + case ocsd_isa_aarch64: > + packet->isa = CS_ETM_ISA_A64; > + break; > + case ocsd_isa_arm: > + packet->isa = CS_ETM_ISA_A32; > + break; > + case ocsd_isa_thumb2: > + packet->isa = CS_ETM_ISA_T32; > + break; > + case ocsd_isa_tee: > + case ocsd_isa_jazelle: > + case ocsd_isa_custom: > + case ocsd_isa_unknown: > + default: > + packet->isa = CS_ETM_ISA_UNKNOWN; > + } > + > packet->start_addr = elem->st_addr; > packet->end_addr = elem->en_addr; > + packet->instr_count = elem->num_instr_range; > + > switch (elem->last_i_type) { > case OCSD_INSTR_BR: > case OCSD_INSTR_BR_INDIRECT: > @@ -336,6 +361,8 @@ cs_etm_decoder__buffer_range(struct cs_etm_decoder > *decoder, > break; > } > > + packet->last_instr_size = elem->last_instr_sz; > + > return ret; > } > > diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h > b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h > index 612b575..9a10eda 100644 > --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h > +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h > @@ -28,11 +28,21 @@ enum cs_etm_sample_type { > CS_ETM_TRACE_ON = 1 << 1, > }; > > +enum cs_etm_isa { > + CS_ETM_ISA_UNKNOWN, > + CS_ETM_ISA_A64, > + CS_ETM_ISA_A32, > + CS_ETM_ISA_T32, > +}; > + > struct cs_etm_packet { > enum cs_etm_sample_type
[ftrace/kprobes PATCH 2/3] tracing/kprobes: Check the probe on unloaded module correctly
Current kprobe event doesn't checks correctly whether the given event is on unloaded module or not. It just checks the event has ":" in the name. That is not enough because if we define a probe on non-exist symbol on loaded module, it allows to define that (with warning message) To ensure it correctly, this searches the module name on loaded module list and only if there is not, it allows to define it. (this event will be available when the target module is loaded) Signed-off-by: Masami Hiramatsu --- kernel/trace/trace_kprobe.c | 39 ++- 1 file changed, 26 insertions(+), 13 deletions(-) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index b006aaeceb92..fbf609cbeac2 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -62,9 +62,23 @@ static nokprobe_inline bool trace_kprobe_within_module(struct trace_kprobe *tk, return strncmp(mod->name, name, len) == 0 && name[len] == ':'; } -static nokprobe_inline bool trace_kprobe_is_on_module(struct trace_kprobe *tk) +static nokprobe_inline bool trace_kprobe_module_exist(struct trace_kprobe *tk) { - return !!strchr(trace_kprobe_symbol(tk), ':'); + char *p; + bool ret; + + if (!tk->symbol) + return false; + p = strchr(tk->symbol, ':'); + if (!p) + return true; + *p = '\0'; + mutex_lock(_mutex); + ret = !!find_module(tk->symbol); + mutex_unlock(_mutex); + *p = ':'; + + return ret; } static nokprobe_inline unsigned long trace_kprobe_nhit(struct trace_kprobe *tk) @@ -374,19 +388,13 @@ static int __register_trace_kprobe(struct trace_kprobe *tk) else ret = register_kprobe(>rp.kp); - if (ret == 0) + if (ret == 0) { tk->tp.flags |= TP_FLAG_REGISTERED; - else { - if (ret == -ENOENT && trace_kprobe_is_on_module(tk)) { - pr_warn("This probe might be able to register after target module is loaded. Continue.\n"); - ret = 0; - } else if (ret == -EILSEQ) { - pr_warn("Probing address(0x%p) is not an instruction boundary.\n", - tk->rp.kp.addr); - ret = -EINVAL; - } + } else if (ret == -EILSEQ) { + pr_warn("Probing address(0x%p) is not an instruction boundary.\n", + tk->rp.kp.addr); + ret = -EINVAL; } - return ret; } @@ -449,6 +457,11 @@ static int register_trace_kprobe(struct trace_kprobe *tk) /* Register k*probe */ ret = __register_trace_kprobe(tk); + if (ret == -ENOENT && !trace_kprobe_module_exist(tk)) { + pr_warn("This probe might be able to register after target module is loaded. Continue.\n"); + ret = 0; + } + if (ret < 0) unregister_kprobe_event(tk); else
[ftrace/kprobes PATCH 1/3] tracing/uprobes: Fix to return -EFAULT if copy_from_user failed
Fix probe_mem_read() to return -EFAULT if copy_from_user() failed. The copy_from_user() returns remaining bytes when it failed, but probe_mem_read() caller expects it returns error code like as probe_kernel_read(). Reported-by: Dan Carpenter Signed-off-by: Masami Hiramatsu --- kernel/trace/trace_uprobe.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 12bdbdf772ed..63e99ee716fe 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -104,7 +104,7 @@ probe_mem_read(void *dest, void *src, size_t size) { void __user *vaddr = (void __force __user *)src; - return copy_from_user(dest, vaddr, size); + return copy_from_user(dest, vaddr, size) ? -EFAULT : 0; } /* * Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max
Re: [PATCH] perf: Support for Arm A32/T32 instruction sets in CoreSight trace
Hi Robert, Your patch landed in the middle of the merge window and will have to be sent again rebased on 4.19-rc1 and long with minor fix found below. Regards, Mathieu On Wed, Aug 22, 2018 at 05:03:57PM +0100, Robert Walker wrote: > This patch adds support for generating instruction samples from trace of > AArch32 programs using the A32 and T32 instruction sets. > > T32 has variable 2 or 4 byte instruction size, so the conversion between > addresses and instruction counts requires extra information from the trace > decoder, requiring version 0.9.1 of OpenCSD. A check for the new struct > member has been added to the feature check for OpenCSD. > > Signed-off-by: Robert Walker > --- > tools/build/feature/test-libopencsd.c | 7 +++ > tools/perf/util/cs-etm-decoder/cs-etm-decoder.c | 27 ++ > tools/perf/util/cs-etm-decoder/cs-etm-decoder.h | 10 > tools/perf/util/cs-etm.c| 71 > +++-- > 4 files changed, 75 insertions(+), 40 deletions(-) > > diff --git a/tools/build/feature/test-libopencsd.c > b/tools/build/feature/test-libopencsd.c > index 5ff1246..d96b2df 100644 > --- a/tools/build/feature/test-libopencsd.c > +++ b/tools/build/feature/test-libopencsd.c > @@ -3,6 +3,13 @@ > > int main(void) > { > + /* > + * Requires ocsd_generic_trace_elem.num_instr_range introduced in > + * OpenCSD 0.9 > + */ > + ocsd_generic_trace_elem elem; > + (void)elem.num_instr_range; > + I really like this - simple and efficient. > (void)ocsd_get_version(); > return 0; > } > diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c > b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c > index 938def6..73d8384 100644 > --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c > +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c > @@ -263,9 +263,12 @@ static void cs_etm_decoder__clear_buffer(struct > cs_etm_decoder *decoder) > decoder->tail = 0; > decoder->packet_count = 0; > for (i = 0; i < MAX_BUFFER; i++) { > + decoder->packet_buffer[i].isa = CS_ETM_ISA_UNKNOWN; > decoder->packet_buffer[i].start_addr = CS_ETM_INVAL_ADDR; > decoder->packet_buffer[i].end_addr = CS_ETM_INVAL_ADDR; > + decoder->packet_buffer[i].instr_count = 0; > decoder->packet_buffer[i].last_instr_taken_branch = false; > + decoder->packet_buffer[i].last_instr_size = 0; > decoder->packet_buffer[i].exc = false; > decoder->packet_buffer[i].exc_ret = false; > decoder->packet_buffer[i].cpu = INT_MIN; > @@ -294,11 +297,13 @@ cs_etm_decoder__buffer_packet(struct cs_etm_decoder > *decoder, > decoder->packet_count++; > > decoder->packet_buffer[et].sample_type = sample_type; > + decoder->packet_buffer[et].isa = CS_ETM_ISA_UNKNOWN; > decoder->packet_buffer[et].exc = false; > decoder->packet_buffer[et].exc_ret = false; > decoder->packet_buffer[et].cpu = *((int *)inode->priv); > decoder->packet_buffer[et].start_addr = CS_ETM_INVAL_ADDR; > decoder->packet_buffer[et].end_addr = CS_ETM_INVAL_ADDR; > + decoder->packet_buffer[et].instr_count = 0; > > if (decoder->packet_count == MAX_BUFFER - 1) > return OCSD_RESP_WAIT; > @@ -321,8 +326,28 @@ cs_etm_decoder__buffer_range(struct cs_etm_decoder > *decoder, > > packet = >packet_buffer[decoder->tail]; > > + switch (elem->isa) { > + case ocsd_isa_aarch64: > + packet->isa = CS_ETM_ISA_A64; > + break; > + case ocsd_isa_arm: > + packet->isa = CS_ETM_ISA_A32; > + break; > + case ocsd_isa_thumb2: > + packet->isa = CS_ETM_ISA_T32; > + break; > + case ocsd_isa_tee: > + case ocsd_isa_jazelle: > + case ocsd_isa_custom: > + case ocsd_isa_unknown: > + default: > + packet->isa = CS_ETM_ISA_UNKNOWN; > + } > + > packet->start_addr = elem->st_addr; > packet->end_addr = elem->en_addr; > + packet->instr_count = elem->num_instr_range; > + > switch (elem->last_i_type) { > case OCSD_INSTR_BR: > case OCSD_INSTR_BR_INDIRECT: > @@ -336,6 +361,8 @@ cs_etm_decoder__buffer_range(struct cs_etm_decoder > *decoder, > break; > } > > + packet->last_instr_size = elem->last_instr_sz; > + > return ret; > } > > diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h > b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h > index 612b575..9a10eda 100644 > --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h > +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h > @@ -28,11 +28,21 @@ enum cs_etm_sample_type { > CS_ETM_TRACE_ON = 1 << 1, > }; > > +enum cs_etm_isa { > + CS_ETM_ISA_UNKNOWN, > + CS_ETM_ISA_A64, > + CS_ETM_ISA_A32, > + CS_ETM_ISA_T32, > +}; > + > struct cs_etm_packet { > enum cs_etm_sample_type
[ftrace/kprobes PATCH 2/3] tracing/kprobes: Check the probe on unloaded module correctly
Current kprobe event doesn't checks correctly whether the given event is on unloaded module or not. It just checks the event has ":" in the name. That is not enough because if we define a probe on non-exist symbol on loaded module, it allows to define that (with warning message) To ensure it correctly, this searches the module name on loaded module list and only if there is not, it allows to define it. (this event will be available when the target module is loaded) Signed-off-by: Masami Hiramatsu --- kernel/trace/trace_kprobe.c | 39 ++- 1 file changed, 26 insertions(+), 13 deletions(-) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index b006aaeceb92..fbf609cbeac2 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -62,9 +62,23 @@ static nokprobe_inline bool trace_kprobe_within_module(struct trace_kprobe *tk, return strncmp(mod->name, name, len) == 0 && name[len] == ':'; } -static nokprobe_inline bool trace_kprobe_is_on_module(struct trace_kprobe *tk) +static nokprobe_inline bool trace_kprobe_module_exist(struct trace_kprobe *tk) { - return !!strchr(trace_kprobe_symbol(tk), ':'); + char *p; + bool ret; + + if (!tk->symbol) + return false; + p = strchr(tk->symbol, ':'); + if (!p) + return true; + *p = '\0'; + mutex_lock(_mutex); + ret = !!find_module(tk->symbol); + mutex_unlock(_mutex); + *p = ':'; + + return ret; } static nokprobe_inline unsigned long trace_kprobe_nhit(struct trace_kprobe *tk) @@ -374,19 +388,13 @@ static int __register_trace_kprobe(struct trace_kprobe *tk) else ret = register_kprobe(>rp.kp); - if (ret == 0) + if (ret == 0) { tk->tp.flags |= TP_FLAG_REGISTERED; - else { - if (ret == -ENOENT && trace_kprobe_is_on_module(tk)) { - pr_warn("This probe might be able to register after target module is loaded. Continue.\n"); - ret = 0; - } else if (ret == -EILSEQ) { - pr_warn("Probing address(0x%p) is not an instruction boundary.\n", - tk->rp.kp.addr); - ret = -EINVAL; - } + } else if (ret == -EILSEQ) { + pr_warn("Probing address(0x%p) is not an instruction boundary.\n", + tk->rp.kp.addr); + ret = -EINVAL; } - return ret; } @@ -449,6 +457,11 @@ static int register_trace_kprobe(struct trace_kprobe *tk) /* Register k*probe */ ret = __register_trace_kprobe(tk); + if (ret == -ENOENT && !trace_kprobe_module_exist(tk)) { + pr_warn("This probe might be able to register after target module is loaded. Continue.\n"); + ret = 0; + } + if (ret < 0) unregister_kprobe_event(tk); else
[ftrace/kprobes PATCH 1/3] tracing/uprobes: Fix to return -EFAULT if copy_from_user failed
Fix probe_mem_read() to return -EFAULT if copy_from_user() failed. The copy_from_user() returns remaining bytes when it failed, but probe_mem_read() caller expects it returns error code like as probe_kernel_read(). Reported-by: Dan Carpenter Signed-off-by: Masami Hiramatsu --- kernel/trace/trace_uprobe.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 12bdbdf772ed..63e99ee716fe 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -104,7 +104,7 @@ probe_mem_read(void *dest, void *src, size_t size) { void __user *vaddr = (void __force __user *)src; - return copy_from_user(dest, vaddr, size); + return copy_from_user(dest, vaddr, size) ? -EFAULT : 0; } /* * Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max
[ftrace/kprobes PATCH 0/3] tracing: probeevent: Fix module symbol probing
Hi, This series is for fixing some bugs in Steve's ftrace/kprobes branch. git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace.git Which is based on my fetcharg improvement series. https://lkml.org/lkml/2018/4/25/601 This includes following fixes. - Fix copy_from_user() misusing which Dan was reported. - Fix to reject incorrect probeevent on loaded kernel module. - Fix to update symbol-based argument on module. This also checks the symbol-based argument is correct or not when target module is loaded. If it is not correct, the event is kept unavailable. Thank you, --- Masami Hiramatsu (3): tracing/uprobes: Fix to return -EFAULT if copy_from_user failed tracing/kprobes: Check the probe on unloaded module correctly tracing/kprobes: Allow kprobe-events to record module symbol kernel/trace/trace_kprobe.c | 51 ++- kernel/trace/trace_probe.c | 62 +-- kernel/trace/trace_probe.h |4 ++- kernel/trace/trace_uprobe.c |2 + 4 files changed, 95 insertions(+), 24 deletions(-) -- Masami Hiramatsu (Linaro)
[ftrace/kprobes PATCH 0/3] tracing: probeevent: Fix module symbol probing
Hi, This series is for fixing some bugs in Steve's ftrace/kprobes branch. git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace.git Which is based on my fetcharg improvement series. https://lkml.org/lkml/2018/4/25/601 This includes following fixes. - Fix copy_from_user() misusing which Dan was reported. - Fix to reject incorrect probeevent on loaded kernel module. - Fix to update symbol-based argument on module. This also checks the symbol-based argument is correct or not when target module is loaded. If it is not correct, the event is kept unavailable. Thank you, --- Masami Hiramatsu (3): tracing/uprobes: Fix to return -EFAULT if copy_from_user failed tracing/kprobes: Check the probe on unloaded module correctly tracing/kprobes: Allow kprobe-events to record module symbol kernel/trace/trace_kprobe.c | 51 ++- kernel/trace/trace_probe.c | 62 +-- kernel/trace/trace_probe.h |4 ++- kernel/trace/trace_uprobe.c |2 + 4 files changed, 95 insertions(+), 24 deletions(-) -- Masami Hiramatsu (Linaro)
[PATCH] [v2] HID: add support for Apple Magic Trackpad 2
USB device Vendor 05ac (Apple) Device 0265 (Magic Trackpad 2) Bluetooth device Vendor 004c (Apple) Device 0265 (Magic Trackpad 2) Add support for Apple Magic Trackpad 2 over USB and bluetooth, putting the device in multi-touch mode. Signed-off-by: Claudio Mettler Signed-off-by: Marek Wyborski Signed-off-by: Sean O'Brien --- drivers/hid/hid-ids.h| 2 + drivers/hid/hid-magicmouse.c | 184 --- 2 files changed, 149 insertions(+), 37 deletions(-) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 79bdf0c7e351..d6d0b20cc015 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -88,9 +88,11 @@ #define USB_DEVICE_ID_ANTON_TOUCH_PAD 0x3101 #define USB_VENDOR_ID_APPLE0x05ac +#define BT_VENDOR_ID_APPLE 0x004c #define USB_DEVICE_ID_APPLE_MIGHTYMOUSE0x0304 #define USB_DEVICE_ID_APPLE_MAGICMOUSE 0x030d #define USB_DEVICE_ID_APPLE_MAGICTRACKPAD 0x030e +#define USB_DEVICE_ID_APPLE_MAGICTRACKPAD2 0x0265 #define USB_DEVICE_ID_APPLE_FOUNTAIN_ANSI 0x020e #define USB_DEVICE_ID_APPLE_FOUNTAIN_ISO 0x020f #define USB_DEVICE_ID_APPLE_GEYSER_ANSI0x0214 diff --git a/drivers/hid/hid-magicmouse.c b/drivers/hid/hid-magicmouse.c index b454c4386157..7f14866ea3c7 100644 --- a/drivers/hid/hid-magicmouse.c +++ b/drivers/hid/hid-magicmouse.c @@ -54,6 +54,8 @@ module_param(report_undeciphered, bool, 0644); MODULE_PARM_DESC(report_undeciphered, "Report undeciphered multi-touch state field using a MSC_RAW event"); #define TRACKPAD_REPORT_ID 0x28 +#define TRACKPAD2_USB_REPORT_ID 0x02 +#define TRACKPAD2_BT_REPORT_ID 0x31 #define MOUSE_REPORT_ID0x29 #define DOUBLE_REPORT_ID 0xf7 /* These definitions are not precise, but they're close enough. (Bits @@ -91,6 +93,19 @@ MODULE_PARM_DESC(report_undeciphered, "Report undeciphered multi-touch state fie #define TRACKPAD_RES_Y \ ((TRACKPAD_MAX_Y - TRACKPAD_MIN_Y) / (TRACKPAD_DIMENSION_Y / 100)) +#define TRACKPAD2_DIMENSION_X (float)16000 +#define TRACKPAD2_MIN_X -3678 +#define TRACKPAD2_MAX_X 3934 +#define TRACKPAD2_RES_X \ + ((TRACKPAD2_MAX_X - TRACKPAD2_MIN_X) / (TRACKPAD2_DIMENSION_X / 100)) +#define TRACKPAD2_DIMENSION_Y (float)11490 +#define TRACKPAD2_MIN_Y -2478 +#define TRACKPAD2_MAX_Y 2587 +#define TRACKPAD2_RES_Y \ + ((TRACKPAD2_MAX_Y - TRACKPAD2_MIN_Y) / (TRACKPAD2_DIMENSION_Y / 100)) + +#define MAX_TOUCHES16 + /** * struct magicmouse_sc - Tracks Magic Mouse-specific data. * @input: Input device through which we report events. @@ -115,8 +130,8 @@ struct magicmouse_sc { short scroll_x; short scroll_y; u8 size; - } touches[16]; - int tracking_ids[16]; + } touches[MAX_TOUCHES]; + int tracking_ids[MAX_TOUCHES]; }; static int magicmouse_firm_touch(struct magicmouse_sc *msc) @@ -183,6 +198,7 @@ static void magicmouse_emit_touch(struct magicmouse_sc *msc, int raw_id, u8 *tda { struct input_dev *input = msc->input; int id, x, y, size, orientation, touch_major, touch_minor, state, down; + int pressure = 0; if (input->id.product == USB_DEVICE_ID_APPLE_MAGICMOUSE) { id = (tdata[6] << 2 | tdata[5] >> 6) & 0xf; @@ -194,7 +210,7 @@ static void magicmouse_emit_touch(struct magicmouse_sc *msc, int raw_id, u8 *tda touch_minor = tdata[4]; state = tdata[7] & TOUCH_STATE_MASK; down = state != TOUCH_STATE_NONE; - } else { /* USB_DEVICE_ID_APPLE_MAGICTRACKPAD */ + } else if (input->id.product == USB_DEVICE_ID_APPLE_MAGICTRACKPAD) { id = (tdata[7] << 2 | tdata[6] >> 6) & 0xf; x = (tdata[1] << 27 | tdata[0] << 19) >> 19; y = -((tdata[3] << 30 | tdata[2] << 22 | tdata[1] << 14) >> 19); @@ -204,6 +220,18 @@ static void magicmouse_emit_touch(struct magicmouse_sc *msc, int raw_id, u8 *tda touch_minor = tdata[5]; state = tdata[8] & TOUCH_STATE_MASK; down = state != TOUCH_STATE_NONE; + } else { /* USB_DEVICE_ID_APPLE_MAGICTRACKPAD2 */ + id = tdata[8] & 0xf; + x = (tdata[1] << 27 | tdata[0] << 19) >> 19; + y = -((tdata[3] << 30 | tdata[2] << 22 | tdata[1] << 14) >> 19); + size = tdata[6]; + orientation = (tdata[8] >> 5) - 4; + touch_major = tdata[4]; + touch_minor = tdata[5]; + /* Prevent zero and low pressure values */ + pressure = tdata[7] + 30; + state = tdata[3] & 0xC0; + down = state == 0x80; } /* Store tracking ID and other fields. */ @@ -215,7 +243,8 @@ static void magicmouse_emit_touch(struct magicmouse_sc *msc, int raw_id, u8 *tda /* If requested, emulate a scroll wheel by detecting small * vertical touch motions.
[PATCH] [v2] HID: add support for Apple Magic Trackpad 2
USB device Vendor 05ac (Apple) Device 0265 (Magic Trackpad 2) Bluetooth device Vendor 004c (Apple) Device 0265 (Magic Trackpad 2) Add support for Apple Magic Trackpad 2 over USB and bluetooth, putting the device in multi-touch mode. Signed-off-by: Claudio Mettler Signed-off-by: Marek Wyborski Signed-off-by: Sean O'Brien --- drivers/hid/hid-ids.h| 2 + drivers/hid/hid-magicmouse.c | 184 --- 2 files changed, 149 insertions(+), 37 deletions(-) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 79bdf0c7e351..d6d0b20cc015 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -88,9 +88,11 @@ #define USB_DEVICE_ID_ANTON_TOUCH_PAD 0x3101 #define USB_VENDOR_ID_APPLE0x05ac +#define BT_VENDOR_ID_APPLE 0x004c #define USB_DEVICE_ID_APPLE_MIGHTYMOUSE0x0304 #define USB_DEVICE_ID_APPLE_MAGICMOUSE 0x030d #define USB_DEVICE_ID_APPLE_MAGICTRACKPAD 0x030e +#define USB_DEVICE_ID_APPLE_MAGICTRACKPAD2 0x0265 #define USB_DEVICE_ID_APPLE_FOUNTAIN_ANSI 0x020e #define USB_DEVICE_ID_APPLE_FOUNTAIN_ISO 0x020f #define USB_DEVICE_ID_APPLE_GEYSER_ANSI0x0214 diff --git a/drivers/hid/hid-magicmouse.c b/drivers/hid/hid-magicmouse.c index b454c4386157..7f14866ea3c7 100644 --- a/drivers/hid/hid-magicmouse.c +++ b/drivers/hid/hid-magicmouse.c @@ -54,6 +54,8 @@ module_param(report_undeciphered, bool, 0644); MODULE_PARM_DESC(report_undeciphered, "Report undeciphered multi-touch state field using a MSC_RAW event"); #define TRACKPAD_REPORT_ID 0x28 +#define TRACKPAD2_USB_REPORT_ID 0x02 +#define TRACKPAD2_BT_REPORT_ID 0x31 #define MOUSE_REPORT_ID0x29 #define DOUBLE_REPORT_ID 0xf7 /* These definitions are not precise, but they're close enough. (Bits @@ -91,6 +93,19 @@ MODULE_PARM_DESC(report_undeciphered, "Report undeciphered multi-touch state fie #define TRACKPAD_RES_Y \ ((TRACKPAD_MAX_Y - TRACKPAD_MIN_Y) / (TRACKPAD_DIMENSION_Y / 100)) +#define TRACKPAD2_DIMENSION_X (float)16000 +#define TRACKPAD2_MIN_X -3678 +#define TRACKPAD2_MAX_X 3934 +#define TRACKPAD2_RES_X \ + ((TRACKPAD2_MAX_X - TRACKPAD2_MIN_X) / (TRACKPAD2_DIMENSION_X / 100)) +#define TRACKPAD2_DIMENSION_Y (float)11490 +#define TRACKPAD2_MIN_Y -2478 +#define TRACKPAD2_MAX_Y 2587 +#define TRACKPAD2_RES_Y \ + ((TRACKPAD2_MAX_Y - TRACKPAD2_MIN_Y) / (TRACKPAD2_DIMENSION_Y / 100)) + +#define MAX_TOUCHES16 + /** * struct magicmouse_sc - Tracks Magic Mouse-specific data. * @input: Input device through which we report events. @@ -115,8 +130,8 @@ struct magicmouse_sc { short scroll_x; short scroll_y; u8 size; - } touches[16]; - int tracking_ids[16]; + } touches[MAX_TOUCHES]; + int tracking_ids[MAX_TOUCHES]; }; static int magicmouse_firm_touch(struct magicmouse_sc *msc) @@ -183,6 +198,7 @@ static void magicmouse_emit_touch(struct magicmouse_sc *msc, int raw_id, u8 *tda { struct input_dev *input = msc->input; int id, x, y, size, orientation, touch_major, touch_minor, state, down; + int pressure = 0; if (input->id.product == USB_DEVICE_ID_APPLE_MAGICMOUSE) { id = (tdata[6] << 2 | tdata[5] >> 6) & 0xf; @@ -194,7 +210,7 @@ static void magicmouse_emit_touch(struct magicmouse_sc *msc, int raw_id, u8 *tda touch_minor = tdata[4]; state = tdata[7] & TOUCH_STATE_MASK; down = state != TOUCH_STATE_NONE; - } else { /* USB_DEVICE_ID_APPLE_MAGICTRACKPAD */ + } else if (input->id.product == USB_DEVICE_ID_APPLE_MAGICTRACKPAD) { id = (tdata[7] << 2 | tdata[6] >> 6) & 0xf; x = (tdata[1] << 27 | tdata[0] << 19) >> 19; y = -((tdata[3] << 30 | tdata[2] << 22 | tdata[1] << 14) >> 19); @@ -204,6 +220,18 @@ static void magicmouse_emit_touch(struct magicmouse_sc *msc, int raw_id, u8 *tda touch_minor = tdata[5]; state = tdata[8] & TOUCH_STATE_MASK; down = state != TOUCH_STATE_NONE; + } else { /* USB_DEVICE_ID_APPLE_MAGICTRACKPAD2 */ + id = tdata[8] & 0xf; + x = (tdata[1] << 27 | tdata[0] << 19) >> 19; + y = -((tdata[3] << 30 | tdata[2] << 22 | tdata[1] << 14) >> 19); + size = tdata[6]; + orientation = (tdata[8] >> 5) - 4; + touch_major = tdata[4]; + touch_minor = tdata[5]; + /* Prevent zero and low pressure values */ + pressure = tdata[7] + 30; + state = tdata[3] & 0xC0; + down = state == 0x80; } /* Store tracking ID and other fields. */ @@ -215,7 +243,8 @@ static void magicmouse_emit_touch(struct magicmouse_sc *msc, int raw_id, u8 *tda /* If requested, emulate a scroll wheel by detecting small * vertical touch motions.
Your reply
We provide photoshop services to some of the companies from around the world. We have worked on tons of images ever since our team establishment in 2009. Many online retail companies use our services for retouching electronics, jewelry, apparels, furniture etc. by getting the images of their products enhanced. Here are the details of what we provide: Clipping path; Deep etch process Image masking Remove background Portrait retouching Jewelry retouching Fashion retouching Please reply back for further info. We can provide testing for your photos if needed. Thanks, Ruby
Your reply
We provide photoshop services to some of the companies from around the world. We have worked on tons of images ever since our team establishment in 2009. Many online retail companies use our services for retouching electronics, jewelry, apparels, furniture etc. by getting the images of their products enhanced. Here are the details of what we provide: Clipping path; Deep etch process Image masking Remove background Portrait retouching Jewelry retouching Fashion retouching Please reply back for further info. We can provide testing for your photos if needed. Thanks, Ruby
[PATCH] MIPS: Fix computation on entry point
Since commit 27c524d17430 ("MIPS: Use the entry point from the ELF file header"), the kernel entry point is computed with a grep on "start address" on the output of objdump. It works fine when the default language is english but it may fail on other language (for example in French, the grep should be done on "adresse de départ"). To fix this computation on most machine, I propose to force the language to english with "LC_ALL=C". Fixes: 27c524d17430 ("MIPS: Use the entry point from the ELF file header") Signed-off-by: Philippe Reynes --- arch/mips/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/Makefile b/arch/mips/Makefile index d74b374..835aa8f 100644 --- a/arch/mips/Makefile +++ b/arch/mips/Makefile @@ -258,7 +258,7 @@ load-y = $(CONFIG_PHYSICAL_START) endif # Sign-extend the entry point to 64 bits if retrieved as a 32-bit number. -entry-y= $(shell $(OBJDUMP) -f vmlinux 2>/dev/null \ +entry-y= $(shell LC_ALL=C $(OBJDUMP) -f vmlinux 2>/dev/null \ | sed -n '/^start address / { \ s/^.* //; \ s/0x\([0-7]...\)$$/0x\1/; \ -- 2.7.4
[PATCH] MIPS: Fix computation on entry point
Since commit 27c524d17430 ("MIPS: Use the entry point from the ELF file header"), the kernel entry point is computed with a grep on "start address" on the output of objdump. It works fine when the default language is english but it may fail on other language (for example in French, the grep should be done on "adresse de départ"). To fix this computation on most machine, I propose to force the language to english with "LC_ALL=C". Fixes: 27c524d17430 ("MIPS: Use the entry point from the ELF file header") Signed-off-by: Philippe Reynes --- arch/mips/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/Makefile b/arch/mips/Makefile index d74b374..835aa8f 100644 --- a/arch/mips/Makefile +++ b/arch/mips/Makefile @@ -258,7 +258,7 @@ load-y = $(CONFIG_PHYSICAL_START) endif # Sign-extend the entry point to 64 bits if retrieved as a 32-bit number. -entry-y= $(shell $(OBJDUMP) -f vmlinux 2>/dev/null \ +entry-y= $(shell LC_ALL=C $(OBJDUMP) -f vmlinux 2>/dev/null \ | sed -n '/^start address / { \ s/^.* //; \ s/0x\([0-7]...\)$$/0x\1/; \ -- 2.7.4
Re: [PATCH 4/7] mm/hmm: properly handle migration pmd
On Tue 28-08-18 11:54:33, Zi Yan wrote: > Hi Michal, > > On 28 Aug 2018, at 11:45, Michal Hocko wrote: > > > On Tue 28-08-18 17:42:06, Michal Hocko wrote: > >> On Tue 28-08-18 11:36:59, Jerome Glisse wrote: > >>> On Tue, Aug 28, 2018 at 05:24:14PM +0200, Michal Hocko wrote: > On Fri 24-08-18 20:05:46, Zi Yan wrote: > [...] > >> + if (!pmd_present(pmd)) { > >> + swp_entry_t entry = pmd_to_swp_entry(pmd); > >> + > >> + if (is_migration_entry(entry)) { > > > > I think you should check thp_migration_supported() here, since PMD > > migration is only enabled in x86_64 systems. > > Other architectures should treat PMD migration entries as bad. > > How can we have a migration pmd entry when the migration is not > supported? > >>> > >>> Not sure i follow here, migration can happen anywhere (assuming > >>> that something like compaction is active or numa or ...). So this > >>> code can face pmd migration entry on architecture that support > >>> it. What is missing here is thp_migration_supported() call to > >>> protect the is_migration_entry() to avoid false positive on arch > >>> which do not support thp migration. > >> > >> I mean that architectures which do not support THP migration shouldn't > >> ever see any migration entry. So is_migration_entry should be always > >> false. Or do I miss something? > > > > And just to be clear. thp_migration_supported should be checked only > > when we actually _do_ the migration or evaluate migratability of the > > page. We definitely do want to sprinkle this check to all places where > > is_migration_entry is checked. > > is_migration_entry() is a general check for swp_entry_t, so it can return > true even if THP migration is not enabled. is_pmd_migration_entry() always > returns false when THP migration is not enabled. > > So the code can be changed in two ways, either replacing is_migration_entry() > with is_pmd_migration_entry() or adding thp_migration_supported() check > like Jerome did. > > Does this clarify your question? Not really. IIUC the code checks for the pmd. So even though is_migration_entry is a more generic check it should never return true for thp_migration_supported() == F because we simply never have those unless I am missing something. is_pmd_migration_entry is much more readable of course and I suspect it can save few cycles as well. -- Michal Hocko SUSE Labs
Re: [PATCH 4/7] mm/hmm: properly handle migration pmd
On Tue 28-08-18 11:54:33, Zi Yan wrote: > Hi Michal, > > On 28 Aug 2018, at 11:45, Michal Hocko wrote: > > > On Tue 28-08-18 17:42:06, Michal Hocko wrote: > >> On Tue 28-08-18 11:36:59, Jerome Glisse wrote: > >>> On Tue, Aug 28, 2018 at 05:24:14PM +0200, Michal Hocko wrote: > On Fri 24-08-18 20:05:46, Zi Yan wrote: > [...] > >> + if (!pmd_present(pmd)) { > >> + swp_entry_t entry = pmd_to_swp_entry(pmd); > >> + > >> + if (is_migration_entry(entry)) { > > > > I think you should check thp_migration_supported() here, since PMD > > migration is only enabled in x86_64 systems. > > Other architectures should treat PMD migration entries as bad. > > How can we have a migration pmd entry when the migration is not > supported? > >>> > >>> Not sure i follow here, migration can happen anywhere (assuming > >>> that something like compaction is active or numa or ...). So this > >>> code can face pmd migration entry on architecture that support > >>> it. What is missing here is thp_migration_supported() call to > >>> protect the is_migration_entry() to avoid false positive on arch > >>> which do not support thp migration. > >> > >> I mean that architectures which do not support THP migration shouldn't > >> ever see any migration entry. So is_migration_entry should be always > >> false. Or do I miss something? > > > > And just to be clear. thp_migration_supported should be checked only > > when we actually _do_ the migration or evaluate migratability of the > > page. We definitely do want to sprinkle this check to all places where > > is_migration_entry is checked. > > is_migration_entry() is a general check for swp_entry_t, so it can return > true even if THP migration is not enabled. is_pmd_migration_entry() always > returns false when THP migration is not enabled. > > So the code can be changed in two ways, either replacing is_migration_entry() > with is_pmd_migration_entry() or adding thp_migration_supported() check > like Jerome did. > > Does this clarify your question? Not really. IIUC the code checks for the pmd. So even though is_migration_entry is a more generic check it should never return true for thp_migration_supported() == F because we simply never have those unless I am missing something. is_pmd_migration_entry is much more readable of course and I suspect it can save few cycles as well. -- Michal Hocko SUSE Labs
Waiting for
We provide photoshop services to some of the companies from around the world. We have worked on tons of images ever since our team establishment in 2009. Many online retail companies use our services for retouching electronics, jewelry, apparels, furniture etc. by getting the images of their products enhanced. Here are the details of what we provide: Clipping path; Deep etch process Image masking Remove background Portrait retouching Jewelry retouching Fashion retouching Please reply back for further info. We can provide testing for your photos if needed. Thanks, Ruby
Waiting for
We provide photoshop services to some of the companies from around the world. We have worked on tons of images ever since our team establishment in 2009. Many online retail companies use our services for retouching electronics, jewelry, apparels, furniture etc. by getting the images of their products enhanced. Here are the details of what we provide: Clipping path; Deep etch process Image masking Remove background Portrait retouching Jewelry retouching Fashion retouching Please reply back for further info. We can provide testing for your photos if needed. Thanks, Ruby
Re: [PATCH 4/7] mm/hmm: properly handle migration pmd
On Tue, Aug 28, 2018 at 11:54:33AM -0400, Zi Yan wrote: > Hi Michal, > > On 28 Aug 2018, at 11:45, Michal Hocko wrote: > > > On Tue 28-08-18 17:42:06, Michal Hocko wrote: > >> On Tue 28-08-18 11:36:59, Jerome Glisse wrote: > >>> On Tue, Aug 28, 2018 at 05:24:14PM +0200, Michal Hocko wrote: > On Fri 24-08-18 20:05:46, Zi Yan wrote: > [...] > >> + if (!pmd_present(pmd)) { > >> + swp_entry_t entry = pmd_to_swp_entry(pmd); > >> + > >> + if (is_migration_entry(entry)) { > > > > I think you should check thp_migration_supported() here, since PMD > > migration is only enabled in x86_64 systems. > > Other architectures should treat PMD migration entries as bad. > > How can we have a migration pmd entry when the migration is not > supported? > >>> > >>> Not sure i follow here, migration can happen anywhere (assuming > >>> that something like compaction is active or numa or ...). So this > >>> code can face pmd migration entry on architecture that support > >>> it. What is missing here is thp_migration_supported() call to > >>> protect the is_migration_entry() to avoid false positive on arch > >>> which do not support thp migration. > >> > >> I mean that architectures which do not support THP migration shouldn't > >> ever see any migration entry. So is_migration_entry should be always > >> false. Or do I miss something? > > > > And just to be clear. thp_migration_supported should be checked only > > when we actually _do_ the migration or evaluate migratability of the > > page. We definitely do want to sprinkle this check to all places where > > is_migration_entry is checked. > > is_migration_entry() is a general check for swp_entry_t, so it can return > true even if THP migration is not enabled. is_pmd_migration_entry() always > returns false when THP migration is not enabled. > > So the code can be changed in two ways, either replacing is_migration_entry() > with is_pmd_migration_entry() or adding thp_migration_supported() check > like Jerome did. > > Does this clarify your question? > Well looking back at code is_migration_entry() will return false on arch which do not have thp migration because pmd_to_swp_entry() will return swp_entry(0,0) which is can not be a valid migration entry. Maybe using is_pmd_migration_entry() would be better here ? It seems that is_pmd_migration_entry() is more common then the open coded thp_migration_supported() && is_migration_entry() Cheers, Jérôme
Re: [PATCH 4/7] mm/hmm: properly handle migration pmd
On Tue, Aug 28, 2018 at 11:54:33AM -0400, Zi Yan wrote: > Hi Michal, > > On 28 Aug 2018, at 11:45, Michal Hocko wrote: > > > On Tue 28-08-18 17:42:06, Michal Hocko wrote: > >> On Tue 28-08-18 11:36:59, Jerome Glisse wrote: > >>> On Tue, Aug 28, 2018 at 05:24:14PM +0200, Michal Hocko wrote: > On Fri 24-08-18 20:05:46, Zi Yan wrote: > [...] > >> + if (!pmd_present(pmd)) { > >> + swp_entry_t entry = pmd_to_swp_entry(pmd); > >> + > >> + if (is_migration_entry(entry)) { > > > > I think you should check thp_migration_supported() here, since PMD > > migration is only enabled in x86_64 systems. > > Other architectures should treat PMD migration entries as bad. > > How can we have a migration pmd entry when the migration is not > supported? > >>> > >>> Not sure i follow here, migration can happen anywhere (assuming > >>> that something like compaction is active or numa or ...). So this > >>> code can face pmd migration entry on architecture that support > >>> it. What is missing here is thp_migration_supported() call to > >>> protect the is_migration_entry() to avoid false positive on arch > >>> which do not support thp migration. > >> > >> I mean that architectures which do not support THP migration shouldn't > >> ever see any migration entry. So is_migration_entry should be always > >> false. Or do I miss something? > > > > And just to be clear. thp_migration_supported should be checked only > > when we actually _do_ the migration or evaluate migratability of the > > page. We definitely do want to sprinkle this check to all places where > > is_migration_entry is checked. > > is_migration_entry() is a general check for swp_entry_t, so it can return > true even if THP migration is not enabled. is_pmd_migration_entry() always > returns false when THP migration is not enabled. > > So the code can be changed in two ways, either replacing is_migration_entry() > with is_pmd_migration_entry() or adding thp_migration_supported() check > like Jerome did. > > Does this clarify your question? > Well looking back at code is_migration_entry() will return false on arch which do not have thp migration because pmd_to_swp_entry() will return swp_entry(0,0) which is can not be a valid migration entry. Maybe using is_pmd_migration_entry() would be better here ? It seems that is_pmd_migration_entry() is more common then the open coded thp_migration_supported() && is_migration_entry() Cheers, Jérôme
VDSO and dcache aliasing
Hello James, A year ago, you wrote that patch: https://www.linux-mips.org/archives/linux-mips/2017-06/msg00658.html You called it a hack but it has been used since then. As you will certainly realize by now, Ocelot is one of the affected SoC so we would pretty much like to see this going upstream. What would be the way forward? Regards, -- Alexandre Belloni, Bootlin Embedded Linux and Kernel engineering https://bootlin.com
VDSO and dcache aliasing
Hello James, A year ago, you wrote that patch: https://www.linux-mips.org/archives/linux-mips/2017-06/msg00658.html You called it a hack but it has been used since then. As you will certainly realize by now, Ocelot is one of the affected SoC so we would pretty much like to see this going upstream. What would be the way forward? Regards, -- Alexandre Belloni, Bootlin Embedded Linux and Kernel engineering https://bootlin.com
Re: [RFC PATCH v2 2/3] pstore: Add register read/write{b,w,l,q} tracing support
On Tue, 28 Aug 2018 18:47:33 +0530 Sai Prakash Ranjan wrote: > On 8/27/2018 9:45 PM, Steven Rostedt wrote: > > On Sat, 25 Aug 2018 12:54:07 +0530 > > Sai Prakash Ranjan wrote: > > > > > >> Ftrace does not trace __raw{read,write}{b,l,w,q}() functions. I am not > >> sure why and how it is filtered out because I do not see any notrace > >> flag in those functions, maybe that whole directory is filtered out. > >> So adding this functionality to ftrace would mean removing the notrace > >> for these functions i.e., something like using > >> __raw{read,write}{b,l,w,q}() as the available filter functions. Also > >> pstore ftrace does not filter functions to trace I suppose? > > > > It's not traced because it is inlined. Simply make the __raw_read > > function a normal function and it will be traced. And then you could > > use ftrace to read the function. > > > > If this has to be per arch, you can register your callback with the > > REGS flags, and pt_regs will be passed to your callback function you > > attached to __raw_read*() as if you inserted a break point at that > > location, and you can get any reg data you want there. > > > > > > Thank you very much for the information Steven. Ok so we can get > function parameters with pt_regs. Yes. > > >> > >> Coming to the reason as to why it would be good to keep this separate > >> from ftrace would be: > >> * Ftrace can get ip and parent ip, but suppose we need extra data (field > >> data) as in above sample output we would not be able to get through > >> ftrace. > > > > As mentioned above, you can get regs (and ftrace is being expanded now > > to get parameters of functions). > > > You mean there is another way to get parameters other than regs? No, but you could register a callback function to be called when a function is hit, and the pt_regs are passed to it. We are working on getting parameters from the pt_regs (see this patch: http://lkml.kernel.org/r/152465885737.26224.2822487520472783854.stgit@devbox) > > >> > >> * Although this patch is for tracing register read/write, we can easily > >> add more functionality since we have dynamic_rtb api which can be hooked > >> to functions and start tracing events(IRQ, Context ID) something similar > >> to tracepoints. > >> Initially thought of having tracepoints for logging register read/write > >> but I do not know if we can export tracepoint data to pstore since the > >> main usecase is to debug unknown resets and hangs. > > > > I don't know why not? We have read/write tracepoints for > > read/write_msr() calls in x86. > > > > Anything can add a hook to the callback of the tracepoints, and use > > that infrastructure instead of creating yet another dynamic code > > modification infrastructure. > > > Thanks for pointing out to read/write_msr, I checked it and was able to > implement something similar for arm64. But still can we export > tracepoint data to pstore because we need to debug reset cases and for > that pstore is of real importance?. If so then it would be great to have > various events logged into pstore which can be a lot of help for debugging. > > Also with the current dynamic filtering of read/write(PATCH 3/3), it is > a lot easier to filter register read/write since we use dynamic debug > framework which provides file, function and line level filtering > capacity. Maybe if we can add something like this to trace events it > would be better? I would recommend using the tracepoint infrastructure. Note, tracepoints and trace events are two different things. Trace events use tracepoints, and you use trace events to create tracepoints, thus they are tightly coupled. But once a tracepoint exists, anything can connect to them without needing to use the trace event. Let's look at the read_msr trace event. Because it is in a header, to avoid "include hell" we open code some of it: static inline unsigned long long native_read_msr(unsigned int msr) { unsigned long long val; val = __rdmsr(msr); if (msr_tracepoint_active(__tracepoint_read_msr)) do_trace_read_msr(msr, val, 0); return val; } Where: #ifdef CONFIG_TRACEPOINTS #define msr_tracepoint_active(t) static_key_false(&(t).key) #else #define msr_tracepoint_active(t) false #endif We have to open code the access to the tracepoint.key because msr.h is used in a lot of critical headers, we couldn't use the normal tracepoint.h header here. The "static_key_false()" is a jump label that is just a nop. When the static_key is enabled, the nop is converted to a static "jmp" to the code that calls "do_trace_read_msr()". This is a function call to a function defined in msr.c (where we can do proper includes), and all that does is call the tracepoint "trace_read_msr()", which is also a static key that, when enabled, will iterate over a list of functions it should call with the defined parameters (msr, val, failed). When defining the trace event for "read_msr", it creates
Re: [RFC PATCH v2 2/3] pstore: Add register read/write{b,w,l,q} tracing support
On Tue, 28 Aug 2018 18:47:33 +0530 Sai Prakash Ranjan wrote: > On 8/27/2018 9:45 PM, Steven Rostedt wrote: > > On Sat, 25 Aug 2018 12:54:07 +0530 > > Sai Prakash Ranjan wrote: > > > > > >> Ftrace does not trace __raw{read,write}{b,l,w,q}() functions. I am not > >> sure why and how it is filtered out because I do not see any notrace > >> flag in those functions, maybe that whole directory is filtered out. > >> So adding this functionality to ftrace would mean removing the notrace > >> for these functions i.e., something like using > >> __raw{read,write}{b,l,w,q}() as the available filter functions. Also > >> pstore ftrace does not filter functions to trace I suppose? > > > > It's not traced because it is inlined. Simply make the __raw_read > > function a normal function and it will be traced. And then you could > > use ftrace to read the function. > > > > If this has to be per arch, you can register your callback with the > > REGS flags, and pt_regs will be passed to your callback function you > > attached to __raw_read*() as if you inserted a break point at that > > location, and you can get any reg data you want there. > > > > > > Thank you very much for the information Steven. Ok so we can get > function parameters with pt_regs. Yes. > > >> > >> Coming to the reason as to why it would be good to keep this separate > >> from ftrace would be: > >> * Ftrace can get ip and parent ip, but suppose we need extra data (field > >> data) as in above sample output we would not be able to get through > >> ftrace. > > > > As mentioned above, you can get regs (and ftrace is being expanded now > > to get parameters of functions). > > > You mean there is another way to get parameters other than regs? No, but you could register a callback function to be called when a function is hit, and the pt_regs are passed to it. We are working on getting parameters from the pt_regs (see this patch: http://lkml.kernel.org/r/152465885737.26224.2822487520472783854.stgit@devbox) > > >> > >> * Although this patch is for tracing register read/write, we can easily > >> add more functionality since we have dynamic_rtb api which can be hooked > >> to functions and start tracing events(IRQ, Context ID) something similar > >> to tracepoints. > >> Initially thought of having tracepoints for logging register read/write > >> but I do not know if we can export tracepoint data to pstore since the > >> main usecase is to debug unknown resets and hangs. > > > > I don't know why not? We have read/write tracepoints for > > read/write_msr() calls in x86. > > > > Anything can add a hook to the callback of the tracepoints, and use > > that infrastructure instead of creating yet another dynamic code > > modification infrastructure. > > > Thanks for pointing out to read/write_msr, I checked it and was able to > implement something similar for arm64. But still can we export > tracepoint data to pstore because we need to debug reset cases and for > that pstore is of real importance?. If so then it would be great to have > various events logged into pstore which can be a lot of help for debugging. > > Also with the current dynamic filtering of read/write(PATCH 3/3), it is > a lot easier to filter register read/write since we use dynamic debug > framework which provides file, function and line level filtering > capacity. Maybe if we can add something like this to trace events it > would be better? I would recommend using the tracepoint infrastructure. Note, tracepoints and trace events are two different things. Trace events use tracepoints, and you use trace events to create tracepoints, thus they are tightly coupled. But once a tracepoint exists, anything can connect to them without needing to use the trace event. Let's look at the read_msr trace event. Because it is in a header, to avoid "include hell" we open code some of it: static inline unsigned long long native_read_msr(unsigned int msr) { unsigned long long val; val = __rdmsr(msr); if (msr_tracepoint_active(__tracepoint_read_msr)) do_trace_read_msr(msr, val, 0); return val; } Where: #ifdef CONFIG_TRACEPOINTS #define msr_tracepoint_active(t) static_key_false(&(t).key) #else #define msr_tracepoint_active(t) false #endif We have to open code the access to the tracepoint.key because msr.h is used in a lot of critical headers, we couldn't use the normal tracepoint.h header here. The "static_key_false()" is a jump label that is just a nop. When the static_key is enabled, the nop is converted to a static "jmp" to the code that calls "do_trace_read_msr()". This is a function call to a function defined in msr.c (where we can do proper includes), and all that does is call the tracepoint "trace_read_msr()", which is also a static key that, when enabled, will iterate over a list of functions it should call with the defined parameters (msr, val, failed). When defining the trace event for "read_msr", it creates
Re: Linux 4.19-rc1
On Mon, Aug 27, 2018 at 02:56:32PM -0700, Linus Torvalds wrote: > On Mon, Aug 27, 2018 at 6:45 AM Guenter Roeck wrote: > > > > Build results: > > total: 132 pass: 129 fail: 3 > > Thanks for running these. Looks like everything but the sparc thing is > under control, and the sparc thing might be one of those "big builds > don't work on sparc" ;( > In general I would agree. On the other side, someone who knows sparc assembler should be able to fix the problem. sparc is notorious for failing allmodconfig builds, mostly due to its separate devicetree implementation. On top of that, many allmodconfig builds already fail, often for minor reasons such as duplicate symbols or missing exports. Dropping sparc:allmodconfig will cause sparc builds to deteriorate, and we'll lose valuable build feedback. On the plus side, sparc64:allmodconfig still builds, but that doesn't cover 32/64 bit differences. I'll monitor the situation for a while and stop building sparc:allmodconfig if the problem isn't fixed around -rc7. Guenter
Re: Linux 4.19-rc1
On Mon, Aug 27, 2018 at 02:56:32PM -0700, Linus Torvalds wrote: > On Mon, Aug 27, 2018 at 6:45 AM Guenter Roeck wrote: > > > > Build results: > > total: 132 pass: 129 fail: 3 > > Thanks for running these. Looks like everything but the sparc thing is > under control, and the sparc thing might be one of those "big builds > don't work on sparc" ;( > In general I would agree. On the other side, someone who knows sparc assembler should be able to fix the problem. sparc is notorious for failing allmodconfig builds, mostly due to its separate devicetree implementation. On top of that, many allmodconfig builds already fail, often for minor reasons such as duplicate symbols or missing exports. Dropping sparc:allmodconfig will cause sparc builds to deteriorate, and we'll lose valuable build feedback. On the plus side, sparc64:allmodconfig still builds, but that doesn't cover 32/64 bit differences. I'll monitor the situation for a while and stop building sparc:allmodconfig if the problem isn't fixed around -rc7. Guenter
[PATCH] binder: use standard functions to allocate fds
Binder uses internal fs interfaces to allocate and install fds: __alloc_fd __fd_install __close_fd get_files_struct put_files_struct These were used to support the passing of fds between processes as part of a transaction. The actual allocation and installation of the fds in the target process was handled by the sending process so the standard functions, alloc_fd() and fd_install() which assume task==current couldn't be used. This patch refactors this mechanism so that the fds are allocated and installed by the target process allowing the standard functions to be used. The sender now creates a list of fd fixups that contains the struct *file and the address to fixup with the new fd once it is allocated. This list is processed by the target process when the transaction is dequeued. A new error case is introduced by this change. If an async transaction with file descriptors cannot allocate new fds in the target (probably due to out of file descriptors), the transaction is discarded with a log message. In the old implementation this would have been detected in the sender context and failed prior to sending. Signed-off-by: Todd Kjos --- drivers/android/Kconfig| 2 +- drivers/android/binder.c | 387 - drivers/android/binder_trace.h | 36 ++- 3 files changed, 260 insertions(+), 165 deletions(-) diff --git a/drivers/android/Kconfig b/drivers/android/Kconfig index 432e9ad770703..51e8250d113fa 100644 --- a/drivers/android/Kconfig +++ b/drivers/android/Kconfig @@ -10,7 +10,7 @@ if ANDROID config ANDROID_BINDER_IPC bool "Android Binder IPC Driver" - depends on MMU + depends on MMU && !CPU_CACHE_VIVT default n ---help--- Binder is used in Android for both communication between processes, diff --git a/drivers/android/binder.c b/drivers/android/binder.c index d58763b6b0090..50856319bc7da 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -71,6 +71,7 @@ #include #include #include +#include #include @@ -457,9 +458,8 @@ struct binder_ref { }; enum binder_deferred_state { - BINDER_DEFERRED_PUT_FILES= 0x01, - BINDER_DEFERRED_FLUSH= 0x02, - BINDER_DEFERRED_RELEASE = 0x04, + BINDER_DEFERRED_FLUSH= 0x01, + BINDER_DEFERRED_RELEASE = 0x02, }; /** @@ -480,9 +480,6 @@ enum binder_deferred_state { *(invariant after initialized) * @tsk task_struct for group_leader of process *(invariant after initialized) - * @files files_struct for process - *(protected by @files_lock) - * @files_lockmutex to protect @files * @deferred_work_node: element for binder_deferred_list *(protected by binder_deferred_lock) * @deferred_work:bitmap of deferred work to perform @@ -527,8 +524,6 @@ struct binder_proc { struct list_head waiting_threads; int pid; struct task_struct *tsk; - struct files_struct *files; - struct mutex files_lock; struct hlist_node deferred_work_node; int deferred_work; bool is_dead; @@ -611,6 +606,23 @@ struct binder_thread { bool is_dead; }; +/** + * struct binder_txn_fd_fixup - transaction fd fixup list element + * @fixup_entry: list entry + * @file: struct file to be associated with new fd + * @offset: offset in buffer data to this fixup + * + * List element for fd fixups in a transaction. Since file + * descriptors need to be allocated in the context of the + * target process, we pass each fd to be processed in this + * struct. + */ +struct binder_txn_fd_fixup { + struct list_head fixup_entry; + struct file *file; + size_t offset; +}; + struct binder_transaction { int debug_id; struct binder_work work; @@ -628,6 +640,7 @@ struct binder_transaction { longpriority; longsaved_priority; kuid_t sender_euid; + struct list_head fd_fixups; /** * @lock: protects @from, @to_proc, and @to_thread * @@ -920,66 +933,6 @@ static void binder_free_thread(struct binder_thread *thread); static void binder_free_proc(struct binder_proc *proc); static void binder_inc_node_tmpref_ilocked(struct binder_node *node); -static int task_get_unused_fd_flags(struct binder_proc *proc, int flags) -{ - unsigned long rlim_cur; - unsigned long irqs; - int ret; - - mutex_lock(>files_lock); - if (proc->files == NULL) { - ret = -ESRCH; - goto err; - } - if (!lock_task_sighand(proc->tsk, )) { - ret = -EMFILE; - goto err; - } - rlim_cur = task_rlimit(proc->tsk, RLIMIT_NOFILE); - unlock_task_sighand(proc->tsk, ); - - ret = __alloc_fd(proc->files, 0, rlim_cur, flags); -err:
[PATCH] binder: use standard functions to allocate fds
Binder uses internal fs interfaces to allocate and install fds: __alloc_fd __fd_install __close_fd get_files_struct put_files_struct These were used to support the passing of fds between processes as part of a transaction. The actual allocation and installation of the fds in the target process was handled by the sending process so the standard functions, alloc_fd() and fd_install() which assume task==current couldn't be used. This patch refactors this mechanism so that the fds are allocated and installed by the target process allowing the standard functions to be used. The sender now creates a list of fd fixups that contains the struct *file and the address to fixup with the new fd once it is allocated. This list is processed by the target process when the transaction is dequeued. A new error case is introduced by this change. If an async transaction with file descriptors cannot allocate new fds in the target (probably due to out of file descriptors), the transaction is discarded with a log message. In the old implementation this would have been detected in the sender context and failed prior to sending. Signed-off-by: Todd Kjos --- drivers/android/Kconfig| 2 +- drivers/android/binder.c | 387 - drivers/android/binder_trace.h | 36 ++- 3 files changed, 260 insertions(+), 165 deletions(-) diff --git a/drivers/android/Kconfig b/drivers/android/Kconfig index 432e9ad770703..51e8250d113fa 100644 --- a/drivers/android/Kconfig +++ b/drivers/android/Kconfig @@ -10,7 +10,7 @@ if ANDROID config ANDROID_BINDER_IPC bool "Android Binder IPC Driver" - depends on MMU + depends on MMU && !CPU_CACHE_VIVT default n ---help--- Binder is used in Android for both communication between processes, diff --git a/drivers/android/binder.c b/drivers/android/binder.c index d58763b6b0090..50856319bc7da 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -71,6 +71,7 @@ #include #include #include +#include #include @@ -457,9 +458,8 @@ struct binder_ref { }; enum binder_deferred_state { - BINDER_DEFERRED_PUT_FILES= 0x01, - BINDER_DEFERRED_FLUSH= 0x02, - BINDER_DEFERRED_RELEASE = 0x04, + BINDER_DEFERRED_FLUSH= 0x01, + BINDER_DEFERRED_RELEASE = 0x02, }; /** @@ -480,9 +480,6 @@ enum binder_deferred_state { *(invariant after initialized) * @tsk task_struct for group_leader of process *(invariant after initialized) - * @files files_struct for process - *(protected by @files_lock) - * @files_lockmutex to protect @files * @deferred_work_node: element for binder_deferred_list *(protected by binder_deferred_lock) * @deferred_work:bitmap of deferred work to perform @@ -527,8 +524,6 @@ struct binder_proc { struct list_head waiting_threads; int pid; struct task_struct *tsk; - struct files_struct *files; - struct mutex files_lock; struct hlist_node deferred_work_node; int deferred_work; bool is_dead; @@ -611,6 +606,23 @@ struct binder_thread { bool is_dead; }; +/** + * struct binder_txn_fd_fixup - transaction fd fixup list element + * @fixup_entry: list entry + * @file: struct file to be associated with new fd + * @offset: offset in buffer data to this fixup + * + * List element for fd fixups in a transaction. Since file + * descriptors need to be allocated in the context of the + * target process, we pass each fd to be processed in this + * struct. + */ +struct binder_txn_fd_fixup { + struct list_head fixup_entry; + struct file *file; + size_t offset; +}; + struct binder_transaction { int debug_id; struct binder_work work; @@ -628,6 +640,7 @@ struct binder_transaction { longpriority; longsaved_priority; kuid_t sender_euid; + struct list_head fd_fixups; /** * @lock: protects @from, @to_proc, and @to_thread * @@ -920,66 +933,6 @@ static void binder_free_thread(struct binder_thread *thread); static void binder_free_proc(struct binder_proc *proc); static void binder_inc_node_tmpref_ilocked(struct binder_node *node); -static int task_get_unused_fd_flags(struct binder_proc *proc, int flags) -{ - unsigned long rlim_cur; - unsigned long irqs; - int ret; - - mutex_lock(>files_lock); - if (proc->files == NULL) { - ret = -ESRCH; - goto err; - } - if (!lock_task_sighand(proc->tsk, )) { - ret = -EMFILE; - goto err; - } - rlim_cur = task_rlimit(proc->tsk, RLIMIT_NOFILE); - unlock_task_sighand(proc->tsk, ); - - ret = __alloc_fd(proc->files, 0, rlim_cur, flags); -err:
Re: [PATCH v2 0/4] Provide core API for NMIs
On 28/08/18 16:29, Julien Thierry wrote: Hi, [...] I'll soon post a series making use of the API for Arm's GICv3. Here is the series, NMI related patches are the last 4 in the series: https://lkml.org/lkml/2018/8/28/693 -- Julien Thierry
Re: [PATCH v2 0/4] Provide core API for NMIs
On 28/08/18 16:29, Julien Thierry wrote: Hi, [...] I'll soon post a series making use of the API for Arm's GICv3. Here is the series, NMI related patches are the last 4 in the series: https://lkml.org/lkml/2018/8/28/693 -- Julien Thierry
[PATCH v4 2/2]: perf record: enable asynchronous trace writing
Trace file offset are linearly calculated by perf_mmap__push() code for the next possible write operation, but file position is updated by the kernel only in the second lseek() syscall after the loop. The first lseek() syscall reads that file position for the next loop iterations. record__mmap_read_sync implements sort of a barrier between spilling ready profiling data to disk. Signed-off-by: Alexey Budankov --- Changes in v4: - converted void *bf to struct perf_mmap *md in signatures - written comment in perf_mmap__push() just before perf_mmap__get(); - written comment in record__mmap_read_sync() on possible restarting of aio_write() operation and releasing perf_mmap object after all; - added perf_mmap__put() for the cases of failed aio_write(); Changes in v3: - written comments about nanosleep(0.5ms) call prior aio_suspend() to cope with intrusiveness of its implementation in glibc; - written comments about rationale behind coping profiling data into mmap->data buffer; --- tools/perf/builtin-record.c | 136 +--- tools/perf/util/mmap.c | 43 ++ tools/perf/util/mmap.h | 2 +- 3 files changed, 161 insertions(+), 20 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 22ebeb92ac51..7b628f9a7770 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -121,6 +121,23 @@ static int record__write(struct record *rec, void *bf, size_t size) return 0; } +static int record__aio_write(int trace_fd, struct aiocb *cblock, + void *buf, size_t size, off_t off) +{ + cblock->aio_fildes = trace_fd; + cblock->aio_buf= buf; + cblock->aio_nbytes = size; + cblock->aio_offset = off; + cblock->aio_sigevent.sigev_notify = SIGEV_NONE; + + if (aio_write(cblock) == -1) { + pr_err("failed to queue perf data, error: %m\n"); + return -1; + } + + return 0; +} + static int process_synthesized_event(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample __maybe_unused, @@ -130,12 +147,13 @@ static int process_synthesized_event(struct perf_tool *tool, return record__write(rec, event, event->header.size); } -static int record__pushfn(void *to, void *bf, size_t size) +static int record__pushfn(void *to, struct perf_mmap *map, size_t size, off_t off) { struct record *rec = to; rec->samples++; - return record__write(rec, bf, size); + return record__aio_write(rec->session->data->file.fd, >cblock, + map->data, size, off); } static volatile int done; @@ -510,13 +528,110 @@ static struct perf_event_header finished_round_event = { .type = PERF_RECORD_FINISHED_ROUND, }; +static int record__mmap_read_sync(int trace_fd, struct aiocb **cblocks, + int cblocks_size, struct record *rec) +{ + size_t rem; + ssize_t size; + off_t rem_off; + int i, aio_ret, aio_errno, do_suspend; + struct perf_mmap *md; + struct timespec timeout0 = { 0, 0 }; + struct timespec timeoutS = { 0, 1000 * 500 * 1 }; // 0.5ms + + if (!cblocks_size) + return 0; + + do { + do_suspend = 0; + /* aio_suspend() implementation inside glibc (as of v2.27) is +* intrusive and not just blocks waiting io requests completion +* but polls requests queue inducing context switches in perf +* tool process. When profiling in system wide mode with tracing +* context switches the trace may be polluted by context switches +* from the perf process and the trace size becomes about 3-5 +* times bigger than that of when writing the trace serially. +* To limit the volume of context switches from perf tool +* process nanosleep() call below is added prior aio_suspend() +* calling till every half of the kernel timer tick which is +* usually 1ms (depends on CONFIG_HZ value). +*/ + nanosleep(, NULL); + if (aio_suspend((const struct aiocb**)cblocks, cblocks_size, )) { + if (errno == EAGAIN || errno == EINTR) { + do_suspend = 1; + continue; + } else { + pr_err("failed to sync perf data, error: %m\n"); + break; + } + } + for (i = 0; i < cblocks_size; i++) { + if (cblocks[i] == NULL) { + continue; + } + aio_errno = aio_error(cblocks[i]); + if (aio_errno ==
[PATCH v4 2/2]: perf record: enable asynchronous trace writing
Trace file offset are linearly calculated by perf_mmap__push() code for the next possible write operation, but file position is updated by the kernel only in the second lseek() syscall after the loop. The first lseek() syscall reads that file position for the next loop iterations. record__mmap_read_sync implements sort of a barrier between spilling ready profiling data to disk. Signed-off-by: Alexey Budankov --- Changes in v4: - converted void *bf to struct perf_mmap *md in signatures - written comment in perf_mmap__push() just before perf_mmap__get(); - written comment in record__mmap_read_sync() on possible restarting of aio_write() operation and releasing perf_mmap object after all; - added perf_mmap__put() for the cases of failed aio_write(); Changes in v3: - written comments about nanosleep(0.5ms) call prior aio_suspend() to cope with intrusiveness of its implementation in glibc; - written comments about rationale behind coping profiling data into mmap->data buffer; --- tools/perf/builtin-record.c | 136 +--- tools/perf/util/mmap.c | 43 ++ tools/perf/util/mmap.h | 2 +- 3 files changed, 161 insertions(+), 20 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 22ebeb92ac51..7b628f9a7770 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -121,6 +121,23 @@ static int record__write(struct record *rec, void *bf, size_t size) return 0; } +static int record__aio_write(int trace_fd, struct aiocb *cblock, + void *buf, size_t size, off_t off) +{ + cblock->aio_fildes = trace_fd; + cblock->aio_buf= buf; + cblock->aio_nbytes = size; + cblock->aio_offset = off; + cblock->aio_sigevent.sigev_notify = SIGEV_NONE; + + if (aio_write(cblock) == -1) { + pr_err("failed to queue perf data, error: %m\n"); + return -1; + } + + return 0; +} + static int process_synthesized_event(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample __maybe_unused, @@ -130,12 +147,13 @@ static int process_synthesized_event(struct perf_tool *tool, return record__write(rec, event, event->header.size); } -static int record__pushfn(void *to, void *bf, size_t size) +static int record__pushfn(void *to, struct perf_mmap *map, size_t size, off_t off) { struct record *rec = to; rec->samples++; - return record__write(rec, bf, size); + return record__aio_write(rec->session->data->file.fd, >cblock, + map->data, size, off); } static volatile int done; @@ -510,13 +528,110 @@ static struct perf_event_header finished_round_event = { .type = PERF_RECORD_FINISHED_ROUND, }; +static int record__mmap_read_sync(int trace_fd, struct aiocb **cblocks, + int cblocks_size, struct record *rec) +{ + size_t rem; + ssize_t size; + off_t rem_off; + int i, aio_ret, aio_errno, do_suspend; + struct perf_mmap *md; + struct timespec timeout0 = { 0, 0 }; + struct timespec timeoutS = { 0, 1000 * 500 * 1 }; // 0.5ms + + if (!cblocks_size) + return 0; + + do { + do_suspend = 0; + /* aio_suspend() implementation inside glibc (as of v2.27) is +* intrusive and not just blocks waiting io requests completion +* but polls requests queue inducing context switches in perf +* tool process. When profiling in system wide mode with tracing +* context switches the trace may be polluted by context switches +* from the perf process and the trace size becomes about 3-5 +* times bigger than that of when writing the trace serially. +* To limit the volume of context switches from perf tool +* process nanosleep() call below is added prior aio_suspend() +* calling till every half of the kernel timer tick which is +* usually 1ms (depends on CONFIG_HZ value). +*/ + nanosleep(, NULL); + if (aio_suspend((const struct aiocb**)cblocks, cblocks_size, )) { + if (errno == EAGAIN || errno == EINTR) { + do_suspend = 1; + continue; + } else { + pr_err("failed to sync perf data, error: %m\n"); + break; + } + } + for (i = 0; i < cblocks_size; i++) { + if (cblocks[i] == NULL) { + continue; + } + aio_errno = aio_error(cblocks[i]); + if (aio_errno ==
Re: [PATCH 4/7] mm/hmm: properly handle migration pmd
Hi Michal, On 28 Aug 2018, at 11:45, Michal Hocko wrote: > On Tue 28-08-18 17:42:06, Michal Hocko wrote: >> On Tue 28-08-18 11:36:59, Jerome Glisse wrote: >>> On Tue, Aug 28, 2018 at 05:24:14PM +0200, Michal Hocko wrote: On Fri 24-08-18 20:05:46, Zi Yan wrote: [...] >> +if (!pmd_present(pmd)) { >> +swp_entry_t entry = pmd_to_swp_entry(pmd); >> + >> +if (is_migration_entry(entry)) { > > I think you should check thp_migration_supported() here, since PMD > migration is only enabled in x86_64 systems. > Other architectures should treat PMD migration entries as bad. How can we have a migration pmd entry when the migration is not supported? >>> >>> Not sure i follow here, migration can happen anywhere (assuming >>> that something like compaction is active or numa or ...). So this >>> code can face pmd migration entry on architecture that support >>> it. What is missing here is thp_migration_supported() call to >>> protect the is_migration_entry() to avoid false positive on arch >>> which do not support thp migration. >> >> I mean that architectures which do not support THP migration shouldn't >> ever see any migration entry. So is_migration_entry should be always >> false. Or do I miss something? > > And just to be clear. thp_migration_supported should be checked only > when we actually _do_ the migration or evaluate migratability of the > page. We definitely do want to sprinkle this check to all places where > is_migration_entry is checked. is_migration_entry() is a general check for swp_entry_t, so it can return true even if THP migration is not enabled. is_pmd_migration_entry() always returns false when THP migration is not enabled. So the code can be changed in two ways, either replacing is_migration_entry() with is_pmd_migration_entry() or adding thp_migration_supported() check like Jerome did. Does this clarify your question? — Best Regards, Yan Zi signature.asc Description: OpenPGP digital signature
Re: [PATCH 4/7] mm/hmm: properly handle migration pmd
Hi Michal, On 28 Aug 2018, at 11:45, Michal Hocko wrote: > On Tue 28-08-18 17:42:06, Michal Hocko wrote: >> On Tue 28-08-18 11:36:59, Jerome Glisse wrote: >>> On Tue, Aug 28, 2018 at 05:24:14PM +0200, Michal Hocko wrote: On Fri 24-08-18 20:05:46, Zi Yan wrote: [...] >> +if (!pmd_present(pmd)) { >> +swp_entry_t entry = pmd_to_swp_entry(pmd); >> + >> +if (is_migration_entry(entry)) { > > I think you should check thp_migration_supported() here, since PMD > migration is only enabled in x86_64 systems. > Other architectures should treat PMD migration entries as bad. How can we have a migration pmd entry when the migration is not supported? >>> >>> Not sure i follow here, migration can happen anywhere (assuming >>> that something like compaction is active or numa or ...). So this >>> code can face pmd migration entry on architecture that support >>> it. What is missing here is thp_migration_supported() call to >>> protect the is_migration_entry() to avoid false positive on arch >>> which do not support thp migration. >> >> I mean that architectures which do not support THP migration shouldn't >> ever see any migration entry. So is_migration_entry should be always >> false. Or do I miss something? > > And just to be clear. thp_migration_supported should be checked only > when we actually _do_ the migration or evaluate migratability of the > page. We definitely do want to sprinkle this check to all places where > is_migration_entry is checked. is_migration_entry() is a general check for swp_entry_t, so it can return true even if THP migration is not enabled. is_pmd_migration_entry() always returns false when THP migration is not enabled. So the code can be changed in two ways, either replacing is_migration_entry() with is_pmd_migration_entry() or adding thp_migration_supported() check like Jerome did. Does this clarify your question? — Best Regards, Yan Zi signature.asc Description: OpenPGP digital signature
[PATCH 1/4] of/unittest: remove use of node name pointer in overlay high level test
In preparation for removing the node name pointer, it needs to be removed from of_unittest_overlay_high_level. However, it's not really correct to use the node name without the unit-address and we should use the full node name. This most easily done by iterating over the child nodes with for_each_child_of_node() which is what of_get_child_by_name() does internally. While at it, we might as well convert the outer loop to use for_each_child_of_node() too instead of open coding it. Cc: Frank Rowand Signed-off-by: Rob Herring --- drivers/of/unittest.c | 10 ++ 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c index 722537e14848..69a522e48970 100644 --- a/drivers/of/unittest.c +++ b/drivers/of/unittest.c @@ -2347,10 +2347,12 @@ static __init void of_unittest_overlay_high_level(void) } } - for (np = overlay_base_root->child; np; np = np->sibling) { - if (of_get_child_by_name(of_root, np->name)) { - unittest(0, "illegal node name in overlay_base %s", - np->name); + for_each_child_of_node(overlay_base_root, np) { + struct device_node *base_child; + for_each_child_of_node(of_root, base_child) { + if (!strcmp(np->full_name, base_child->full_name)) + unittest(0, "illegal node name in overlay_base %pOFn", + np); return; } } -- 2.17.1
[PATCH 1/4] of/unittest: remove use of node name pointer in overlay high level test
In preparation for removing the node name pointer, it needs to be removed from of_unittest_overlay_high_level. However, it's not really correct to use the node name without the unit-address and we should use the full node name. This most easily done by iterating over the child nodes with for_each_child_of_node() which is what of_get_child_by_name() does internally. While at it, we might as well convert the outer loop to use for_each_child_of_node() too instead of open coding it. Cc: Frank Rowand Signed-off-by: Rob Herring --- drivers/of/unittest.c | 10 ++ 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c index 722537e14848..69a522e48970 100644 --- a/drivers/of/unittest.c +++ b/drivers/of/unittest.c @@ -2347,10 +2347,12 @@ static __init void of_unittest_overlay_high_level(void) } } - for (np = overlay_base_root->child; np; np = np->sibling) { - if (of_get_child_by_name(of_root, np->name)) { - unittest(0, "illegal node name in overlay_base %s", - np->name); + for_each_child_of_node(overlay_base_root, np) { + struct device_node *base_child; + for_each_child_of_node(of_root, base_child) { + if (!strcmp(np->full_name, base_child->full_name)) + unittest(0, "illegal node name in overlay_base %pOFn", + np); return; } } -- 2.17.1
[PATCH 2/4] of/unittest: add printf tests for node name
Add some printf test for printing the node name (without the unit-address). Cc: Frank Rowand Signed-off-by: Rob Herring --- drivers/of/unittest.c | 4 1 file changed, 4 insertions(+) diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c index 69a522e48970..9def7be9c111 100644 --- a/drivers/of/unittest.c +++ b/drivers/of/unittest.c @@ -299,6 +299,10 @@ static void __init of_unittest_printf(void) of_unittest_printf_one(np, "%pOF", full_name); of_unittest_printf_one(np, "%pOFf", full_name); + of_unittest_printf_one(np, "%pOFn", "dev"); + of_unittest_printf_one(np, "%2pOFn", "dev"); + of_unittest_printf_one(np, "%5pOFn", " dev"); + of_unittest_printf_one(np, "%pOFnc", "dev:test-sub-device"); of_unittest_printf_one(np, "%pOFp", phandle_str); of_unittest_printf_one(np, "%pOFP", "dev@100"); of_unittest_printf_one(np, "ABC %pOFP ABC", "ABC dev@100 ABC"); -- 2.17.1
[PATCH v5 26/27] irqchip/gic: Add functions to access irq priorities
Add accessors to the GIC distributor/redistributors priority registers. Signed-off-by: Julien Thierry Cc: Thomas Gleixner Cc: Jason Cooper Cc: Marc Zyngier --- drivers/irqchip/irq-gic-common.c | 10 ++ drivers/irqchip/irq-gic-common.h | 2 ++ 2 files changed, 12 insertions(+) diff --git a/drivers/irqchip/irq-gic-common.c b/drivers/irqchip/irq-gic-common.c index 01e673c..910746f 100644 --- a/drivers/irqchip/irq-gic-common.c +++ b/drivers/irqchip/irq-gic-common.c @@ -98,6 +98,16 @@ int gic_configure_irq(unsigned int irq, unsigned int type, return ret; } +void gic_set_irq_prio(unsigned int irq, void __iomem *base, u8 prio) +{ + writeb_relaxed(prio, base + GIC_DIST_PRI + irq); +} + +u8 gic_get_irq_prio(unsigned int irq, void __iomem *base) +{ + return readb_relaxed(base + GIC_DIST_PRI + irq); +} + void gic_dist_config(void __iomem *base, int gic_irqs, void (*sync_access)(void)) { diff --git a/drivers/irqchip/irq-gic-common.h b/drivers/irqchip/irq-gic-common.h index 3919cd7..1586dbd 100644 --- a/drivers/irqchip/irq-gic-common.h +++ b/drivers/irqchip/irq-gic-common.h @@ -35,6 +35,8 @@ void gic_dist_config(void __iomem *base, int gic_irqs, void gic_cpu_config(void __iomem *base, void (*sync_access)(void)); void gic_enable_quirks(u32 iidr, const struct gic_quirk *quirks, void *data); +void gic_set_irq_prio(unsigned int irq, void __iomem *base, u8 prio); +u8 gic_get_irq_prio(unsigned int irq, void __iomem *base); void gic_set_kvm_info(const struct gic_kvm_info *info); -- 1.9.1