[PATCH] PCI: iproc: Remove PAXC slot check to allow VF support

2018-08-28 Thread Ray Jui
From: Jitendra Bhivare 

Fix previous incorrect logic that limits PAXC slot number to zero only.
In order for SRIOV/VF to work, we need to allow the slot number to be
greater than zero.

Fixes: 46560388c476c ("PCI: iproc: Allow multiple devices except on PAXC")
Signed-off-by: Jitendra Bhivare 
Signed-off-by: Ray Jui 
Reviewed-by: Andy Gospodarek 
---
 drivers/pci/controller/pcie-iproc.c | 8 
 1 file changed, 8 deletions(-)

diff --git a/drivers/pci/controller/pcie-iproc.c 
b/drivers/pci/controller/pcie-iproc.c
index 3160e9342a2f..c20fd6bd68fd 100644
--- a/drivers/pci/controller/pcie-iproc.c
+++ b/drivers/pci/controller/pcie-iproc.c
@@ -630,14 +630,6 @@ static void __iomem *iproc_pcie_map_cfg_bus(struct 
iproc_pcie *pcie,
return (pcie->base + offset);
}
 
-   /*
-* PAXC is connected to an internally emulated EP within the SoC.  It
-* allows only one device.
-*/
-   if (pcie->ep_is_internal)
-   if (slot > 0)
-   return NULL;
-
return iproc_pcie_map_ep_cfg_reg(pcie, busno, slot, fn, where);
 }
 
-- 
2.17.1



[PATCH] PCI: iproc: Remove PAXC slot check to allow VF support

2018-08-28 Thread Ray Jui
From: Jitendra Bhivare 

Fix previous incorrect logic that limits PAXC slot number to zero only.
In order for SRIOV/VF to work, we need to allow the slot number to be
greater than zero.

Fixes: 46560388c476c ("PCI: iproc: Allow multiple devices except on PAXC")
Signed-off-by: Jitendra Bhivare 
Signed-off-by: Ray Jui 
Reviewed-by: Andy Gospodarek 
---
 drivers/pci/controller/pcie-iproc.c | 8 
 1 file changed, 8 deletions(-)

diff --git a/drivers/pci/controller/pcie-iproc.c 
b/drivers/pci/controller/pcie-iproc.c
index 3160e9342a2f..c20fd6bd68fd 100644
--- a/drivers/pci/controller/pcie-iproc.c
+++ b/drivers/pci/controller/pcie-iproc.c
@@ -630,14 +630,6 @@ static void __iomem *iproc_pcie_map_cfg_bus(struct 
iproc_pcie *pcie,
return (pcie->base + offset);
}
 
-   /*
-* PAXC is connected to an internally emulated EP within the SoC.  It
-* allows only one device.
-*/
-   if (pcie->ep_is_internal)
-   if (slot > 0)
-   return NULL;
-
return iproc_pcie_map_ep_cfg_reg(pcie, busno, slot, fn, where);
 }
 
-- 
2.17.1



Re: [PATCH 1/2] x86/mm: add .data..decrypted section to hold shared variables

2018-08-28 Thread Brijesh Singh




On 08/27/2018 05:11 PM, Tom Lendacky wrote:

On 08/27/2018 06:24 AM, Brijesh Singh wrote:

kvmclock defines few static variables which are shared with hypervisor
during the kvmclock initialization.

When SEV is active, memory is encrypted with a guest-specific key, and
if guest OS wants to share the memory region with hypervisor then it must
clear the C-bit before sharing it.

The '__decrypted' can be used to define a shared variables; the variables
will be put in the .data.decryption section. This section is mapped with
C=0 early in the boot, we also ensure that the initialized values are
updated to match with C=0 (i.e peform an in-place decryption). The
.data..decrypted section is PMD aligned and sized so that we avoid the
need for spliting the pages when map with C=0.


This should probably be broken into a few smaller patches.  Maybe a
patch that adds the section and the attribute, a patch that re-arranges
the mapping setup and then the in-place decryption and clearing of the
encryption bit for the area.




OK, I will break the patch. Probably will create a separate patch which
just re-arranges the mapping setup without making any logical changes.




Signed-off-by: Brijesh Singh 
Fixes: 368a540e0232 ("x86/kvmclock: Remove memblock dependency")
Cc: sta...@vger.kernel.org
Cc: Tom Lendacky 
Cc: k...@vger.kernel.org
Cc: Thomas Gleixner 
Cc: Borislav Petkov 
Cc: "H. Peter Anvin" 
Cc: linux-kernel@vger.kernel.org
Cc: Paolo Bonzini 
Cc: Sean Christopherson 
Cc: "Radim Krčmář" 
---
  arch/x86/include/asm/mem_encrypt.h |   4 +
  arch/x86/kernel/head64.c   |  12 ++
  arch/x86/kernel/vmlinux.lds.S  |  18 +++
  arch/x86/mm/mem_encrypt_identity.c | 220 +++--
  4 files changed, 197 insertions(+), 57 deletions(-)

diff --git a/arch/x86/include/asm/mem_encrypt.h 
b/arch/x86/include/asm/mem_encrypt.h
index c064383..3f7d9d3 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -52,6 +52,8 @@ void __init mem_encrypt_init(void);
  bool sme_active(void);
  bool sev_active(void);
  
+#define __decrypted __attribute__((__section__(".data..decrypted")))

+
  #else /* !CONFIG_AMD_MEM_ENCRYPT */
  
  #define sme_me_mask	0ULL

@@ -77,6 +79,8 @@ early_set_memory_decrypted(unsigned long vaddr, unsigned long 
size) { return 0;
  static inline int __init
  early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 
0; }
  
+#define __decrypted

+
  #endif/* CONFIG_AMD_MEM_ENCRYPT */
  
  /*

diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 8047379..6a18297 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -43,6 +43,9 @@ extern pmd_t 
early_dynamic_pgts[EARLY_DYNAMIC_PAGE_TABLES][PTRS_PER_PMD];
  static unsigned int __initdata next_early_pgt;
  pmdval_t early_pmd_flags = __PAGE_KERNEL_LARGE & ~(_PAGE_GLOBAL | _PAGE_NX);
  
+/* To clear memory encryption mask from the decrypted section */

+extern char __start_data_decrypted[], __end_data_decrypted[];
+


Should find a header for these rather than defining them here.



OK, will move then in mem_encrypt.h. Will that work ?



  #ifdef CONFIG_X86_5LEVEL
  unsigned int __pgtable_l5_enabled __ro_after_init;
  unsigned int pgdir_shift __ro_after_init = 39;
@@ -112,6 +115,7 @@ static bool __head check_la57_support(unsigned long 
physaddr)
  unsigned long __head __startup_64(unsigned long physaddr,
  struct boot_params *bp)
  {
+   unsigned long vaddr, vaddr_end;
unsigned long load_delta, *p;
unsigned long pgtable_flags;
pgdval_t *pgd;
@@ -234,6 +238,14 @@ unsigned long __head __startup_64(unsigned long physaddr,
/* Encrypt the kernel and related (if SME is active) */
sme_encrypt_kernel(bp);
  
+	/* Clear the memory encryption mask from the decrypted section */

+   vaddr = (unsigned long)__start_data_decrypted;
+   vaddr_end = (unsigned long)__end_data_decrypted;
+   for (; vaddr < vaddr_end; vaddr += PMD_SIZE) {
+   i = pmd_index(vaddr);
+   pmd[i] -= sme_get_me_mask();
+   }
+
/*
 * Return the SME encryption mask (if SME is active) to be used as a
 * modifier for the initial pgdir entry programmed into CR3.
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 8bde0a4..511b875 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -89,6 +89,22 @@ PHDRS {
note PT_NOTE FLAGS(0);  /* ___ */
  }
  
+/*

+ * This section contains data which will be mapped as decrypted. Memory
+ * encryption operates on a page basis. But we make this section a pmd
+ * aligned to avoid spliting the pages while mapping the section early.
+ *
+ * Note: We use a separate section so that only this section gets
+ * decrypted to avoid exposing more than we wish.
+ */
+#define DATA_DECRYPTED_SECTION \
+   . = 

Re: [PATCH 1/2] x86/mm: add .data..decrypted section to hold shared variables

2018-08-28 Thread Brijesh Singh




On 08/27/2018 05:11 PM, Tom Lendacky wrote:

On 08/27/2018 06:24 AM, Brijesh Singh wrote:

kvmclock defines few static variables which are shared with hypervisor
during the kvmclock initialization.

When SEV is active, memory is encrypted with a guest-specific key, and
if guest OS wants to share the memory region with hypervisor then it must
clear the C-bit before sharing it.

The '__decrypted' can be used to define a shared variables; the variables
will be put in the .data.decryption section. This section is mapped with
C=0 early in the boot, we also ensure that the initialized values are
updated to match with C=0 (i.e peform an in-place decryption). The
.data..decrypted section is PMD aligned and sized so that we avoid the
need for spliting the pages when map with C=0.


This should probably be broken into a few smaller patches.  Maybe a
patch that adds the section and the attribute, a patch that re-arranges
the mapping setup and then the in-place decryption and clearing of the
encryption bit for the area.




OK, I will break the patch. Probably will create a separate patch which
just re-arranges the mapping setup without making any logical changes.




Signed-off-by: Brijesh Singh 
Fixes: 368a540e0232 ("x86/kvmclock: Remove memblock dependency")
Cc: sta...@vger.kernel.org
Cc: Tom Lendacky 
Cc: k...@vger.kernel.org
Cc: Thomas Gleixner 
Cc: Borislav Petkov 
Cc: "H. Peter Anvin" 
Cc: linux-kernel@vger.kernel.org
Cc: Paolo Bonzini 
Cc: Sean Christopherson 
Cc: "Radim Krčmář" 
---
  arch/x86/include/asm/mem_encrypt.h |   4 +
  arch/x86/kernel/head64.c   |  12 ++
  arch/x86/kernel/vmlinux.lds.S  |  18 +++
  arch/x86/mm/mem_encrypt_identity.c | 220 +++--
  4 files changed, 197 insertions(+), 57 deletions(-)

diff --git a/arch/x86/include/asm/mem_encrypt.h 
b/arch/x86/include/asm/mem_encrypt.h
index c064383..3f7d9d3 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -52,6 +52,8 @@ void __init mem_encrypt_init(void);
  bool sme_active(void);
  bool sev_active(void);
  
+#define __decrypted __attribute__((__section__(".data..decrypted")))

+
  #else /* !CONFIG_AMD_MEM_ENCRYPT */
  
  #define sme_me_mask	0ULL

@@ -77,6 +79,8 @@ early_set_memory_decrypted(unsigned long vaddr, unsigned long 
size) { return 0;
  static inline int __init
  early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 
0; }
  
+#define __decrypted

+
  #endif/* CONFIG_AMD_MEM_ENCRYPT */
  
  /*

diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 8047379..6a18297 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -43,6 +43,9 @@ extern pmd_t 
early_dynamic_pgts[EARLY_DYNAMIC_PAGE_TABLES][PTRS_PER_PMD];
  static unsigned int __initdata next_early_pgt;
  pmdval_t early_pmd_flags = __PAGE_KERNEL_LARGE & ~(_PAGE_GLOBAL | _PAGE_NX);
  
+/* To clear memory encryption mask from the decrypted section */

+extern char __start_data_decrypted[], __end_data_decrypted[];
+


Should find a header for these rather than defining them here.



OK, will move then in mem_encrypt.h. Will that work ?



  #ifdef CONFIG_X86_5LEVEL
  unsigned int __pgtable_l5_enabled __ro_after_init;
  unsigned int pgdir_shift __ro_after_init = 39;
@@ -112,6 +115,7 @@ static bool __head check_la57_support(unsigned long 
physaddr)
  unsigned long __head __startup_64(unsigned long physaddr,
  struct boot_params *bp)
  {
+   unsigned long vaddr, vaddr_end;
unsigned long load_delta, *p;
unsigned long pgtable_flags;
pgdval_t *pgd;
@@ -234,6 +238,14 @@ unsigned long __head __startup_64(unsigned long physaddr,
/* Encrypt the kernel and related (if SME is active) */
sme_encrypt_kernel(bp);
  
+	/* Clear the memory encryption mask from the decrypted section */

+   vaddr = (unsigned long)__start_data_decrypted;
+   vaddr_end = (unsigned long)__end_data_decrypted;
+   for (; vaddr < vaddr_end; vaddr += PMD_SIZE) {
+   i = pmd_index(vaddr);
+   pmd[i] -= sme_get_me_mask();
+   }
+
/*
 * Return the SME encryption mask (if SME is active) to be used as a
 * modifier for the initial pgdir entry programmed into CR3.
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 8bde0a4..511b875 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -89,6 +89,22 @@ PHDRS {
note PT_NOTE FLAGS(0);  /* ___ */
  }
  
+/*

+ * This section contains data which will be mapped as decrypted. Memory
+ * encryption operates on a page basis. But we make this section a pmd
+ * aligned to avoid spliting the pages while mapping the section early.
+ *
+ * Note: We use a separate section so that only this section gets
+ * decrypted to avoid exposing more than we wish.
+ */
+#define DATA_DECRYPTED_SECTION \
+   . = 

Re: [RFC PATCH] EDAC, ghes: Enable per-layer error reporting for ARM

2018-08-28 Thread James Morse
Hi Tyler,

On 24/08/18 16:14, Tyler Baicar wrote:
> On Fri, Aug 24, 2018 at 5:48 AM, James Morse  wrote:
>> On 23/08/18 16:46, Tyler Baicar wrote:
>>> On Thu, Aug 23, 2018 at 5:29 AM James Morse  wrote:
 On 19/07/18 19:36, Tyler Baicar wrote:
> This seems pretty hacky to me, so if anyone has other suggestions please 
> share
> them.

 CPER's "Memory Error Record 2" thinks that "NODE, CARD and MODULE should 
 provide
 the information necessary to identify the failing FRU". As EDAC has three
 'levels', these are what they should correspond to for ghes-edac.

 I assume NODE means rack/chassis in some distributed system. Lets ignore 
 it as
 it doesn't seem to map to anything in the SMBIOS table.
>>>
>>> I believe NODE should map to socket number for multi-socket systems.
>>
>> Isn't the Memory Array Structure still unique in a multi-socket system? If so
>> the node isn't telling us anything new.

> Yes, the Memory Array structure in SMBIOS is still unique, but the NODE value
> is needed in NODE, CARD, MODULE because the CARD number here typically
> maps to channel number which each socket has their own channel numbers.

/me flinches at 'typically'.

Okay, so the hierarchy applies to the numbers, not the handles.
How come there isn't a node handle?

I think we should ignore the extra hierarchy. The module/devices/dimms are the
only replaceable part, and we don't know if the firmware will provide the
information.


>> Do sockets show up in the SMBIOS table? We would need to know how many there 
>> are
>> in advance. For arm systems the cpu topology from PPTT is the best bet for 
>> this
>> information, but what do we do if that table is missing? (also, does firmware
>> count from 1 or 0?) I suspect we can't use this field unless we know what the
>> range of values is going to be in advance.
> 
> An Fan mentioned in his response, what the customers really care about
> is mapping to
> a particular DIMM since that is what they can replace. To do this, the
> Memory Device
> handle should be enough since those are all unique regardless of
> Memory Array handle
> and which socket the DIMM is on. The Firmware I've worked with counts
> from 0, but I'm
> not sure if that is required.

If the spec doesn't say, its difficult to know the range of values until we've
seen one of the limits.


> That won't matter if we just use the Memory Device handle.

I agree.


>>> I think the proper way to get this working would be to use these handles. 
>>> We can
>>> avoid populating this layer information and instead have a mapping of type 
>>> 17
>>> index number (how edac is numbering the DIMMs today) to the handle number.
>>
>> Why get avoid the layer stuff? Isn't counting DIMM/memory-devices what
>> EDAC_MC_LAYER_SLOT is for?
> 
> The problem with the layer reporting is that you need to know all the
> layer information as Fan mentioned.

I haven't spotted what requires this, there seems to be a bit of a mix of
numbers-of-layers and what order they go in. thunderx_edac.c just uses SLOT.
I think we can get away with using EDAC_MC_LAYER_ALL_MEM, sized by num_dimms
(which ghes-edac already does).

Providing extra topology may not be useful unless the firmware populates this
information. What do we do if we export card+module, but then firmware only
populates the module-handle?


> SoCs can support multiple board combinations (ie
> 1DPC vs. 2DPC)
> and there is no standardized way of knowing whether you are booted on a 1DPC 
> or
> 2DPC board.
> 
>>> Then we will need a new function to increment the counter based on the 
>>> handle
>>> number rather than this layer information. Is that how you are envisioning 
>>> it?
>>
>> I'm not familiar with edac's internals, so I didn't have any particular 
>> vision!
>>
>> Isn't the problem that ghes_edac_report_mem_error() does this:
>> |   e->top_layer = -1;
>> |   e->mid_layer = -1;
>> |   e->low_layer = -1;
> 
> The other problem is that the sysfs nodes are all setup with a single
> layer representing
> all of the memory on the board.
> 
> https://elixir.bootlin.com/linux/latest/source/drivers/edac/ghes_edac.c#L469
> 
> So the DIMM counters exposed in sysfs are all under a single memory
> controller and just
> numbered from 0 to n-1 based on the order in which the type 17 SMBIOS
> entries show up
> in the DMI walk.

User-space depending on the dmi walk order makes me nervous. Doing this gives
you per-dimm counters, if user-space needs to know which physical-dimm/slot that
is, we'd need a way of matching it with one of the smbios location strings.


>> so edac_raw_mc_handle_error() has no clue where the error happened. (I 
>> haven't
>> read what it does with this information yet).
>>
>> ghes_edac_report_mem_error() does check CPER_MEM_VALID_MODULE_HANDLE, and if 
>> its
>> set, it uses the handle to find the bank/device strings and prints them out.
> 
> Yes, I think this is where we need to add support to increment the
> count 

Re: [PATCH v2 1/6] dt-bindings: reset: Add PDC Global binding for SDM845 SoCs

2018-08-28 Thread Matthias Kaehlcke
On Tue, Aug 28, 2018 at 11:38:26AM +0530, si...@codeaurora.org wrote:
> Hi Matthias,
> Thanks for the review
> 
> On 2018-08-28 06:08, Matthias Kaehlcke wrote:
> > Hi Sibi,
> > 
> > On Fri, Aug 24, 2018 at 06:48:55PM +0530, Sibi Sankar wrote:
> > > Add PDC Global(Power Domain Controller) binding for SDM845 SoCs.
> > 
> > nit: missing blank before the opening parenthesis.
> > 
> 
> Will fix it
> 
> > > 
> > > Signed-off-by: Sibi Sankar 
> > > ---
> > >  .../bindings/reset/qcom,pdc-global.txt| 52
> > > +++
> > >  include/dt-bindings/reset/qcom,sdm845-pdc.h   | 20 +++
> > >  2 files changed, 72 insertions(+)
> > >  create mode 100644
> > > Documentation/devicetree/bindings/reset/qcom,pdc-global.txt
> > >  create mode 100644 include/dt-bindings/reset/qcom,sdm845-pdc.h
> > > 
> > > diff --git
> > > a/Documentation/devicetree/bindings/reset/qcom,pdc-global.txt
> > > b/Documentation/devicetree/bindings/reset/qcom,pdc-global.txt
> > > new file mode 100644
> > > index ..69f9edca9503
> > > --- /dev/null
> > > +++ b/Documentation/devicetree/bindings/reset/qcom,pdc-global.txt
> > > @@ -0,0 +1,52 @@
> > > +PDC Global
> > > +==
> > > +
> > > +This binding describes a reset-controller found on PDC-Global(Power
> > > Domain
> > > +Controller) block for Qualcomm Technologies Inc SDM845 SoCs.
> > 
> > Are there other PDC reset controllers that aren't 'global'? Otherwise
> > I'd suggest to use 'pdc-reset' instead of 'pdc-global', which is more
> > specific and in line with the name of the driver added by this series.
> > Or something like 'pdc-reset-global/main' if there are other
> > controllers?
> > 
> 
> These are the only reset lines found in the pdc-global register space. But
> as
> explained by Bjorn, wouldn't it be better to leave it as such since
> pdc-global
> best describes the hardware without being limited by the current
> functionality
> it is being used for?

If I understand Bjorn's reply on
https://patchwork.kernel.org/patch/10575335/ correctly it is planned
to use the 'pdc-global' compatible string in the future in some sort
of MFD driver for the 'PDC Global' IP block, which then instantiates
other drivers like the reset controller. In this case I agree that
'pdc-global' seems a reasonable choice.


Re: [RFC PATCH] EDAC, ghes: Enable per-layer error reporting for ARM

2018-08-28 Thread James Morse
Hi Tyler,

On 24/08/18 16:14, Tyler Baicar wrote:
> On Fri, Aug 24, 2018 at 5:48 AM, James Morse  wrote:
>> On 23/08/18 16:46, Tyler Baicar wrote:
>>> On Thu, Aug 23, 2018 at 5:29 AM James Morse  wrote:
 On 19/07/18 19:36, Tyler Baicar wrote:
> This seems pretty hacky to me, so if anyone has other suggestions please 
> share
> them.

 CPER's "Memory Error Record 2" thinks that "NODE, CARD and MODULE should 
 provide
 the information necessary to identify the failing FRU". As EDAC has three
 'levels', these are what they should correspond to for ghes-edac.

 I assume NODE means rack/chassis in some distributed system. Lets ignore 
 it as
 it doesn't seem to map to anything in the SMBIOS table.
>>>
>>> I believe NODE should map to socket number for multi-socket systems.
>>
>> Isn't the Memory Array Structure still unique in a multi-socket system? If so
>> the node isn't telling us anything new.

> Yes, the Memory Array structure in SMBIOS is still unique, but the NODE value
> is needed in NODE, CARD, MODULE because the CARD number here typically
> maps to channel number which each socket has their own channel numbers.

/me flinches at 'typically'.

Okay, so the hierarchy applies to the numbers, not the handles.
How come there isn't a node handle?

I think we should ignore the extra hierarchy. The module/devices/dimms are the
only replaceable part, and we don't know if the firmware will provide the
information.


>> Do sockets show up in the SMBIOS table? We would need to know how many there 
>> are
>> in advance. For arm systems the cpu topology from PPTT is the best bet for 
>> this
>> information, but what do we do if that table is missing? (also, does firmware
>> count from 1 or 0?) I suspect we can't use this field unless we know what the
>> range of values is going to be in advance.
> 
> An Fan mentioned in his response, what the customers really care about
> is mapping to
> a particular DIMM since that is what they can replace. To do this, the
> Memory Device
> handle should be enough since those are all unique regardless of
> Memory Array handle
> and which socket the DIMM is on. The Firmware I've worked with counts
> from 0, but I'm
> not sure if that is required.

If the spec doesn't say, its difficult to know the range of values until we've
seen one of the limits.


> That won't matter if we just use the Memory Device handle.

I agree.


>>> I think the proper way to get this working would be to use these handles. 
>>> We can
>>> avoid populating this layer information and instead have a mapping of type 
>>> 17
>>> index number (how edac is numbering the DIMMs today) to the handle number.
>>
>> Why get avoid the layer stuff? Isn't counting DIMM/memory-devices what
>> EDAC_MC_LAYER_SLOT is for?
> 
> The problem with the layer reporting is that you need to know all the
> layer information as Fan mentioned.

I haven't spotted what requires this, there seems to be a bit of a mix of
numbers-of-layers and what order they go in. thunderx_edac.c just uses SLOT.
I think we can get away with using EDAC_MC_LAYER_ALL_MEM, sized by num_dimms
(which ghes-edac already does).

Providing extra topology may not be useful unless the firmware populates this
information. What do we do if we export card+module, but then firmware only
populates the module-handle?


> SoCs can support multiple board combinations (ie
> 1DPC vs. 2DPC)
> and there is no standardized way of knowing whether you are booted on a 1DPC 
> or
> 2DPC board.
> 
>>> Then we will need a new function to increment the counter based on the 
>>> handle
>>> number rather than this layer information. Is that how you are envisioning 
>>> it?
>>
>> I'm not familiar with edac's internals, so I didn't have any particular 
>> vision!
>>
>> Isn't the problem that ghes_edac_report_mem_error() does this:
>> |   e->top_layer = -1;
>> |   e->mid_layer = -1;
>> |   e->low_layer = -1;
> 
> The other problem is that the sysfs nodes are all setup with a single
> layer representing
> all of the memory on the board.
> 
> https://elixir.bootlin.com/linux/latest/source/drivers/edac/ghes_edac.c#L469
> 
> So the DIMM counters exposed in sysfs are all under a single memory
> controller and just
> numbered from 0 to n-1 based on the order in which the type 17 SMBIOS
> entries show up
> in the DMI walk.

User-space depending on the dmi walk order makes me nervous. Doing this gives
you per-dimm counters, if user-space needs to know which physical-dimm/slot that
is, we'd need a way of matching it with one of the smbios location strings.


>> so edac_raw_mc_handle_error() has no clue where the error happened. (I 
>> haven't
>> read what it does with this information yet).
>>
>> ghes_edac_report_mem_error() does check CPER_MEM_VALID_MODULE_HANDLE, and if 
>> its
>> set, it uses the handle to find the bank/device strings and prints them out.
> 
> Yes, I think this is where we need to add support to increment the
> count 

Re: [PATCH v2 1/6] dt-bindings: reset: Add PDC Global binding for SDM845 SoCs

2018-08-28 Thread Matthias Kaehlcke
On Tue, Aug 28, 2018 at 11:38:26AM +0530, si...@codeaurora.org wrote:
> Hi Matthias,
> Thanks for the review
> 
> On 2018-08-28 06:08, Matthias Kaehlcke wrote:
> > Hi Sibi,
> > 
> > On Fri, Aug 24, 2018 at 06:48:55PM +0530, Sibi Sankar wrote:
> > > Add PDC Global(Power Domain Controller) binding for SDM845 SoCs.
> > 
> > nit: missing blank before the opening parenthesis.
> > 
> 
> Will fix it
> 
> > > 
> > > Signed-off-by: Sibi Sankar 
> > > ---
> > >  .../bindings/reset/qcom,pdc-global.txt| 52
> > > +++
> > >  include/dt-bindings/reset/qcom,sdm845-pdc.h   | 20 +++
> > >  2 files changed, 72 insertions(+)
> > >  create mode 100644
> > > Documentation/devicetree/bindings/reset/qcom,pdc-global.txt
> > >  create mode 100644 include/dt-bindings/reset/qcom,sdm845-pdc.h
> > > 
> > > diff --git
> > > a/Documentation/devicetree/bindings/reset/qcom,pdc-global.txt
> > > b/Documentation/devicetree/bindings/reset/qcom,pdc-global.txt
> > > new file mode 100644
> > > index ..69f9edca9503
> > > --- /dev/null
> > > +++ b/Documentation/devicetree/bindings/reset/qcom,pdc-global.txt
> > > @@ -0,0 +1,52 @@
> > > +PDC Global
> > > +==
> > > +
> > > +This binding describes a reset-controller found on PDC-Global(Power
> > > Domain
> > > +Controller) block for Qualcomm Technologies Inc SDM845 SoCs.
> > 
> > Are there other PDC reset controllers that aren't 'global'? Otherwise
> > I'd suggest to use 'pdc-reset' instead of 'pdc-global', which is more
> > specific and in line with the name of the driver added by this series.
> > Or something like 'pdc-reset-global/main' if there are other
> > controllers?
> > 
> 
> These are the only reset lines found in the pdc-global register space. But
> as
> explained by Bjorn, wouldn't it be better to leave it as such since
> pdc-global
> best describes the hardware without being limited by the current
> functionality
> it is being used for?

If I understand Bjorn's reply on
https://patchwork.kernel.org/patch/10575335/ correctly it is planned
to use the 'pdc-global' compatible string in the future in some sort
of MFD driver for the 'PDC Global' IP block, which then instantiates
other drivers like the reset controller. In this case I agree that
'pdc-global' seems a reasonable choice.


Re: [RFC PATCH] EDAC, ghes: Enable per-layer error reporting for ARM

2018-08-28 Thread James Morse
Hi Fan,

On 24/08/18 15:30, wufan wrote:
>> Why get avoid the layer stuff? Isn't counting DIMM/memory-devices what
>> EDAC_MC_LAYER_SLOT is for?
> 
> Borislav has explained it in his response. Here let me elaborate a little 
> more.
> To use the layer information you need an accurate way to pinpoint each 
> component
> in the layer and the parent components in the layers above. For example, to 
> use
> EDAC_MC_LAYER_SLOT you also need information for the parent layer say
> EDAC_MC_LAYER_CHANNEL, or another layer on top say EDAC_MC_LAYER_BRANCH.

I haven't spotted anything that forces a particular meaning/topology on these
types. (there are four of them, but only three 'levels')

i3000_edac.c has EDAC_MC_LAYER_CHIP_SELECT then EDAC_MC_LAYER_CHANNEL,
but i5400_edac.c has EDAC_MC_LAYER_BRANCH then EDAC_MC_LAYER_CHANNEL.
pnd2_edac.c agrees that channel comes before slot, but thunderx_edac.c just uses
slot directly

ghes-edac already describes memory as 'EDAC_MC_LAYER_ALL_MEM', with num_dimms, I
think we just need to get 'the' dimm number. Using the cper-module-handle means
we don't have to worry about firmware's count of dimms being different, as we
both agree its smbios-type-17 we're talking about.


> There
> are no clear ways to get the information from SMBIOS table. In the case of 
> "memory
> channel" we looked at type 37 which has the exact spelling but it was 
> introduced
> to support RamBus and Synclink. Not sure we can readily use it for modern
> architecture concept of "channel/slot". 

I think we should avoid this 'channel' thing as it means different things to
different people!

We can use ghes:card smbios:physical-memory-array as the UEFI spec tells us they
are the same, and we don't actually need to know what they mean.


>  I think it is good enough if we can pin each error to the corresponding DIMM.
> At the end of the day DIMMs are what customer can replace in the memory system
> and that's all that they care about. For the manufacturers of the board/chips
> they have the knowledge to map the specific DIMMs to the upper layer 
> components,
> so they can easily collect error counter data for upper layers. 

I agree.


>> CPER's "Memory Error Record 2" thinks that "NODE, CARD and MODULE
>> should provide the information necessary to identify the failing FRU". As
>> EDAC has three 'levels', these are what they should correspond to for ghes-
>> edac.
>>
>> I assume NODE means rack/chassis in some distributed system. Lets ignore it
>> as it doesn't seem to map to anything in the SMBIOS table.
> 
> How about type 4 "Processor Information"?

As the spec doesn't tell us what the field means, we can't really do anything
other than print the value out.


>> 'Card' doesn't mean much to me, but it maps to SMBIOS:17 "Memory Array
>> Structure", which the Memory Device structure also points to.
>> Card then must mean "a collection of memory devices (DIMMs) that operate
>> together to form an address space".
>>
>> This might be what I think of as a memory-controller, or it might be
>> something more complicated. Regardless, the CPER records think its relevant.
> 
> Originally I thought "Card" were memory channel. But looking at the definition
> of "Card Handle" in CPER: "... this field contains the SMBIOS handle for the
> Type 16 Memory Array Structure that represents the memory card". So Card is
> memory controller or something similar to that.
> Right now ghes-edac assumes
> one mc. We probably need to map mc(s) to the type 16 instances in SMBIOS 
> table. 

I think we should ignore 'mc's, and just report the dimm numbers.

ghes-edac only cares about the number of dimms today, and this would work on
systems that only describe the dimms in the smbios table.


Thanks,

James


Re: [RFC PATCH] EDAC, ghes: Enable per-layer error reporting for ARM

2018-08-28 Thread James Morse
Hi Fan,

On 24/08/18 15:30, wufan wrote:
>> Why get avoid the layer stuff? Isn't counting DIMM/memory-devices what
>> EDAC_MC_LAYER_SLOT is for?
> 
> Borislav has explained it in his response. Here let me elaborate a little 
> more.
> To use the layer information you need an accurate way to pinpoint each 
> component
> in the layer and the parent components in the layers above. For example, to 
> use
> EDAC_MC_LAYER_SLOT you also need information for the parent layer say
> EDAC_MC_LAYER_CHANNEL, or another layer on top say EDAC_MC_LAYER_BRANCH.

I haven't spotted anything that forces a particular meaning/topology on these
types. (there are four of them, but only three 'levels')

i3000_edac.c has EDAC_MC_LAYER_CHIP_SELECT then EDAC_MC_LAYER_CHANNEL,
but i5400_edac.c has EDAC_MC_LAYER_BRANCH then EDAC_MC_LAYER_CHANNEL.
pnd2_edac.c agrees that channel comes before slot, but thunderx_edac.c just uses
slot directly

ghes-edac already describes memory as 'EDAC_MC_LAYER_ALL_MEM', with num_dimms, I
think we just need to get 'the' dimm number. Using the cper-module-handle means
we don't have to worry about firmware's count of dimms being different, as we
both agree its smbios-type-17 we're talking about.


> There
> are no clear ways to get the information from SMBIOS table. In the case of 
> "memory
> channel" we looked at type 37 which has the exact spelling but it was 
> introduced
> to support RamBus and Synclink. Not sure we can readily use it for modern
> architecture concept of "channel/slot". 

I think we should avoid this 'channel' thing as it means different things to
different people!

We can use ghes:card smbios:physical-memory-array as the UEFI spec tells us they
are the same, and we don't actually need to know what they mean.


>  I think it is good enough if we can pin each error to the corresponding DIMM.
> At the end of the day DIMMs are what customer can replace in the memory system
> and that's all that they care about. For the manufacturers of the board/chips
> they have the knowledge to map the specific DIMMs to the upper layer 
> components,
> so they can easily collect error counter data for upper layers. 

I agree.


>> CPER's "Memory Error Record 2" thinks that "NODE, CARD and MODULE
>> should provide the information necessary to identify the failing FRU". As
>> EDAC has three 'levels', these are what they should correspond to for ghes-
>> edac.
>>
>> I assume NODE means rack/chassis in some distributed system. Lets ignore it
>> as it doesn't seem to map to anything in the SMBIOS table.
> 
> How about type 4 "Processor Information"?

As the spec doesn't tell us what the field means, we can't really do anything
other than print the value out.


>> 'Card' doesn't mean much to me, but it maps to SMBIOS:17 "Memory Array
>> Structure", which the Memory Device structure also points to.
>> Card then must mean "a collection of memory devices (DIMMs) that operate
>> together to form an address space".
>>
>> This might be what I think of as a memory-controller, or it might be
>> something more complicated. Regardless, the CPER records think its relevant.
> 
> Originally I thought "Card" were memory channel. But looking at the definition
> of "Card Handle" in CPER: "... this field contains the SMBIOS handle for the
> Type 16 Memory Array Structure that represents the memory card". So Card is
> memory controller or something similar to that.
> Right now ghes-edac assumes
> one mc. We probably need to map mc(s) to the type 16 instances in SMBIOS 
> table. 

I think we should ignore 'mc's, and just report the dimm numbers.

ghes-edac only cares about the number of dimms today, and this would work on
systems that only describe the dimms in the smbios table.


Thanks,

James


Re: [PATCH] ARM: dts: imx7s-warp: use SPDX-License-Identifier

2018-08-28 Thread Fabio Estevam
On Tue, Aug 28, 2018 at 2:06 PM, Pierre-Jean Texier
 wrote:
> Adopt the SPDX license identifier headers to ease
> license compliance management.
>
> Signed-off-by: Pierre-Jean Texier 

Reviewed-by: Fabio Estevam 


Re: [PATCH] ARM: dts: imx7s-warp: use SPDX-License-Identifier

2018-08-28 Thread Fabio Estevam
On Tue, Aug 28, 2018 at 2:06 PM, Pierre-Jean Texier
 wrote:
> Adopt the SPDX license identifier headers to ease
> license compliance management.
>
> Signed-off-by: Pierre-Jean Texier 

Reviewed-by: Fabio Estevam 


Re: [RFC PATCH] EDAC, ghes: Enable per-layer error reporting for ARM

2018-08-28 Thread James Morse
Hi Boris,

On 24/08/18 13:01, Borislav Petkov wrote:
> On Fri, Aug 24, 2018 at 10:48:24AM +0100, James Morse wrote:
>> so edac_raw_mc_handle_error() has no clue where the error happened. (I 
>> haven't
>> read what it does with this information yet).
> 
> See edac_inc_ce_error(), for example - it uses the layers which are not
> negative (-1) to increment the error counts of the respective layer. It
> all depends on what granularity of the hardware part you're reporting
> the error for: is it a DIMM rank, a whole DIMM or for a channel which
> can span multiple DIMM ranks. And so on...
> 
> Look at some of the drivers and how they're doing that layering. It all
> depends on whether you can get the precise info from the hw.

Hmmm, in this example we need the information from firmware, as that is where
ghes-edac gets its information from.

We already count the module/device/dimms in the smbios table, memory is
described as 'EDAC_MC_LAYER_ALL_MEM' with num_dimms. I think all we're missing
is which dimm in ghes_edac_report_mem_error(). We have the handle, we just need
a number between 1 and num_dimms.

If it turns out firmware doesn't populate the handles in its cper records, then
we can keep e->enable_per_layer_report false when calling
edac_raw_mc_handle_error().

(I suggest we ignore 'card', and just do this for the device/dimms).


>> Naively I thought we could generate some index during 
>> ghes_edac_count_dimms(),
>> and use this as e->${whichever}_layer. I hoped there would be something we 
>> could
>> already use as the index, but I can't spot it, so this will be more than the
>> one-liner I was hoping for!
> 
> If you can get that info from the hardware and injecting an error into
> a DIMM gives you the correct DIMM number so that we can increment the
> proper counter, then you're golden. I don't think that works reliably on
> x86, though, therefore the lumping together.

... 'correct DIMM number' ...

Does x86 have another source of memory-topology information it needs to
correlate smbios with?

For arm there is nothing else describing the memory-topology, so as long as we
can correlate the smbios table and ghes:cper records through the handles, we can
get this working for all systems.


Thanks,

James


Re: [RFC PATCH] EDAC, ghes: Enable per-layer error reporting for ARM

2018-08-28 Thread James Morse
Hi Boris,

On 24/08/18 13:01, Borislav Petkov wrote:
> On Fri, Aug 24, 2018 at 10:48:24AM +0100, James Morse wrote:
>> so edac_raw_mc_handle_error() has no clue where the error happened. (I 
>> haven't
>> read what it does with this information yet).
> 
> See edac_inc_ce_error(), for example - it uses the layers which are not
> negative (-1) to increment the error counts of the respective layer. It
> all depends on what granularity of the hardware part you're reporting
> the error for: is it a DIMM rank, a whole DIMM or for a channel which
> can span multiple DIMM ranks. And so on...
> 
> Look at some of the drivers and how they're doing that layering. It all
> depends on whether you can get the precise info from the hw.

Hmmm, in this example we need the information from firmware, as that is where
ghes-edac gets its information from.

We already count the module/device/dimms in the smbios table, memory is
described as 'EDAC_MC_LAYER_ALL_MEM' with num_dimms. I think all we're missing
is which dimm in ghes_edac_report_mem_error(). We have the handle, we just need
a number between 1 and num_dimms.

If it turns out firmware doesn't populate the handles in its cper records, then
we can keep e->enable_per_layer_report false when calling
edac_raw_mc_handle_error().

(I suggest we ignore 'card', and just do this for the device/dimms).


>> Naively I thought we could generate some index during 
>> ghes_edac_count_dimms(),
>> and use this as e->${whichever}_layer. I hoped there would be something we 
>> could
>> already use as the index, but I can't spot it, so this will be more than the
>> one-liner I was hoping for!
> 
> If you can get that info from the hardware and injecting an error into
> a DIMM gives you the correct DIMM number so that we can increment the
> proper counter, then you're golden. I don't think that works reliably on
> x86, though, therefore the lumping together.

... 'correct DIMM number' ...

Does x86 have another source of memory-topology information it needs to
correlate smbios with?

For arm there is nothing else describing the memory-topology, so as long as we
can correlate the smbios table and ghes:cper records through the handles, we can
get this working for all systems.


Thanks,

James


[PATCH] ARM: dts: imx7s-warp: use SPDX-License-Identifier

2018-08-28 Thread Pierre-Jean Texier
Adopt the SPDX license identifier headers to ease
license compliance management.

Signed-off-by: Pierre-Jean Texier 
---
 arch/arm/boot/dts/imx7s-warp.dts | 39 +--
 1 file changed, 1 insertion(+), 38 deletions(-)

diff --git a/arch/arm/boot/dts/imx7s-warp.dts b/arch/arm/boot/dts/imx7s-warp.dts
index fa390da..bf33c74 100644
--- a/arch/arm/boot/dts/imx7s-warp.dts
+++ b/arch/arm/boot/dts/imx7s-warp.dts
@@ -1,44 +1,7 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
 /*
  * Copyright (C) 2016 NXP Semiconductors.
  * Author: Fabio Estevam 
- *
- * This file is dual-licensed: you can use it either under the terms
- * of the GPL or the X11 license, at your option. Note that this dual
- * licensing only applies to this file, and not this project as a
- * whole.
- *
- *  a) This file is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of the
- * License, or (at your option) any later version.
- *
- * This file is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * Or, alternatively,
- *
- *  b) Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
  */
 
 /dts-v1/;
-- 
2.7.4



[PATCH] ARM: dts: imx7s-warp: use SPDX-License-Identifier

2018-08-28 Thread Pierre-Jean Texier
Adopt the SPDX license identifier headers to ease
license compliance management.

Signed-off-by: Pierre-Jean Texier 
---
 arch/arm/boot/dts/imx7s-warp.dts | 39 +--
 1 file changed, 1 insertion(+), 38 deletions(-)

diff --git a/arch/arm/boot/dts/imx7s-warp.dts b/arch/arm/boot/dts/imx7s-warp.dts
index fa390da..bf33c74 100644
--- a/arch/arm/boot/dts/imx7s-warp.dts
+++ b/arch/arm/boot/dts/imx7s-warp.dts
@@ -1,44 +1,7 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
 /*
  * Copyright (C) 2016 NXP Semiconductors.
  * Author: Fabio Estevam 
- *
- * This file is dual-licensed: you can use it either under the terms
- * of the GPL or the X11 license, at your option. Note that this dual
- * licensing only applies to this file, and not this project as a
- * whole.
- *
- *  a) This file is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of the
- * License, or (at your option) any later version.
- *
- * This file is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * Or, alternatively,
- *
- *  b) Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
  */
 
 /dts-v1/;
-- 
2.7.4



Re: [PATCH v2 1/1] hdac-codec runtime suspended at PM:Suspend.

2018-08-28 Thread Takashi Iwai
On Tue, 28 Aug 2018 18:34:15 +0200,
Anshuman Gupta wrote:
> 
> Is this patch not in consideration, there are no review comment for it. 
> this patch fixes an issue with hdac hdmi codec driver.
> 
> On one of our platform HD audio controller takes arounf 300ms.
> Below are the snippet of dmesg log.
> 
> [ 3778.461888] calling  :00:0e.0+ @ 3667, parent: pci:00, cb: 
> pci_pm_resume
> [ 3778.775273] call :00:0e.0+ returned 0 after 306016 usecs
> 
> When HD audio controller is in runtime suspend state, 
> with direct complete, we can improve overall system wide resume latency.

Actually, *this* should have been mentioned in the changelog, and the
subject would be better phrased to reflect it.

After all, you're trying to reduce / optimize the runtime PM latency.


thanks,

Takashi

> On Sat, Aug 18, 2018 at 11:42:03PM +0530, Anshuman Gupta wrote:
> > Keep hdac hdmi codec to be in runtime suspended while entering to
> > system wide suspend. Currently hdac hdmi codec driver using its
> > suspend and resume operation in prepare and complete PM callbacks,
> > and it resumes the hd audio controller (parent of self) from runtime
> > suspend and blocks the direct complete for hd audio controller.
> >
> > If hdac-codec is already in runtime suspend state skip its power down
> > sequence in prepare, power up sequence in complete phase. It enables
> > direct complete path for hdac-codec and hd audio controller
> > PCI device.
> >
> > Signed-off-by: Anshuman Gupta 
> > ---
> > Changes in v2
> > - Changed the commit message.
> > - Using pm_runtime_suspended instead of pm_runtime_status_suspended
> >   in order to handle any race condition.
> >
> > sound/soc/codecs/hdac_hdmi.c | 6 ++
> >  1 file changed, 6 insertions(+)
> >
> > diff --git a/sound/soc/codecs/hdac_hdmi.c b/sound/soc/codecs/hdac_hdmi.c
> > index 84f7a7a..e965338 100644
> > --- a/sound/soc/codecs/hdac_hdmi.c
> > +++ b/sound/soc/codecs/hdac_hdmi.c
> > @@ -1859,6 +1859,9 @@ static int hdmi_codec_prepare(struct device *dev)
> >   struct hdac_ext_device *edev = to_hda_ext_device(dev);
> >   struct hdac_device *hdev = >hdev;
> >
> > + if (pm_runtime_suspended(dev))
> > + return 1;
> > +
> >   pm_runtime_get_sync(>hdev.dev);
> >
> >   /*
> > @@ -1880,6 +1883,9 @@ static void hdmi_codec_complete(struct device *dev)
> >   struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(>hdev);
> >   struct hdac_device *hdev = >hdev;
> >
> > + if (pm_runtime_suspended(dev))
> > + return;
> > +
> >   /* Power up afg */
> >   snd_hdac_codec_read(hdev, hdev->afg, 0, AC_VERB_SET_POWER_STATE,
> >   AC_PWRST_D0);
> > --
> > 2.7.4
> >
> 
> --
> Thanks,
> Anshuman.
>  
> 


Re: [PATCH v2 1/1] hdac-codec runtime suspended at PM:Suspend.

2018-08-28 Thread Takashi Iwai
On Tue, 28 Aug 2018 18:34:15 +0200,
Anshuman Gupta wrote:
> 
> Is this patch not in consideration, there are no review comment for it. 
> this patch fixes an issue with hdac hdmi codec driver.
> 
> On one of our platform HD audio controller takes arounf 300ms.
> Below are the snippet of dmesg log.
> 
> [ 3778.461888] calling  :00:0e.0+ @ 3667, parent: pci:00, cb: 
> pci_pm_resume
> [ 3778.775273] call :00:0e.0+ returned 0 after 306016 usecs
> 
> When HD audio controller is in runtime suspend state, 
> with direct complete, we can improve overall system wide resume latency.

Actually, *this* should have been mentioned in the changelog, and the
subject would be better phrased to reflect it.

After all, you're trying to reduce / optimize the runtime PM latency.


thanks,

Takashi

> On Sat, Aug 18, 2018 at 11:42:03PM +0530, Anshuman Gupta wrote:
> > Keep hdac hdmi codec to be in runtime suspended while entering to
> > system wide suspend. Currently hdac hdmi codec driver using its
> > suspend and resume operation in prepare and complete PM callbacks,
> > and it resumes the hd audio controller (parent of self) from runtime
> > suspend and blocks the direct complete for hd audio controller.
> >
> > If hdac-codec is already in runtime suspend state skip its power down
> > sequence in prepare, power up sequence in complete phase. It enables
> > direct complete path for hdac-codec and hd audio controller
> > PCI device.
> >
> > Signed-off-by: Anshuman Gupta 
> > ---
> > Changes in v2
> > - Changed the commit message.
> > - Using pm_runtime_suspended instead of pm_runtime_status_suspended
> >   in order to handle any race condition.
> >
> > sound/soc/codecs/hdac_hdmi.c | 6 ++
> >  1 file changed, 6 insertions(+)
> >
> > diff --git a/sound/soc/codecs/hdac_hdmi.c b/sound/soc/codecs/hdac_hdmi.c
> > index 84f7a7a..e965338 100644
> > --- a/sound/soc/codecs/hdac_hdmi.c
> > +++ b/sound/soc/codecs/hdac_hdmi.c
> > @@ -1859,6 +1859,9 @@ static int hdmi_codec_prepare(struct device *dev)
> >   struct hdac_ext_device *edev = to_hda_ext_device(dev);
> >   struct hdac_device *hdev = >hdev;
> >
> > + if (pm_runtime_suspended(dev))
> > + return 1;
> > +
> >   pm_runtime_get_sync(>hdev.dev);
> >
> >   /*
> > @@ -1880,6 +1883,9 @@ static void hdmi_codec_complete(struct device *dev)
> >   struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(>hdev);
> >   struct hdac_device *hdev = >hdev;
> >
> > + if (pm_runtime_suspended(dev))
> > + return;
> > +
> >   /* Power up afg */
> >   snd_hdac_codec_read(hdev, hdev->afg, 0, AC_VERB_SET_POWER_STATE,
> >   AC_PWRST_D0);
> > --
> > 2.7.4
> >
> 
> --
> Thanks,
> Anshuman.
>  
> 


Re: [PATCH] include/linux/compiler*.h: Use feature checking instead of version checks for attributes

2018-08-28 Thread Nick Desaulniers
On Tue, Aug 28, 2018 at 8:10 AM Miguel Ojeda
 wrote:
>
> Hi Nick,
>
> On Mon, Aug 27, 2018 at 7:48 PM, Nick Desaulniers
>  wrote:
> > On Mon, Aug 27, 2018 at 10:43 AM Nick Desaulniers
> >> > +
> >> > +/*
> >> > + * Optional attributes: your compiler may or may not support them.
> >> > + *
> >> > + * To check for them, we use __has_attribute, which is supported on gcc 
> >> > >= 5,
> >> > + * clang >= 2.9 and icc >= 17. In the meantime, to support 4.6 <= gcc < 
> >> > 5,
> >> > + * we implement it by hand.
> >> > + */
> >> > +#ifndef __has_attribute
> >> > +#define __has_attribute(x) __GCC46_has_attribute_##x
> >> > +#define __GCC46_has_attribute_assume_aligned 0
> >> > +#define __GCC46_has_attribute_designated_init 0
> >> > +#define __GCC46_has_attribute_externally_visible 1
> >> > +#define __GCC46_has_attribute_noclone 1
> >> > +#define __GCC46_has_attribute_optimize 1
> >> > +#define __GCC46_has_attribute_no_sanitize_address 0
> >> > +#endif
> >
> > And a follow up; I'm trying to understand what will happen in the case
> > of say gcc 4.9 here.  Were any of these supported between gcc 4.6 and
> > 5.0?  If so, then this code will not use them.  It's simpler than
> > explicit version checks, but it won't use features that are supported.
> >
>
> I addressed that in the email I sent afterwards:
>
> """
> Note that:
>   - assume_aligned came with gcc 4.9
>   - no_sanitize_address came with gcc 4.8
>
> So if we feel it is important to have them there (before gcc 5), we
> would need here a quick version check here.
> """
>
> The idea is that, in the future, whenever gcc 5 or later is the
> minimum version, we just get rid of the #ifdef block without touching
> the rest of the code :-)

So if __has_attribute came with gcc 5, then that means that this patch
will break assume_aligned for gcc-4.9 users and no_sanitize_address
for gcc-4.8 and gcc-4.9 users?  The slab allocator uses
assume_aligned, and no_sanitize_address for CONFIG_KASAN.  Should this
patch ever come back through stable, Android and ChromeOS
gcc-4.9/KASAN builds will break.

I don't think we should leave that for a follow up; I would like to
see it as part of this patch.  It's ok to have explicit version checks
for those 2 attributes since it's not possible to feature detect them
for the versions of gcc that we support in this code base.  I think
you should add them in a v2 of this patch.  Then we can point to this
commit as the *shining example* of how to do proper feature detection,
falling back to version checks only as a last resort.

-- 
Thanks,
~Nick Desaulniers


Re: [PATCH] include/linux/compiler*.h: Use feature checking instead of version checks for attributes

2018-08-28 Thread Nick Desaulniers
On Tue, Aug 28, 2018 at 8:10 AM Miguel Ojeda
 wrote:
>
> Hi Nick,
>
> On Mon, Aug 27, 2018 at 7:48 PM, Nick Desaulniers
>  wrote:
> > On Mon, Aug 27, 2018 at 10:43 AM Nick Desaulniers
> >> > +
> >> > +/*
> >> > + * Optional attributes: your compiler may or may not support them.
> >> > + *
> >> > + * To check for them, we use __has_attribute, which is supported on gcc 
> >> > >= 5,
> >> > + * clang >= 2.9 and icc >= 17. In the meantime, to support 4.6 <= gcc < 
> >> > 5,
> >> > + * we implement it by hand.
> >> > + */
> >> > +#ifndef __has_attribute
> >> > +#define __has_attribute(x) __GCC46_has_attribute_##x
> >> > +#define __GCC46_has_attribute_assume_aligned 0
> >> > +#define __GCC46_has_attribute_designated_init 0
> >> > +#define __GCC46_has_attribute_externally_visible 1
> >> > +#define __GCC46_has_attribute_noclone 1
> >> > +#define __GCC46_has_attribute_optimize 1
> >> > +#define __GCC46_has_attribute_no_sanitize_address 0
> >> > +#endif
> >
> > And a follow up; I'm trying to understand what will happen in the case
> > of say gcc 4.9 here.  Were any of these supported between gcc 4.6 and
> > 5.0?  If so, then this code will not use them.  It's simpler than
> > explicit version checks, but it won't use features that are supported.
> >
>
> I addressed that in the email I sent afterwards:
>
> """
> Note that:
>   - assume_aligned came with gcc 4.9
>   - no_sanitize_address came with gcc 4.8
>
> So if we feel it is important to have them there (before gcc 5), we
> would need here a quick version check here.
> """
>
> The idea is that, in the future, whenever gcc 5 or later is the
> minimum version, we just get rid of the #ifdef block without touching
> the rest of the code :-)

So if __has_attribute came with gcc 5, then that means that this patch
will break assume_aligned for gcc-4.9 users and no_sanitize_address
for gcc-4.8 and gcc-4.9 users?  The slab allocator uses
assume_aligned, and no_sanitize_address for CONFIG_KASAN.  Should this
patch ever come back through stable, Android and ChromeOS
gcc-4.9/KASAN builds will break.

I don't think we should leave that for a follow up; I would like to
see it as part of this patch.  It's ok to have explicit version checks
for those 2 attributes since it's not possible to feature detect them
for the versions of gcc that we support in this code base.  I think
you should add them in a v2 of this patch.  Then we can point to this
commit as the *shining example* of how to do proper feature detection,
falling back to version checks only as a last resort.

-- 
Thanks,
~Nick Desaulniers


Re: [PATCH] perf: Convert to using %pOFn instead of device_node.name

2018-08-28 Thread Will Deacon
On Mon, Aug 27, 2018 at 08:52:39PM -0500, Rob Herring wrote:
> In preparation to remove the node name pointer from struct device_node,
> convert printf users to use the %pOFn format specifier.
> 
> Cc: Will Deacon 
> Cc: Mark Rutland 
> Cc: linux-arm-ker...@lists.infradead.org
> Signed-off-by: Rob Herring 
> ---
>  drivers/perf/arm_pmu_platform.c | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)

Cheers, I can queue this up for 4.20. Let me know if you'd rather take it
along with the name pointer removal instead.

Will

> diff --git a/drivers/perf/arm_pmu_platform.c b/drivers/perf/arm_pmu_platform.c
> index 96075cecb0ae..933bd8410fc2 100644
> --- a/drivers/perf/arm_pmu_platform.c
> +++ b/drivers/perf/arm_pmu_platform.c
> @@ -77,14 +77,14 @@ static int pmu_parse_irq_affinity(struct device_node 
> *node, int i)
>  
>   dn = of_parse_phandle(node, "interrupt-affinity", i);
>   if (!dn) {
> - pr_warn("failed to parse interrupt-affinity[%d] for %s\n",
> - i, node->name);
> + pr_warn("failed to parse interrupt-affinity[%d] for %pOFn\n",
> + i, node);
>   return -EINVAL;
>   }
>  
>   cpu = of_cpu_node_to_id(dn);
>   if (cpu < 0) {
> - pr_warn("failed to find logical CPU for %s\n", dn->name);
> + pr_warn("failed to find logical CPU for %pOFn\n", dn);
>   cpu = nr_cpu_ids;
>   }
>  
> -- 
> 2.17.1
> 


Re: [PATCH] perf: Convert to using %pOFn instead of device_node.name

2018-08-28 Thread Will Deacon
On Mon, Aug 27, 2018 at 08:52:39PM -0500, Rob Herring wrote:
> In preparation to remove the node name pointer from struct device_node,
> convert printf users to use the %pOFn format specifier.
> 
> Cc: Will Deacon 
> Cc: Mark Rutland 
> Cc: linux-arm-ker...@lists.infradead.org
> Signed-off-by: Rob Herring 
> ---
>  drivers/perf/arm_pmu_platform.c | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)

Cheers, I can queue this up for 4.20. Let me know if you'd rather take it
along with the name pointer removal instead.

Will

> diff --git a/drivers/perf/arm_pmu_platform.c b/drivers/perf/arm_pmu_platform.c
> index 96075cecb0ae..933bd8410fc2 100644
> --- a/drivers/perf/arm_pmu_platform.c
> +++ b/drivers/perf/arm_pmu_platform.c
> @@ -77,14 +77,14 @@ static int pmu_parse_irq_affinity(struct device_node 
> *node, int i)
>  
>   dn = of_parse_phandle(node, "interrupt-affinity", i);
>   if (!dn) {
> - pr_warn("failed to parse interrupt-affinity[%d] for %s\n",
> - i, node->name);
> + pr_warn("failed to parse interrupt-affinity[%d] for %pOFn\n",
> + i, node);
>   return -EINVAL;
>   }
>  
>   cpu = of_cpu_node_to_id(dn);
>   if (cpu < 0) {
> - pr_warn("failed to find logical CPU for %s\n", dn->name);
> + pr_warn("failed to find logical CPU for %pOFn\n", dn);
>   cpu = nr_cpu_ids;
>   }
>  
> -- 
> 2.17.1
> 


Re: [PATCH v2] objtool: Support multiple rodata sections.

2018-08-28 Thread Josh Poimboeuf
On Fri, Aug 03, 2018 at 07:40:40PM +0100, Allan Xavier wrote:
> +static void mark_rodata(struct objtool_file *file)
> +{
> + struct section *sec;
> + bool found = false;
> + static const char *str1 = ".str1.";
> + const int str1len = strlen(str1) + 1;
> +

A comment here would help, explaining that this looks for both .rodata
and .rodata.func_name sections (when using -fdata-sections).

> + for_each_sec(file, sec) {
> + if (strstr(sec->name, ".rodata") == sec->name) {
> + char *str1pos = sec->name + strlen(sec->name) - str1len;
> +
> + /* Skips over .rodata.str1.1 and .rodata.str.1.8 */
> + if (strstr(sec->name, str1) != str1pos) {
> + sec->rodata = true;
> + found = true;
> + }
> + }
> + }


This could be simplified and made more readable with something like:

for_each_sec(file, sec) {
if (!strncmp(sec->name, ".rodata", 7) &&
!strstr(sec->name, ".str1.") {
sec->rodata = true;
found = true;
}
}

-- 
Josh


Re: [PATCH v2] objtool: Support multiple rodata sections.

2018-08-28 Thread Josh Poimboeuf
On Fri, Aug 03, 2018 at 07:40:40PM +0100, Allan Xavier wrote:
> +static void mark_rodata(struct objtool_file *file)
> +{
> + struct section *sec;
> + bool found = false;
> + static const char *str1 = ".str1.";
> + const int str1len = strlen(str1) + 1;
> +

A comment here would help, explaining that this looks for both .rodata
and .rodata.func_name sections (when using -fdata-sections).

> + for_each_sec(file, sec) {
> + if (strstr(sec->name, ".rodata") == sec->name) {
> + char *str1pos = sec->name + strlen(sec->name) - str1len;
> +
> + /* Skips over .rodata.str1.1 and .rodata.str.1.8 */
> + if (strstr(sec->name, str1) != str1pos) {
> + sec->rodata = true;
> + found = true;
> + }
> + }
> + }


This could be simplified and made more readable with something like:

for_each_sec(file, sec) {
if (!strncmp(sec->name, ".rodata", 7) &&
!strstr(sec->name, ".str1.") {
sec->rodata = true;
found = true;
}
}

-- 
Josh


Re: [PATCH v2 2/6] reset: qcom: PDC Global (Power Domain Controller) reset controller

2018-08-28 Thread Matthias Kaehlcke
On Mon, Aug 27, 2018 at 08:02:53PM -0700, Bjorn Andersson wrote:
> On Mon 27 Aug 17:22 PDT 2018, Matthias Kaehlcke wrote:
> > On Fri, Aug 24, 2018 at 06:48:56PM +0530, Sibi Sankar wrote:
> > > diff --git a/drivers/reset/reset-qcom-pdc.c 
> > > b/drivers/reset/reset-qcom-pdc.c
> [..]
> > > +struct qcom_pdc_desc {
> > > + const struct regmap_config *config;
> > > + const struct qcom_pdc_reset_map *resets;
> > > + size_t num_resets;
> > > +};
> > 
> > Not sure if this structure adds much value or just a layer of
> > indirection:
> > 
> > - .config is only accessed in _probe(), sdm845_pdc_regmap_config could
> >   be used directly
> > - .resets is used in _(de)assert(), sdm845_pdc_resets could be used
> >   directly
> > - .num_resets is only accessed in _probe(),
> >   ARRAY_SIZE(sdm845_pdc_resets) could be used instead
> > 
> > It probably makes sense if it is planned to support reset controllers
> > of other SoCs with this driver.
> > 
> 
> I like this suggestion, once we need the configurability we can add the
> type for it. It also shows that only .resets would need to be referenced
> by qcom_pdc_reset_data.
> 
> > > +struct qcom_pdc_reset_data {
> > > + struct reset_controller_dev rcdev;
> > > + struct regmap *regmap;
> > > + const struct qcom_pdc_desc *desc;
> > > +};
> [..]
> > > +static int qcom_pdc_reset_probe(struct platform_device *pdev)
> > > +{
> [..]
> > > + data->regmap = devm_regmap_init_mmio(dev, base, desc->config);
> > > + if (IS_ERR(data->regmap)) {
> > > + dev_err(dev, "Unable to get pdc-global regmap");
> > 
> > Add missing '\n'
> > 
> > Say 'pdc-reset' instead of 'pdc-global'? (see also my other comment
> > below).
> > 
> 
> This regmap is created out of the single anonymous "reg", so I think the
> error should be reduced to "Unable to initialize regmap\n".
> 
> [..]
> > > +static const struct of_device_id qcom_pdc_reset_of_match[] = {
> > > + { .compatible = "qcom,sdm845-pdc-global", .data = _pdc_desc },
> > 
> > Should this be 'qcom,sdm845-pdc-reset' which is more specific than
> > 'global' and in line with the name and purpose of the driver?
> > Obviously this would require to adjust the bindings doc too.
> > 
> 
> No, the binding describes the hardware block named "PDC Global",
> currently implemented as a reset controller. The reason for doing this
> is so that we one day can expose additional interfaces in a backwards
> compatible fashion.

Sounds reasonable, thanks for the clarification.


Re: [PATCH v2 2/6] reset: qcom: PDC Global (Power Domain Controller) reset controller

2018-08-28 Thread Matthias Kaehlcke
On Mon, Aug 27, 2018 at 08:02:53PM -0700, Bjorn Andersson wrote:
> On Mon 27 Aug 17:22 PDT 2018, Matthias Kaehlcke wrote:
> > On Fri, Aug 24, 2018 at 06:48:56PM +0530, Sibi Sankar wrote:
> > > diff --git a/drivers/reset/reset-qcom-pdc.c 
> > > b/drivers/reset/reset-qcom-pdc.c
> [..]
> > > +struct qcom_pdc_desc {
> > > + const struct regmap_config *config;
> > > + const struct qcom_pdc_reset_map *resets;
> > > + size_t num_resets;
> > > +};
> > 
> > Not sure if this structure adds much value or just a layer of
> > indirection:
> > 
> > - .config is only accessed in _probe(), sdm845_pdc_regmap_config could
> >   be used directly
> > - .resets is used in _(de)assert(), sdm845_pdc_resets could be used
> >   directly
> > - .num_resets is only accessed in _probe(),
> >   ARRAY_SIZE(sdm845_pdc_resets) could be used instead
> > 
> > It probably makes sense if it is planned to support reset controllers
> > of other SoCs with this driver.
> > 
> 
> I like this suggestion, once we need the configurability we can add the
> type for it. It also shows that only .resets would need to be referenced
> by qcom_pdc_reset_data.
> 
> > > +struct qcom_pdc_reset_data {
> > > + struct reset_controller_dev rcdev;
> > > + struct regmap *regmap;
> > > + const struct qcom_pdc_desc *desc;
> > > +};
> [..]
> > > +static int qcom_pdc_reset_probe(struct platform_device *pdev)
> > > +{
> [..]
> > > + data->regmap = devm_regmap_init_mmio(dev, base, desc->config);
> > > + if (IS_ERR(data->regmap)) {
> > > + dev_err(dev, "Unable to get pdc-global regmap");
> > 
> > Add missing '\n'
> > 
> > Say 'pdc-reset' instead of 'pdc-global'? (see also my other comment
> > below).
> > 
> 
> This regmap is created out of the single anonymous "reg", so I think the
> error should be reduced to "Unable to initialize regmap\n".
> 
> [..]
> > > +static const struct of_device_id qcom_pdc_reset_of_match[] = {
> > > + { .compatible = "qcom,sdm845-pdc-global", .data = _pdc_desc },
> > 
> > Should this be 'qcom,sdm845-pdc-reset' which is more specific than
> > 'global' and in line with the name and purpose of the driver?
> > Obviously this would require to adjust the bindings doc too.
> > 
> 
> No, the binding describes the hardware block named "PDC Global",
> currently implemented as a reset controller. The reason for doing this
> is so that we one day can expose additional interfaces in a backwards
> compatible fashion.

Sounds reasonable, thanks for the clarification.


Re: [PATCH] MIPS: Convert to using %pOFn instead of device_node.name

2018-08-28 Thread Paul Burton
Hi Rob,

On Mon, Aug 27, 2018 at 08:52:05PM -0500, Rob Herring wrote:
> In preparation to remove the node name pointer from struct device_node,
> convert printf users to use the %pOFn format specifier.
> 
> Cc: Ralf Baechle 
> Cc: Paul Burton 
> Cc: James Hogan 
> Cc: John Crispin 
> Cc: linux-m...@linux-mips.org
> Signed-off-by: Rob Herring 
> ---
>  arch/mips/cavium-octeon/octeon-irq.c | 16 
>  arch/mips/netlogic/common/irq.c  | 14 +++---
>  arch/mips/ralink/cevt-rt3352.c   |  6 +++---
>  arch/mips/ralink/ill_acc.c   |  2 +-
>  4 files changed, 19 insertions(+), 19 deletions(-)

Thanks - applied to mips-next for 4.20.

Paul


Re: [PATCH] MIPS: Convert to using %pOFn instead of device_node.name

2018-08-28 Thread Paul Burton
Hi Rob,

On Mon, Aug 27, 2018 at 08:52:05PM -0500, Rob Herring wrote:
> In preparation to remove the node name pointer from struct device_node,
> convert printf users to use the %pOFn format specifier.
> 
> Cc: Ralf Baechle 
> Cc: Paul Burton 
> Cc: James Hogan 
> Cc: John Crispin 
> Cc: linux-m...@linux-mips.org
> Signed-off-by: Rob Herring 
> ---
>  arch/mips/cavium-octeon/octeon-irq.c | 16 
>  arch/mips/netlogic/common/irq.c  | 14 +++---
>  arch/mips/ralink/cevt-rt3352.c   |  6 +++---
>  arch/mips/ralink/ill_acc.c   |  2 +-
>  4 files changed, 19 insertions(+), 19 deletions(-)

Thanks - applied to mips-next for 4.20.

Paul


Re: [PATCH v13 07/13] x86/sgx: Add data structures for tracking the EPC pages

2018-08-28 Thread Dave Hansen
>>>  extern bool sgx_enabled;
>>>  extern bool sgx_lc_enabled;
>>> +extern struct sgx_epc_bank sgx_epc_banks[SGX_MAX_EPC_BANKS];
>>> +
>>> +/*
>>> + * enum sgx_epc_page_desc - defines bits and masks for an EPC page's desc
>>
>> Why are you bothering packing these bits?  This seems a rather
>> convoluted way to store two integers.
> 
> To keep struct sgx_epc_page 64 bytes.

It's a list_head and a ulong now.  That doesn't add up to 64.

If you properly describe the bounds and limits of banks we can possibly
help you find a nice solution.  As it stands, they are totally opaque
and we have no idea what is going on.

>>> +static __init int sgx_init_epc_bank(u64 addr, u64 size, unsigned long 
>>> index,
>>> +   struct sgx_epc_bank *bank)
>>> +{
>>> +   unsigned long nr_pages = size >> PAGE_SHIFT;
>>> +   struct sgx_epc_page *pages_data;
>>> +   unsigned long i;
>>> +   void *va;
>>> +
>>> +   va = ioremap_cache(addr, size);
>>> +   if (!va)
>>> +   return -ENOMEM;
>>> +
>>> +   pages_data = kcalloc(nr_pages, sizeof(struct sgx_epc_page), GFP_KERNEL);
>>> +   if (!pages_data)
>>> +   goto out_iomap;
>>
>> This looks like you're roughly limited by the page allocator to a bank
>> size of ~1.4GB which seems kinda small.  Is this really OK?
> 
> Where does this limitation come from?

The page allocator can only do 4MB at a time.  Using your 64 byte
numbers: 4MB/64 = 64k sgx_epc_pages.  64k*PAGE_SIZE = 256MB.  So you can
only handle 256MB banks with this code.

BTW, if you only have 64k worth of pages, you can use a u16 for the index.

>>> +   u32 eax, ebx, ecx, edx;
>>> +   u64 pa, size;
>>> +   int ret;
>>> +   int i;
>>> +
>>> +   for (i = 0; i < SGX_MAX_EPC_BANKS; i++) {
>>> +   cpuid_count(SGX_CPUID, 2 + i, , , , );
>>> +   if (!(eax & 0xF))
>>> +   break;
>>
>> So, we have random data coming out of a random CPUID leaf being called
>> 'eax' and then being tested against a random hard-coded mask.  This
>> seems rather unfortunate for someone trying to understand the code.  Can
>> we do better?
> 
> Should probably do something along the lines:
> 
> #define SGX_CPUID_SECTION(i) (2 + (i))
> 
> enum sgx_section {
>   SGX_CPUID_SECTION_INVALID   = 0x00,
>   SGX_CPUID_SECTION_VALID = 0x1B,
>   SGX_CPUID_SECTION_MASK  = 0xFF,
> };

Plus comments, that would be nice.

>>> +   sgx_nr_epc_banks++;
>>> +   }
>>> +
>>> +   if (!sgx_nr_epc_banks) {
>>> +   pr_err("There are zero EPC banks.\n");
>>> +   return -ENODEV;
>>> +   }
>>> +
>>> +   return 0;
>>> +}
>>
>> Does this support hot-addition of a bank?  If not, why not?
...
> I'm not aware that we would have an ACPI specification for SGX so this
> is all I have at the moment (does not show any ACPI event for
> hotplugging).

So you're saying the one platform you looked at don't support hotplug.
I was looking for a more broad statement about SGX.


[PATCH v5 2/3] overlayfs: check CAP_MKNOD before issuing vfs_whiteout

2018-08-28 Thread Mark Salyzyn
Assumption never checked, should fail if the mounter creds are not
sufficient.

Signed-off-by: Mark Salyzyn 
Cc: Miklos Szeredi 
Cc: Jonathan Corbet 
Cc: Vivek Goyal 
Cc: Eric W. Biederman 
Cc: Amir Goldstein 
Cc: Randy Dunlap 
Cc: Stephen Smalley 
Cc: linux-unio...@vger.kernel.org
Cc: linux-...@vger.kernel.org
Cc: linux-kernel@vger.kernel.org

v5
- dependency of "overlayfs: override_creds=off option bypass creator_cred"
---
 fs/overlayfs/overlayfs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 7538b9b56237..bf3a80157d42 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -176,7 +176,7 @@ static inline int ovl_do_rename(struct inode *olddir, 
struct dentry *olddentry,
 
 static inline int ovl_do_whiteout(struct inode *dir, struct dentry *dentry)
 {
-   int err = vfs_whiteout(dir, dentry);
+   int err = capable(CAP_MKNOD) ? vfs_whiteout(dir, dentry) : -EPERM;
pr_debug("whiteout(%pd2) = %i\n", dentry, err);
return err;
 }
-- 
2.19.0.rc0.228.g281dcd1b4d0-goog



Re: [PATCH v13 07/13] x86/sgx: Add data structures for tracking the EPC pages

2018-08-28 Thread Dave Hansen
>>>  extern bool sgx_enabled;
>>>  extern bool sgx_lc_enabled;
>>> +extern struct sgx_epc_bank sgx_epc_banks[SGX_MAX_EPC_BANKS];
>>> +
>>> +/*
>>> + * enum sgx_epc_page_desc - defines bits and masks for an EPC page's desc
>>
>> Why are you bothering packing these bits?  This seems a rather
>> convoluted way to store two integers.
> 
> To keep struct sgx_epc_page 64 bytes.

It's a list_head and a ulong now.  That doesn't add up to 64.

If you properly describe the bounds and limits of banks we can possibly
help you find a nice solution.  As it stands, they are totally opaque
and we have no idea what is going on.

>>> +static __init int sgx_init_epc_bank(u64 addr, u64 size, unsigned long 
>>> index,
>>> +   struct sgx_epc_bank *bank)
>>> +{
>>> +   unsigned long nr_pages = size >> PAGE_SHIFT;
>>> +   struct sgx_epc_page *pages_data;
>>> +   unsigned long i;
>>> +   void *va;
>>> +
>>> +   va = ioremap_cache(addr, size);
>>> +   if (!va)
>>> +   return -ENOMEM;
>>> +
>>> +   pages_data = kcalloc(nr_pages, sizeof(struct sgx_epc_page), GFP_KERNEL);
>>> +   if (!pages_data)
>>> +   goto out_iomap;
>>
>> This looks like you're roughly limited by the page allocator to a bank
>> size of ~1.4GB which seems kinda small.  Is this really OK?
> 
> Where does this limitation come from?

The page allocator can only do 4MB at a time.  Using your 64 byte
numbers: 4MB/64 = 64k sgx_epc_pages.  64k*PAGE_SIZE = 256MB.  So you can
only handle 256MB banks with this code.

BTW, if you only have 64k worth of pages, you can use a u16 for the index.

>>> +   u32 eax, ebx, ecx, edx;
>>> +   u64 pa, size;
>>> +   int ret;
>>> +   int i;
>>> +
>>> +   for (i = 0; i < SGX_MAX_EPC_BANKS; i++) {
>>> +   cpuid_count(SGX_CPUID, 2 + i, , , , );
>>> +   if (!(eax & 0xF))
>>> +   break;
>>
>> So, we have random data coming out of a random CPUID leaf being called
>> 'eax' and then being tested against a random hard-coded mask.  This
>> seems rather unfortunate for someone trying to understand the code.  Can
>> we do better?
> 
> Should probably do something along the lines:
> 
> #define SGX_CPUID_SECTION(i) (2 + (i))
> 
> enum sgx_section {
>   SGX_CPUID_SECTION_INVALID   = 0x00,
>   SGX_CPUID_SECTION_VALID = 0x1B,
>   SGX_CPUID_SECTION_MASK  = 0xFF,
> };

Plus comments, that would be nice.

>>> +   sgx_nr_epc_banks++;
>>> +   }
>>> +
>>> +   if (!sgx_nr_epc_banks) {
>>> +   pr_err("There are zero EPC banks.\n");
>>> +   return -ENODEV;
>>> +   }
>>> +
>>> +   return 0;
>>> +}
>>
>> Does this support hot-addition of a bank?  If not, why not?
...
> I'm not aware that we would have an ACPI specification for SGX so this
> is all I have at the moment (does not show any ACPI event for
> hotplugging).

So you're saying the one platform you looked at don't support hotplug.
I was looking for a more broad statement about SGX.


[PATCH v5 2/3] overlayfs: check CAP_MKNOD before issuing vfs_whiteout

2018-08-28 Thread Mark Salyzyn
Assumption never checked, should fail if the mounter creds are not
sufficient.

Signed-off-by: Mark Salyzyn 
Cc: Miklos Szeredi 
Cc: Jonathan Corbet 
Cc: Vivek Goyal 
Cc: Eric W. Biederman 
Cc: Amir Goldstein 
Cc: Randy Dunlap 
Cc: Stephen Smalley 
Cc: linux-unio...@vger.kernel.org
Cc: linux-...@vger.kernel.org
Cc: linux-kernel@vger.kernel.org

v5
- dependency of "overlayfs: override_creds=off option bypass creator_cred"
---
 fs/overlayfs/overlayfs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 7538b9b56237..bf3a80157d42 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -176,7 +176,7 @@ static inline int ovl_do_rename(struct inode *olddir, 
struct dentry *olddentry,
 
 static inline int ovl_do_whiteout(struct inode *dir, struct dentry *dentry)
 {
-   int err = vfs_whiteout(dir, dentry);
+   int err = capable(CAP_MKNOD) ? vfs_whiteout(dir, dentry) : -EPERM;
pr_debug("whiteout(%pd2) = %i\n", dentry, err);
return err;
 }
-- 
2.19.0.rc0.228.g281dcd1b4d0-goog



Re: [PATCH] include/linux/compiler*.h: Use feature checking instead of version checks for attributes

2018-08-28 Thread Nick Desaulniers
On Tue, Aug 28, 2018 at 8:04 AM Miguel Ojeda
 wrote:
>
> Hi Nick,
>
> On Mon, Aug 27, 2018 at 7:43 PM, Nick Desaulniers
>  wrote:
> > On Sun, Aug 26, 2018 at 10:58 AM Miguel Ojeda
> >  wrote:
> >>
> >> Instead of using version checks per-compiler to define (or not) each 
> >> attribute,
> >> use __has_attribute to test for them, following the cleanup started with
> >> commit 815f0ddb346c ("include/linux/compiler*.h: make compiler-*.h 
> >> mutually exclusive").
> >>
> >> All the attributes that are fairly common/standard (i.e. those that do not
> >> require extra logic to define them) have been moved to a new file
> >> include/linux/compiler_attributes.h. The attributes have been sorted
> >> and divided between "required" and "optional".
> >
> > Nice! Thanks Miguel.  Regarding sorting, I'm happy with that.  In
> > fact, some of the comments can be removed IMO, as the attributes have
> > common definitions in the docs (maybe an added link to the gcc and
> > clang attribute docs at the top of the file rather than per attribute
> > comments).
>
> Thanks for the review!
>
> I thought about that, although there isn't a single page with them in
> GCC (we could group them by type though: function ones, variable
> ones... and then link to those).
> On the other hand, maybe writing a
> Doc/ file is better and allows us to write as much as one would like
> about each of them (and a link to each page compiler's page about it,
> etc.). I think in the end the Doc/ file might be the best, in order
> not to crowd the header.

A comment is closer to the source, but I guess that's bytes for each
inclusion for every file.  I don't feel passionate about this point
one way or the other.

>
> >
> >>
> >> Further, attributes that are already supported in gcc >= 4.6 and recent 
> >> clang
> >> were simply made to be required (instead of testing for them):
> >>   * always_inline
> >>   * const (pure was already "required", by the way)
> >>   * gnu_inline
> >
> > There's an important test for gnu_inline that isn't checking that it's
> > supported, but rather what the implicit behavior is depending on which
> > C standard is being used.  It's important not to remove that.
>
> Hm... I actually thought it was not available at some point before 4.6
> and removed the #ifdef. The comment even says it is featuring
> detecting it so that the old GCC inlining is used; but it shouldn't
> matter if you always use it, no?

Good point.  Rather than defining it only if GNU inline is not the
current behavior is a bit more verbose than just always defining it.
This seems to confirm that that should work:
https://godbolt.org/z/igwh32.

>
> I just went looking for more info in d03db2bc2 ("compiler-gcc.h: Add
> __attribute__((gnu_inline)) to all inline declarations") and if I
> understood the commit message, the problem is compiling with implicit
> new standard in newer compilers which trigger the C90 behavior, while
> we need the old one --- but if we use gnu_inline, we are getting it
> regardless.
>
> I am sure I am missing something, but I think a clarification is
> needed (and in the code comment as well) -- a bit off-topic, anyway.
>
> [Also, I wouldn't define an attribute or not depending on some other
> condition. I would, instead, define something some other symbol with
> that logic (i.e. instead of using "__gnu_inline", because that is
> lying -- it is not using the attribute even if the compiler supports
> it).]
>
> >
> >>
> >> Finally, some other bits were cleaned up in the process:
> >>   * __optimize: removed (unused in the whole kernel tree)
> >
> > A+ for removing dead code.  I also don't see it used anywhere.
> >
> >>   * __must_be_array: removed from -gcc and -clang (identical), moved to 
> >> _types
> >
> > Yep, uses a builtin (we should add guards for that, later, in a
> > similar style change that guards the use of builtins). Looks good.
> >
> >> (it depends on the unconditionally used  __builtin_types_compatible_p
> >>   * Removes unneeded underscores on the attributes' names
> >
> > That doesn't sound right, but lets see what you mean by that.
>
> Some attributes used the __name__ syntax (i.e. inside the double
> parenthesis), others didn't. I simplified by removing all the extra
> underscores.

A+

>
> >
> >>
> >> There are some things that can be further cleaned up afterwards:
> >>   * __attribute_const__: rename to __const
> >
> > This doesn't look correct to me; the kernel is full of call sites for
> > __attribute_const__. You can't rename the definition without renaming
>
> Of course it is full of use sites! That is why I said it is a possible
> cleanup for *afterwards* this patch :-)
>
> > all of the call sites (and that would be too big a change for this
> > patch, IMO).  Skip the rename, and it also looks like you just removed
> > it outright (Oops).
>
> Not sure what you mean by this (?). The attribute is still there unchanged.

Nevermind, I misinterpretered this part of the patch.

>
> >
> >>   * __noretpoline: 

Re: [PATCH] include/linux/compiler*.h: Use feature checking instead of version checks for attributes

2018-08-28 Thread Nick Desaulniers
On Tue, Aug 28, 2018 at 8:04 AM Miguel Ojeda
 wrote:
>
> Hi Nick,
>
> On Mon, Aug 27, 2018 at 7:43 PM, Nick Desaulniers
>  wrote:
> > On Sun, Aug 26, 2018 at 10:58 AM Miguel Ojeda
> >  wrote:
> >>
> >> Instead of using version checks per-compiler to define (or not) each 
> >> attribute,
> >> use __has_attribute to test for them, following the cleanup started with
> >> commit 815f0ddb346c ("include/linux/compiler*.h: make compiler-*.h 
> >> mutually exclusive").
> >>
> >> All the attributes that are fairly common/standard (i.e. those that do not
> >> require extra logic to define them) have been moved to a new file
> >> include/linux/compiler_attributes.h. The attributes have been sorted
> >> and divided between "required" and "optional".
> >
> > Nice! Thanks Miguel.  Regarding sorting, I'm happy with that.  In
> > fact, some of the comments can be removed IMO, as the attributes have
> > common definitions in the docs (maybe an added link to the gcc and
> > clang attribute docs at the top of the file rather than per attribute
> > comments).
>
> Thanks for the review!
>
> I thought about that, although there isn't a single page with them in
> GCC (we could group them by type though: function ones, variable
> ones... and then link to those).
> On the other hand, maybe writing a
> Doc/ file is better and allows us to write as much as one would like
> about each of them (and a link to each page compiler's page about it,
> etc.). I think in the end the Doc/ file might be the best, in order
> not to crowd the header.

A comment is closer to the source, but I guess that's bytes for each
inclusion for every file.  I don't feel passionate about this point
one way or the other.

>
> >
> >>
> >> Further, attributes that are already supported in gcc >= 4.6 and recent 
> >> clang
> >> were simply made to be required (instead of testing for them):
> >>   * always_inline
> >>   * const (pure was already "required", by the way)
> >>   * gnu_inline
> >
> > There's an important test for gnu_inline that isn't checking that it's
> > supported, but rather what the implicit behavior is depending on which
> > C standard is being used.  It's important not to remove that.
>
> Hm... I actually thought it was not available at some point before 4.6
> and removed the #ifdef. The comment even says it is featuring
> detecting it so that the old GCC inlining is used; but it shouldn't
> matter if you always use it, no?

Good point.  Rather than defining it only if GNU inline is not the
current behavior is a bit more verbose than just always defining it.
This seems to confirm that that should work:
https://godbolt.org/z/igwh32.

>
> I just went looking for more info in d03db2bc2 ("compiler-gcc.h: Add
> __attribute__((gnu_inline)) to all inline declarations") and if I
> understood the commit message, the problem is compiling with implicit
> new standard in newer compilers which trigger the C90 behavior, while
> we need the old one --- but if we use gnu_inline, we are getting it
> regardless.
>
> I am sure I am missing something, but I think a clarification is
> needed (and in the code comment as well) -- a bit off-topic, anyway.
>
> [Also, I wouldn't define an attribute or not depending on some other
> condition. I would, instead, define something some other symbol with
> that logic (i.e. instead of using "__gnu_inline", because that is
> lying -- it is not using the attribute even if the compiler supports
> it).]
>
> >
> >>
> >> Finally, some other bits were cleaned up in the process:
> >>   * __optimize: removed (unused in the whole kernel tree)
> >
> > A+ for removing dead code.  I also don't see it used anywhere.
> >
> >>   * __must_be_array: removed from -gcc and -clang (identical), moved to 
> >> _types
> >
> > Yep, uses a builtin (we should add guards for that, later, in a
> > similar style change that guards the use of builtins). Looks good.
> >
> >> (it depends on the unconditionally used  __builtin_types_compatible_p
> >>   * Removes unneeded underscores on the attributes' names
> >
> > That doesn't sound right, but lets see what you mean by that.
>
> Some attributes used the __name__ syntax (i.e. inside the double
> parenthesis), others didn't. I simplified by removing all the extra
> underscores.

A+

>
> >
> >>
> >> There are some things that can be further cleaned up afterwards:
> >>   * __attribute_const__: rename to __const
> >
> > This doesn't look correct to me; the kernel is full of call sites for
> > __attribute_const__. You can't rename the definition without renaming
>
> Of course it is full of use sites! That is why I said it is a possible
> cleanup for *afterwards* this patch :-)
>
> > all of the call sites (and that would be too big a change for this
> > patch, IMO).  Skip the rename, and it also looks like you just removed
> > it outright (Oops).
>
> Not sure what you mean by this (?). The attribute is still there unchanged.

Nevermind, I misinterpretered this part of the patch.

>
> >
> >>   * __noretpoline: 

[PATCH v5 0/3]

2018-08-28 Thread Mark Salyzyn
overlayfs: check CAP_DAC_READ_SEARCH before issuing exportfs_decode_fh
overlayfs: check CAP_MKNOD before issuing vfs_whiteout

Assumption never checked, should fail if the mounter creds are not
sufficient.

overlayfs: override_creds=off option bypass creator_cred

By default, all access to the upper, lower and work directories is the
recorded mounter's MAC and DAC credentials.  The incoming accesses are
checked against the caller's credentials.

If the principles of least privilege are applied, the mounter's
credentials might not overlap the credentials of the caller's when
accessing the overlayfs filesystem.  For example, a file that a lower
DAC privileged caller can execute, is MAC denied to the generally
higher DAC privileged mounter, to prevent an attack vector.

We add the option to turn off override_creds in the mount options; all
subsequent operations after mount on the filesystem will be only the
caller's credentials.  This option default is set in the CONFIG
OVERLAY_FS_OVERRIDE_CREDS or in the module option override_creds.

The module boolean parameter and mount option override_creds is also
added as a presence check for this "feature" by checking existence of
/sys/module/overlay/parameters/overlay_creds.  This will allow user
space to determine if the option can be supplied successfully to the
mount(2) operation.

Signed-off-by: Mark Salyzyn 
Cc: Miklos Szeredi 
Cc: Jonathan Corbet 
Cc: Vivek Goyal 
Cc: Eric W. Biederman 
Cc: Amir Goldstein 
Cc: Randy Dunlap 
Cc: Stephen Smalley 
Cc: linux-unio...@vger.kernel.org
Cc: linux-...@vger.kernel.org
Cc: linux-kernel@vger.kernel.org

v2:
- Forward port changed attr to stat, resulting in a build error.
- altered commit message.

v3:
- Change name from caller_credentials / creator_credentials to the
  boolean override_creds.
- Changed from creator to mounter credentials.
- Updated and fortified the documentation.
- Added CONFIG_OVERLAY_FS_OVERRIDE_CREDS

v4:
- spelling and grammar errors in text

v5:
- beefed up the caveats in the Documentation
- Is dependent on
  "overlayfs: check CAP_DAC_READ_SEARCH before issuing exportfs_decode_fh"
  "overlayfs: check CAP_MKNOD before issuing vfs_whiteout"
- Added prwarn when override_creds=off


Re: [PATCH V2] xen: export device state to sysfs

2018-08-28 Thread Boris Ostrovsky
On 08/28/2018 10:56 AM, Joe Jin wrote:
> Export device state to sysfs to allow for easier get device state.
>
> Signed-off-by: Joe Jin 
> Cc: Boris Ostrovsky 
> Cc: Juergen Gross 
> Cc: Konrad Rzeszutek Wilk 
> ---
>  Documentation/ABI/stable/sysfs-bus-xen-backend | 9 +
>  drivers/xen/xenbus/xenbus_probe.c  | 9 +
>  2 files changed, 18 insertions(+)
>
> diff --git a/Documentation/ABI/stable/sysfs-bus-xen-backend 
> b/Documentation/ABI/stable/sysfs-bus-xen-backend
> index 3d5951c8bf5f..e8b60bd766f7 100644
> --- a/Documentation/ABI/stable/sysfs-bus-xen-backend


Won't this show up in the frontend as well?

-boris

> +++ b/Documentation/ABI/stable/sysfs-bus-xen-backend
> @@ -73,3 +73,12 @@ KernelVersion: 3.0
>  Contact: Konrad Rzeszutek Wilk 
>  Description:
>  Number of sectors written by the frontend.
> +
> +What:/sys/bus/xen-backend/devices/*/state
> +Date:August 2018
> +KernelVersion:   4.19
> +Contact: Joe Jin 
> +Description:
> +The state of the device. One of: 'Unknown',
> +'Initialising', 'Initialised', 'Connected', 'Closing',
> +'Closed', 'Reconfiguring', 'Reconfigured'.
> diff --git a/drivers/xen/xenbus/xenbus_probe.c 
> b/drivers/xen/xenbus/xenbus_probe.c
> index f2088838f690..5b471889d723 100644
> --- a/drivers/xen/xenbus/xenbus_probe.c
> +++ b/drivers/xen/xenbus/xenbus_probe.c
> @@ -402,10 +402,19 @@ static ssize_t modalias_show(struct device *dev,
>  }
>  static DEVICE_ATTR_RO(modalias);
>  
> +static ssize_t state_show(struct device *dev,
> + struct device_attribute *attr, char *buf)
> +{
> + return sprintf(buf, "%s\n",
> + xenbus_strstate(to_xenbus_device(dev)->state));
> +}
> +static DEVICE_ATTR_RO(state);
> +
>  static struct attribute *xenbus_dev_attrs[] = {
>   _attr_nodename.attr,
>   _attr_devtype.attr,
>   _attr_modalias.attr,
> + _attr_state.attr,
>   NULL,
>  };
>  



[PATCH v5 0/3]

2018-08-28 Thread Mark Salyzyn
overlayfs: check CAP_DAC_READ_SEARCH before issuing exportfs_decode_fh
overlayfs: check CAP_MKNOD before issuing vfs_whiteout

Assumption never checked, should fail if the mounter creds are not
sufficient.

overlayfs: override_creds=off option bypass creator_cred

By default, all access to the upper, lower and work directories is the
recorded mounter's MAC and DAC credentials.  The incoming accesses are
checked against the caller's credentials.

If the principles of least privilege are applied, the mounter's
credentials might not overlap the credentials of the caller's when
accessing the overlayfs filesystem.  For example, a file that a lower
DAC privileged caller can execute, is MAC denied to the generally
higher DAC privileged mounter, to prevent an attack vector.

We add the option to turn off override_creds in the mount options; all
subsequent operations after mount on the filesystem will be only the
caller's credentials.  This option default is set in the CONFIG
OVERLAY_FS_OVERRIDE_CREDS or in the module option override_creds.

The module boolean parameter and mount option override_creds is also
added as a presence check for this "feature" by checking existence of
/sys/module/overlay/parameters/overlay_creds.  This will allow user
space to determine if the option can be supplied successfully to the
mount(2) operation.

Signed-off-by: Mark Salyzyn 
Cc: Miklos Szeredi 
Cc: Jonathan Corbet 
Cc: Vivek Goyal 
Cc: Eric W. Biederman 
Cc: Amir Goldstein 
Cc: Randy Dunlap 
Cc: Stephen Smalley 
Cc: linux-unio...@vger.kernel.org
Cc: linux-...@vger.kernel.org
Cc: linux-kernel@vger.kernel.org

v2:
- Forward port changed attr to stat, resulting in a build error.
- altered commit message.

v3:
- Change name from caller_credentials / creator_credentials to the
  boolean override_creds.
- Changed from creator to mounter credentials.
- Updated and fortified the documentation.
- Added CONFIG_OVERLAY_FS_OVERRIDE_CREDS

v4:
- spelling and grammar errors in text

v5:
- beefed up the caveats in the Documentation
- Is dependent on
  "overlayfs: check CAP_DAC_READ_SEARCH before issuing exportfs_decode_fh"
  "overlayfs: check CAP_MKNOD before issuing vfs_whiteout"
- Added prwarn when override_creds=off


Re: [PATCH V2] xen: export device state to sysfs

2018-08-28 Thread Boris Ostrovsky
On 08/28/2018 10:56 AM, Joe Jin wrote:
> Export device state to sysfs to allow for easier get device state.
>
> Signed-off-by: Joe Jin 
> Cc: Boris Ostrovsky 
> Cc: Juergen Gross 
> Cc: Konrad Rzeszutek Wilk 
> ---
>  Documentation/ABI/stable/sysfs-bus-xen-backend | 9 +
>  drivers/xen/xenbus/xenbus_probe.c  | 9 +
>  2 files changed, 18 insertions(+)
>
> diff --git a/Documentation/ABI/stable/sysfs-bus-xen-backend 
> b/Documentation/ABI/stable/sysfs-bus-xen-backend
> index 3d5951c8bf5f..e8b60bd766f7 100644
> --- a/Documentation/ABI/stable/sysfs-bus-xen-backend


Won't this show up in the frontend as well?

-boris

> +++ b/Documentation/ABI/stable/sysfs-bus-xen-backend
> @@ -73,3 +73,12 @@ KernelVersion: 3.0
>  Contact: Konrad Rzeszutek Wilk 
>  Description:
>  Number of sectors written by the frontend.
> +
> +What:/sys/bus/xen-backend/devices/*/state
> +Date:August 2018
> +KernelVersion:   4.19
> +Contact: Joe Jin 
> +Description:
> +The state of the device. One of: 'Unknown',
> +'Initialising', 'Initialised', 'Connected', 'Closing',
> +'Closed', 'Reconfiguring', 'Reconfigured'.
> diff --git a/drivers/xen/xenbus/xenbus_probe.c 
> b/drivers/xen/xenbus/xenbus_probe.c
> index f2088838f690..5b471889d723 100644
> --- a/drivers/xen/xenbus/xenbus_probe.c
> +++ b/drivers/xen/xenbus/xenbus_probe.c
> @@ -402,10 +402,19 @@ static ssize_t modalias_show(struct device *dev,
>  }
>  static DEVICE_ATTR_RO(modalias);
>  
> +static ssize_t state_show(struct device *dev,
> + struct device_attribute *attr, char *buf)
> +{
> + return sprintf(buf, "%s\n",
> + xenbus_strstate(to_xenbus_device(dev)->state));
> +}
> +static DEVICE_ATTR_RO(state);
> +
>  static struct attribute *xenbus_dev_attrs[] = {
>   _attr_nodename.attr,
>   _attr_devtype.attr,
>   _attr_modalias.attr,
> + _attr_state.attr,
>   NULL,
>  };
>  



[PATCH net-next] genetlink: constify genl_err_attr() argument

2018-08-28 Thread Michal Kubecek
genl_err_attr() sets netlink_ext_ack::bad_attr which is a pointer to const
struct nlattr so make the attr argument also const.

Signed-off-by: Michal Kubecek 
---
 include/net/genetlink.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/net/genetlink.h b/include/net/genetlink.h
index decf6012a401..aa2e5888f18d 100644
--- a/include/net/genetlink.h
+++ b/include/net/genetlink.h
@@ -112,7 +112,7 @@ static inline void genl_info_net_set(struct genl_info 
*info, struct net *net)
 #define GENL_SET_ERR_MSG(info, msg) NL_SET_ERR_MSG((info)->extack, msg)
 
 static inline int genl_err_attr(struct genl_info *info, int err,
-   struct nlattr *attr)
+   const struct nlattr *attr)
 {
info->extack->bad_attr = attr;
 
-- 
2.18.0



[PATCH net-next] genetlink: constify genl_err_attr() argument

2018-08-28 Thread Michal Kubecek
genl_err_attr() sets netlink_ext_ack::bad_attr which is a pointer to const
struct nlattr so make the attr argument also const.

Signed-off-by: Michal Kubecek 
---
 include/net/genetlink.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/net/genetlink.h b/include/net/genetlink.h
index decf6012a401..aa2e5888f18d 100644
--- a/include/net/genetlink.h
+++ b/include/net/genetlink.h
@@ -112,7 +112,7 @@ static inline void genl_info_net_set(struct genl_info 
*info, struct net *net)
 #define GENL_SET_ERR_MSG(info, msg) NL_SET_ERR_MSG((info)->extack, msg)
 
 static inline int genl_err_attr(struct genl_info *info, int err,
-   struct nlattr *attr)
+   const struct nlattr *attr)
 {
info->extack->bad_attr = attr;
 
-- 
2.18.0



Re: [PATCH] RISC-V: Mask out the F extension on systems without D

2018-08-28 Thread Palmer Dabbelt

On Tue, 28 Aug 2018 00:10:32 PDT (-0700), alan...@andestech.com wrote:

Hi Palmer,

On Mon, Aug 27, 2018 at 03:03:52PM -0700, Palmer Dabbelt wrote:

The RISC-V Linux port doesn't support systems that have the F extension
but don't have the D extension -- we actually don't support systems
without D either, but Alan's patch set is rectifying that soon.  For now
I think we can leave this in a semi-broken state and just wait for
Alan's patch set to get merged for proper non-FPU support -- the patch
set is starting to look good, so doing something in-between doesn't seem
like it's worth the work.

I don't think it's worth fretting about support for systems with F but
not D for now: our glibc ABIs are IMAC and IMAFDC so they probably won't
end up being popular.  We can always extend this in the future.

CC: Alan Kao 
Signed-off-by: Palmer Dabbelt 
---
 arch/riscv/kernel/cpufeature.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
index 17011a870044..652d102ffa06 100644
--- a/arch/riscv/kernel/cpufeature.c
+++ b/arch/riscv/kernel/cpufeature.c
@@ -57,5 +57,12 @@ void riscv_fill_hwcap(void)
for (i = 0; i < strlen(isa); ++i)
elf_hwcap |= isa2hwcap[(unsigned char)(isa[i])];

+   /* We don't support systems with F but without D, so mask those out
+* here. */
+   if ((elf_hwcap & COMPAT_HWCAP_ISA_F) && !(elf_hwcap & 
COMPAT_HWCAP_ISA_D)) {
+   pr_info("This kernel does not support systems with F but not 
D");
+   elf_hwcap &= ~COMPAT_HWCAP_ISA_F;
+   }
+


The commit message does address the problem and this patch does provide checks
and helpful information to users, but I wonder if we really need this patch, for
two reasons:

* Just as you mentioned, current glibc ABI does not support such a thing as
  IMAFC, so probably no one has had trouble with this.  To be honest, I suppose
  that anybody (RISC-V enthusiasts or vendors) who really need F-only support
  in kernel should get themself involved in the development by sending patches
  to improve.

* There are corner cases to let a F-only machine to pass the check in this
  patch.  For instance, a vendor decides to name her extension ISA as doom,
  and supports single-precision FP only, so her ISA string would be

IMAFCXdoom.

  The variable elf_hwcap is calculated at the loop in line 57,58, the 'd'
  from Xdoom would bypass the check, while the underlying machine does not
  support double-precision FP.


Ah, yes, that makes sense.  I'd go the other way here and just be strict about 
parsing the ISA string: it's defined to be listed in a particular order, so we 
should really only be accepting legal ISA strings.


I'll submit a second patch to fix this behavior.




pr_info("elf_hwcap is 0x%lx", elf_hwcap);
 }
--
2.16.4



I don't know if the reasons make sense to you, but anyway that's all I
would like to say about this patch.

Alan


Re: [PATCH] RISC-V: Mask out the F extension on systems without D

2018-08-28 Thread Palmer Dabbelt

On Tue, 28 Aug 2018 00:10:32 PDT (-0700), alan...@andestech.com wrote:

Hi Palmer,

On Mon, Aug 27, 2018 at 03:03:52PM -0700, Palmer Dabbelt wrote:

The RISC-V Linux port doesn't support systems that have the F extension
but don't have the D extension -- we actually don't support systems
without D either, but Alan's patch set is rectifying that soon.  For now
I think we can leave this in a semi-broken state and just wait for
Alan's patch set to get merged for proper non-FPU support -- the patch
set is starting to look good, so doing something in-between doesn't seem
like it's worth the work.

I don't think it's worth fretting about support for systems with F but
not D for now: our glibc ABIs are IMAC and IMAFDC so they probably won't
end up being popular.  We can always extend this in the future.

CC: Alan Kao 
Signed-off-by: Palmer Dabbelt 
---
 arch/riscv/kernel/cpufeature.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
index 17011a870044..652d102ffa06 100644
--- a/arch/riscv/kernel/cpufeature.c
+++ b/arch/riscv/kernel/cpufeature.c
@@ -57,5 +57,12 @@ void riscv_fill_hwcap(void)
for (i = 0; i < strlen(isa); ++i)
elf_hwcap |= isa2hwcap[(unsigned char)(isa[i])];

+   /* We don't support systems with F but without D, so mask those out
+* here. */
+   if ((elf_hwcap & COMPAT_HWCAP_ISA_F) && !(elf_hwcap & 
COMPAT_HWCAP_ISA_D)) {
+   pr_info("This kernel does not support systems with F but not 
D");
+   elf_hwcap &= ~COMPAT_HWCAP_ISA_F;
+   }
+


The commit message does address the problem and this patch does provide checks
and helpful information to users, but I wonder if we really need this patch, for
two reasons:

* Just as you mentioned, current glibc ABI does not support such a thing as
  IMAFC, so probably no one has had trouble with this.  To be honest, I suppose
  that anybody (RISC-V enthusiasts or vendors) who really need F-only support
  in kernel should get themself involved in the development by sending patches
  to improve.

* There are corner cases to let a F-only machine to pass the check in this
  patch.  For instance, a vendor decides to name her extension ISA as doom,
  and supports single-precision FP only, so her ISA string would be

IMAFCXdoom.

  The variable elf_hwcap is calculated at the loop in line 57,58, the 'd'
  from Xdoom would bypass the check, while the underlying machine does not
  support double-precision FP.


Ah, yes, that makes sense.  I'd go the other way here and just be strict about 
parsing the ISA string: it's defined to be listed in a particular order, so we 
should really only be accepting legal ISA strings.


I'll submit a second patch to fix this behavior.




pr_info("elf_hwcap is 0x%lx", elf_hwcap);
 }
--
2.16.4



I don't know if the reasons make sense to you, but anyway that's all I
would like to say about this patch.

Alan


Re: [PATCH 1/2] arm64: PCI: Remove node-local allocations when initialising host controller

2018-08-28 Thread Will Deacon
On Tue, Aug 28, 2018 at 04:05:12PM +0100, Punit Agrawal wrote:
> Memory for host controller data structures is allocated local to the
> node to which the controller is associated with. This has been the
> behaviour since support for ACPI was added in
> commit 0cb0786bac15 ("ARM64: PCI: Support ACPI-based PCI host controller").
> 
> Drop the node local allocation as there is no benefit from doing so -
> the usage of these structures is independent from where the controller
> is located.
> 
> Signed-off-by: Punit Agrawal 
> Cc: Catalin Marinas 
> Cc: Will Deacon 
> Cc: Lorenzo Pieralisi 
> Cc: linux-arm-ker...@lists.infradead.org
> ---
>  arch/arm64/kernel/pci.c | 5 ++---
>  1 file changed, 2 insertions(+), 3 deletions(-)

Acked-by: Will Deacon 

Will

> diff --git a/arch/arm64/kernel/pci.c b/arch/arm64/kernel/pci.c
> index 0e2ea1c78542..bb85e2f4603f 100644
> --- a/arch/arm64/kernel/pci.c
> +++ b/arch/arm64/kernel/pci.c
> @@ -165,16 +165,15 @@ static void pci_acpi_generic_release_info(struct 
> acpi_pci_root_info *ci)
>  /* Interface called from ACPI code to setup PCI host controller */
>  struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
>  {
> - int node = acpi_get_node(root->device->handle);
>   struct acpi_pci_generic_root_info *ri;
>   struct pci_bus *bus, *child;
>   struct acpi_pci_root_ops *root_ops;
>  
> - ri = kzalloc_node(sizeof(*ri), GFP_KERNEL, node);
> + ri = kzalloc(sizeof(*ri), GFP_KERNEL);
>   if (!ri)
>   return NULL;
>  
> - root_ops = kzalloc_node(sizeof(*root_ops), GFP_KERNEL, node);
> + root_ops = kzalloc(sizeof(*root_ops), GFP_KERNEL);
>   if (!root_ops) {
>   kfree(ri);
>   return NULL;
> -- 
> 2.18.0
> 


Re: [PATCH 1/2] arm64: PCI: Remove node-local allocations when initialising host controller

2018-08-28 Thread Will Deacon
On Tue, Aug 28, 2018 at 04:05:12PM +0100, Punit Agrawal wrote:
> Memory for host controller data structures is allocated local to the
> node to which the controller is associated with. This has been the
> behaviour since support for ACPI was added in
> commit 0cb0786bac15 ("ARM64: PCI: Support ACPI-based PCI host controller").
> 
> Drop the node local allocation as there is no benefit from doing so -
> the usage of these structures is independent from where the controller
> is located.
> 
> Signed-off-by: Punit Agrawal 
> Cc: Catalin Marinas 
> Cc: Will Deacon 
> Cc: Lorenzo Pieralisi 
> Cc: linux-arm-ker...@lists.infradead.org
> ---
>  arch/arm64/kernel/pci.c | 5 ++---
>  1 file changed, 2 insertions(+), 3 deletions(-)

Acked-by: Will Deacon 

Will

> diff --git a/arch/arm64/kernel/pci.c b/arch/arm64/kernel/pci.c
> index 0e2ea1c78542..bb85e2f4603f 100644
> --- a/arch/arm64/kernel/pci.c
> +++ b/arch/arm64/kernel/pci.c
> @@ -165,16 +165,15 @@ static void pci_acpi_generic_release_info(struct 
> acpi_pci_root_info *ci)
>  /* Interface called from ACPI code to setup PCI host controller */
>  struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
>  {
> - int node = acpi_get_node(root->device->handle);
>   struct acpi_pci_generic_root_info *ri;
>   struct pci_bus *bus, *child;
>   struct acpi_pci_root_ops *root_ops;
>  
> - ri = kzalloc_node(sizeof(*ri), GFP_KERNEL, node);
> + ri = kzalloc(sizeof(*ri), GFP_KERNEL);
>   if (!ri)
>   return NULL;
>  
> - root_ops = kzalloc_node(sizeof(*root_ops), GFP_KERNEL, node);
> + root_ops = kzalloc(sizeof(*root_ops), GFP_KERNEL);
>   if (!root_ops) {
>   kfree(ri);
>   return NULL;
> -- 
> 2.18.0
> 


Re: [PATCH v2 1/1] hdac-codec runtime suspended at PM:Suspend.

2018-08-28 Thread Anshuman Gupta
Is this patch not in consideration, there are no review comment for it. 
this patch fixes an issue with hdac hdmi codec driver.

On one of our platform HD audio controller takes arounf 300ms.
Below are the snippet of dmesg log.

[ 3778.461888] calling  :00:0e.0+ @ 3667, parent: pci:00, cb: 
pci_pm_resume
[ 3778.775273] call :00:0e.0+ returned 0 after 306016 usecs

When HD audio controller is in runtime suspend state, 
with direct complete, we can improve overall system wide resume latency.
 
On Sat, Aug 18, 2018 at 11:42:03PM +0530, Anshuman Gupta wrote:
> Keep hdac hdmi codec to be in runtime suspended while entering to
> system wide suspend. Currently hdac hdmi codec driver using its
> suspend and resume operation in prepare and complete PM callbacks,
> and it resumes the hd audio controller (parent of self) from runtime
> suspend and blocks the direct complete for hd audio controller.
>
> If hdac-codec is already in runtime suspend state skip its power down
> sequence in prepare, power up sequence in complete phase. It enables
> direct complete path for hdac-codec and hd audio controller
> PCI device.
>
> Signed-off-by: Anshuman Gupta 
> ---
> Changes in v2
> - Changed the commit message.
> - Using pm_runtime_suspended instead of pm_runtime_status_suspended
>   in order to handle any race condition.
>
> sound/soc/codecs/hdac_hdmi.c | 6 ++
>  1 file changed, 6 insertions(+)
>
> diff --git a/sound/soc/codecs/hdac_hdmi.c b/sound/soc/codecs/hdac_hdmi.c
> index 84f7a7a..e965338 100644
> --- a/sound/soc/codecs/hdac_hdmi.c
> +++ b/sound/soc/codecs/hdac_hdmi.c
> @@ -1859,6 +1859,9 @@ static int hdmi_codec_prepare(struct device *dev)
>   struct hdac_ext_device *edev = to_hda_ext_device(dev);
>   struct hdac_device *hdev = >hdev;
>
> + if (pm_runtime_suspended(dev))
> + return 1;
> +
>   pm_runtime_get_sync(>hdev.dev);
>
>   /*
> @@ -1880,6 +1883,9 @@ static void hdmi_codec_complete(struct device *dev)
>   struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(>hdev);
>   struct hdac_device *hdev = >hdev;
>
> + if (pm_runtime_suspended(dev))
> + return;
> +
>   /* Power up afg */
>   snd_hdac_codec_read(hdev, hdev->afg, 0, AC_VERB_SET_POWER_STATE,
>   AC_PWRST_D0);
> --
> 2.7.4
>

--
Thanks,
Anshuman.
 


Re: [PATCH v2 1/1] hdac-codec runtime suspended at PM:Suspend.

2018-08-28 Thread Anshuman Gupta
Is this patch not in consideration, there are no review comment for it. 
this patch fixes an issue with hdac hdmi codec driver.

On one of our platform HD audio controller takes arounf 300ms.
Below are the snippet of dmesg log.

[ 3778.461888] calling  :00:0e.0+ @ 3667, parent: pci:00, cb: 
pci_pm_resume
[ 3778.775273] call :00:0e.0+ returned 0 after 306016 usecs

When HD audio controller is in runtime suspend state, 
with direct complete, we can improve overall system wide resume latency.
 
On Sat, Aug 18, 2018 at 11:42:03PM +0530, Anshuman Gupta wrote:
> Keep hdac hdmi codec to be in runtime suspended while entering to
> system wide suspend. Currently hdac hdmi codec driver using its
> suspend and resume operation in prepare and complete PM callbacks,
> and it resumes the hd audio controller (parent of self) from runtime
> suspend and blocks the direct complete for hd audio controller.
>
> If hdac-codec is already in runtime suspend state skip its power down
> sequence in prepare, power up sequence in complete phase. It enables
> direct complete path for hdac-codec and hd audio controller
> PCI device.
>
> Signed-off-by: Anshuman Gupta 
> ---
> Changes in v2
> - Changed the commit message.
> - Using pm_runtime_suspended instead of pm_runtime_status_suspended
>   in order to handle any race condition.
>
> sound/soc/codecs/hdac_hdmi.c | 6 ++
>  1 file changed, 6 insertions(+)
>
> diff --git a/sound/soc/codecs/hdac_hdmi.c b/sound/soc/codecs/hdac_hdmi.c
> index 84f7a7a..e965338 100644
> --- a/sound/soc/codecs/hdac_hdmi.c
> +++ b/sound/soc/codecs/hdac_hdmi.c
> @@ -1859,6 +1859,9 @@ static int hdmi_codec_prepare(struct device *dev)
>   struct hdac_ext_device *edev = to_hda_ext_device(dev);
>   struct hdac_device *hdev = >hdev;
>
> + if (pm_runtime_suspended(dev))
> + return 1;
> +
>   pm_runtime_get_sync(>hdev.dev);
>
>   /*
> @@ -1880,6 +1883,9 @@ static void hdmi_codec_complete(struct device *dev)
>   struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(>hdev);
>   struct hdac_device *hdev = >hdev;
>
> + if (pm_runtime_suspended(dev))
> + return;
> +
>   /* Power up afg */
>   snd_hdac_codec_read(hdev, hdev->afg, 0, AC_VERB_SET_POWER_STATE,
>   AC_PWRST_D0);
> --
> 2.7.4
>

--
Thanks,
Anshuman.
 


Re: [PATCH v2] x86/dumpstack: don't dump kernel memory based on usermode RIP

2018-08-28 Thread Jann Horn
On Tue, Aug 28, 2018 at 6:25 PM Borislav Petkov  wrote:
>
> On Tue, Aug 28, 2018 at 05:49:01PM +0200, Jann Horn wrote:
> > show_opcodes() is used both for dumping kernel instructions and for dumping
> > user instructions. If userspace causes #PF by jumping to a kernel address,
> > show_opcodes() can be reached with regs->ip controlled by the user,
> > pointing to kernel code.
>
> Yap, and people keep asking how to dump the running kernel, after
> patching and jump labels and stuff... Here's how!
>
> :-
>
> > Make sure that userspace can't trick us into
> > dumping kernel memory into dmesg.
> >
> > Cc: sta...@vger.kernel.org
> > Fixes: 7cccf0725cf7 ("x86/dumpstack: Add a show_ip() function")
>
> I think this one is more likely:
>
>   ba54d856a9d8 ("x86/fault: Dump user opcode bytes on fatal faults")
>
> as it added the dumping of user opcode bytes.

No, you can also get user opcode bytes printed by WARN() and friends.
When you add a WARN() in the pagefault handler, you get something like
this. The first "Code:" line is from ba54d856a9d8, but the second one
further down is from before that.

[  125.564041] segfault[1602]: segfault at 854340c0 ip
854340c0 sp 7ffd4cc7a568 error 15
[  125.569923] Code: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
00 00 00 <63> 6f 72 65 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
00 00
[  125.576859] [ cut here ]
[  125.578406] TESTING WARN()
[  125.578439] WARNING: CPU: 6 PID: 1602 at arch/x86/mm/fault.c:894
__bad_area_nosemaphore+0x147/0x270
[  125.582172] Modules linked in: bpfilter
[  125.583394] CPU: 6 PID: 1602 Comm: segfault Tainted: GW
4.18.0+ #108
[  125.585811] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996),
BIOS 1.10.2-1 04/01/2014
[  125.588410] RIP: 0010:__bad_area_nosemaphore+0x147/0x270
[  125.590078] Code: 48 89 d9 48 89 ea 44 89 e6 48 c7 83 30 0b 00 00
0e 00 00 00 bf 0b 00 00 00 e8 f5 eb ff ff 48 c7 c7 00 61 66 84 e8 79
11 05 00 <0f> 0b 48 83 c4 28 5b 5d 41 5c 41 5d 41 5e 41 5f c3 48 83 c4
28 4c
[  125.595779] RSP: 0018:8801cb3b7e18 EFLAGS: 00010286
[  125.597426] RAX:  RBX: 8801cbb9e000 RCX: 
[  125.599605] RDX: 0001 RSI: dc00 RDI: 86678ea0
[  125.601800] RBP: 854340c0 R08: ed003d873ed5 R09: ed003d873ed5
[  125.603935] R10: 0001 R11: ed003d873ed4 R12: 0001
[  125.606113] R13:  R14: 0015 R15: 8801cb3b7f58
[  125.608250] FS:  7fe30d518700() GS:8801ec38()
knlGS:
[  125.610608] CS:  0010 DS:  ES:  CR0: 80050033
[  125.612331] CR2: 854340c0 CR3: 0001d563e001 CR4: 003606e0
[  125.614470] DR0:  DR1:  DR2: 
[  125.616607] DR3:  DR6: fffe0ff0 DR7: 0400
[  125.618736] Call Trace:
[  125.619475]  __do_page_fault+0x133/0x780
[  125.620646]  ? mm_fault_error+0x1b0/0x1b0
[  125.622236]  ? async_page_fault+0x8/0x30
[  125.623388]  async_page_fault+0x1e/0x30
[  125.624526] RIP: 0033:core_pattern+0x0/0x880
[  125.625786] Code: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
00 00 00 <63> 6f 72 65 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
00 00
[  125.631208] RSP: 002b:7ffd4cc7a568 EFLAGS: 00010202
[  125.632737] RAX: 854340c0 RBX:  RCX: 
[  125.635039] RDX: 7ffd4cc7a678 RSI: 7ffd4cc7a668 RDI: 0001
[  125.637088] RBP: 7ffd4cc7a580 R08: 562d395106f0 R09: 7fe30d323cb0
[  125.639153] R10:  R11: 7fe30d0d23c0 R12: 562d39510530
[  125.641183] R13: 7ffd4cc7a660 R14:  R15: 
[  125.643221] ---[ end trace fb20716f9d6369bd ]---

> > Reviewed-by: Kees Cook 
> > Signed-off-by: Jann Horn 
> > ---
> > v2: Andy pointed out that I probably shouldn't be doing wrapping
> > arithmetic on pointers.
> >
> >  arch/x86/include/asm/stacktrace.h |  2 +-
> >  arch/x86/kernel/dumpstack.c   | 13 ++---
> >  arch/x86/mm/fault.c   |  2 +-
> >  3 files changed, 12 insertions(+), 5 deletions(-)
> >
> > diff --git a/arch/x86/include/asm/stacktrace.h 
> > b/arch/x86/include/asm/stacktrace.h
> > index b6dc698f992a..f335aad404a4 100644
> > --- a/arch/x86/include/asm/stacktrace.h
> > +++ b/arch/x86/include/asm/stacktrace.h
> > @@ -111,6 +111,6 @@ static inline unsigned long caller_frame_pointer(void)
> >   return (unsigned long)frame;
> >  }
> >
> > -void show_opcodes(u8 *rip, const char *loglvl);
> > +void show_opcodes(struct pt_regs *regs, const char *loglvl);
> >  void show_ip(struct pt_regs *regs, const char *loglvl);
> >  #endif /* _ASM_X86_STACKTRACE_H */
> > diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
> > 

Re: [PATCH v2] x86/dumpstack: don't dump kernel memory based on usermode RIP

2018-08-28 Thread Jann Horn
On Tue, Aug 28, 2018 at 6:25 PM Borislav Petkov  wrote:
>
> On Tue, Aug 28, 2018 at 05:49:01PM +0200, Jann Horn wrote:
> > show_opcodes() is used both for dumping kernel instructions and for dumping
> > user instructions. If userspace causes #PF by jumping to a kernel address,
> > show_opcodes() can be reached with regs->ip controlled by the user,
> > pointing to kernel code.
>
> Yap, and people keep asking how to dump the running kernel, after
> patching and jump labels and stuff... Here's how!
>
> :-
>
> > Make sure that userspace can't trick us into
> > dumping kernel memory into dmesg.
> >
> > Cc: sta...@vger.kernel.org
> > Fixes: 7cccf0725cf7 ("x86/dumpstack: Add a show_ip() function")
>
> I think this one is more likely:
>
>   ba54d856a9d8 ("x86/fault: Dump user opcode bytes on fatal faults")
>
> as it added the dumping of user opcode bytes.

No, you can also get user opcode bytes printed by WARN() and friends.
When you add a WARN() in the pagefault handler, you get something like
this. The first "Code:" line is from ba54d856a9d8, but the second one
further down is from before that.

[  125.564041] segfault[1602]: segfault at 854340c0 ip
854340c0 sp 7ffd4cc7a568 error 15
[  125.569923] Code: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
00 00 00 <63> 6f 72 65 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
00 00
[  125.576859] [ cut here ]
[  125.578406] TESTING WARN()
[  125.578439] WARNING: CPU: 6 PID: 1602 at arch/x86/mm/fault.c:894
__bad_area_nosemaphore+0x147/0x270
[  125.582172] Modules linked in: bpfilter
[  125.583394] CPU: 6 PID: 1602 Comm: segfault Tainted: GW
4.18.0+ #108
[  125.585811] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996),
BIOS 1.10.2-1 04/01/2014
[  125.588410] RIP: 0010:__bad_area_nosemaphore+0x147/0x270
[  125.590078] Code: 48 89 d9 48 89 ea 44 89 e6 48 c7 83 30 0b 00 00
0e 00 00 00 bf 0b 00 00 00 e8 f5 eb ff ff 48 c7 c7 00 61 66 84 e8 79
11 05 00 <0f> 0b 48 83 c4 28 5b 5d 41 5c 41 5d 41 5e 41 5f c3 48 83 c4
28 4c
[  125.595779] RSP: 0018:8801cb3b7e18 EFLAGS: 00010286
[  125.597426] RAX:  RBX: 8801cbb9e000 RCX: 
[  125.599605] RDX: 0001 RSI: dc00 RDI: 86678ea0
[  125.601800] RBP: 854340c0 R08: ed003d873ed5 R09: ed003d873ed5
[  125.603935] R10: 0001 R11: ed003d873ed4 R12: 0001
[  125.606113] R13:  R14: 0015 R15: 8801cb3b7f58
[  125.608250] FS:  7fe30d518700() GS:8801ec38()
knlGS:
[  125.610608] CS:  0010 DS:  ES:  CR0: 80050033
[  125.612331] CR2: 854340c0 CR3: 0001d563e001 CR4: 003606e0
[  125.614470] DR0:  DR1:  DR2: 
[  125.616607] DR3:  DR6: fffe0ff0 DR7: 0400
[  125.618736] Call Trace:
[  125.619475]  __do_page_fault+0x133/0x780
[  125.620646]  ? mm_fault_error+0x1b0/0x1b0
[  125.622236]  ? async_page_fault+0x8/0x30
[  125.623388]  async_page_fault+0x1e/0x30
[  125.624526] RIP: 0033:core_pattern+0x0/0x880
[  125.625786] Code: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
00 00 00 <63> 6f 72 65 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
00 00
[  125.631208] RSP: 002b:7ffd4cc7a568 EFLAGS: 00010202
[  125.632737] RAX: 854340c0 RBX:  RCX: 
[  125.635039] RDX: 7ffd4cc7a678 RSI: 7ffd4cc7a668 RDI: 0001
[  125.637088] RBP: 7ffd4cc7a580 R08: 562d395106f0 R09: 7fe30d323cb0
[  125.639153] R10:  R11: 7fe30d0d23c0 R12: 562d39510530
[  125.641183] R13: 7ffd4cc7a660 R14:  R15: 
[  125.643221] ---[ end trace fb20716f9d6369bd ]---

> > Reviewed-by: Kees Cook 
> > Signed-off-by: Jann Horn 
> > ---
> > v2: Andy pointed out that I probably shouldn't be doing wrapping
> > arithmetic on pointers.
> >
> >  arch/x86/include/asm/stacktrace.h |  2 +-
> >  arch/x86/kernel/dumpstack.c   | 13 ++---
> >  arch/x86/mm/fault.c   |  2 +-
> >  3 files changed, 12 insertions(+), 5 deletions(-)
> >
> > diff --git a/arch/x86/include/asm/stacktrace.h 
> > b/arch/x86/include/asm/stacktrace.h
> > index b6dc698f992a..f335aad404a4 100644
> > --- a/arch/x86/include/asm/stacktrace.h
> > +++ b/arch/x86/include/asm/stacktrace.h
> > @@ -111,6 +111,6 @@ static inline unsigned long caller_frame_pointer(void)
> >   return (unsigned long)frame;
> >  }
> >
> > -void show_opcodes(u8 *rip, const char *loglvl);
> > +void show_opcodes(struct pt_regs *regs, const char *loglvl);
> >  void show_ip(struct pt_regs *regs, const char *loglvl);
> >  #endif /* _ASM_X86_STACKTRACE_H */
> > diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
> > 

[PATCH] power: supply: remove unused pointer 'dev'

2018-08-28 Thread Colin King
From: Colin Ian King 

Pointer 'dev' is being assigned but is never used hence it is
redundant and can be removed.

Cleans up clang warning:
variable 'dev' set but not used [-Wunused-but-set-variable]

Signed-off-by: Colin Ian King 
---
 drivers/power/supply/cros_usbpd-charger.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/power/supply/cros_usbpd-charger.c 
b/drivers/power/supply/cros_usbpd-charger.c
index 688a16bacfbb..f2b8de502b82 100644
--- a/drivers/power/supply/cros_usbpd-charger.c
+++ b/drivers/power/supply/cros_usbpd-charger.c
@@ -378,12 +378,10 @@ static int cros_usbpd_charger_ec_event(struct 
notifier_block *nb,
 {
struct cros_ec_device *ec_device;
struct charger_data *charger;
-   struct device *dev;
u32 host_event;
 
charger = container_of(nb, struct charger_data, notifier);
ec_device = charger->ec_device;
-   dev = charger->dev;
 
host_event = cros_ec_get_host_event(ec_device);
if (host_event & EC_HOST_EVENT_MASK(EC_HOST_EVENT_PD_MCU)) {
-- 
2.17.1



[PATCH] power: supply: remove unused pointer 'dev'

2018-08-28 Thread Colin King
From: Colin Ian King 

Pointer 'dev' is being assigned but is never used hence it is
redundant and can be removed.

Cleans up clang warning:
variable 'dev' set but not used [-Wunused-but-set-variable]

Signed-off-by: Colin Ian King 
---
 drivers/power/supply/cros_usbpd-charger.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/power/supply/cros_usbpd-charger.c 
b/drivers/power/supply/cros_usbpd-charger.c
index 688a16bacfbb..f2b8de502b82 100644
--- a/drivers/power/supply/cros_usbpd-charger.c
+++ b/drivers/power/supply/cros_usbpd-charger.c
@@ -378,12 +378,10 @@ static int cros_usbpd_charger_ec_event(struct 
notifier_block *nb,
 {
struct cros_ec_device *ec_device;
struct charger_data *charger;
-   struct device *dev;
u32 host_event;
 
charger = container_of(nb, struct charger_data, notifier);
ec_device = charger->ec_device;
-   dev = charger->dev;
 
host_event = cros_ec_get_host_event(ec_device);
if (host_event & EC_HOST_EVENT_MASK(EC_HOST_EVENT_PD_MCU)) {
-- 
2.17.1



Re: [PATCH v2] objtool: Support multiple rodata sections.

2018-08-28 Thread Allan Xavier
Ping... are there any comments on this?

On 03/08/18 19:40, Allan Xavier wrote:
> This commit adds support for processing switch jump tables in objects
> with multiple .rodata sections, such as those created when using
> -ffunction-sections and -fdata-sections.  Currently, objtool always
> looks in .rodata for jump table information which results in many
> "sibling call from callable instruction with modified stack frame"
> warnings with objects compiled using those flags.
> 
> The fix is comprised of three parts:
> 
> 1. Flagging all .rodata sections when importing elf information for
> easier checking later.
> 
> 2. Keeping a reference to the section each relocation is from in order
> to get the list_head for the other relocations in that section.
> 
> 3. Finding jump tables by following relocations to .rodata sections,
> rather than always referencing a single global .rodata section.
> 
> The patch has been tested without data sections enabled and no
> differences in the resulting orc unwind information were seen.
> 
> Note that as objtool adds terminators to end of each .text section the
> unwind information generated between a function+data sections build and
> a normal build aren't directly comparable. Manual inspection suggests
> that objtool is now generating the correct information, or at least
> making more of an effort to do so than it did previously.
> 
> Signed-off-by: Allan Xavier 
> ---
> Changes since v1:
>  - Cleaned up section symbol check.
>  - Fixed brackets.
>  - Moved mark_rodata to decode_sections().
>  - Excluded *.str1.[18] sections from rodata sections.
> 
>  tools/objtool/check.c | 39 +--
>  tools/objtool/check.h |  4 ++--
>  tools/objtool/elf.c   |  1 +
>  tools/objtool/elf.h   |  3 ++-
>  4 files changed, 38 insertions(+), 9 deletions(-)
> 
> diff --git a/tools/objtool/check.c b/tools/objtool/check.c
> index f4a25bd1871fb..e3a5d53c4b83b 100644
> --- a/tools/objtool/check.c
> +++ b/tools/objtool/check.c
> @@ -836,7 +836,7 @@ static int add_switch_table(struct objtool_file *file, 
> struct instruction *insn,
>   struct symbol *pfunc = insn->func->pfunc;
>   unsigned int prev_offset = 0;
>  
> - list_for_each_entry_from(rela, >rodata->rela->rela_list, list) {
> + list_for_each_entry_from(rela, >rela_sec->rela_list, list) {
>   if (rela == next_table)
>   break;
>  
> @@ -926,6 +926,7 @@ static struct rela *find_switch_table(struct objtool_file 
> *file,
>  {
>   struct rela *text_rela, *rodata_rela;
>   struct instruction *orig_insn = insn;
> + struct section *rodata_sec;
>   unsigned long table_offset;
>  
>   /*
> @@ -953,10 +954,13 @@ static struct rela *find_switch_table(struct 
> objtool_file *file,
>   /* look for a relocation which references .rodata */
>   text_rela = find_rela_by_dest_range(insn->sec, insn->offset,
>   insn->len);
> - if (!text_rela || text_rela->sym != file->rodata->sym)
> + if (!text_rela || text_rela->sym->type != STT_SECTION ||
> + !text_rela->sym->sec->rodata)
>   continue;
>  
>   table_offset = text_rela->addend;
> + rodata_sec = text_rela->sym->sec;
> +
>   if (text_rela->type == R_X86_64_PC32)
>   table_offset += 4;
>  
> @@ -964,10 +968,10 @@ static struct rela *find_switch_table(struct 
> objtool_file *file,
>* Make sure the .rodata address isn't associated with a
>* symbol.  gcc jump tables are anonymous data.
>*/
> - if (find_symbol_containing(file->rodata, table_offset))
> + if (find_symbol_containing(rodata_sec, table_offset))
>   continue;
>  
> - rodata_rela = find_rela_by_dest(file->rodata, table_offset);
> + rodata_rela = find_rela_by_dest(rodata_sec, table_offset);
>   if (rodata_rela) {
>   /*
>* Use of RIP-relative switch jumps is quite rare, and
> @@ -1052,7 +1056,7 @@ static int add_switch_table_alts(struct objtool_file 
> *file)
>   struct symbol *func;
>   int ret;
>  
> - if (!file->rodata || !file->rodata->rela)
> + if (!file->rodata)
>   return 0;
>  
>   for_each_sec(file, sec) {
> @@ -1197,10 +1201,34 @@ static int read_retpoline_hints(struct objtool_file 
> *file)
>   return 0;
>  }
>  
> +static void mark_rodata(struct objtool_file *file)
> +{
> + struct section *sec;
> + bool found = false;
> + static const char *str1 = ".str1.";
> + const int str1len = strlen(str1) + 1;
> +
> + for_each_sec(file, sec) {
> + if (strstr(sec->name, ".rodata") == sec->name) {
> + char *str1pos = sec->name + strlen(sec->name) - str1len;
> +
> + /* Skips over .rodata.str1.1 and 

Re: [PATCH v2] objtool: Support multiple rodata sections.

2018-08-28 Thread Allan Xavier
Ping... are there any comments on this?

On 03/08/18 19:40, Allan Xavier wrote:
> This commit adds support for processing switch jump tables in objects
> with multiple .rodata sections, such as those created when using
> -ffunction-sections and -fdata-sections.  Currently, objtool always
> looks in .rodata for jump table information which results in many
> "sibling call from callable instruction with modified stack frame"
> warnings with objects compiled using those flags.
> 
> The fix is comprised of three parts:
> 
> 1. Flagging all .rodata sections when importing elf information for
> easier checking later.
> 
> 2. Keeping a reference to the section each relocation is from in order
> to get the list_head for the other relocations in that section.
> 
> 3. Finding jump tables by following relocations to .rodata sections,
> rather than always referencing a single global .rodata section.
> 
> The patch has been tested without data sections enabled and no
> differences in the resulting orc unwind information were seen.
> 
> Note that as objtool adds terminators to end of each .text section the
> unwind information generated between a function+data sections build and
> a normal build aren't directly comparable. Manual inspection suggests
> that objtool is now generating the correct information, or at least
> making more of an effort to do so than it did previously.
> 
> Signed-off-by: Allan Xavier 
> ---
> Changes since v1:
>  - Cleaned up section symbol check.
>  - Fixed brackets.
>  - Moved mark_rodata to decode_sections().
>  - Excluded *.str1.[18] sections from rodata sections.
> 
>  tools/objtool/check.c | 39 +--
>  tools/objtool/check.h |  4 ++--
>  tools/objtool/elf.c   |  1 +
>  tools/objtool/elf.h   |  3 ++-
>  4 files changed, 38 insertions(+), 9 deletions(-)
> 
> diff --git a/tools/objtool/check.c b/tools/objtool/check.c
> index f4a25bd1871fb..e3a5d53c4b83b 100644
> --- a/tools/objtool/check.c
> +++ b/tools/objtool/check.c
> @@ -836,7 +836,7 @@ static int add_switch_table(struct objtool_file *file, 
> struct instruction *insn,
>   struct symbol *pfunc = insn->func->pfunc;
>   unsigned int prev_offset = 0;
>  
> - list_for_each_entry_from(rela, >rodata->rela->rela_list, list) {
> + list_for_each_entry_from(rela, >rela_sec->rela_list, list) {
>   if (rela == next_table)
>   break;
>  
> @@ -926,6 +926,7 @@ static struct rela *find_switch_table(struct objtool_file 
> *file,
>  {
>   struct rela *text_rela, *rodata_rela;
>   struct instruction *orig_insn = insn;
> + struct section *rodata_sec;
>   unsigned long table_offset;
>  
>   /*
> @@ -953,10 +954,13 @@ static struct rela *find_switch_table(struct 
> objtool_file *file,
>   /* look for a relocation which references .rodata */
>   text_rela = find_rela_by_dest_range(insn->sec, insn->offset,
>   insn->len);
> - if (!text_rela || text_rela->sym != file->rodata->sym)
> + if (!text_rela || text_rela->sym->type != STT_SECTION ||
> + !text_rela->sym->sec->rodata)
>   continue;
>  
>   table_offset = text_rela->addend;
> + rodata_sec = text_rela->sym->sec;
> +
>   if (text_rela->type == R_X86_64_PC32)
>   table_offset += 4;
>  
> @@ -964,10 +968,10 @@ static struct rela *find_switch_table(struct 
> objtool_file *file,
>* Make sure the .rodata address isn't associated with a
>* symbol.  gcc jump tables are anonymous data.
>*/
> - if (find_symbol_containing(file->rodata, table_offset))
> + if (find_symbol_containing(rodata_sec, table_offset))
>   continue;
>  
> - rodata_rela = find_rela_by_dest(file->rodata, table_offset);
> + rodata_rela = find_rela_by_dest(rodata_sec, table_offset);
>   if (rodata_rela) {
>   /*
>* Use of RIP-relative switch jumps is quite rare, and
> @@ -1052,7 +1056,7 @@ static int add_switch_table_alts(struct objtool_file 
> *file)
>   struct symbol *func;
>   int ret;
>  
> - if (!file->rodata || !file->rodata->rela)
> + if (!file->rodata)
>   return 0;
>  
>   for_each_sec(file, sec) {
> @@ -1197,10 +1201,34 @@ static int read_retpoline_hints(struct objtool_file 
> *file)
>   return 0;
>  }
>  
> +static void mark_rodata(struct objtool_file *file)
> +{
> + struct section *sec;
> + bool found = false;
> + static const char *str1 = ".str1.";
> + const int str1len = strlen(str1) + 1;
> +
> + for_each_sec(file, sec) {
> + if (strstr(sec->name, ".rodata") == sec->name) {
> + char *str1pos = sec->name + strlen(sec->name) - str1len;
> +
> + /* Skips over .rodata.str1.1 and 

Re: [PATCH] arm64: dts: ti: k3-am65: Change #address-cells and #size-cells of interconnect to 2

2018-08-28 Thread Tony Lindgren
* Kishon Vijay Abraham I  [180828 10:31]:
> AM65 has two PCIe controllers and each PCIe controller has '2' address
> spaces one within the 4GB address space of the SoC and the other above
> the 4GB address space of the SoC in addition to the register space. The
> size of the address space above the 4GB SoC address space is 4GB. These
> address ranges will be used by CPU/DMA to access the PCIe address space.
> In order to represent the address space above the 4GB SoC address space
> and to represent the size of this address space as 4GB, change
> address-cells and size-cells of interconnect to 2.
...
>   cbass_mcu: interconnect@2838 {
>   compatible = "simple-bus";
>   #address-cells = <1>;
>   #size-cells = <1>;

Yup great, the interconnect instances that don't need above 4GB
address space should stay this way.

Acked-by: Tony Lindgren 


Re: [PATCH] arm64: dts: ti: k3-am65: Change #address-cells and #size-cells of interconnect to 2

2018-08-28 Thread Tony Lindgren
* Kishon Vijay Abraham I  [180828 10:31]:
> AM65 has two PCIe controllers and each PCIe controller has '2' address
> spaces one within the 4GB address space of the SoC and the other above
> the 4GB address space of the SoC in addition to the register space. The
> size of the address space above the 4GB SoC address space is 4GB. These
> address ranges will be used by CPU/DMA to access the PCIe address space.
> In order to represent the address space above the 4GB SoC address space
> and to represent the size of this address space as 4GB, change
> address-cells and size-cells of interconnect to 2.
...
>   cbass_mcu: interconnect@2838 {
>   compatible = "simple-bus";
>   #address-cells = <1>;
>   #size-cells = <1>;

Yup great, the interconnect instances that don't need above 4GB
address space should stay this way.

Acked-by: Tony Lindgren 


Re: [PATCH v2] x86/dumpstack: don't dump kernel memory based on usermode RIP

2018-08-28 Thread Borislav Petkov
On Tue, Aug 28, 2018 at 05:49:01PM +0200, Jann Horn wrote:
> show_opcodes() is used both for dumping kernel instructions and for dumping
> user instructions. If userspace causes #PF by jumping to a kernel address,
> show_opcodes() can be reached with regs->ip controlled by the user,
> pointing to kernel code.

Yap, and people keep asking how to dump the running kernel, after
patching and jump labels and stuff... Here's how!

:-

> Make sure that userspace can't trick us into
> dumping kernel memory into dmesg.
> 
> Cc: sta...@vger.kernel.org
> Fixes: 7cccf0725cf7 ("x86/dumpstack: Add a show_ip() function")

I think this one is more likely:

  ba54d856a9d8 ("x86/fault: Dump user opcode bytes on fatal faults")

as it added the dumping of user opcode bytes.

> Reviewed-by: Kees Cook 
> Signed-off-by: Jann Horn 
> ---
> v2: Andy pointed out that I probably shouldn't be doing wrapping
> arithmetic on pointers.
> 
>  arch/x86/include/asm/stacktrace.h |  2 +-
>  arch/x86/kernel/dumpstack.c   | 13 ++---
>  arch/x86/mm/fault.c   |  2 +-
>  3 files changed, 12 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/x86/include/asm/stacktrace.h 
> b/arch/x86/include/asm/stacktrace.h
> index b6dc698f992a..f335aad404a4 100644
> --- a/arch/x86/include/asm/stacktrace.h
> +++ b/arch/x86/include/asm/stacktrace.h
> @@ -111,6 +111,6 @@ static inline unsigned long caller_frame_pointer(void)
>   return (unsigned long)frame;
>  }
>  
> -void show_opcodes(u8 *rip, const char *loglvl);
> +void show_opcodes(struct pt_regs *regs, const char *loglvl);
>  void show_ip(struct pt_regs *regs, const char *loglvl);
>  #endif /* _ASM_X86_STACKTRACE_H */
> diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
> index 9c8652974f8e..14b337582b6f 100644
> --- a/arch/x86/kernel/dumpstack.c
> +++ b/arch/x86/kernel/dumpstack.c
> @@ -89,14 +89,21 @@ static void printk_stack_address(unsigned long address, 
> int reliable,
>   * Thus, the 2/3rds prologue and 64 byte OPCODE_BUFSIZE is just a random
>   * guesstimate in attempt to achieve all of the above.
>   */
> -void show_opcodes(u8 *rip, const char *loglvl)
> +void show_opcodes(struct pt_regs *regs, const char *loglvl)
>  {
>  #define PROLOGUE_SIZE 42
>  #define EPILOGUE_SIZE 21
>  #define OPCODE_BUFSIZE (PROLOGUE_SIZE + 1 + EPILOGUE_SIZE)
>   u8 opcodes[OPCODE_BUFSIZE];
> + u8 *prologue = (u8 *)(regs->ip - PROLOGUE_SIZE);
> + /*
> +  * Make sure userspace isn't trying to trick us into dumping kernel
> +  * memory by pointing the userspace instruction pointer at it.
> +  */
> + bool bad_ip = user_mode(regs) &&
> +   __range_not_ok(prologue, OPCODE_BUFSIZE, TASK_SIZE_MAX);
>  

Ok, can we pls move the sole dumping of opcodes in a helper called,
__show_opcodes(), for example, which the checking wrapper show_opcodes()
- without the "__" prefix - calls?

So that show_signal_msg() can call the checking variant - show_opcodes()
- as userspace might be doing monkey business there and we definitely
wanna check first but __show_regs() can call the non-checking variant
__show_opcodes() because there we wanna dump whatever rIP points to
because we wanna know if the machine has gone off into the weeds etc,
when staring at splats.

Or am I missing a security aspect here?

Thx.

-- 
Regards/Gruss,
Boris.

SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton, HRB 21284 
(AG Nürnberg)
-- 


Re: [PATCH v2] x86/dumpstack: don't dump kernel memory based on usermode RIP

2018-08-28 Thread Borislav Petkov
On Tue, Aug 28, 2018 at 05:49:01PM +0200, Jann Horn wrote:
> show_opcodes() is used both for dumping kernel instructions and for dumping
> user instructions. If userspace causes #PF by jumping to a kernel address,
> show_opcodes() can be reached with regs->ip controlled by the user,
> pointing to kernel code.

Yap, and people keep asking how to dump the running kernel, after
patching and jump labels and stuff... Here's how!

:-

> Make sure that userspace can't trick us into
> dumping kernel memory into dmesg.
> 
> Cc: sta...@vger.kernel.org
> Fixes: 7cccf0725cf7 ("x86/dumpstack: Add a show_ip() function")

I think this one is more likely:

  ba54d856a9d8 ("x86/fault: Dump user opcode bytes on fatal faults")

as it added the dumping of user opcode bytes.

> Reviewed-by: Kees Cook 
> Signed-off-by: Jann Horn 
> ---
> v2: Andy pointed out that I probably shouldn't be doing wrapping
> arithmetic on pointers.
> 
>  arch/x86/include/asm/stacktrace.h |  2 +-
>  arch/x86/kernel/dumpstack.c   | 13 ++---
>  arch/x86/mm/fault.c   |  2 +-
>  3 files changed, 12 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/x86/include/asm/stacktrace.h 
> b/arch/x86/include/asm/stacktrace.h
> index b6dc698f992a..f335aad404a4 100644
> --- a/arch/x86/include/asm/stacktrace.h
> +++ b/arch/x86/include/asm/stacktrace.h
> @@ -111,6 +111,6 @@ static inline unsigned long caller_frame_pointer(void)
>   return (unsigned long)frame;
>  }
>  
> -void show_opcodes(u8 *rip, const char *loglvl);
> +void show_opcodes(struct pt_regs *regs, const char *loglvl);
>  void show_ip(struct pt_regs *regs, const char *loglvl);
>  #endif /* _ASM_X86_STACKTRACE_H */
> diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
> index 9c8652974f8e..14b337582b6f 100644
> --- a/arch/x86/kernel/dumpstack.c
> +++ b/arch/x86/kernel/dumpstack.c
> @@ -89,14 +89,21 @@ static void printk_stack_address(unsigned long address, 
> int reliable,
>   * Thus, the 2/3rds prologue and 64 byte OPCODE_BUFSIZE is just a random
>   * guesstimate in attempt to achieve all of the above.
>   */
> -void show_opcodes(u8 *rip, const char *loglvl)
> +void show_opcodes(struct pt_regs *regs, const char *loglvl)
>  {
>  #define PROLOGUE_SIZE 42
>  #define EPILOGUE_SIZE 21
>  #define OPCODE_BUFSIZE (PROLOGUE_SIZE + 1 + EPILOGUE_SIZE)
>   u8 opcodes[OPCODE_BUFSIZE];
> + u8 *prologue = (u8 *)(regs->ip - PROLOGUE_SIZE);
> + /*
> +  * Make sure userspace isn't trying to trick us into dumping kernel
> +  * memory by pointing the userspace instruction pointer at it.
> +  */
> + bool bad_ip = user_mode(regs) &&
> +   __range_not_ok(prologue, OPCODE_BUFSIZE, TASK_SIZE_MAX);
>  

Ok, can we pls move the sole dumping of opcodes in a helper called,
__show_opcodes(), for example, which the checking wrapper show_opcodes()
- without the "__" prefix - calls?

So that show_signal_msg() can call the checking variant - show_opcodes()
- as userspace might be doing monkey business there and we definitely
wanna check first but __show_regs() can call the non-checking variant
__show_opcodes() because there we wanna dump whatever rIP points to
because we wanna know if the machine has gone off into the weeds etc,
when staring at splats.

Or am I missing a security aspect here?

Thx.

-- 
Regards/Gruss,
Boris.

SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton, HRB 21284 
(AG Nürnberg)
-- 


Re: [PATCH] serial: 8250_omap: Make 8250_omap driver driver depend on ARCH_K3

2018-08-28 Thread Tony Lindgren
* Nishanth Menon  [180828 01:07]:
> From: Lokesh Vutla 
> 
> Allow 8250 omap serial driver to be used for K3 platforms.
> 
> Signed-off-by: Lokesh Vutla 
> Signed-off-by: Nishanth Menon 
> ---
> 
> Now that we have the device tree support merged integrated AND we have 
> ARCH_K3,
> Lets enable the 820 OMAP Driver to build for ARCH_K3 and make it operational.

Acked-by: Tony Lindgren 


Re: [PATCH] serial: 8250_omap: Make 8250_omap driver driver depend on ARCH_K3

2018-08-28 Thread Tony Lindgren
* Nishanth Menon  [180828 01:07]:
> From: Lokesh Vutla 
> 
> Allow 8250 omap serial driver to be used for K3 platforms.
> 
> Signed-off-by: Lokesh Vutla 
> Signed-off-by: Nishanth Menon 
> ---
> 
> Now that we have the device tree support merged integrated AND we have 
> ARCH_K3,
> Lets enable the 820 OMAP Driver to build for ARCH_K3 and make it operational.

Acked-by: Tony Lindgren 


Re: [PATCH] arm64: defconfig: Enable TI's AM6 SoC platform

2018-08-28 Thread Tony Lindgren
* Nishanth Menon  [180828 00:50]:
> Enable K3 SoC platform for TI's AM6 SoC.
> 
> Signed-off-by: Nishanth Menon 
> ---
> Ref: https://www.spinics.net/lists/arm-kernel/msg667805.html
> 
> Unfortunately, Kconfig changes did'nt hit v4.19-rc1 window.
> So, as promised, patch based off v4.19-rc1 tag.

Acked-by: Tony Lindgren 


Re: [PATCH] arm64: defconfig: Enable TI's AM6 SoC platform

2018-08-28 Thread Tony Lindgren
* Nishanth Menon  [180828 00:50]:
> Enable K3 SoC platform for TI's AM6 SoC.
> 
> Signed-off-by: Nishanth Menon 
> ---
> Ref: https://www.spinics.net/lists/arm-kernel/msg667805.html
> 
> Unfortunately, Kconfig changes did'nt hit v4.19-rc1 window.
> So, as promised, patch based off v4.19-rc1 tag.

Acked-by: Tony Lindgren 


[ftrace/kprobes PATCH 3/3] tracing/kprobes: Allow kprobe-events to record module symbol

2018-08-28 Thread Masami Hiramatsu
Allow kprobe-events to record module symbols.

Since data symbols in a non-loaded module doesn't exist, it fails to
define such symbol as an argument of kprobe-event. But if the kprobe
event is defined on that module, we can defer to resolve the symbol
address.

Note that if given symbol is not found, the event is kept unavailable.
User can enable it but the event is not recorded.

Signed-off-by: Masami Hiramatsu 
---
 kernel/trace/trace_kprobe.c |   12 
 kernel/trace/trace_probe.c  |   62 +--
 kernel/trace/trace_probe.h  |4 ++-
 3 files changed, 68 insertions(+), 10 deletions(-)

diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index fbf609cbeac2..920985fae8c0 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -366,7 +366,7 @@ static bool within_notrace_func(struct trace_kprobe *tk)
 /* Internal register function - just handle k*probes and flags */
 static int __register_trace_kprobe(struct trace_kprobe *tk)
 {
-   int ret;
+   int i, ret;
 
if (trace_probe_is_registered(>tp))
return -EINVAL;
@@ -377,6 +377,12 @@ static int __register_trace_kprobe(struct trace_kprobe *tk)
return -EINVAL;
}
 
+   for (i = 0; i < tk->tp.nr_args; i++) {
+   ret = traceprobe_update_arg(>tp.args[i]);
+   if (ret)
+   return ret;
+   }
+
/* Set/clear disabled flag according to tp->flag */
if (trace_probe_is_enabled(>tp))
tk->rp.kp.flags &= ~KPROBE_FLAG_DISABLED;
@@ -925,6 +931,7 @@ process_fetch_insn(struct fetch_insn *code, struct pt_regs 
*regs, void *dest,
 {
unsigned long val;
 
+retry:
/* 1st stage: get value from context */
switch (code->op) {
case FETCH_OP_REG:
@@ -950,6 +957,9 @@ process_fetch_insn(struct fetch_insn *code, struct pt_regs 
*regs, void *dest,
val = regs_get_kernel_argument(regs, code->param);
break;
 #endif
+   case FETCH_NOP_SYMBOL:  /* Ignore a place holder */
+   code++;
+   goto retry;
default:
return -EILSEQ;
}
diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index 333cda6d2633..5b3d573b3dcf 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -251,16 +251,16 @@ parse_probe_arg(char *arg, const struct fetch_type *type,
if (!(flags & TPARG_FL_KERNEL))
return -EINVAL;
 
-   ret = traceprobe_split_symbol_offset(arg + 1, );
-   if (ret)
-   break;
+   /* Preserve symbol for updating */
+   code->op = FETCH_NOP_SYMBOL;
+   code->data = kstrdup(arg + 1, GFP_KERNEL);
+   if (!code->data)
+   return -ENOMEM;
+   if (++code == end)
+   return -E2BIG;
 
code->op = FETCH_OP_IMM;
-   code->immediate =
-   (unsigned long)kallsyms_lookup_name(arg + 1);
-   if (!code->immediate)
-   return -ENOENT;
-   code->immediate += offset;
+   code->immediate = 0;
}
/* These are fetching from memory */
if (++code == end)
@@ -480,6 +480,11 @@ int traceprobe_parse_probe_arg(char *arg, ssize_t *size,
memcpy(parg->code, tmp, sizeof(*code) * (code - tmp + 1));
 
 fail:
+   if (ret) {
+   for (code = tmp; code < tmp + FETCH_INSN_MAX; code++)
+   if (code->op == FETCH_NOP_SYMBOL)
+   kfree(code->data);
+   }
kfree(tmp);
 
return ret;
@@ -504,12 +509,53 @@ int traceprobe_conflict_field_name(const char *name,
 
 void traceprobe_free_probe_arg(struct probe_arg *arg)
 {
+   struct fetch_insn *code = arg->code;
+
+   while (code && code->op != FETCH_OP_END) {
+   if (code->op == FETCH_NOP_SYMBOL)
+   kfree(code->data);
+   code++;
+   }
kfree(arg->code);
kfree(arg->name);
kfree(arg->comm);
kfree(arg->fmt);
 }
 
+int traceprobe_update_arg(struct probe_arg *arg)
+{
+   struct fetch_insn *code = arg->code;
+   long offset;
+   char *tmp;
+   char c;
+   int ret = 0;
+
+   while (code && code->op != FETCH_OP_END) {
+   if (code->op == FETCH_NOP_SYMBOL) {
+   if (code[1].op != FETCH_OP_IMM)
+   return -EINVAL;
+
+   tmp = strpbrk("+-", code->data);
+   if (tmp)
+   c = *tmp;
+   ret = 

[ftrace/kprobes PATCH 3/3] tracing/kprobes: Allow kprobe-events to record module symbol

2018-08-28 Thread Masami Hiramatsu
Allow kprobe-events to record module symbols.

Since data symbols in a non-loaded module doesn't exist, it fails to
define such symbol as an argument of kprobe-event. But if the kprobe
event is defined on that module, we can defer to resolve the symbol
address.

Note that if given symbol is not found, the event is kept unavailable.
User can enable it but the event is not recorded.

Signed-off-by: Masami Hiramatsu 
---
 kernel/trace/trace_kprobe.c |   12 
 kernel/trace/trace_probe.c  |   62 +--
 kernel/trace/trace_probe.h  |4 ++-
 3 files changed, 68 insertions(+), 10 deletions(-)

diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index fbf609cbeac2..920985fae8c0 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -366,7 +366,7 @@ static bool within_notrace_func(struct trace_kprobe *tk)
 /* Internal register function - just handle k*probes and flags */
 static int __register_trace_kprobe(struct trace_kprobe *tk)
 {
-   int ret;
+   int i, ret;
 
if (trace_probe_is_registered(>tp))
return -EINVAL;
@@ -377,6 +377,12 @@ static int __register_trace_kprobe(struct trace_kprobe *tk)
return -EINVAL;
}
 
+   for (i = 0; i < tk->tp.nr_args; i++) {
+   ret = traceprobe_update_arg(>tp.args[i]);
+   if (ret)
+   return ret;
+   }
+
/* Set/clear disabled flag according to tp->flag */
if (trace_probe_is_enabled(>tp))
tk->rp.kp.flags &= ~KPROBE_FLAG_DISABLED;
@@ -925,6 +931,7 @@ process_fetch_insn(struct fetch_insn *code, struct pt_regs 
*regs, void *dest,
 {
unsigned long val;
 
+retry:
/* 1st stage: get value from context */
switch (code->op) {
case FETCH_OP_REG:
@@ -950,6 +957,9 @@ process_fetch_insn(struct fetch_insn *code, struct pt_regs 
*regs, void *dest,
val = regs_get_kernel_argument(regs, code->param);
break;
 #endif
+   case FETCH_NOP_SYMBOL:  /* Ignore a place holder */
+   code++;
+   goto retry;
default:
return -EILSEQ;
}
diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index 333cda6d2633..5b3d573b3dcf 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -251,16 +251,16 @@ parse_probe_arg(char *arg, const struct fetch_type *type,
if (!(flags & TPARG_FL_KERNEL))
return -EINVAL;
 
-   ret = traceprobe_split_symbol_offset(arg + 1, );
-   if (ret)
-   break;
+   /* Preserve symbol for updating */
+   code->op = FETCH_NOP_SYMBOL;
+   code->data = kstrdup(arg + 1, GFP_KERNEL);
+   if (!code->data)
+   return -ENOMEM;
+   if (++code == end)
+   return -E2BIG;
 
code->op = FETCH_OP_IMM;
-   code->immediate =
-   (unsigned long)kallsyms_lookup_name(arg + 1);
-   if (!code->immediate)
-   return -ENOENT;
-   code->immediate += offset;
+   code->immediate = 0;
}
/* These are fetching from memory */
if (++code == end)
@@ -480,6 +480,11 @@ int traceprobe_parse_probe_arg(char *arg, ssize_t *size,
memcpy(parg->code, tmp, sizeof(*code) * (code - tmp + 1));
 
 fail:
+   if (ret) {
+   for (code = tmp; code < tmp + FETCH_INSN_MAX; code++)
+   if (code->op == FETCH_NOP_SYMBOL)
+   kfree(code->data);
+   }
kfree(tmp);
 
return ret;
@@ -504,12 +509,53 @@ int traceprobe_conflict_field_name(const char *name,
 
 void traceprobe_free_probe_arg(struct probe_arg *arg)
 {
+   struct fetch_insn *code = arg->code;
+
+   while (code && code->op != FETCH_OP_END) {
+   if (code->op == FETCH_NOP_SYMBOL)
+   kfree(code->data);
+   code++;
+   }
kfree(arg->code);
kfree(arg->name);
kfree(arg->comm);
kfree(arg->fmt);
 }
 
+int traceprobe_update_arg(struct probe_arg *arg)
+{
+   struct fetch_insn *code = arg->code;
+   long offset;
+   char *tmp;
+   char c;
+   int ret = 0;
+
+   while (code && code->op != FETCH_OP_END) {
+   if (code->op == FETCH_NOP_SYMBOL) {
+   if (code[1].op != FETCH_OP_IMM)
+   return -EINVAL;
+
+   tmp = strpbrk("+-", code->data);
+   if (tmp)
+   c = *tmp;
+   ret = 

Re: [PATCH] perf: Support for Arm A32/T32 instruction sets in CoreSight trace

2018-08-28 Thread Mathieu Poirier
Hi Robert,

Your patch landed in the middle of the merge window and will have to be sent
again rebased on 4.19-rc1 and long with minor fix found below.

Regards,
Mathieu 

On Wed, Aug 22, 2018 at 05:03:57PM +0100, Robert Walker wrote:
> This patch adds support for generating instruction samples from trace of
> AArch32 programs using the A32 and T32 instruction sets.
> 
> T32 has variable 2 or 4 byte instruction size, so the conversion between
> addresses and instruction counts requires extra information from the trace
> decoder, requiring version 0.9.1 of OpenCSD.  A check for the new struct
> member has been added to the feature check for OpenCSD.
> 
> Signed-off-by: Robert Walker 
> ---
>  tools/build/feature/test-libopencsd.c   |  7 +++
>  tools/perf/util/cs-etm-decoder/cs-etm-decoder.c | 27 ++
>  tools/perf/util/cs-etm-decoder/cs-etm-decoder.h | 10 
>  tools/perf/util/cs-etm.c| 71 
> +++--
>  4 files changed, 75 insertions(+), 40 deletions(-)
> 
> diff --git a/tools/build/feature/test-libopencsd.c 
> b/tools/build/feature/test-libopencsd.c
> index 5ff1246..d96b2df 100644
> --- a/tools/build/feature/test-libopencsd.c
> +++ b/tools/build/feature/test-libopencsd.c
> @@ -3,6 +3,13 @@
>  
>  int main(void)
>  {
> + /*
> +  * Requires ocsd_generic_trace_elem.num_instr_range introduced in
> +  * OpenCSD 0.9
> +  */
> + ocsd_generic_trace_elem elem;
> + (void)elem.num_instr_range;
> +

I really like this - simple and efficient.

>   (void)ocsd_get_version();
>   return 0;
>  }
> diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c 
> b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
> index 938def6..73d8384 100644
> --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
> +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
> @@ -263,9 +263,12 @@ static void cs_etm_decoder__clear_buffer(struct 
> cs_etm_decoder *decoder)
>   decoder->tail = 0;
>   decoder->packet_count = 0;
>   for (i = 0; i < MAX_BUFFER; i++) {
> + decoder->packet_buffer[i].isa = CS_ETM_ISA_UNKNOWN;
>   decoder->packet_buffer[i].start_addr = CS_ETM_INVAL_ADDR;
>   decoder->packet_buffer[i].end_addr = CS_ETM_INVAL_ADDR;
> + decoder->packet_buffer[i].instr_count = 0;
>   decoder->packet_buffer[i].last_instr_taken_branch = false;
> + decoder->packet_buffer[i].last_instr_size = 0;
>   decoder->packet_buffer[i].exc = false;
>   decoder->packet_buffer[i].exc_ret = false;
>   decoder->packet_buffer[i].cpu = INT_MIN;
> @@ -294,11 +297,13 @@ cs_etm_decoder__buffer_packet(struct cs_etm_decoder 
> *decoder,
>   decoder->packet_count++;
>  
>   decoder->packet_buffer[et].sample_type = sample_type;
> + decoder->packet_buffer[et].isa = CS_ETM_ISA_UNKNOWN;
>   decoder->packet_buffer[et].exc = false;
>   decoder->packet_buffer[et].exc_ret = false;
>   decoder->packet_buffer[et].cpu = *((int *)inode->priv);
>   decoder->packet_buffer[et].start_addr = CS_ETM_INVAL_ADDR;
>   decoder->packet_buffer[et].end_addr = CS_ETM_INVAL_ADDR;
> + decoder->packet_buffer[et].instr_count = 0;
>  
>   if (decoder->packet_count == MAX_BUFFER - 1)
>   return OCSD_RESP_WAIT;
> @@ -321,8 +326,28 @@ cs_etm_decoder__buffer_range(struct cs_etm_decoder 
> *decoder,
>  
>   packet = >packet_buffer[decoder->tail];
>  
> + switch (elem->isa) {
> + case ocsd_isa_aarch64:
> + packet->isa = CS_ETM_ISA_A64;
> + break;
> + case ocsd_isa_arm:
> + packet->isa = CS_ETM_ISA_A32;
> + break;
> + case ocsd_isa_thumb2:
> + packet->isa = CS_ETM_ISA_T32;
> + break;
> + case ocsd_isa_tee:
> + case ocsd_isa_jazelle:
> + case ocsd_isa_custom:
> + case ocsd_isa_unknown:
> + default:
> + packet->isa = CS_ETM_ISA_UNKNOWN;
> + }
> +
>   packet->start_addr = elem->st_addr;
>   packet->end_addr = elem->en_addr;
> + packet->instr_count = elem->num_instr_range;
> +
>   switch (elem->last_i_type) {
>   case OCSD_INSTR_BR:
>   case OCSD_INSTR_BR_INDIRECT:
> @@ -336,6 +361,8 @@ cs_etm_decoder__buffer_range(struct cs_etm_decoder 
> *decoder,
>   break;
>   }
>  
> + packet->last_instr_size = elem->last_instr_sz;
> +
>   return ret;
>  }
>  
> diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h 
> b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
> index 612b575..9a10eda 100644
> --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
> +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
> @@ -28,11 +28,21 @@ enum cs_etm_sample_type {
>   CS_ETM_TRACE_ON = 1 << 1,
>  };
>  
> +enum cs_etm_isa {
> + CS_ETM_ISA_UNKNOWN,
> + CS_ETM_ISA_A64,
> + CS_ETM_ISA_A32,
> + CS_ETM_ISA_T32,
> +};
> +
>  struct cs_etm_packet {
>   enum cs_etm_sample_type 

[ftrace/kprobes PATCH 2/3] tracing/kprobes: Check the probe on unloaded module correctly

2018-08-28 Thread Masami Hiramatsu
Current kprobe event doesn't checks correctly whether the
given event is on unloaded module or not. It just checks
the event has ":" in the name.

That is not enough because if we define a probe on non-exist
symbol on loaded module, it allows to define that (with
warning message)

To ensure it correctly, this searches the module name on
loaded module list and only if there is not, it allows to
define it. (this event will be available when the target
module is loaded)

Signed-off-by: Masami Hiramatsu 
---
 kernel/trace/trace_kprobe.c |   39 ++-
 1 file changed, 26 insertions(+), 13 deletions(-)

diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index b006aaeceb92..fbf609cbeac2 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -62,9 +62,23 @@ static nokprobe_inline bool 
trace_kprobe_within_module(struct trace_kprobe *tk,
return strncmp(mod->name, name, len) == 0 && name[len] == ':';
 }
 
-static nokprobe_inline bool trace_kprobe_is_on_module(struct trace_kprobe *tk)
+static nokprobe_inline bool trace_kprobe_module_exist(struct trace_kprobe *tk)
 {
-   return !!strchr(trace_kprobe_symbol(tk), ':');
+   char *p;
+   bool ret;
+
+   if (!tk->symbol)
+   return false;
+   p = strchr(tk->symbol, ':');
+   if (!p)
+   return true;
+   *p = '\0';
+   mutex_lock(_mutex);
+   ret = !!find_module(tk->symbol);
+   mutex_unlock(_mutex);
+   *p = ':';
+
+   return ret;
 }
 
 static nokprobe_inline unsigned long trace_kprobe_nhit(struct trace_kprobe *tk)
@@ -374,19 +388,13 @@ static int __register_trace_kprobe(struct trace_kprobe 
*tk)
else
ret = register_kprobe(>rp.kp);
 
-   if (ret == 0)
+   if (ret == 0) {
tk->tp.flags |= TP_FLAG_REGISTERED;
-   else {
-   if (ret == -ENOENT && trace_kprobe_is_on_module(tk)) {
-   pr_warn("This probe might be able to register after 
target module is loaded. Continue.\n");
-   ret = 0;
-   } else if (ret == -EILSEQ) {
-   pr_warn("Probing address(0x%p) is not an instruction 
boundary.\n",
-   tk->rp.kp.addr);
-   ret = -EINVAL;
-   }
+   } else if (ret == -EILSEQ) {
+   pr_warn("Probing address(0x%p) is not an instruction 
boundary.\n",
+   tk->rp.kp.addr);
+   ret = -EINVAL;
}
-
return ret;
 }
 
@@ -449,6 +457,11 @@ static int register_trace_kprobe(struct trace_kprobe *tk)
 
/* Register k*probe */
ret = __register_trace_kprobe(tk);
+   if (ret == -ENOENT && !trace_kprobe_module_exist(tk)) {
+   pr_warn("This probe might be able to register after target 
module is loaded. Continue.\n");
+   ret = 0;
+   }
+
if (ret < 0)
unregister_kprobe_event(tk);
else



[ftrace/kprobes PATCH 1/3] tracing/uprobes: Fix to return -EFAULT if copy_from_user failed

2018-08-28 Thread Masami Hiramatsu
Fix probe_mem_read() to return -EFAULT if copy_from_user()
failed. The copy_from_user() returns remaining bytes
when it failed, but probe_mem_read() caller expects it
returns error code like as probe_kernel_read().

Reported-by: Dan Carpenter 
Signed-off-by: Masami Hiramatsu 
---
 kernel/trace/trace_uprobe.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index 12bdbdf772ed..63e99ee716fe 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -104,7 +104,7 @@ probe_mem_read(void *dest, void *src, size_t size)
 {
void __user *vaddr = (void __force __user *)src;
 
-   return copy_from_user(dest, vaddr, size);
+   return copy_from_user(dest, vaddr, size) ? -EFAULT : 0;
 }
 /*
  * Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max



Re: [PATCH] perf: Support for Arm A32/T32 instruction sets in CoreSight trace

2018-08-28 Thread Mathieu Poirier
Hi Robert,

Your patch landed in the middle of the merge window and will have to be sent
again rebased on 4.19-rc1 and long with minor fix found below.

Regards,
Mathieu 

On Wed, Aug 22, 2018 at 05:03:57PM +0100, Robert Walker wrote:
> This patch adds support for generating instruction samples from trace of
> AArch32 programs using the A32 and T32 instruction sets.
> 
> T32 has variable 2 or 4 byte instruction size, so the conversion between
> addresses and instruction counts requires extra information from the trace
> decoder, requiring version 0.9.1 of OpenCSD.  A check for the new struct
> member has been added to the feature check for OpenCSD.
> 
> Signed-off-by: Robert Walker 
> ---
>  tools/build/feature/test-libopencsd.c   |  7 +++
>  tools/perf/util/cs-etm-decoder/cs-etm-decoder.c | 27 ++
>  tools/perf/util/cs-etm-decoder/cs-etm-decoder.h | 10 
>  tools/perf/util/cs-etm.c| 71 
> +++--
>  4 files changed, 75 insertions(+), 40 deletions(-)
> 
> diff --git a/tools/build/feature/test-libopencsd.c 
> b/tools/build/feature/test-libopencsd.c
> index 5ff1246..d96b2df 100644
> --- a/tools/build/feature/test-libopencsd.c
> +++ b/tools/build/feature/test-libopencsd.c
> @@ -3,6 +3,13 @@
>  
>  int main(void)
>  {
> + /*
> +  * Requires ocsd_generic_trace_elem.num_instr_range introduced in
> +  * OpenCSD 0.9
> +  */
> + ocsd_generic_trace_elem elem;
> + (void)elem.num_instr_range;
> +

I really like this - simple and efficient.

>   (void)ocsd_get_version();
>   return 0;
>  }
> diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c 
> b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
> index 938def6..73d8384 100644
> --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
> +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
> @@ -263,9 +263,12 @@ static void cs_etm_decoder__clear_buffer(struct 
> cs_etm_decoder *decoder)
>   decoder->tail = 0;
>   decoder->packet_count = 0;
>   for (i = 0; i < MAX_BUFFER; i++) {
> + decoder->packet_buffer[i].isa = CS_ETM_ISA_UNKNOWN;
>   decoder->packet_buffer[i].start_addr = CS_ETM_INVAL_ADDR;
>   decoder->packet_buffer[i].end_addr = CS_ETM_INVAL_ADDR;
> + decoder->packet_buffer[i].instr_count = 0;
>   decoder->packet_buffer[i].last_instr_taken_branch = false;
> + decoder->packet_buffer[i].last_instr_size = 0;
>   decoder->packet_buffer[i].exc = false;
>   decoder->packet_buffer[i].exc_ret = false;
>   decoder->packet_buffer[i].cpu = INT_MIN;
> @@ -294,11 +297,13 @@ cs_etm_decoder__buffer_packet(struct cs_etm_decoder 
> *decoder,
>   decoder->packet_count++;
>  
>   decoder->packet_buffer[et].sample_type = sample_type;
> + decoder->packet_buffer[et].isa = CS_ETM_ISA_UNKNOWN;
>   decoder->packet_buffer[et].exc = false;
>   decoder->packet_buffer[et].exc_ret = false;
>   decoder->packet_buffer[et].cpu = *((int *)inode->priv);
>   decoder->packet_buffer[et].start_addr = CS_ETM_INVAL_ADDR;
>   decoder->packet_buffer[et].end_addr = CS_ETM_INVAL_ADDR;
> + decoder->packet_buffer[et].instr_count = 0;
>  
>   if (decoder->packet_count == MAX_BUFFER - 1)
>   return OCSD_RESP_WAIT;
> @@ -321,8 +326,28 @@ cs_etm_decoder__buffer_range(struct cs_etm_decoder 
> *decoder,
>  
>   packet = >packet_buffer[decoder->tail];
>  
> + switch (elem->isa) {
> + case ocsd_isa_aarch64:
> + packet->isa = CS_ETM_ISA_A64;
> + break;
> + case ocsd_isa_arm:
> + packet->isa = CS_ETM_ISA_A32;
> + break;
> + case ocsd_isa_thumb2:
> + packet->isa = CS_ETM_ISA_T32;
> + break;
> + case ocsd_isa_tee:
> + case ocsd_isa_jazelle:
> + case ocsd_isa_custom:
> + case ocsd_isa_unknown:
> + default:
> + packet->isa = CS_ETM_ISA_UNKNOWN;
> + }
> +
>   packet->start_addr = elem->st_addr;
>   packet->end_addr = elem->en_addr;
> + packet->instr_count = elem->num_instr_range;
> +
>   switch (elem->last_i_type) {
>   case OCSD_INSTR_BR:
>   case OCSD_INSTR_BR_INDIRECT:
> @@ -336,6 +361,8 @@ cs_etm_decoder__buffer_range(struct cs_etm_decoder 
> *decoder,
>   break;
>   }
>  
> + packet->last_instr_size = elem->last_instr_sz;
> +
>   return ret;
>  }
>  
> diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h 
> b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
> index 612b575..9a10eda 100644
> --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
> +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
> @@ -28,11 +28,21 @@ enum cs_etm_sample_type {
>   CS_ETM_TRACE_ON = 1 << 1,
>  };
>  
> +enum cs_etm_isa {
> + CS_ETM_ISA_UNKNOWN,
> + CS_ETM_ISA_A64,
> + CS_ETM_ISA_A32,
> + CS_ETM_ISA_T32,
> +};
> +
>  struct cs_etm_packet {
>   enum cs_etm_sample_type 

[ftrace/kprobes PATCH 2/3] tracing/kprobes: Check the probe on unloaded module correctly

2018-08-28 Thread Masami Hiramatsu
Current kprobe event doesn't checks correctly whether the
given event is on unloaded module or not. It just checks
the event has ":" in the name.

That is not enough because if we define a probe on non-exist
symbol on loaded module, it allows to define that (with
warning message)

To ensure it correctly, this searches the module name on
loaded module list and only if there is not, it allows to
define it. (this event will be available when the target
module is loaded)

Signed-off-by: Masami Hiramatsu 
---
 kernel/trace/trace_kprobe.c |   39 ++-
 1 file changed, 26 insertions(+), 13 deletions(-)

diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index b006aaeceb92..fbf609cbeac2 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -62,9 +62,23 @@ static nokprobe_inline bool 
trace_kprobe_within_module(struct trace_kprobe *tk,
return strncmp(mod->name, name, len) == 0 && name[len] == ':';
 }
 
-static nokprobe_inline bool trace_kprobe_is_on_module(struct trace_kprobe *tk)
+static nokprobe_inline bool trace_kprobe_module_exist(struct trace_kprobe *tk)
 {
-   return !!strchr(trace_kprobe_symbol(tk), ':');
+   char *p;
+   bool ret;
+
+   if (!tk->symbol)
+   return false;
+   p = strchr(tk->symbol, ':');
+   if (!p)
+   return true;
+   *p = '\0';
+   mutex_lock(_mutex);
+   ret = !!find_module(tk->symbol);
+   mutex_unlock(_mutex);
+   *p = ':';
+
+   return ret;
 }
 
 static nokprobe_inline unsigned long trace_kprobe_nhit(struct trace_kprobe *tk)
@@ -374,19 +388,13 @@ static int __register_trace_kprobe(struct trace_kprobe 
*tk)
else
ret = register_kprobe(>rp.kp);
 
-   if (ret == 0)
+   if (ret == 0) {
tk->tp.flags |= TP_FLAG_REGISTERED;
-   else {
-   if (ret == -ENOENT && trace_kprobe_is_on_module(tk)) {
-   pr_warn("This probe might be able to register after 
target module is loaded. Continue.\n");
-   ret = 0;
-   } else if (ret == -EILSEQ) {
-   pr_warn("Probing address(0x%p) is not an instruction 
boundary.\n",
-   tk->rp.kp.addr);
-   ret = -EINVAL;
-   }
+   } else if (ret == -EILSEQ) {
+   pr_warn("Probing address(0x%p) is not an instruction 
boundary.\n",
+   tk->rp.kp.addr);
+   ret = -EINVAL;
}
-
return ret;
 }
 
@@ -449,6 +457,11 @@ static int register_trace_kprobe(struct trace_kprobe *tk)
 
/* Register k*probe */
ret = __register_trace_kprobe(tk);
+   if (ret == -ENOENT && !trace_kprobe_module_exist(tk)) {
+   pr_warn("This probe might be able to register after target 
module is loaded. Continue.\n");
+   ret = 0;
+   }
+
if (ret < 0)
unregister_kprobe_event(tk);
else



[ftrace/kprobes PATCH 1/3] tracing/uprobes: Fix to return -EFAULT if copy_from_user failed

2018-08-28 Thread Masami Hiramatsu
Fix probe_mem_read() to return -EFAULT if copy_from_user()
failed. The copy_from_user() returns remaining bytes
when it failed, but probe_mem_read() caller expects it
returns error code like as probe_kernel_read().

Reported-by: Dan Carpenter 
Signed-off-by: Masami Hiramatsu 
---
 kernel/trace/trace_uprobe.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index 12bdbdf772ed..63e99ee716fe 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -104,7 +104,7 @@ probe_mem_read(void *dest, void *src, size_t size)
 {
void __user *vaddr = (void __force __user *)src;
 
-   return copy_from_user(dest, vaddr, size);
+   return copy_from_user(dest, vaddr, size) ? -EFAULT : 0;
 }
 /*
  * Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max



[ftrace/kprobes PATCH 0/3] tracing: probeevent: Fix module symbol probing

2018-08-28 Thread Masami Hiramatsu
Hi,

This series is for fixing some bugs in Steve's ftrace/kprobes branch.

git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace.git

Which is based on my fetcharg improvement series.

https://lkml.org/lkml/2018/4/25/601

This includes following fixes.

- Fix copy_from_user() misusing which Dan was reported.
- Fix to reject incorrect probeevent on loaded kernel module.
- Fix to update symbol-based argument on module.
  This also checks the symbol-based argument is correct or not when
  target module is loaded. If it is not correct, the event is kept
  unavailable.

Thank you,

---

Masami Hiramatsu (3):
  tracing/uprobes: Fix to return -EFAULT if copy_from_user failed
  tracing/kprobes: Check the probe on unloaded module correctly
  tracing/kprobes: Allow kprobe-events to record module symbol


 kernel/trace/trace_kprobe.c |   51 ++-
 kernel/trace/trace_probe.c  |   62 +--
 kernel/trace/trace_probe.h  |4 ++-
 kernel/trace/trace_uprobe.c |2 +
 4 files changed, 95 insertions(+), 24 deletions(-)

--
Masami Hiramatsu (Linaro) 


[ftrace/kprobes PATCH 0/3] tracing: probeevent: Fix module symbol probing

2018-08-28 Thread Masami Hiramatsu
Hi,

This series is for fixing some bugs in Steve's ftrace/kprobes branch.

git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace.git

Which is based on my fetcharg improvement series.

https://lkml.org/lkml/2018/4/25/601

This includes following fixes.

- Fix copy_from_user() misusing which Dan was reported.
- Fix to reject incorrect probeevent on loaded kernel module.
- Fix to update symbol-based argument on module.
  This also checks the symbol-based argument is correct or not when
  target module is loaded. If it is not correct, the event is kept
  unavailable.

Thank you,

---

Masami Hiramatsu (3):
  tracing/uprobes: Fix to return -EFAULT if copy_from_user failed
  tracing/kprobes: Check the probe on unloaded module correctly
  tracing/kprobes: Allow kprobe-events to record module symbol


 kernel/trace/trace_kprobe.c |   51 ++-
 kernel/trace/trace_probe.c  |   62 +--
 kernel/trace/trace_probe.h  |4 ++-
 kernel/trace/trace_uprobe.c |2 +
 4 files changed, 95 insertions(+), 24 deletions(-)

--
Masami Hiramatsu (Linaro) 


[PATCH] [v2] HID: add support for Apple Magic Trackpad 2

2018-08-28 Thread Sean O'Brien
USB device
Vendor 05ac (Apple)
Device 0265 (Magic Trackpad 2)
Bluetooth device
Vendor 004c (Apple)
Device 0265 (Magic Trackpad 2)

Add support for Apple Magic Trackpad 2 over USB and bluetooth, putting
the device in multi-touch mode.

Signed-off-by: Claudio Mettler 
Signed-off-by: Marek Wyborski 
Signed-off-by: Sean O'Brien 
---

 drivers/hid/hid-ids.h|   2 +
 drivers/hid/hid-magicmouse.c | 184 ---
 2 files changed, 149 insertions(+), 37 deletions(-)

diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index 79bdf0c7e351..d6d0b20cc015 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -88,9 +88,11 @@
 #define USB_DEVICE_ID_ANTON_TOUCH_PAD  0x3101
 
 #define USB_VENDOR_ID_APPLE0x05ac
+#define BT_VENDOR_ID_APPLE 0x004c
 #define USB_DEVICE_ID_APPLE_MIGHTYMOUSE0x0304
 #define USB_DEVICE_ID_APPLE_MAGICMOUSE 0x030d
 #define USB_DEVICE_ID_APPLE_MAGICTRACKPAD  0x030e
+#define USB_DEVICE_ID_APPLE_MAGICTRACKPAD2 0x0265
 #define USB_DEVICE_ID_APPLE_FOUNTAIN_ANSI  0x020e
 #define USB_DEVICE_ID_APPLE_FOUNTAIN_ISO   0x020f
 #define USB_DEVICE_ID_APPLE_GEYSER_ANSI0x0214
diff --git a/drivers/hid/hid-magicmouse.c b/drivers/hid/hid-magicmouse.c
index b454c4386157..7f14866ea3c7 100644
--- a/drivers/hid/hid-magicmouse.c
+++ b/drivers/hid/hid-magicmouse.c
@@ -54,6 +54,8 @@ module_param(report_undeciphered, bool, 0644);
 MODULE_PARM_DESC(report_undeciphered, "Report undeciphered multi-touch state 
field using a MSC_RAW event");
 
 #define TRACKPAD_REPORT_ID 0x28
+#define TRACKPAD2_USB_REPORT_ID 0x02
+#define TRACKPAD2_BT_REPORT_ID 0x31
 #define MOUSE_REPORT_ID0x29
 #define DOUBLE_REPORT_ID   0xf7
 /* These definitions are not precise, but they're close enough.  (Bits
@@ -91,6 +93,19 @@ MODULE_PARM_DESC(report_undeciphered, "Report undeciphered 
multi-touch state fie
 #define TRACKPAD_RES_Y \
((TRACKPAD_MAX_Y - TRACKPAD_MIN_Y) / (TRACKPAD_DIMENSION_Y / 100))
 
+#define TRACKPAD2_DIMENSION_X (float)16000
+#define TRACKPAD2_MIN_X -3678
+#define TRACKPAD2_MAX_X 3934
+#define TRACKPAD2_RES_X \
+   ((TRACKPAD2_MAX_X - TRACKPAD2_MIN_X) / (TRACKPAD2_DIMENSION_X / 100))
+#define TRACKPAD2_DIMENSION_Y (float)11490
+#define TRACKPAD2_MIN_Y -2478
+#define TRACKPAD2_MAX_Y 2587
+#define TRACKPAD2_RES_Y \
+   ((TRACKPAD2_MAX_Y - TRACKPAD2_MIN_Y) / (TRACKPAD2_DIMENSION_Y / 100))
+
+#define MAX_TOUCHES16
+
 /**
  * struct magicmouse_sc - Tracks Magic Mouse-specific data.
  * @input: Input device through which we report events.
@@ -115,8 +130,8 @@ struct magicmouse_sc {
short scroll_x;
short scroll_y;
u8 size;
-   } touches[16];
-   int tracking_ids[16];
+   } touches[MAX_TOUCHES];
+   int tracking_ids[MAX_TOUCHES];
 };
 
 static int magicmouse_firm_touch(struct magicmouse_sc *msc)
@@ -183,6 +198,7 @@ static void magicmouse_emit_touch(struct magicmouse_sc 
*msc, int raw_id, u8 *tda
 {
struct input_dev *input = msc->input;
int id, x, y, size, orientation, touch_major, touch_minor, state, down;
+   int pressure = 0;
 
if (input->id.product == USB_DEVICE_ID_APPLE_MAGICMOUSE) {
id = (tdata[6] << 2 | tdata[5] >> 6) & 0xf;
@@ -194,7 +210,7 @@ static void magicmouse_emit_touch(struct magicmouse_sc 
*msc, int raw_id, u8 *tda
touch_minor = tdata[4];
state = tdata[7] & TOUCH_STATE_MASK;
down = state != TOUCH_STATE_NONE;
-   } else { /* USB_DEVICE_ID_APPLE_MAGICTRACKPAD */
+   } else if (input->id.product == USB_DEVICE_ID_APPLE_MAGICTRACKPAD) {
id = (tdata[7] << 2 | tdata[6] >> 6) & 0xf;
x = (tdata[1] << 27 | tdata[0] << 19) >> 19;
y = -((tdata[3] << 30 | tdata[2] << 22 | tdata[1] << 14) >> 19);
@@ -204,6 +220,18 @@ static void magicmouse_emit_touch(struct magicmouse_sc 
*msc, int raw_id, u8 *tda
touch_minor = tdata[5];
state = tdata[8] & TOUCH_STATE_MASK;
down = state != TOUCH_STATE_NONE;
+   } else { /* USB_DEVICE_ID_APPLE_MAGICTRACKPAD2 */
+   id = tdata[8] & 0xf;
+   x = (tdata[1] << 27 | tdata[0] << 19) >> 19;
+   y = -((tdata[3] << 30 | tdata[2] << 22 | tdata[1] << 14) >> 19);
+   size = tdata[6];
+   orientation = (tdata[8] >> 5) - 4;
+   touch_major = tdata[4];
+   touch_minor = tdata[5];
+   /* Prevent zero and low pressure values */
+   pressure = tdata[7] + 30;
+   state = tdata[3] & 0xC0;
+   down = state == 0x80;
}
 
/* Store tracking ID and other fields. */
@@ -215,7 +243,8 @@ static void magicmouse_emit_touch(struct magicmouse_sc 
*msc, int raw_id, u8 *tda
/* If requested, emulate a scroll wheel by detecting small
 * vertical touch motions.
  

[PATCH] [v2] HID: add support for Apple Magic Trackpad 2

2018-08-28 Thread Sean O'Brien
USB device
Vendor 05ac (Apple)
Device 0265 (Magic Trackpad 2)
Bluetooth device
Vendor 004c (Apple)
Device 0265 (Magic Trackpad 2)

Add support for Apple Magic Trackpad 2 over USB and bluetooth, putting
the device in multi-touch mode.

Signed-off-by: Claudio Mettler 
Signed-off-by: Marek Wyborski 
Signed-off-by: Sean O'Brien 
---

 drivers/hid/hid-ids.h|   2 +
 drivers/hid/hid-magicmouse.c | 184 ---
 2 files changed, 149 insertions(+), 37 deletions(-)

diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index 79bdf0c7e351..d6d0b20cc015 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -88,9 +88,11 @@
 #define USB_DEVICE_ID_ANTON_TOUCH_PAD  0x3101
 
 #define USB_VENDOR_ID_APPLE0x05ac
+#define BT_VENDOR_ID_APPLE 0x004c
 #define USB_DEVICE_ID_APPLE_MIGHTYMOUSE0x0304
 #define USB_DEVICE_ID_APPLE_MAGICMOUSE 0x030d
 #define USB_DEVICE_ID_APPLE_MAGICTRACKPAD  0x030e
+#define USB_DEVICE_ID_APPLE_MAGICTRACKPAD2 0x0265
 #define USB_DEVICE_ID_APPLE_FOUNTAIN_ANSI  0x020e
 #define USB_DEVICE_ID_APPLE_FOUNTAIN_ISO   0x020f
 #define USB_DEVICE_ID_APPLE_GEYSER_ANSI0x0214
diff --git a/drivers/hid/hid-magicmouse.c b/drivers/hid/hid-magicmouse.c
index b454c4386157..7f14866ea3c7 100644
--- a/drivers/hid/hid-magicmouse.c
+++ b/drivers/hid/hid-magicmouse.c
@@ -54,6 +54,8 @@ module_param(report_undeciphered, bool, 0644);
 MODULE_PARM_DESC(report_undeciphered, "Report undeciphered multi-touch state 
field using a MSC_RAW event");
 
 #define TRACKPAD_REPORT_ID 0x28
+#define TRACKPAD2_USB_REPORT_ID 0x02
+#define TRACKPAD2_BT_REPORT_ID 0x31
 #define MOUSE_REPORT_ID0x29
 #define DOUBLE_REPORT_ID   0xf7
 /* These definitions are not precise, but they're close enough.  (Bits
@@ -91,6 +93,19 @@ MODULE_PARM_DESC(report_undeciphered, "Report undeciphered 
multi-touch state fie
 #define TRACKPAD_RES_Y \
((TRACKPAD_MAX_Y - TRACKPAD_MIN_Y) / (TRACKPAD_DIMENSION_Y / 100))
 
+#define TRACKPAD2_DIMENSION_X (float)16000
+#define TRACKPAD2_MIN_X -3678
+#define TRACKPAD2_MAX_X 3934
+#define TRACKPAD2_RES_X \
+   ((TRACKPAD2_MAX_X - TRACKPAD2_MIN_X) / (TRACKPAD2_DIMENSION_X / 100))
+#define TRACKPAD2_DIMENSION_Y (float)11490
+#define TRACKPAD2_MIN_Y -2478
+#define TRACKPAD2_MAX_Y 2587
+#define TRACKPAD2_RES_Y \
+   ((TRACKPAD2_MAX_Y - TRACKPAD2_MIN_Y) / (TRACKPAD2_DIMENSION_Y / 100))
+
+#define MAX_TOUCHES16
+
 /**
  * struct magicmouse_sc - Tracks Magic Mouse-specific data.
  * @input: Input device through which we report events.
@@ -115,8 +130,8 @@ struct magicmouse_sc {
short scroll_x;
short scroll_y;
u8 size;
-   } touches[16];
-   int tracking_ids[16];
+   } touches[MAX_TOUCHES];
+   int tracking_ids[MAX_TOUCHES];
 };
 
 static int magicmouse_firm_touch(struct magicmouse_sc *msc)
@@ -183,6 +198,7 @@ static void magicmouse_emit_touch(struct magicmouse_sc 
*msc, int raw_id, u8 *tda
 {
struct input_dev *input = msc->input;
int id, x, y, size, orientation, touch_major, touch_minor, state, down;
+   int pressure = 0;
 
if (input->id.product == USB_DEVICE_ID_APPLE_MAGICMOUSE) {
id = (tdata[6] << 2 | tdata[5] >> 6) & 0xf;
@@ -194,7 +210,7 @@ static void magicmouse_emit_touch(struct magicmouse_sc 
*msc, int raw_id, u8 *tda
touch_minor = tdata[4];
state = tdata[7] & TOUCH_STATE_MASK;
down = state != TOUCH_STATE_NONE;
-   } else { /* USB_DEVICE_ID_APPLE_MAGICTRACKPAD */
+   } else if (input->id.product == USB_DEVICE_ID_APPLE_MAGICTRACKPAD) {
id = (tdata[7] << 2 | tdata[6] >> 6) & 0xf;
x = (tdata[1] << 27 | tdata[0] << 19) >> 19;
y = -((tdata[3] << 30 | tdata[2] << 22 | tdata[1] << 14) >> 19);
@@ -204,6 +220,18 @@ static void magicmouse_emit_touch(struct magicmouse_sc 
*msc, int raw_id, u8 *tda
touch_minor = tdata[5];
state = tdata[8] & TOUCH_STATE_MASK;
down = state != TOUCH_STATE_NONE;
+   } else { /* USB_DEVICE_ID_APPLE_MAGICTRACKPAD2 */
+   id = tdata[8] & 0xf;
+   x = (tdata[1] << 27 | tdata[0] << 19) >> 19;
+   y = -((tdata[3] << 30 | tdata[2] << 22 | tdata[1] << 14) >> 19);
+   size = tdata[6];
+   orientation = (tdata[8] >> 5) - 4;
+   touch_major = tdata[4];
+   touch_minor = tdata[5];
+   /* Prevent zero and low pressure values */
+   pressure = tdata[7] + 30;
+   state = tdata[3] & 0xC0;
+   down = state == 0x80;
}
 
/* Store tracking ID and other fields. */
@@ -215,7 +243,8 @@ static void magicmouse_emit_touch(struct magicmouse_sc 
*msc, int raw_id, u8 *tda
/* If requested, emulate a scroll wheel by detecting small
 * vertical touch motions.
  

Your reply

2018-08-28 Thread Ruby

We provide photoshop services to some of the companies from around the
world.
We have worked on tons of images ever since our team establishment in 2009.

Many online retail companies use our services for retouching electronics,
jewelry, apparels, furniture
etc. by getting the images of their products enhanced.

Here are the details of what we provide:
Clipping path;
Deep etch process
Image masking
Remove background
Portrait retouching
Jewelry retouching
Fashion retouching

Please reply back for further info.
We can provide testing for your photos if needed.

Thanks,
Ruby



Your reply

2018-08-28 Thread Ruby

We provide photoshop services to some of the companies from around the
world.
We have worked on tons of images ever since our team establishment in 2009.

Many online retail companies use our services for retouching electronics,
jewelry, apparels, furniture
etc. by getting the images of their products enhanced.

Here are the details of what we provide:
Clipping path;
Deep etch process
Image masking
Remove background
Portrait retouching
Jewelry retouching
Fashion retouching

Please reply back for further info.
We can provide testing for your photos if needed.

Thanks,
Ruby



[PATCH] MIPS: Fix computation on entry point

2018-08-28 Thread Philippe Reynes
Since commit 27c524d17430 ("MIPS: Use the entry point from the ELF
file header"), the kernel entry point is computed with a grep on
"start address" on the output of objdump. It works fine when the
default language is english but it may fail on other language (for
example in French, the grep should be done on "adresse de départ").
To fix this computation on most machine, I propose to force the
language to english with "LC_ALL=C".

Fixes: 27c524d17430 ("MIPS: Use the entry point from the ELF file header")

Signed-off-by: Philippe Reynes 
---
 arch/mips/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/mips/Makefile b/arch/mips/Makefile
index d74b374..835aa8f 100644
--- a/arch/mips/Makefile
+++ b/arch/mips/Makefile
@@ -258,7 +258,7 @@ load-y  = 
$(CONFIG_PHYSICAL_START)
 endif
 
 # Sign-extend the entry point to 64 bits if retrieved as a 32-bit number.
-entry-y= $(shell $(OBJDUMP) -f vmlinux 2>/dev/null \
+entry-y= $(shell LC_ALL=C $(OBJDUMP) -f vmlinux 2>/dev/null \
| sed -n '/^start address / { \
s/^.* //; \
s/0x\([0-7]...\)$$/0x\1/; \
-- 
2.7.4



[PATCH] MIPS: Fix computation on entry point

2018-08-28 Thread Philippe Reynes
Since commit 27c524d17430 ("MIPS: Use the entry point from the ELF
file header"), the kernel entry point is computed with a grep on
"start address" on the output of objdump. It works fine when the
default language is english but it may fail on other language (for
example in French, the grep should be done on "adresse de départ").
To fix this computation on most machine, I propose to force the
language to english with "LC_ALL=C".

Fixes: 27c524d17430 ("MIPS: Use the entry point from the ELF file header")

Signed-off-by: Philippe Reynes 
---
 arch/mips/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/mips/Makefile b/arch/mips/Makefile
index d74b374..835aa8f 100644
--- a/arch/mips/Makefile
+++ b/arch/mips/Makefile
@@ -258,7 +258,7 @@ load-y  = 
$(CONFIG_PHYSICAL_START)
 endif
 
 # Sign-extend the entry point to 64 bits if retrieved as a 32-bit number.
-entry-y= $(shell $(OBJDUMP) -f vmlinux 2>/dev/null \
+entry-y= $(shell LC_ALL=C $(OBJDUMP) -f vmlinux 2>/dev/null \
| sed -n '/^start address / { \
s/^.* //; \
s/0x\([0-7]...\)$$/0x\1/; \
-- 
2.7.4



Re: [PATCH 4/7] mm/hmm: properly handle migration pmd

2018-08-28 Thread Michal Hocko
On Tue 28-08-18 11:54:33, Zi Yan wrote:
> Hi Michal,
> 
> On 28 Aug 2018, at 11:45, Michal Hocko wrote:
> 
> > On Tue 28-08-18 17:42:06, Michal Hocko wrote:
> >> On Tue 28-08-18 11:36:59, Jerome Glisse wrote:
> >>> On Tue, Aug 28, 2018 at 05:24:14PM +0200, Michal Hocko wrote:
>  On Fri 24-08-18 20:05:46, Zi Yan wrote:
>  [...]
> >> +  if (!pmd_present(pmd)) {
> >> +  swp_entry_t entry = pmd_to_swp_entry(pmd);
> >> +
> >> +  if (is_migration_entry(entry)) {
> >
> > I think you should check thp_migration_supported() here, since PMD 
> > migration is only enabled in x86_64 systems.
> > Other architectures should treat PMD migration entries as bad.
> 
>  How can we have a migration pmd entry when the migration is not
>  supported?
> >>>
> >>> Not sure i follow here, migration can happen anywhere (assuming
> >>> that something like compaction is active or numa or ...). So this
> >>> code can face pmd migration entry on architecture that support
> >>> it. What is missing here is thp_migration_supported() call to
> >>> protect the is_migration_entry() to avoid false positive on arch
> >>> which do not support thp migration.
> >>
> >> I mean that architectures which do not support THP migration shouldn't
> >> ever see any migration entry. So is_migration_entry should be always
> >> false. Or do I miss something?
> >
> > And just to be clear. thp_migration_supported should be checked only
> > when we actually _do_ the migration or evaluate migratability of the
> > page. We definitely do want to sprinkle this check to all places where
> > is_migration_entry is checked.
> 
> is_migration_entry() is a general check for swp_entry_t, so it can return
> true even if THP migration is not enabled. is_pmd_migration_entry() always
> returns false when THP migration is not enabled.
> 
> So the code can be changed in two ways, either replacing is_migration_entry()
> with is_pmd_migration_entry() or adding thp_migration_supported() check
> like Jerome did.
> 
> Does this clarify your question?

Not really. IIUC the code checks for the pmd. So even though
is_migration_entry is a more generic check it should never return true
for thp_migration_supported() == F because we simply never have those
unless I am missing something.

is_pmd_migration_entry is much more readable of course and I suspect it
can save few cycles as well.
-- 
Michal Hocko
SUSE Labs


Re: [PATCH 4/7] mm/hmm: properly handle migration pmd

2018-08-28 Thread Michal Hocko
On Tue 28-08-18 11:54:33, Zi Yan wrote:
> Hi Michal,
> 
> On 28 Aug 2018, at 11:45, Michal Hocko wrote:
> 
> > On Tue 28-08-18 17:42:06, Michal Hocko wrote:
> >> On Tue 28-08-18 11:36:59, Jerome Glisse wrote:
> >>> On Tue, Aug 28, 2018 at 05:24:14PM +0200, Michal Hocko wrote:
>  On Fri 24-08-18 20:05:46, Zi Yan wrote:
>  [...]
> >> +  if (!pmd_present(pmd)) {
> >> +  swp_entry_t entry = pmd_to_swp_entry(pmd);
> >> +
> >> +  if (is_migration_entry(entry)) {
> >
> > I think you should check thp_migration_supported() here, since PMD 
> > migration is only enabled in x86_64 systems.
> > Other architectures should treat PMD migration entries as bad.
> 
>  How can we have a migration pmd entry when the migration is not
>  supported?
> >>>
> >>> Not sure i follow here, migration can happen anywhere (assuming
> >>> that something like compaction is active or numa or ...). So this
> >>> code can face pmd migration entry on architecture that support
> >>> it. What is missing here is thp_migration_supported() call to
> >>> protect the is_migration_entry() to avoid false positive on arch
> >>> which do not support thp migration.
> >>
> >> I mean that architectures which do not support THP migration shouldn't
> >> ever see any migration entry. So is_migration_entry should be always
> >> false. Or do I miss something?
> >
> > And just to be clear. thp_migration_supported should be checked only
> > when we actually _do_ the migration or evaluate migratability of the
> > page. We definitely do want to sprinkle this check to all places where
> > is_migration_entry is checked.
> 
> is_migration_entry() is a general check for swp_entry_t, so it can return
> true even if THP migration is not enabled. is_pmd_migration_entry() always
> returns false when THP migration is not enabled.
> 
> So the code can be changed in two ways, either replacing is_migration_entry()
> with is_pmd_migration_entry() or adding thp_migration_supported() check
> like Jerome did.
> 
> Does this clarify your question?

Not really. IIUC the code checks for the pmd. So even though
is_migration_entry is a more generic check it should never return true
for thp_migration_supported() == F because we simply never have those
unless I am missing something.

is_pmd_migration_entry is much more readable of course and I suspect it
can save few cycles as well.
-- 
Michal Hocko
SUSE Labs


Waiting for

2018-08-28 Thread Ruby

We provide photoshop services to some of the companies from around the
world.
We have worked on tons of images ever since our team establishment in 2009.

Many online retail companies use our services for retouching electronics,
jewelry, apparels, furniture
etc. by getting the images of their products enhanced.

Here are the details of what we provide:
Clipping path;
Deep etch process
Image masking
Remove background
Portrait retouching
Jewelry retouching
Fashion retouching

Please reply back for further info.
We can provide testing for your photos if needed.

Thanks,
Ruby



Waiting for

2018-08-28 Thread Ruby

We provide photoshop services to some of the companies from around the
world.
We have worked on tons of images ever since our team establishment in 2009.

Many online retail companies use our services for retouching electronics,
jewelry, apparels, furniture
etc. by getting the images of their products enhanced.

Here are the details of what we provide:
Clipping path;
Deep etch process
Image masking
Remove background
Portrait retouching
Jewelry retouching
Fashion retouching

Please reply back for further info.
We can provide testing for your photos if needed.

Thanks,
Ruby



Re: [PATCH 4/7] mm/hmm: properly handle migration pmd

2018-08-28 Thread Jerome Glisse
On Tue, Aug 28, 2018 at 11:54:33AM -0400, Zi Yan wrote:
> Hi Michal,
> 
> On 28 Aug 2018, at 11:45, Michal Hocko wrote:
> 
> > On Tue 28-08-18 17:42:06, Michal Hocko wrote:
> >> On Tue 28-08-18 11:36:59, Jerome Glisse wrote:
> >>> On Tue, Aug 28, 2018 at 05:24:14PM +0200, Michal Hocko wrote:
>  On Fri 24-08-18 20:05:46, Zi Yan wrote:
>  [...]
> >> +  if (!pmd_present(pmd)) {
> >> +  swp_entry_t entry = pmd_to_swp_entry(pmd);
> >> +
> >> +  if (is_migration_entry(entry)) {
> >
> > I think you should check thp_migration_supported() here, since PMD 
> > migration is only enabled in x86_64 systems.
> > Other architectures should treat PMD migration entries as bad.
> 
>  How can we have a migration pmd entry when the migration is not
>  supported?
> >>>
> >>> Not sure i follow here, migration can happen anywhere (assuming
> >>> that something like compaction is active or numa or ...). So this
> >>> code can face pmd migration entry on architecture that support
> >>> it. What is missing here is thp_migration_supported() call to
> >>> protect the is_migration_entry() to avoid false positive on arch
> >>> which do not support thp migration.
> >>
> >> I mean that architectures which do not support THP migration shouldn't
> >> ever see any migration entry. So is_migration_entry should be always
> >> false. Or do I miss something?
> >
> > And just to be clear. thp_migration_supported should be checked only
> > when we actually _do_ the migration or evaluate migratability of the
> > page. We definitely do want to sprinkle this check to all places where
> > is_migration_entry is checked.
> 
> is_migration_entry() is a general check for swp_entry_t, so it can return
> true even if THP migration is not enabled. is_pmd_migration_entry() always
> returns false when THP migration is not enabled.
> 
> So the code can be changed in two ways, either replacing is_migration_entry()
> with is_pmd_migration_entry() or adding thp_migration_supported() check
> like Jerome did.
> 
> Does this clarify your question?
> 

Well looking back at code is_migration_entry() will return false on arch
which do not have thp migration because pmd_to_swp_entry() will return
swp_entry(0,0) which is can not be a valid migration entry.

Maybe using is_pmd_migration_entry() would be better here ? It seems
that is_pmd_migration_entry() is more common then the open coded
thp_migration_supported() && is_migration_entry()

Cheers,
Jérôme


Re: [PATCH 4/7] mm/hmm: properly handle migration pmd

2018-08-28 Thread Jerome Glisse
On Tue, Aug 28, 2018 at 11:54:33AM -0400, Zi Yan wrote:
> Hi Michal,
> 
> On 28 Aug 2018, at 11:45, Michal Hocko wrote:
> 
> > On Tue 28-08-18 17:42:06, Michal Hocko wrote:
> >> On Tue 28-08-18 11:36:59, Jerome Glisse wrote:
> >>> On Tue, Aug 28, 2018 at 05:24:14PM +0200, Michal Hocko wrote:
>  On Fri 24-08-18 20:05:46, Zi Yan wrote:
>  [...]
> >> +  if (!pmd_present(pmd)) {
> >> +  swp_entry_t entry = pmd_to_swp_entry(pmd);
> >> +
> >> +  if (is_migration_entry(entry)) {
> >
> > I think you should check thp_migration_supported() here, since PMD 
> > migration is only enabled in x86_64 systems.
> > Other architectures should treat PMD migration entries as bad.
> 
>  How can we have a migration pmd entry when the migration is not
>  supported?
> >>>
> >>> Not sure i follow here, migration can happen anywhere (assuming
> >>> that something like compaction is active or numa or ...). So this
> >>> code can face pmd migration entry on architecture that support
> >>> it. What is missing here is thp_migration_supported() call to
> >>> protect the is_migration_entry() to avoid false positive on arch
> >>> which do not support thp migration.
> >>
> >> I mean that architectures which do not support THP migration shouldn't
> >> ever see any migration entry. So is_migration_entry should be always
> >> false. Or do I miss something?
> >
> > And just to be clear. thp_migration_supported should be checked only
> > when we actually _do_ the migration or evaluate migratability of the
> > page. We definitely do want to sprinkle this check to all places where
> > is_migration_entry is checked.
> 
> is_migration_entry() is a general check for swp_entry_t, so it can return
> true even if THP migration is not enabled. is_pmd_migration_entry() always
> returns false when THP migration is not enabled.
> 
> So the code can be changed in two ways, either replacing is_migration_entry()
> with is_pmd_migration_entry() or adding thp_migration_supported() check
> like Jerome did.
> 
> Does this clarify your question?
> 

Well looking back at code is_migration_entry() will return false on arch
which do not have thp migration because pmd_to_swp_entry() will return
swp_entry(0,0) which is can not be a valid migration entry.

Maybe using is_pmd_migration_entry() would be better here ? It seems
that is_pmd_migration_entry() is more common then the open coded
thp_migration_supported() && is_migration_entry()

Cheers,
Jérôme


VDSO and dcache aliasing

2018-08-28 Thread Alexandre Belloni
Hello James,

A year ago, you wrote that patch:

https://www.linux-mips.org/archives/linux-mips/2017-06/msg00658.html

You called it a hack but it has been used since then. As you will
certainly realize by now, Ocelot is one of the affected SoC so we would
pretty much like to see this going upstream.

What would be the way forward?

Regards,

-- 
Alexandre Belloni, Bootlin
Embedded Linux and Kernel engineering
https://bootlin.com


VDSO and dcache aliasing

2018-08-28 Thread Alexandre Belloni
Hello James,

A year ago, you wrote that patch:

https://www.linux-mips.org/archives/linux-mips/2017-06/msg00658.html

You called it a hack but it has been used since then. As you will
certainly realize by now, Ocelot is one of the affected SoC so we would
pretty much like to see this going upstream.

What would be the way forward?

Regards,

-- 
Alexandre Belloni, Bootlin
Embedded Linux and Kernel engineering
https://bootlin.com


Re: [RFC PATCH v2 2/3] pstore: Add register read/write{b,w,l,q} tracing support

2018-08-28 Thread Steven Rostedt
On Tue, 28 Aug 2018 18:47:33 +0530
Sai Prakash Ranjan  wrote:

> On 8/27/2018 9:45 PM, Steven Rostedt wrote:
> > On Sat, 25 Aug 2018 12:54:07 +0530
> > Sai Prakash Ranjan  wrote:
> > 
> >   
> >> Ftrace does not trace __raw{read,write}{b,l,w,q}() functions. I am not
> >> sure why and how it is filtered out because I do not see any notrace
> >> flag in those functions, maybe that whole directory is filtered out.
> >> So adding this functionality to ftrace would mean removing the notrace
> >> for these functions i.e., something like using
> >> __raw{read,write}{b,l,w,q}() as the available filter functions. Also
> >> pstore ftrace does not filter functions to trace I suppose?  
> > 
> > It's not traced because it is inlined. Simply make the __raw_read
> > function a normal function and it will be traced. And then you could
> > use ftrace to read the function.
> > 
> > If this has to be per arch, you can register your callback with the
> > REGS flags, and pt_regs will be passed to your callback function you
> > attached to __raw_read*() as if you inserted a break point at that
> > location, and you can get any reg data you want there.
> > 
> >  
> 
> Thank you very much for the information Steven. Ok so we can get 
> function parameters with pt_regs.

Yes.

> 
> >>
> >> Coming to the reason as to why it would be good to keep this separate
> >> from ftrace would be:
> >> * Ftrace can get ip and parent ip, but suppose we need extra data (field
> >> data) as in above sample output we would not be able to get through 
> >> ftrace.  
> > 
> > As mentioned above, you can get regs (and ftrace is being expanded now
> > to get parameters of functions).
> >   
> You mean there is another way to get parameters other than regs?

No, but you could register a callback function to be called when a
function is hit, and the pt_regs are passed to it. We are working on
getting parameters from the pt_regs (see this patch:
 http://lkml.kernel.org/r/152465885737.26224.2822487520472783854.stgit@devbox)

> 
> >>
> >> * Although this patch is for tracing register read/write, we can easily
> >> add more functionality since we have dynamic_rtb api which can be hooked
> >> to functions and start tracing events(IRQ, Context ID) something similar
> >> to tracepoints.
> >> Initially thought of having tracepoints for logging register read/write
> >> but I do not know if we can export tracepoint data to pstore since the
> >> main usecase is to debug unknown resets and hangs.  
> > 
> > I don't know why not? We have read/write tracepoints for
> > read/write_msr() calls in x86.
> > 
> > Anything can add a hook to the callback of the tracepoints, and use
> > that infrastructure instead of creating yet another dynamic code
> > modification infrastructure.
> >   
> Thanks for pointing out to read/write_msr, I checked it and was able to 
> implement something similar for arm64. But still can we export 
> tracepoint data to pstore because we need to debug reset cases and for 
> that pstore is of real importance?. If so then it would be great to have 
> various events logged into pstore which can be a lot of help for debugging.
> 
> Also with the current dynamic filtering of read/write(PATCH 3/3), it is 
> a lot easier to filter register read/write since we use dynamic debug 
> framework which provides file, function and line level filtering 
> capacity. Maybe if we can add something like this to trace events it 
> would be better?

I would recommend using the tracepoint infrastructure. Note,
tracepoints and trace events are two different things. Trace events use
tracepoints, and you use trace events to create tracepoints, thus they
are tightly coupled. But once a tracepoint exists, anything can connect
to them without needing to use the trace event.

Let's look at the read_msr trace event. Because it is in a header, to
avoid "include hell" we open code some of it:

static inline unsigned long long native_read_msr(unsigned int msr)
{
unsigned long long val;

val = __rdmsr(msr);

if (msr_tracepoint_active(__tracepoint_read_msr))
do_trace_read_msr(msr, val, 0);

return val;
}

Where:

#ifdef CONFIG_TRACEPOINTS
#define msr_tracepoint_active(t) static_key_false(&(t).key)
#else 
#define msr_tracepoint_active(t) false
#endif

We have to open code the access to the tracepoint.key because msr.h is
used in a lot of critical headers, we couldn't use the normal
tracepoint.h header here.

The "static_key_false()" is a jump label that is just a nop. When the
static_key is enabled, the nop is converted to a static "jmp" to the
code that calls "do_trace_read_msr()". This is a function call to a
function defined in msr.c (where we can do proper includes), and all
that does is call the tracepoint "trace_read_msr()", which is also a
static key that, when enabled, will iterate over a list of functions it
should call with the defined parameters (msr, val, failed).

When defining the trace event for "read_msr", it creates 

Re: [RFC PATCH v2 2/3] pstore: Add register read/write{b,w,l,q} tracing support

2018-08-28 Thread Steven Rostedt
On Tue, 28 Aug 2018 18:47:33 +0530
Sai Prakash Ranjan  wrote:

> On 8/27/2018 9:45 PM, Steven Rostedt wrote:
> > On Sat, 25 Aug 2018 12:54:07 +0530
> > Sai Prakash Ranjan  wrote:
> > 
> >   
> >> Ftrace does not trace __raw{read,write}{b,l,w,q}() functions. I am not
> >> sure why and how it is filtered out because I do not see any notrace
> >> flag in those functions, maybe that whole directory is filtered out.
> >> So adding this functionality to ftrace would mean removing the notrace
> >> for these functions i.e., something like using
> >> __raw{read,write}{b,l,w,q}() as the available filter functions. Also
> >> pstore ftrace does not filter functions to trace I suppose?  
> > 
> > It's not traced because it is inlined. Simply make the __raw_read
> > function a normal function and it will be traced. And then you could
> > use ftrace to read the function.
> > 
> > If this has to be per arch, you can register your callback with the
> > REGS flags, and pt_regs will be passed to your callback function you
> > attached to __raw_read*() as if you inserted a break point at that
> > location, and you can get any reg data you want there.
> > 
> >  
> 
> Thank you very much for the information Steven. Ok so we can get 
> function parameters with pt_regs.

Yes.

> 
> >>
> >> Coming to the reason as to why it would be good to keep this separate
> >> from ftrace would be:
> >> * Ftrace can get ip and parent ip, but suppose we need extra data (field
> >> data) as in above sample output we would not be able to get through 
> >> ftrace.  
> > 
> > As mentioned above, you can get regs (and ftrace is being expanded now
> > to get parameters of functions).
> >   
> You mean there is another way to get parameters other than regs?

No, but you could register a callback function to be called when a
function is hit, and the pt_regs are passed to it. We are working on
getting parameters from the pt_regs (see this patch:
 http://lkml.kernel.org/r/152465885737.26224.2822487520472783854.stgit@devbox)

> 
> >>
> >> * Although this patch is for tracing register read/write, we can easily
> >> add more functionality since we have dynamic_rtb api which can be hooked
> >> to functions and start tracing events(IRQ, Context ID) something similar
> >> to tracepoints.
> >> Initially thought of having tracepoints for logging register read/write
> >> but I do not know if we can export tracepoint data to pstore since the
> >> main usecase is to debug unknown resets and hangs.  
> > 
> > I don't know why not? We have read/write tracepoints for
> > read/write_msr() calls in x86.
> > 
> > Anything can add a hook to the callback of the tracepoints, and use
> > that infrastructure instead of creating yet another dynamic code
> > modification infrastructure.
> >   
> Thanks for pointing out to read/write_msr, I checked it and was able to 
> implement something similar for arm64. But still can we export 
> tracepoint data to pstore because we need to debug reset cases and for 
> that pstore is of real importance?. If so then it would be great to have 
> various events logged into pstore which can be a lot of help for debugging.
> 
> Also with the current dynamic filtering of read/write(PATCH 3/3), it is 
> a lot easier to filter register read/write since we use dynamic debug 
> framework which provides file, function and line level filtering 
> capacity. Maybe if we can add something like this to trace events it 
> would be better?

I would recommend using the tracepoint infrastructure. Note,
tracepoints and trace events are two different things. Trace events use
tracepoints, and you use trace events to create tracepoints, thus they
are tightly coupled. But once a tracepoint exists, anything can connect
to them without needing to use the trace event.

Let's look at the read_msr trace event. Because it is in a header, to
avoid "include hell" we open code some of it:

static inline unsigned long long native_read_msr(unsigned int msr)
{
unsigned long long val;

val = __rdmsr(msr);

if (msr_tracepoint_active(__tracepoint_read_msr))
do_trace_read_msr(msr, val, 0);

return val;
}

Where:

#ifdef CONFIG_TRACEPOINTS
#define msr_tracepoint_active(t) static_key_false(&(t).key)
#else 
#define msr_tracepoint_active(t) false
#endif

We have to open code the access to the tracepoint.key because msr.h is
used in a lot of critical headers, we couldn't use the normal
tracepoint.h header here.

The "static_key_false()" is a jump label that is just a nop. When the
static_key is enabled, the nop is converted to a static "jmp" to the
code that calls "do_trace_read_msr()". This is a function call to a
function defined in msr.c (where we can do proper includes), and all
that does is call the tracepoint "trace_read_msr()", which is also a
static key that, when enabled, will iterate over a list of functions it
should call with the defined parameters (msr, val, failed).

When defining the trace event for "read_msr", it creates 

Re: Linux 4.19-rc1

2018-08-28 Thread Guenter Roeck
On Mon, Aug 27, 2018 at 02:56:32PM -0700, Linus Torvalds wrote:
> On Mon, Aug 27, 2018 at 6:45 AM Guenter Roeck  wrote:
> >
> > Build results:
> > total: 132 pass: 129 fail: 3
> 
> Thanks for running these. Looks like everything but the sparc thing is
> under control, and the sparc thing might be one of those "big builds
> don't work on sparc" ;(
> 

In general I would agree. On the other side, someone who knows sparc
assembler should be able to fix the problem. sparc is notorious for
failing allmodconfig builds, mostly due to its separate devicetree
implementation. On top of that, many allmodconfig builds already fail,
often for minor reasons such as duplicate symbols or missing exports.
Dropping sparc:allmodconfig will cause sparc builds to deteriorate,
and we'll lose valuable build feedback. On the plus side,
sparc64:allmodconfig still builds, but that doesn't cover 32/64 bit
differences.

I'll monitor the situation for a while and stop building sparc:allmodconfig
if the problem isn't fixed around -rc7.

Guenter


Re: Linux 4.19-rc1

2018-08-28 Thread Guenter Roeck
On Mon, Aug 27, 2018 at 02:56:32PM -0700, Linus Torvalds wrote:
> On Mon, Aug 27, 2018 at 6:45 AM Guenter Roeck  wrote:
> >
> > Build results:
> > total: 132 pass: 129 fail: 3
> 
> Thanks for running these. Looks like everything but the sparc thing is
> under control, and the sparc thing might be one of those "big builds
> don't work on sparc" ;(
> 

In general I would agree. On the other side, someone who knows sparc
assembler should be able to fix the problem. sparc is notorious for
failing allmodconfig builds, mostly due to its separate devicetree
implementation. On top of that, many allmodconfig builds already fail,
often for minor reasons such as duplicate symbols or missing exports.
Dropping sparc:allmodconfig will cause sparc builds to deteriorate,
and we'll lose valuable build feedback. On the plus side,
sparc64:allmodconfig still builds, but that doesn't cover 32/64 bit
differences.

I'll monitor the situation for a while and stop building sparc:allmodconfig
if the problem isn't fixed around -rc7.

Guenter


[PATCH] binder: use standard functions to allocate fds

2018-08-28 Thread Todd Kjos
Binder uses internal fs interfaces to allocate and install fds:

__alloc_fd
__fd_install
__close_fd
get_files_struct
put_files_struct

These were used to support the passing of fds between processes
as part of a transaction. The actual allocation and installation
of the fds in the target process was handled by the sending
process so the standard functions, alloc_fd() and fd_install()
which assume task==current couldn't be used.

This patch refactors this mechanism so that the fds are
allocated and installed by the target process allowing the
standard functions to be used.

The sender now creates a list of fd fixups that contains the
struct *file and the address to fixup with the new fd once
it is allocated. This list is processed by the target process
when the transaction is dequeued.

A new error case is introduced by this change. If an async
transaction with file descriptors cannot allocate new
fds in the target (probably due to out of file descriptors),
the transaction is discarded with a log message. In the old
implementation this would have been detected in the sender
context and failed prior to sending.

Signed-off-by: Todd Kjos 
---
 drivers/android/Kconfig|   2 +-
 drivers/android/binder.c   | 387 -
 drivers/android/binder_trace.h |  36 ++-
 3 files changed, 260 insertions(+), 165 deletions(-)

diff --git a/drivers/android/Kconfig b/drivers/android/Kconfig
index 432e9ad770703..51e8250d113fa 100644
--- a/drivers/android/Kconfig
+++ b/drivers/android/Kconfig
@@ -10,7 +10,7 @@ if ANDROID
 
 config ANDROID_BINDER_IPC
bool "Android Binder IPC Driver"
-   depends on MMU
+   depends on MMU && !CPU_CACHE_VIVT
default n
---help---
  Binder is used in Android for both communication between processes,
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index d58763b6b0090..50856319bc7da 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -71,6 +71,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 
@@ -457,9 +458,8 @@ struct binder_ref {
 };
 
 enum binder_deferred_state {
-   BINDER_DEFERRED_PUT_FILES= 0x01,
-   BINDER_DEFERRED_FLUSH= 0x02,
-   BINDER_DEFERRED_RELEASE  = 0x04,
+   BINDER_DEFERRED_FLUSH= 0x01,
+   BINDER_DEFERRED_RELEASE  = 0x02,
 };
 
 /**
@@ -480,9 +480,6 @@ enum binder_deferred_state {
  *(invariant after initialized)
  * @tsk   task_struct for group_leader of process
  *(invariant after initialized)
- * @files files_struct for process
- *(protected by @files_lock)
- * @files_lockmutex to protect @files
  * @deferred_work_node:   element for binder_deferred_list
  *(protected by binder_deferred_lock)
  * @deferred_work:bitmap of deferred work to perform
@@ -527,8 +524,6 @@ struct binder_proc {
struct list_head waiting_threads;
int pid;
struct task_struct *tsk;
-   struct files_struct *files;
-   struct mutex files_lock;
struct hlist_node deferred_work_node;
int deferred_work;
bool is_dead;
@@ -611,6 +606,23 @@ struct binder_thread {
bool is_dead;
 };
 
+/**
+ * struct binder_txn_fd_fixup - transaction fd fixup list element
+ * @fixup_entry:  list entry
+ * @file: struct file to be associated with new fd
+ * @offset:   offset in buffer data to this fixup
+ *
+ * List element for fd fixups in a transaction. Since file
+ * descriptors need to be allocated in the context of the
+ * target process, we pass each fd to be processed in this
+ * struct.
+ */
+struct binder_txn_fd_fixup {
+   struct list_head fixup_entry;
+   struct file *file;
+   size_t offset;
+};
+
 struct binder_transaction {
int debug_id;
struct binder_work work;
@@ -628,6 +640,7 @@ struct binder_transaction {
longpriority;
longsaved_priority;
kuid_t  sender_euid;
+   struct list_head fd_fixups;
/**
 * @lock:  protects @from, @to_proc, and @to_thread
 *
@@ -920,66 +933,6 @@ static void binder_free_thread(struct binder_thread 
*thread);
 static void binder_free_proc(struct binder_proc *proc);
 static void binder_inc_node_tmpref_ilocked(struct binder_node *node);
 
-static int task_get_unused_fd_flags(struct binder_proc *proc, int flags)
-{
-   unsigned long rlim_cur;
-   unsigned long irqs;
-   int ret;
-
-   mutex_lock(>files_lock);
-   if (proc->files == NULL) {
-   ret = -ESRCH;
-   goto err;
-   }
-   if (!lock_task_sighand(proc->tsk, )) {
-   ret = -EMFILE;
-   goto err;
-   }
-   rlim_cur = task_rlimit(proc->tsk, RLIMIT_NOFILE);
-   unlock_task_sighand(proc->tsk, );
-
-   ret = __alloc_fd(proc->files, 0, rlim_cur, flags);
-err:

[PATCH] binder: use standard functions to allocate fds

2018-08-28 Thread Todd Kjos
Binder uses internal fs interfaces to allocate and install fds:

__alloc_fd
__fd_install
__close_fd
get_files_struct
put_files_struct

These were used to support the passing of fds between processes
as part of a transaction. The actual allocation and installation
of the fds in the target process was handled by the sending
process so the standard functions, alloc_fd() and fd_install()
which assume task==current couldn't be used.

This patch refactors this mechanism so that the fds are
allocated and installed by the target process allowing the
standard functions to be used.

The sender now creates a list of fd fixups that contains the
struct *file and the address to fixup with the new fd once
it is allocated. This list is processed by the target process
when the transaction is dequeued.

A new error case is introduced by this change. If an async
transaction with file descriptors cannot allocate new
fds in the target (probably due to out of file descriptors),
the transaction is discarded with a log message. In the old
implementation this would have been detected in the sender
context and failed prior to sending.

Signed-off-by: Todd Kjos 
---
 drivers/android/Kconfig|   2 +-
 drivers/android/binder.c   | 387 -
 drivers/android/binder_trace.h |  36 ++-
 3 files changed, 260 insertions(+), 165 deletions(-)

diff --git a/drivers/android/Kconfig b/drivers/android/Kconfig
index 432e9ad770703..51e8250d113fa 100644
--- a/drivers/android/Kconfig
+++ b/drivers/android/Kconfig
@@ -10,7 +10,7 @@ if ANDROID
 
 config ANDROID_BINDER_IPC
bool "Android Binder IPC Driver"
-   depends on MMU
+   depends on MMU && !CPU_CACHE_VIVT
default n
---help---
  Binder is used in Android for both communication between processes,
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index d58763b6b0090..50856319bc7da 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -71,6 +71,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 
@@ -457,9 +458,8 @@ struct binder_ref {
 };
 
 enum binder_deferred_state {
-   BINDER_DEFERRED_PUT_FILES= 0x01,
-   BINDER_DEFERRED_FLUSH= 0x02,
-   BINDER_DEFERRED_RELEASE  = 0x04,
+   BINDER_DEFERRED_FLUSH= 0x01,
+   BINDER_DEFERRED_RELEASE  = 0x02,
 };
 
 /**
@@ -480,9 +480,6 @@ enum binder_deferred_state {
  *(invariant after initialized)
  * @tsk   task_struct for group_leader of process
  *(invariant after initialized)
- * @files files_struct for process
- *(protected by @files_lock)
- * @files_lockmutex to protect @files
  * @deferred_work_node:   element for binder_deferred_list
  *(protected by binder_deferred_lock)
  * @deferred_work:bitmap of deferred work to perform
@@ -527,8 +524,6 @@ struct binder_proc {
struct list_head waiting_threads;
int pid;
struct task_struct *tsk;
-   struct files_struct *files;
-   struct mutex files_lock;
struct hlist_node deferred_work_node;
int deferred_work;
bool is_dead;
@@ -611,6 +606,23 @@ struct binder_thread {
bool is_dead;
 };
 
+/**
+ * struct binder_txn_fd_fixup - transaction fd fixup list element
+ * @fixup_entry:  list entry
+ * @file: struct file to be associated with new fd
+ * @offset:   offset in buffer data to this fixup
+ *
+ * List element for fd fixups in a transaction. Since file
+ * descriptors need to be allocated in the context of the
+ * target process, we pass each fd to be processed in this
+ * struct.
+ */
+struct binder_txn_fd_fixup {
+   struct list_head fixup_entry;
+   struct file *file;
+   size_t offset;
+};
+
 struct binder_transaction {
int debug_id;
struct binder_work work;
@@ -628,6 +640,7 @@ struct binder_transaction {
longpriority;
longsaved_priority;
kuid_t  sender_euid;
+   struct list_head fd_fixups;
/**
 * @lock:  protects @from, @to_proc, and @to_thread
 *
@@ -920,66 +933,6 @@ static void binder_free_thread(struct binder_thread 
*thread);
 static void binder_free_proc(struct binder_proc *proc);
 static void binder_inc_node_tmpref_ilocked(struct binder_node *node);
 
-static int task_get_unused_fd_flags(struct binder_proc *proc, int flags)
-{
-   unsigned long rlim_cur;
-   unsigned long irqs;
-   int ret;
-
-   mutex_lock(>files_lock);
-   if (proc->files == NULL) {
-   ret = -ESRCH;
-   goto err;
-   }
-   if (!lock_task_sighand(proc->tsk, )) {
-   ret = -EMFILE;
-   goto err;
-   }
-   rlim_cur = task_rlimit(proc->tsk, RLIMIT_NOFILE);
-   unlock_task_sighand(proc->tsk, );
-
-   ret = __alloc_fd(proc->files, 0, rlim_cur, flags);
-err:

Re: [PATCH v2 0/4] Provide core API for NMIs

2018-08-28 Thread Julien Thierry




On 28/08/18 16:29, Julien Thierry wrote:

Hi,



[...]


I'll soon post a series making use of the API for Arm's GICv3.



Here is the series, NMI related patches are the last 4 in the series:

https://lkml.org/lkml/2018/8/28/693

--
Julien Thierry


Re: [PATCH v2 0/4] Provide core API for NMIs

2018-08-28 Thread Julien Thierry




On 28/08/18 16:29, Julien Thierry wrote:

Hi,



[...]


I'll soon post a series making use of the API for Arm's GICv3.



Here is the series, NMI related patches are the last 4 in the series:

https://lkml.org/lkml/2018/8/28/693

--
Julien Thierry


[PATCH v4 2/2]: perf record: enable asynchronous trace writing

2018-08-28 Thread Alexey Budankov


Trace file offset are linearly calculated by perf_mmap__push() code 
for the next possible write operation, but file position is updated by 
the kernel only in the second lseek() syscall after the loop. 
The first lseek() syscall reads that file position for 
the next loop iterations.

record__mmap_read_sync implements sort of a barrier between spilling 
ready profiling data to disk.

Signed-off-by: Alexey Budankov 
---
Changes in v4:
- converted void *bf to struct perf_mmap *md in signatures
- written comment in perf_mmap__push() just before perf_mmap__get();
- written comment in record__mmap_read_sync() on possible restarting 
  of aio_write() operation and releasing perf_mmap object after all;
- added perf_mmap__put() for the cases of failed aio_write();
Changes in v3:
- written comments about nanosleep(0.5ms) call prior aio_suspend()
  to cope with intrusiveness of its implementation in glibc;
- written comments about rationale behind coping profiling data 
  into mmap->data buffer;
---
 tools/perf/builtin-record.c | 136 +---
 tools/perf/util/mmap.c  |  43 ++
 tools/perf/util/mmap.h  |   2 +-
 3 files changed, 161 insertions(+), 20 deletions(-)

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 22ebeb92ac51..7b628f9a7770 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -121,6 +121,23 @@ static int record__write(struct record *rec, void *bf, 
size_t size)
return 0;
 }
 
+static int record__aio_write(int trace_fd, struct aiocb *cblock,
+   void *buf, size_t size, off_t off)
+{
+   cblock->aio_fildes = trace_fd;
+   cblock->aio_buf= buf;
+   cblock->aio_nbytes = size;
+   cblock->aio_offset = off;
+   cblock->aio_sigevent.sigev_notify = SIGEV_NONE;
+
+   if (aio_write(cblock) == -1) {
+   pr_err("failed to queue perf data, error: %m\n");
+   return -1;
+   }
+
+   return 0;
+}
+
 static int process_synthesized_event(struct perf_tool *tool,
 union perf_event *event,
 struct perf_sample *sample __maybe_unused,
@@ -130,12 +147,13 @@ static int process_synthesized_event(struct perf_tool 
*tool,
return record__write(rec, event, event->header.size);
 }
 
-static int record__pushfn(void *to, void *bf, size_t size)
+static int record__pushfn(void *to, struct perf_mmap *map, size_t size, off_t 
off)
 {
struct record *rec = to;
 
rec->samples++;
-   return record__write(rec, bf, size);
+   return record__aio_write(rec->session->data->file.fd, >cblock,
+   map->data, size, off);
 }
 
 static volatile int done;
@@ -510,13 +528,110 @@ static struct perf_event_header finished_round_event = {
.type = PERF_RECORD_FINISHED_ROUND,
 };
 
+static int record__mmap_read_sync(int trace_fd, struct aiocb **cblocks,
+   int cblocks_size, struct record *rec)
+{
+   size_t rem;
+   ssize_t size;
+   off_t rem_off;
+   int i, aio_ret, aio_errno, do_suspend;
+   struct perf_mmap *md;
+   struct timespec timeout0 = { 0, 0 };
+   struct timespec timeoutS = { 0, 1000 * 500  * 1 }; // 0.5ms
+
+   if (!cblocks_size)
+   return 0;
+
+   do {
+   do_suspend = 0;
+   /* aio_suspend() implementation inside glibc (as of v2.27) is
+* intrusive and not just blocks waiting io requests completion
+* but polls requests queue inducing context switches in perf
+* tool process. When profiling in system wide mode with tracing
+* context switches the trace may be polluted by context 
switches
+* from the perf process and the trace size becomes about 3-5
+* times bigger than that of when writing the trace serially.
+* To limit the volume of context switches from perf tool
+* process nanosleep() call below is added prior aio_suspend()
+* calling till every half of the kernel timer tick which is
+* usually 1ms (depends on CONFIG_HZ value).
+*/
+   nanosleep(, NULL);
+   if (aio_suspend((const struct aiocb**)cblocks, cblocks_size, 
)) {
+   if (errno == EAGAIN || errno == EINTR) {
+   do_suspend = 1;
+   continue;
+   } else {
+   pr_err("failed to sync perf data, error: %m\n");
+   break;
+   }
+   }
+   for (i = 0; i < cblocks_size; i++) {
+   if (cblocks[i] == NULL) {
+   continue;
+   }
+   aio_errno = aio_error(cblocks[i]);
+   if (aio_errno == 

[PATCH v4 2/2]: perf record: enable asynchronous trace writing

2018-08-28 Thread Alexey Budankov


Trace file offset are linearly calculated by perf_mmap__push() code 
for the next possible write operation, but file position is updated by 
the kernel only in the second lseek() syscall after the loop. 
The first lseek() syscall reads that file position for 
the next loop iterations.

record__mmap_read_sync implements sort of a barrier between spilling 
ready profiling data to disk.

Signed-off-by: Alexey Budankov 
---
Changes in v4:
- converted void *bf to struct perf_mmap *md in signatures
- written comment in perf_mmap__push() just before perf_mmap__get();
- written comment in record__mmap_read_sync() on possible restarting 
  of aio_write() operation and releasing perf_mmap object after all;
- added perf_mmap__put() for the cases of failed aio_write();
Changes in v3:
- written comments about nanosleep(0.5ms) call prior aio_suspend()
  to cope with intrusiveness of its implementation in glibc;
- written comments about rationale behind coping profiling data 
  into mmap->data buffer;
---
 tools/perf/builtin-record.c | 136 +---
 tools/perf/util/mmap.c  |  43 ++
 tools/perf/util/mmap.h  |   2 +-
 3 files changed, 161 insertions(+), 20 deletions(-)

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 22ebeb92ac51..7b628f9a7770 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -121,6 +121,23 @@ static int record__write(struct record *rec, void *bf, 
size_t size)
return 0;
 }
 
+static int record__aio_write(int trace_fd, struct aiocb *cblock,
+   void *buf, size_t size, off_t off)
+{
+   cblock->aio_fildes = trace_fd;
+   cblock->aio_buf= buf;
+   cblock->aio_nbytes = size;
+   cblock->aio_offset = off;
+   cblock->aio_sigevent.sigev_notify = SIGEV_NONE;
+
+   if (aio_write(cblock) == -1) {
+   pr_err("failed to queue perf data, error: %m\n");
+   return -1;
+   }
+
+   return 0;
+}
+
 static int process_synthesized_event(struct perf_tool *tool,
 union perf_event *event,
 struct perf_sample *sample __maybe_unused,
@@ -130,12 +147,13 @@ static int process_synthesized_event(struct perf_tool 
*tool,
return record__write(rec, event, event->header.size);
 }
 
-static int record__pushfn(void *to, void *bf, size_t size)
+static int record__pushfn(void *to, struct perf_mmap *map, size_t size, off_t 
off)
 {
struct record *rec = to;
 
rec->samples++;
-   return record__write(rec, bf, size);
+   return record__aio_write(rec->session->data->file.fd, >cblock,
+   map->data, size, off);
 }
 
 static volatile int done;
@@ -510,13 +528,110 @@ static struct perf_event_header finished_round_event = {
.type = PERF_RECORD_FINISHED_ROUND,
 };
 
+static int record__mmap_read_sync(int trace_fd, struct aiocb **cblocks,
+   int cblocks_size, struct record *rec)
+{
+   size_t rem;
+   ssize_t size;
+   off_t rem_off;
+   int i, aio_ret, aio_errno, do_suspend;
+   struct perf_mmap *md;
+   struct timespec timeout0 = { 0, 0 };
+   struct timespec timeoutS = { 0, 1000 * 500  * 1 }; // 0.5ms
+
+   if (!cblocks_size)
+   return 0;
+
+   do {
+   do_suspend = 0;
+   /* aio_suspend() implementation inside glibc (as of v2.27) is
+* intrusive and not just blocks waiting io requests completion
+* but polls requests queue inducing context switches in perf
+* tool process. When profiling in system wide mode with tracing
+* context switches the trace may be polluted by context 
switches
+* from the perf process and the trace size becomes about 3-5
+* times bigger than that of when writing the trace serially.
+* To limit the volume of context switches from perf tool
+* process nanosleep() call below is added prior aio_suspend()
+* calling till every half of the kernel timer tick which is
+* usually 1ms (depends on CONFIG_HZ value).
+*/
+   nanosleep(, NULL);
+   if (aio_suspend((const struct aiocb**)cblocks, cblocks_size, 
)) {
+   if (errno == EAGAIN || errno == EINTR) {
+   do_suspend = 1;
+   continue;
+   } else {
+   pr_err("failed to sync perf data, error: %m\n");
+   break;
+   }
+   }
+   for (i = 0; i < cblocks_size; i++) {
+   if (cblocks[i] == NULL) {
+   continue;
+   }
+   aio_errno = aio_error(cblocks[i]);
+   if (aio_errno == 

Re: [PATCH 4/7] mm/hmm: properly handle migration pmd

2018-08-28 Thread Zi Yan
Hi Michal,

On 28 Aug 2018, at 11:45, Michal Hocko wrote:

> On Tue 28-08-18 17:42:06, Michal Hocko wrote:
>> On Tue 28-08-18 11:36:59, Jerome Glisse wrote:
>>> On Tue, Aug 28, 2018 at 05:24:14PM +0200, Michal Hocko wrote:
 On Fri 24-08-18 20:05:46, Zi Yan wrote:
 [...]
>> +if (!pmd_present(pmd)) {
>> +swp_entry_t entry = pmd_to_swp_entry(pmd);
>> +
>> +if (is_migration_entry(entry)) {
>
> I think you should check thp_migration_supported() here, since PMD 
> migration is only enabled in x86_64 systems.
> Other architectures should treat PMD migration entries as bad.

 How can we have a migration pmd entry when the migration is not
 supported?
>>>
>>> Not sure i follow here, migration can happen anywhere (assuming
>>> that something like compaction is active or numa or ...). So this
>>> code can face pmd migration entry on architecture that support
>>> it. What is missing here is thp_migration_supported() call to
>>> protect the is_migration_entry() to avoid false positive on arch
>>> which do not support thp migration.
>>
>> I mean that architectures which do not support THP migration shouldn't
>> ever see any migration entry. So is_migration_entry should be always
>> false. Or do I miss something?
>
> And just to be clear. thp_migration_supported should be checked only
> when we actually _do_ the migration or evaluate migratability of the
> page. We definitely do want to sprinkle this check to all places where
> is_migration_entry is checked.

is_migration_entry() is a general check for swp_entry_t, so it can return
true even if THP migration is not enabled. is_pmd_migration_entry() always
returns false when THP migration is not enabled.

So the code can be changed in two ways, either replacing is_migration_entry()
with is_pmd_migration_entry() or adding thp_migration_supported() check
like Jerome did.

Does this clarify your question?

—
Best Regards,
Yan Zi


signature.asc
Description: OpenPGP digital signature


Re: [PATCH 4/7] mm/hmm: properly handle migration pmd

2018-08-28 Thread Zi Yan
Hi Michal,

On 28 Aug 2018, at 11:45, Michal Hocko wrote:

> On Tue 28-08-18 17:42:06, Michal Hocko wrote:
>> On Tue 28-08-18 11:36:59, Jerome Glisse wrote:
>>> On Tue, Aug 28, 2018 at 05:24:14PM +0200, Michal Hocko wrote:
 On Fri 24-08-18 20:05:46, Zi Yan wrote:
 [...]
>> +if (!pmd_present(pmd)) {
>> +swp_entry_t entry = pmd_to_swp_entry(pmd);
>> +
>> +if (is_migration_entry(entry)) {
>
> I think you should check thp_migration_supported() here, since PMD 
> migration is only enabled in x86_64 systems.
> Other architectures should treat PMD migration entries as bad.

 How can we have a migration pmd entry when the migration is not
 supported?
>>>
>>> Not sure i follow here, migration can happen anywhere (assuming
>>> that something like compaction is active or numa or ...). So this
>>> code can face pmd migration entry on architecture that support
>>> it. What is missing here is thp_migration_supported() call to
>>> protect the is_migration_entry() to avoid false positive on arch
>>> which do not support thp migration.
>>
>> I mean that architectures which do not support THP migration shouldn't
>> ever see any migration entry. So is_migration_entry should be always
>> false. Or do I miss something?
>
> And just to be clear. thp_migration_supported should be checked only
> when we actually _do_ the migration or evaluate migratability of the
> page. We definitely do want to sprinkle this check to all places where
> is_migration_entry is checked.

is_migration_entry() is a general check for swp_entry_t, so it can return
true even if THP migration is not enabled. is_pmd_migration_entry() always
returns false when THP migration is not enabled.

So the code can be changed in two ways, either replacing is_migration_entry()
with is_pmd_migration_entry() or adding thp_migration_supported() check
like Jerome did.

Does this clarify your question?

—
Best Regards,
Yan Zi


signature.asc
Description: OpenPGP digital signature


[PATCH 1/4] of/unittest: remove use of node name pointer in overlay high level test

2018-08-28 Thread Rob Herring
In preparation for removing the node name pointer, it needs to be
removed from of_unittest_overlay_high_level. However, it's not really
correct to use the node name without the unit-address and we should use
the full node name. This most easily done by iterating over the child
nodes with for_each_child_of_node() which is what of_get_child_by_name()
does internally. While at it, we might as well convert the outer loop to
use for_each_child_of_node() too instead of open coding it.

Cc: Frank Rowand 
Signed-off-by: Rob Herring 
---
 drivers/of/unittest.c | 10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c
index 722537e14848..69a522e48970 100644
--- a/drivers/of/unittest.c
+++ b/drivers/of/unittest.c
@@ -2347,10 +2347,12 @@ static __init void of_unittest_overlay_high_level(void)
}
}
 
-   for (np = overlay_base_root->child; np; np = np->sibling) {
-   if (of_get_child_by_name(of_root, np->name)) {
-   unittest(0, "illegal node name in overlay_base %s",
-   np->name);
+   for_each_child_of_node(overlay_base_root, np) {
+   struct device_node *base_child;
+   for_each_child_of_node(of_root, base_child) {
+   if (!strcmp(np->full_name, base_child->full_name))
+   unittest(0, "illegal node name in overlay_base 
%pOFn",
+   np);
return;
}
}
-- 
2.17.1



[PATCH 1/4] of/unittest: remove use of node name pointer in overlay high level test

2018-08-28 Thread Rob Herring
In preparation for removing the node name pointer, it needs to be
removed from of_unittest_overlay_high_level. However, it's not really
correct to use the node name without the unit-address and we should use
the full node name. This most easily done by iterating over the child
nodes with for_each_child_of_node() which is what of_get_child_by_name()
does internally. While at it, we might as well convert the outer loop to
use for_each_child_of_node() too instead of open coding it.

Cc: Frank Rowand 
Signed-off-by: Rob Herring 
---
 drivers/of/unittest.c | 10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c
index 722537e14848..69a522e48970 100644
--- a/drivers/of/unittest.c
+++ b/drivers/of/unittest.c
@@ -2347,10 +2347,12 @@ static __init void of_unittest_overlay_high_level(void)
}
}
 
-   for (np = overlay_base_root->child; np; np = np->sibling) {
-   if (of_get_child_by_name(of_root, np->name)) {
-   unittest(0, "illegal node name in overlay_base %s",
-   np->name);
+   for_each_child_of_node(overlay_base_root, np) {
+   struct device_node *base_child;
+   for_each_child_of_node(of_root, base_child) {
+   if (!strcmp(np->full_name, base_child->full_name))
+   unittest(0, "illegal node name in overlay_base 
%pOFn",
+   np);
return;
}
}
-- 
2.17.1



[PATCH 2/4] of/unittest: add printf tests for node name

2018-08-28 Thread Rob Herring
Add some printf test for printing the node name (without the
unit-address).

Cc: Frank Rowand 
Signed-off-by: Rob Herring 
---
 drivers/of/unittest.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c
index 69a522e48970..9def7be9c111 100644
--- a/drivers/of/unittest.c
+++ b/drivers/of/unittest.c
@@ -299,6 +299,10 @@ static void __init of_unittest_printf(void)
 
of_unittest_printf_one(np, "%pOF",  full_name);
of_unittest_printf_one(np, "%pOFf", full_name);
+   of_unittest_printf_one(np, "%pOFn", "dev");
+   of_unittest_printf_one(np, "%2pOFn", "dev");
+   of_unittest_printf_one(np, "%5pOFn", "  dev");
+   of_unittest_printf_one(np, "%pOFnc", "dev:test-sub-device");
of_unittest_printf_one(np, "%pOFp", phandle_str);
of_unittest_printf_one(np, "%pOFP", "dev@100");
of_unittest_printf_one(np, "ABC %pOFP ABC", "ABC dev@100 ABC");
-- 
2.17.1



[PATCH v5 26/27] irqchip/gic: Add functions to access irq priorities

2018-08-28 Thread Julien Thierry
Add accessors to the GIC distributor/redistributors priority registers.

Signed-off-by: Julien Thierry 
Cc: Thomas Gleixner 
Cc: Jason Cooper 
Cc: Marc Zyngier 
---
 drivers/irqchip/irq-gic-common.c | 10 ++
 drivers/irqchip/irq-gic-common.h |  2 ++
 2 files changed, 12 insertions(+)

diff --git a/drivers/irqchip/irq-gic-common.c b/drivers/irqchip/irq-gic-common.c
index 01e673c..910746f 100644
--- a/drivers/irqchip/irq-gic-common.c
+++ b/drivers/irqchip/irq-gic-common.c
@@ -98,6 +98,16 @@ int gic_configure_irq(unsigned int irq, unsigned int type,
return ret;
 }
 
+void gic_set_irq_prio(unsigned int irq, void __iomem *base, u8 prio)
+{
+   writeb_relaxed(prio, base + GIC_DIST_PRI + irq);
+}
+
+u8 gic_get_irq_prio(unsigned int irq, void __iomem *base)
+{
+   return readb_relaxed(base + GIC_DIST_PRI + irq);
+}
+
 void gic_dist_config(void __iomem *base, int gic_irqs,
 void (*sync_access)(void))
 {
diff --git a/drivers/irqchip/irq-gic-common.h b/drivers/irqchip/irq-gic-common.h
index 3919cd7..1586dbd 100644
--- a/drivers/irqchip/irq-gic-common.h
+++ b/drivers/irqchip/irq-gic-common.h
@@ -35,6 +35,8 @@ void gic_dist_config(void __iomem *base, int gic_irqs,
 void gic_cpu_config(void __iomem *base, void (*sync_access)(void));
 void gic_enable_quirks(u32 iidr, const struct gic_quirk *quirks,
void *data);
+void gic_set_irq_prio(unsigned int irq, void __iomem *base, u8 prio);
+u8 gic_get_irq_prio(unsigned int irq, void __iomem *base);
 
 void gic_set_kvm_info(const struct gic_kvm_info *info);
 
-- 
1.9.1



<    1   2   3   4   5   6   7   8   9   10   >