[PATCH v8 10/14] PCI/RCEC: Add pcie_link_rcec() to associate RCiEPs

2020-10-02 Thread Sean V Kelley
From: Sean V Kelley 

A Root Complex Event Collector provides support for
terminating error and PME messages from associated RCiEPs.

Make use of the RCEC Endpoint Association Extended Capability
to identify associated RCiEPs. Link the associated RCiEPs as
the RCECs are enumerated.

Co-developed-by: Qiuxu Zhuo 
Signed-off-by: Qiuxu Zhuo 
Signed-off-by: Sean V Kelley 
Reviewed-by: Jonathan Cameron 
---
 drivers/pci/pci.h  |  2 +
 drivers/pci/pcie/portdrv_pci.c |  3 ++
 drivers/pci/pcie/rcec.c| 91 ++
 include/linux/pci.h|  1 +
 4 files changed, 97 insertions(+)

diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 98ec87ef780d..ea5716d48b68 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -473,9 +473,11 @@ static inline void pci_dpc_init(struct pci_dev *pdev) {}
 #ifdef CONFIG_PCIEPORTBUS
 int pci_rcec_init(struct pci_dev *dev);
 void pci_rcec_exit(struct pci_dev *dev);
+void pcie_link_rcec(struct pci_dev *rcec);
 #else
 static inline int pci_rcec_init(struct pci_dev *dev) { return 0; }
 static inline void pci_rcec_exit(struct pci_dev *dev) {}
+static inline void pcie_link_rcec(struct pci_dev *rcec) {}
 #endif
 
 #ifdef CONFIG_PCI_ATS
diff --git a/drivers/pci/pcie/portdrv_pci.c b/drivers/pci/pcie/portdrv_pci.c
index 4d880679b9b1..dbeb0155c2c3 100644
--- a/drivers/pci/pcie/portdrv_pci.c
+++ b/drivers/pci/pcie/portdrv_pci.c
@@ -110,6 +110,9 @@ static int pcie_portdrv_probe(struct pci_dev *dev,
 (pci_pcie_type(dev) != PCI_EXP_TYPE_RC_EC)))
return -ENODEV;
 
+   if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_EC)
+   pcie_link_rcec(dev);
+
status = pcie_port_device_register(dev);
if (status)
return status;
diff --git a/drivers/pci/pcie/rcec.c b/drivers/pci/pcie/rcec.c
index da02b0af442d..9ba74d8064e9 100644
--- a/drivers/pci/pcie/rcec.c
+++ b/drivers/pci/pcie/rcec.c
@@ -15,6 +15,97 @@
 
 #include "../pci.h"
 
+struct walk_rcec_data {
+   struct pci_dev *rcec;
+   int (*user_callback)(struct pci_dev *dev, void *data);
+   void *user_data;
+};
+
+static bool rcec_assoc_rciep(struct pci_dev *rcec, struct pci_dev *rciep)
+{
+   unsigned long bitmap = rcec->rcec_ea->bitmap;
+   unsigned int devn;
+
+   /* An RCiEP found on a different bus in range */
+   if (rcec->bus->number != rciep->bus->number)
+   return true;
+
+   /* Same bus, so check bitmap */
+   for_each_set_bit(devn, &bitmap, 32)
+   if (devn == rciep->devfn)
+   return true;
+
+   return false;
+}
+
+static int link_rcec_helper(struct pci_dev *dev, void *data)
+{
+   struct walk_rcec_data *rcec_data = data;
+   struct pci_dev *rcec = rcec_data->rcec;
+
+   if ((pci_pcie_type(dev) == PCI_EXP_TYPE_RC_END) && 
rcec_assoc_rciep(rcec, dev)) {
+   dev->rcec = rcec;
+   pci_dbg(dev, "PME & error events reported via %s\n", 
pci_name(rcec));
+   }
+
+   return 0;
+}
+
+static void walk_rcec(int (*cb)(struct pci_dev *dev, void *data), void 
*userdata)
+{
+   struct walk_rcec_data *rcec_data = userdata;
+   struct pci_dev *rcec = rcec_data->rcec;
+   u8 nextbusn, lastbusn;
+   struct pci_bus *bus;
+   unsigned int bnr;
+
+   if (!rcec->rcec_ea)
+   return;
+
+   /* Walk own bus for bitmap based association */
+   pci_walk_bus(rcec->bus, cb, rcec_data);
+
+   nextbusn = rcec->rcec_ea->nextbusn;
+   lastbusn = rcec->rcec_ea->lastbusn;
+
+   /* All RCiEP devices are on the same bus as the RCEC */
+   if (nextbusn == 0xff && lastbusn == 0x00)
+   return;
+
+   for (bnr = nextbusn; bnr <= lastbusn; bnr++) {
+   /* No association indicated (PCIe 5.0-1, 7.9.10.3) */
+   if (bnr == rcec->bus->number)
+   continue;
+
+   bus = pci_find_bus(pci_domain_nr(rcec->bus), bnr);
+   if (!bus)
+   continue;
+
+   /* Find RCiEP devices on the given bus ranges */
+   pci_walk_bus(bus, cb, rcec_data);
+   }
+}
+
+/**
+ * pcie_link_rcec - Link RCiEP devices associating with RCEC.
+ * @rcec RCEC whose RCiEP devices should be linked.
+ *
+ * Link the given RCEC to each RCiEP device found.
+ */
+void pcie_link_rcec(struct pci_dev *rcec)
+{
+   struct walk_rcec_data rcec_data;
+
+   if (!rcec->rcec_ea)
+   return;
+
+   rcec_data.rcec = rcec;
+   rcec_data.user_callback = NULL;
+   rcec_data.user_data = NULL;
+
+   walk_rcec(link_rcec_helper, &rcec_data);
+}
+
 int pci_rcec_init(struct pci_dev *dev)
 {
struct rcec_ea *rcec_ea;
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 2290439e8bc0..e546b16b13c1 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -330,6 +330,7 @@ struct pci_dev {
 #endif
 #ifdef CONFIG_PCIEPORTBUS
struct rcec_ea  *rcec_ea;   /* R

[PATCH v8 14/14] PCI/AER: Add RCEC AER error injection support

2020-10-02 Thread Sean V Kelley
From: Qiuxu Zhuo 

The Root Complex Event Collectors (RCEC) appear as peers to Root Ports
and also have the AER capability. So add RCEC support to the current AER
error injection driver.

Signed-off-by: Qiuxu Zhuo 
Co-developed-by: Sean V Kelley 
Signed-off-by: Sean V Kelley 
---
 drivers/pci/pcie/aer_inject.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/pci/pcie/aer_inject.c b/drivers/pci/pcie/aer_inject.c
index c2cbf425afc5..011a6c54b4e3 100644
--- a/drivers/pci/pcie/aer_inject.c
+++ b/drivers/pci/pcie/aer_inject.c
@@ -333,8 +333,11 @@ static int aer_inject(struct aer_error_inj *einj)
if (!dev)
return -ENODEV;
rpdev = pcie_find_root_port(dev);
+   /* If Root port not found, try to find an RCEC */
+   if (!rpdev)
+   rpdev = dev->rcec;
if (!rpdev) {
-   pci_err(dev, "Root port not found\n");
+   pci_err(dev, "Neither root port nor RCEC found\n");
ret = -ENODEV;
goto out_put;
}
-- 
2.28.0



Re: [PATCH v4 1/2] dt-bindings: usb: Add binding for discrete onboard USB hubs

2020-10-02 Thread Alan Stern
On Fri, Oct 02, 2020 at 09:08:47AM -0700, Matthias Kaehlcke wrote:
> On Thu, Oct 01, 2020 at 09:21:53PM -0400, Alan Stern wrote:
> > On Thu, Oct 01, 2020 at 02:54:12PM -0700, Matthias Kaehlcke wrote:
> > > Hi,
> > > 
> > > thanks for providing more insights on the USB hardware!
> > 
> > Sure.
> > 
> > > On Wed, Sep 30, 2020 at 09:24:13PM -0400, Alan Stern wrote:
> > > > A hub that attaches only to the USB-3 data wires in a cable is not USB
> > > > compliant.  A USB-2 device plugged into such a hub would not work.
> > > > 
> > > > But ports can be wired up in weird ways.  For example, it is possible
> > > > to have the USB-3 wires from a port going directly to the host
> > > > controller, while the USB-2 wires from the same port go through a
> > > > USB-2 hub which is then connected to a separate host controller.  (In
> > > > fact, my office computer has just such an arrangement.)
> > > 
> > > It's not clear to me how this case would be addressed when (some of) the
> > > handling is done in xhci-plat.c We have two host controllers now, which 
> > > one
> > > is supposed to be in charge? I guess the idea is to specify the hub only
> > > for one of the controllers?
> > 
> > I don't grasp the point of this question.  It doesn't seem to be
> > relevant to the case you're concerned about -- your board isn't going to
> > wire up the special hub in this weird way, is it?
> 
> When doing upstream development I try to look beyond my specific use case
> and aim for solutions that are generally useful.
> 
> I don't know how common a configuration like the one on your office computer
> is. If it isn't a fringe case it seems like we should support it if feasible.

It isn't very common.  I think it was probably adopted as a stopgap kind 
of approach at a time when USB-3 was still relatively new and the 
chipsets didn't yet have full support for it.  On the other hand, it 
certainly isn't unheard of and it is compliant with the spec.

Of course, on any system that does this, the designers will be aware of 
it and could add the appropriate description (whatever it turns out to 
be) to DT.

> > _All_ of the handling could be done by xhci-plat.  Since the xHCI
> > controller is the parent of both the USB-2 and USB-3 incarnations of
> > the special hub, it won't get suspended until they are both in
> > suspend, and it will get resumed before either of them.  Similarly,
> > the power to the special hub could be switched on as part of the host
> > controller's probe routine and switched off during the host
> > controller's remove routine.
> > 
> > Using xhci-plat in this way would be better than a dedicated driver in
> > the sense that it wouldn't then be necessary to make up a fictitious
> > platform device and somehow describe it in DT.
> > 
> > The disadvantage is that we would end up with a driver that's
> > nominally meant to handle host controllers but now also manages (at
> > least in part) hubs.  A not-so-clean separation of functions.  But
> > that's not terribly different from the way your current patch works,
> > right?
> 
> Yes, this muddling of the xhci-plat code with the handling of hubs was
> one of my concerns, but who am I to argue if you as USB maintainer see
> that preferable over a dedicated driver. I suppose you are taking into
> account that there will be a need for code for different hub models that
> has to live somewhere (could be a dedicated file or directory).

This isn't really a difference in the hubs but rather in their support 
circuitry.  Still, if you look through the various *-platform.c files in 
drivers/usb/host (and also in pci-quirks.c), you'll see plenty of 
examples of platform-specific code for particular devices.

Ideally the new code would go into the hub driver.  But that won't work, 
since the hub driver doesn't get involved until the hub has been 
discovered on the USB bus, and that won't happen until its power has 
been enabled.

> And even if it is not my specific use case it would be nice to support
> hubs that are part of a hierarchy and not wired directly to the host
> controller. We don't necessarily have to implement all support for this
> initially, but should have it in mind at least for the bindings.
> 
> Also we would probably lose the ability to use a sysfs attribute to
> configure whether the hub should be always powered during suspend or
> not. This could be worked around with a DT property, however DT
> maintainers tend to be reluctant about configuration entries that
> don't translate directly to the hardware.

In theory the sysfs attribute could go under the host controller, but I 
agree it would be awkward.

This is just one example of a more general problem, as I mentioned in a 
recent email to Doug Anderson.

Alan Stern


[PATCH v8 09/14] PCI/AER: Apply function level reset to RCiEP on fatal error

2020-10-02 Thread Sean V Kelley
From: Qiuxu Zhuo 

Attempt to do a function level reset for an RCiEP on fatal error.

Signed-off-by: Qiuxu Zhuo 
Reviewed-by: Jonathan Cameron 
---
 drivers/pci/pcie/err.c | 31 ++-
 1 file changed, 22 insertions(+), 9 deletions(-)

diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c
index c4ceca42a3bf..38abd7984996 100644
--- a/drivers/pci/pcie/err.c
+++ b/drivers/pci/pcie/err.c
@@ -169,6 +169,17 @@ static void pci_walk_bridge(struct pci_dev *bridge, int 
(*cb)(struct pci_dev *,
cb(bridge, userdata);
 }
 
+static pci_ers_result_t flr_on_rciep(struct pci_dev *dev)
+{
+   if (!pcie_has_flr(dev))
+   return PCI_ERS_RESULT_DISCONNECT;
+
+   if (pcie_flr(dev))
+   return PCI_ERS_RESULT_DISCONNECT;
+
+   return PCI_ERS_RESULT_RECOVERED;
+}
+
 pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
pci_channel_state_t state,
pci_ers_result_t (*reset_subordinate_devices)(struct 
pci_dev *pdev))
@@ -195,15 +206,17 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
if (state == pci_channel_io_frozen) {
pci_walk_bridge(bridge, report_frozen_detected, &status);
if (type == PCI_EXP_TYPE_RC_END) {
-   pci_warn(dev, "subordinate device reset not possible 
for RCiEP\n");
-   status = PCI_ERS_RESULT_NONE;
-   goto failed;
-   }
-
-   status = reset_subordinate_devices(bridge);
-   if (status != PCI_ERS_RESULT_RECOVERED) {
-   pci_warn(dev, "subordinate device reset failed\n");
-   goto failed;
+   status = flr_on_rciep(dev);
+   if (status != PCI_ERS_RESULT_RECOVERED) {
+   pci_warn(dev, "function level reset failed\n");
+   goto failed;
+   }
+   } else {
+   status = reset_subordinate_devices(bridge);
+   if (status != PCI_ERS_RESULT_RECOVERED) {
+   pci_warn(dev, "subordinate device reset 
failed\n");
+   goto failed;
+   }
}
} else {
pci_walk_bridge(bridge, report_normal_detected, &status);
-- 
2.28.0



[PATCH v8 07/14] PCI/ERR: Limit AER resets in pcie_do_recovery()

2020-10-02 Thread Sean V Kelley
From: Sean V Kelley 

In some cases a bridge may not exist as the hardware
controlling may be handled only by firmware and so is
not visible to the OS. This scenario is also possible
in future use cases involving non-native use of RCECs
by firmware. So explicitly apply conditional logic
around these resets by limiting them to root ports and
downstream ports.

Signed-off-by: Sean V Kelley 
Acked-by: Jonathan Cameron 
---
 drivers/pci/pcie/err.c | 9 ++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c
index 9b2130725ab6..5ff1afa4763d 100644
--- a/drivers/pci/pcie/err.c
+++ b/drivers/pci/pcie/err.c
@@ -218,9 +218,12 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
pci_dbg(dev, "broadcast resume message\n");
pci_walk_bridge(bridge, report_resume, &status);
 
-   if (pcie_aer_is_native(bridge))
-   pcie_clear_device_status(bridge);
-   pci_aer_clear_nonfatal_status(bridge);
+   if (type == PCI_EXP_TYPE_ROOT_PORT ||
+   type == PCI_EXP_TYPE_DOWNSTREAM) {
+   if (pcie_aer_is_native(bridge))
+   pcie_clear_device_status(bridge);
+   pci_aer_clear_nonfatal_status(bridge);
+   }
pci_info(dev, "device recovery successful\n");
return status;
 
-- 
2.28.0



[PATCH v8 13/14] PCI/PME: Add pcie_walk_rcec() to RCEC PME handling

2020-10-02 Thread Sean V Kelley
From: Sean V Kelley 

Root Complex Event Collectors (RCEC) appear as peers of Root Ports
and also have the PME capability. As with AER, there is a need to be
able to walk the RCiEPs associated with their RCEC for purposes of
acting upon them with callbacks. So add RCEC support through the use
of pcie_walk_rcec() to the current PME service driver and attach the
PME service driver to the RCEC device.

Co-developed-by: Qiuxu Zhuo 
Signed-off-by: Qiuxu Zhuo 
Signed-off-by: Sean V Kelley 
---
 drivers/pci/pcie/pme.c  | 15 +++
 drivers/pci/pcie/portdrv_core.c |  8 
 2 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/drivers/pci/pcie/pme.c b/drivers/pci/pcie/pme.c
index 6a32970bb731..87799166c96a 100644
--- a/drivers/pci/pcie/pme.c
+++ b/drivers/pci/pcie/pme.c
@@ -310,7 +310,10 @@ static int pcie_pme_can_wakeup(struct pci_dev *dev, void 
*ign)
 static void pcie_pme_mark_devices(struct pci_dev *port)
 {
pcie_pme_can_wakeup(port, NULL);
-   if (port->subordinate)
+
+   if (pci_pcie_type(port) == PCI_EXP_TYPE_RC_EC)
+   pcie_walk_rcec(port, pcie_pme_can_wakeup, NULL);
+   else if (port->subordinate)
pci_walk_bus(port->subordinate, pcie_pme_can_wakeup, NULL);
 }
 
@@ -320,10 +323,15 @@ static void pcie_pme_mark_devices(struct pci_dev *port)
  */
 static int pcie_pme_probe(struct pcie_device *srv)
 {
-   struct pci_dev *port;
+   struct pci_dev *port = srv->port;
struct pcie_pme_service_data *data;
int ret;
 
+   /* Limit to Root Ports or Root Complex Event Collectors */
+   if ((pci_pcie_type(port) != PCI_EXP_TYPE_RC_EC) &&
+   (pci_pcie_type(port) != PCI_EXP_TYPE_ROOT_PORT))
+   return -ENODEV;
+
data = kzalloc(sizeof(*data), GFP_KERNEL);
if (!data)
return -ENOMEM;
@@ -333,7 +341,6 @@ static int pcie_pme_probe(struct pcie_device *srv)
data->srv = srv;
set_service_data(srv, data);
 
-   port = srv->port;
pcie_pme_interrupt_enable(port, false);
pcie_clear_root_pme_status(port);
 
@@ -445,7 +452,7 @@ static void pcie_pme_remove(struct pcie_device *srv)
 
 static struct pcie_port_service_driver pcie_pme_driver = {
.name   = "pcie_pme",
-   .port_type  = PCI_EXP_TYPE_ROOT_PORT,
+   .port_type  = PCIE_ANY_PORT,
.service= PCIE_PORT_SERVICE_PME,
 
.probe  = pcie_pme_probe,
diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c
index 50a9522ab07d..99769c636775 100644
--- a/drivers/pci/pcie/portdrv_core.c
+++ b/drivers/pci/pcie/portdrv_core.c
@@ -234,11 +234,11 @@ static int get_port_device_capability(struct pci_dev *dev)
 #endif
 
/*
-* Root ports are capable of generating PME too.  Root Complex
-* Event Collectors can also generate PMEs, but we don't handle
-* those yet.
+* Root ports and Root Complex Event Collectors are capable
+* of generating PME.
 */
-   if (pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT &&
+   if ((pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT ||
+pci_pcie_type(dev) == PCI_EXP_TYPE_RC_EC) &&
(pcie_ports_native || host->native_pme)) {
services |= PCIE_PORT_SERVICE_PME;
 
-- 
2.28.0



[PATCH v8 08/14] PCI/AER: Extend AER error handling to RCECs

2020-10-02 Thread Sean V Kelley
From: Jonathan Cameron 

Currently the kernel does not handle AER errors for Root Complex
integrated End Points (RCiEPs)[0]. These devices sit on a root bus within
the Root Complex (RC). AER handling is performed by a Root Complex Event
Collector (RCEC) [1] which is a effectively a type of RCiEP on the same
root bus.

For an RCEC (technically not a Bridge), error messages "received" from
associated RCiEPs must be enabled for "transmission" in order to cause a
System Error via the Root Control register or (when the Advanced Error
Reporting Capability is present) reporting via the Root Error Command
register and logging in the Root Error Status register and Error Source
Identification register.

In addition to the defined OS level handling of the reset flow for the
associated RCiEPs of an RCEC, it is possible to also have non-native
handling. In that case there is no need to take any actions on the RCEC
because the firmware is responsible for them. This is true where APEI [2]
is used to report the AER errors via a GHES[v2] HEST entry [3] and
relevant AER CPER record [4] and non-native handling is in use.

We effectively end up with two different types of discovery for
purposes of handling AER errors:

1) Normal bus walk - we pass the downstream port above a bus to which
the device is attached and it walks everything below that point.

2) An RCiEP with no visible association with an RCEC as there is no need
to walk devices. In that case, the flow is to just call the callbacks for
the actual device, which in turn references its associated RCEC.

Modify pci_walk_bridge() to handle devices which lack a subordinate bus.
If the device does not then it will call the function on that device
alone.

[0] ACPI PCI Express Base Specification 5.0-1 1.3.2.3 Root Complex
Integrated Endpoint Rules.
[1] ACPI PCI Express Base Specification 5.0-1 6.2 Error Signalling and
Logging
[2] ACPI Specification 6.3 Chapter 18 ACPI Platform Error Interface (APEI)
[3] ACPI Specification 6.3 18.2.3.7 Generic Hardware Error Source
[4] UEFI Specification 2.8, N.2.7 PCI Express Error Section

Signed-off-by: Jonathan Cameron 
Signed-off-by: Sean V Kelley 
---
 drivers/pci/pcie/err.c | 25 -
 1 file changed, 20 insertions(+), 5 deletions(-)

diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c
index 5ff1afa4763d..c4ceca42a3bf 100644
--- a/drivers/pci/pcie/err.c
+++ b/drivers/pci/pcie/err.c
@@ -148,19 +148,25 @@ static int report_resume(struct pci_dev *dev, void *data)
 
 /**
  * pci_walk_bridge - walk bridges potentially AER affected
- * @bridge   bridge which may be a Port.
+ * @bridge   bridge which may be an RCEC with associated RCiEPs,
+ *   an RCiEP associated with an RCEC, or a Port.
  * @cb   callback to be called for each device found
  * @userdata arbitrary pointer to be passed to callback.
  *
  * If the device provided is a bridge, walk the subordinate bus,
  * including any bridged devices on buses under this bus.
  * Call the provided callback on each device found.
+ *
+ * If the device provided has no subordinate bus, call the provided
+ * callback on the device itself.
  */
 static void pci_walk_bridge(struct pci_dev *bridge, int (*cb)(struct pci_dev 
*, void *),
void *userdata)
 {
if (bridge->subordinate)
pci_walk_bus(bridge->subordinate, cb, userdata);
+   else
+   cb(bridge, userdata);
 }
 
 pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
@@ -174,11 +180,13 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
/*
 * Error recovery runs on all subordinates of the first downstream
 * bridge. If the downstream bridge detected the error, it is
-* cleared at the end.
+* cleared at the end. For RCiEPs we should reset just the RCiEP itself.
 */
type = pci_pcie_type(dev);
if (type == PCI_EXP_TYPE_ROOT_PORT ||
-   type == PCI_EXP_TYPE_DOWNSTREAM)
+   type == PCI_EXP_TYPE_DOWNSTREAM ||
+   type == PCI_EXP_TYPE_RC_EC ||
+   type == PCI_EXP_TYPE_RC_END)
bridge = dev;
else
bridge = pci_upstream_bridge(dev);
@@ -186,7 +194,13 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
pci_dbg(dev, "broadcast error_detected message\n");
if (state == pci_channel_io_frozen) {
pci_walk_bridge(bridge, report_frozen_detected, &status);
-   status = reset_subordinate_device(bridge);
+   if (type == PCI_EXP_TYPE_RC_END) {
+   pci_warn(dev, "subordinate device reset not possible 
for RCiEP\n");
+   status = PCI_ERS_RESULT_NONE;
+   goto failed;
+   }
+
+   status = reset_subordinate_devices(bridge);
if (status != PCI_ERS_RESULT_RECOVERED) {
pci_warn(dev, "subordinate device reset failed\n");
goto f

[PATCH v8 02/14] PCI/RCEC: Bind RCEC devices to the Root Port driver

2020-10-02 Thread Sean V Kelley
From: Qiuxu Zhuo 

If a Root Complex Integrated Endpoint (RCiEP) is implemented, errors may
optionally be sent to a corresponding Root Complex Event Collector (RCEC).
Each RCiEP must be associated with no more than one RCEC. Interface errors
are reported to the OS by RCECs.

For an RCEC (technically not a Bridge), error messages "received" from
associated RCiEPs must be enabled for "transmission" in order to cause a
System Error via the Root Control register or (when the Advanced Error
Reporting Capability is present) reporting via the Root Error Command
register and logging in the Root Error Status register and Error Source
Identification register.

Given the commonality with Root Ports and the need to also support AER
and PME services for RCECs, extend the Root Port driver to support RCEC
devices through the addition of the RCEC Class ID to the driver
structure.

Co-developed-by: Sean V Kelley 
Signed-off-by: Sean V Kelley 
Signed-off-by: Qiuxu Zhuo 
Reviewed-by: Jonathan Cameron 
---
 drivers/pci/pcie/portdrv_pci.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/pci/pcie/portdrv_pci.c b/drivers/pci/pcie/portdrv_pci.c
index 3a3ce40ae1ab..4d880679b9b1 100644
--- a/drivers/pci/pcie/portdrv_pci.c
+++ b/drivers/pci/pcie/portdrv_pci.c
@@ -106,7 +106,8 @@ static int pcie_portdrv_probe(struct pci_dev *dev,
if (!pci_is_pcie(dev) ||
((pci_pcie_type(dev) != PCI_EXP_TYPE_ROOT_PORT) &&
 (pci_pcie_type(dev) != PCI_EXP_TYPE_UPSTREAM) &&
-(pci_pcie_type(dev) != PCI_EXP_TYPE_DOWNSTREAM)))
+(pci_pcie_type(dev) != PCI_EXP_TYPE_DOWNSTREAM) &&
+(pci_pcie_type(dev) != PCI_EXP_TYPE_RC_EC)))
return -ENODEV;
 
status = pcie_port_device_register(dev);
@@ -195,6 +196,8 @@ static const struct pci_device_id port_pci_ids[] = {
{ PCI_DEVICE_CLASS(((PCI_CLASS_BRIDGE_PCI << 8) | 0x00), ~0) },
/* subtractive decode PCI-to-PCI bridge, class type is 060401h */
{ PCI_DEVICE_CLASS(((PCI_CLASS_BRIDGE_PCI << 8) | 0x01), ~0) },
+   /* handle any Root Complex Event Collector */
+   { PCI_DEVICE_CLASS(((PCI_CLASS_SYSTEM_RCEC << 8) | 0x00), ~0) },
{ },
 };
 
-- 
2.28.0



[PATCH v8 04/14] PCI/ERR: Rename reset_link() to reset_subordinate_device()

2020-10-02 Thread Sean V Kelley
From: Sean V Kelley 

reset_link() appears to be misnamed. The point is to really
reset any devices below a given bridge. So rename it to
reset_subordinate_devices() to make it clear that we are
passing a bridge with the intent to reset the devices below it.

Suggested-by: Bjorn Helgaas 
Signed-off-by: Sean V Kelley 
Acked-by: Jonathan Cameron 
---
 drivers/pci/pci.h  | 2 +-
 drivers/pci/pcie/err.c | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 0e332a218d75..98ec87ef780d 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -574,7 +574,7 @@ static inline int pci_dev_specific_disable_acs_redir(struct 
pci_dev *dev)
 /* PCI error reporting and recovery */
 pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
pci_channel_state_t state,
-   pci_ers_result_t (*reset_link)(struct pci_dev *pdev));
+   pci_ers_result_t (*reset_subordinate_devices)(struct 
pci_dev *pdev));
 
 bool pcie_wait_for_link(struct pci_dev *pdev, bool active);
 #ifdef CONFIG_PCIEASPM
diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c
index c543f419d8f9..950612342f1c 100644
--- a/drivers/pci/pcie/err.c
+++ b/drivers/pci/pcie/err.c
@@ -148,7 +148,7 @@ static int report_resume(struct pci_dev *dev, void *data)
 
 pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
pci_channel_state_t state,
-   pci_ers_result_t (*reset_link)(struct pci_dev *pdev))
+   pci_ers_result_t (*reset_subordinate_devices)(struct 
pci_dev *pdev))
 {
pci_ers_result_t status = PCI_ERS_RESULT_CAN_RECOVER;
struct pci_bus *bus;
@@ -165,9 +165,9 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
pci_dbg(dev, "broadcast error_detected message\n");
if (state == pci_channel_io_frozen) {
pci_walk_bus(bus, report_frozen_detected, &status);
-   status = reset_link(dev);
+   status = reset_subordinate_device(dev);
if (status != PCI_ERS_RESULT_RECOVERED) {
-   pci_warn(dev, "link reset failed\n");
+   pci_warn(dev, "subordinate device reset failed\n");
goto failed;
}
} else {
-- 
2.28.0



[PATCH v8 05/14] PCI/ERR: Use "bridge" for clarity in pcie_do_recovery()

2020-10-02 Thread Sean V Kelley
From: Sean V Kelley 

A generic term such as "bridge" may be used for something with
a subordinate bus. The mix of ports would benefit from a use of
the term. Further clarity can be had in pcie_do_recovery()
with use of pci_upstream_bridge() in place of dev->bus->self.
Reverse the pcie_do_recovery() conditional logic and replace
use of "dev" with "bridge".

Suggested-by: Bjorn Helgaas 
Signed-off-by: Sean V Kelley 
Acked-by: Jonathan Cameron 
---
 drivers/pci/pcie/err.c | 28 +---
 1 file changed, 17 insertions(+), 11 deletions(-)

diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c
index 950612342f1c..e68ea5243ff2 100644
--- a/drivers/pci/pcie/err.c
+++ b/drivers/pci/pcie/err.c
@@ -152,20 +152,26 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
 {
pci_ers_result_t status = PCI_ERS_RESULT_CAN_RECOVER;
struct pci_bus *bus;
+   struct pci_dev *bridge;
+   int type;
 
/*
-* Error recovery runs on all subordinates of the first downstream port.
-* If the downstream port detected the error, it is cleared at the end.
+* Error recovery runs on all subordinates of the first downstream
+* bridge. If the downstream bridge detected the error, it is
+* cleared at the end.
 */
-   if (!(pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT ||
- pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM))
-   dev = dev->bus->self;
-   bus = dev->subordinate;
-
+   type = pci_pcie_type(dev);
+   if (type == PCI_EXP_TYPE_ROOT_PORT ||
+   type == PCI_EXP_TYPE_DOWNSTREAM)
+   bridge = dev;
+   else
+   bridge = pci_upstream_bridge(dev);
+
+   bus = bridge->subordinate;
pci_dbg(dev, "broadcast error_detected message\n");
if (state == pci_channel_io_frozen) {
pci_walk_bus(bus, report_frozen_detected, &status);
-   status = reset_subordinate_device(dev);
+   status = reset_subordinate_device(bridge);
if (status != PCI_ERS_RESULT_RECOVERED) {
pci_warn(dev, "subordinate device reset failed\n");
goto failed;
@@ -197,9 +203,9 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
pci_dbg(dev, "broadcast resume message\n");
pci_walk_bus(bus, report_resume, &status);
 
-   if (pcie_aer_is_native(dev))
-   pcie_clear_device_status(dev);
-   pci_aer_clear_nonfatal_status(dev);
+   if (pcie_aer_is_native(bridge))
+   pcie_clear_device_status(bridge);
+   pci_aer_clear_nonfatal_status(bridge);
pci_info(dev, "device recovery successful\n");
return status;
 
-- 
2.28.0



[PATCH v8 12/14] PCI/AER: Add pcie_walk_rcec() to RCEC AER handling

2020-10-02 Thread Sean V Kelley
From: Sean V Kelley 

Root Complex Event Collectors (RCEC) appear as peers to Root Ports
and also have the AER capability. In addition, actions need to be taken
for associated RCiEPs. In such cases the RCECs will need to be walked in
order to find and act upon their respective RCiEPs.  Extend the existing
ability to link the RCECs with a walking function pcie_walk_rcec(). Add
RCEC support to the current AER service driver and attach the AER service
driver to the RCEC device.

Co-developed-by: Qiuxu Zhuo 
Signed-off-by: Qiuxu Zhuo 
Signed-off-by: Sean V Kelley 
Reviewed-by: Jonathan Cameron 
---
 drivers/pci/pci.h   |  4 
 drivers/pci/pcie/aer.c  | 27 ---
 drivers/pci/pcie/rcec.c | 37 +
 3 files changed, 61 insertions(+), 7 deletions(-)

diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index ea5716d48b68..73fe09355e21 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -474,10 +474,14 @@ static inline void pci_dpc_init(struct pci_dev *pdev) {}
 int pci_rcec_init(struct pci_dev *dev);
 void pci_rcec_exit(struct pci_dev *dev);
 void pcie_link_rcec(struct pci_dev *rcec);
+void pcie_walk_rcec(struct pci_dev *rcec, int (*cb)(struct pci_dev *, void *),
+   void *userdata);
 #else
 static inline int pci_rcec_init(struct pci_dev *dev) { return 0; }
 static inline void pci_rcec_exit(struct pci_dev *dev) {}
 static inline void pcie_link_rcec(struct pci_dev *rcec) {}
+static inline void pcie_walk_rcec(struct pci_dev *rcec, int (*cb)(struct 
pci_dev *, void *),
+ void *userdata) {}
 #endif
 
 #ifdef CONFIG_PCI_ATS
diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
index dccdba60b5d9..3cde646f71c0 100644
--- a/drivers/pci/pcie/aer.c
+++ b/drivers/pci/pcie/aer.c
@@ -300,7 +300,7 @@ int pci_aer_raw_clear_status(struct pci_dev *dev)
return -EIO;
 
port_type = pci_pcie_type(dev);
-   if (port_type == PCI_EXP_TYPE_ROOT_PORT) {
+   if (port_type == PCI_EXP_TYPE_ROOT_PORT || port_type == 
PCI_EXP_TYPE_RC_EC) {
pci_read_config_dword(dev, aer + PCI_ERR_ROOT_STATUS, &status);
pci_write_config_dword(dev, aer + PCI_ERR_ROOT_STATUS, status);
}
@@ -595,7 +595,8 @@ static umode_t aer_stats_attrs_are_visible(struct kobject 
*kobj,
if ((a == &dev_attr_aer_rootport_total_err_cor.attr ||
 a == &dev_attr_aer_rootport_total_err_fatal.attr ||
 a == &dev_attr_aer_rootport_total_err_nonfatal.attr) &&
-   pci_pcie_type(pdev) != PCI_EXP_TYPE_ROOT_PORT)
+   ((pci_pcie_type(pdev) != PCI_EXP_TYPE_ROOT_PORT) &&
+(pci_pcie_type(pdev) != PCI_EXP_TYPE_RC_EC)))
return 0;
 
return a->mode;
@@ -916,7 +917,10 @@ static bool find_source_device(struct pci_dev *parent,
if (result)
return true;
 
-   pci_walk_bus(parent->subordinate, find_device_iter, e_info);
+   if (pci_pcie_type(parent) == PCI_EXP_TYPE_RC_EC)
+   pcie_walk_rcec(parent, find_device_iter, e_info);
+   else
+   pci_walk_bus(parent->subordinate, find_device_iter, e_info);
 
if (!e_info->error_dev_num) {
pci_info(parent, "can't find device of ID%04x\n", e_info->id);
@@ -1053,6 +1057,7 @@ int aer_get_device_error_info(struct pci_dev *dev, struct 
aer_err_info *info)
if (!(info->status & ~info->mask))
return 0;
} else if (pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT ||
+  pci_pcie_type(dev) == PCI_EXP_TYPE_RC_EC ||
   pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM ||
   info->severity == AER_NONFATAL) {
 
@@ -1205,6 +1210,7 @@ static int set_device_error_reporting(struct pci_dev 
*dev, void *data)
int type = pci_pcie_type(dev);
 
if ((type == PCI_EXP_TYPE_ROOT_PORT) ||
+   (type == PCI_EXP_TYPE_RC_EC) ||
(type == PCI_EXP_TYPE_UPSTREAM) ||
(type == PCI_EXP_TYPE_DOWNSTREAM)) {
if (enable)
@@ -1229,9 +1235,11 @@ static void 
set_downstream_devices_error_reporting(struct pci_dev *dev,
 {
set_device_error_reporting(dev, &enable);
 
-   if (!dev->subordinate)
-   return;
-   pci_walk_bus(dev->subordinate, set_device_error_reporting, &enable);
+   if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_EC)
+   pcie_walk_rcec(dev, set_device_error_reporting, &enable);
+   else if (dev->subordinate)
+   pci_walk_bus(dev->subordinate, set_device_error_reporting, 
&enable);
+
 }
 
 /**
@@ -1329,6 +1337,11 @@ static int aer_probe(struct pcie_device *dev)
struct device *device = &dev->device;
struct pci_dev *port = dev->port;
 
+   /* Limit to Root Ports or Root Complex Event Collectors */
+   if ((pci_pcie_type(port) != PCI_EXP_TYPE_RC_EC) &&
+   (pci_pcie_type(port) != PCI_EXP_TYPE_ROOT_PORT))
+  

[PATCH v8 11/14] PCI/RCEC: Add RCiEP's linked RCEC to AER/ERR

2020-10-02 Thread Sean V Kelley
From: Qiuxu Zhuo 

When attempting error recovery for an RCiEP associated with an RCEC device,
there needs to be a way to update the Root Error Status, the Uncorrectable
Error Status and the Uncorrectable Error Severity of the parent RCEC.
In some non-native cases in which there is no OS visible device
associated with the RCiEP, there is nothing to act upon as the firmware
is acting before the OS. So add handling for the linked 'rcec' in AER/ERR
while taking into account non-native cases.

Co-developed-by: Sean V Kelley 
Signed-off-by: Sean V Kelley 
Signed-off-by: Qiuxu Zhuo 
Reviewed-by: Jonathan Cameron 
---
 drivers/pci/pcie/aer.c |  9 +
 drivers/pci/pcie/err.c | 39 ---
 2 files changed, 33 insertions(+), 15 deletions(-)

diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
index 65dff5f3457a..dccdba60b5d9 100644
--- a/drivers/pci/pcie/aer.c
+++ b/drivers/pci/pcie/aer.c
@@ -1358,17 +1358,18 @@ static int aer_probe(struct pcie_device *dev)
 static pci_ers_result_t aer_root_reset(struct pci_dev *dev)
 {
int aer = dev->aer_cap;
+   int rc = 0;
u32 reg32;
-   int rc;
-
 
/* Disable Root's interrupt in response to error messages */
pci_read_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, ®32);
reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK;
pci_write_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, reg32);
 
-   rc = pci_bus_error_reset(dev);
-   pci_info(dev, "Root Port link has been reset\n");
+   if (pci_pcie_type(dev) != PCI_EXP_TYPE_RC_EC) {
+   rc = pci_bus_error_reset(dev);
+   pci_info(dev, "Root Port link has been reset\n");
+   }
 
/* Clear Root Error Status */
pci_read_config_dword(dev, aer + PCI_ERR_ROOT_STATUS, ®32);
diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c
index 38abd7984996..956ad4c86d53 100644
--- a/drivers/pci/pcie/err.c
+++ b/drivers/pci/pcie/err.c
@@ -149,7 +149,8 @@ static int report_resume(struct pci_dev *dev, void *data)
 /**
  * pci_walk_bridge - walk bridges potentially AER affected
  * @bridge   bridge which may be an RCEC with associated RCiEPs,
- *   an RCiEP associated with an RCEC, or a Port.
+ *   or a Port.
+ * @dev  an RCiEP lacking an associated RCEC.
  * @cb   callback to be called for each device found
  * @userdata arbitrary pointer to be passed to callback.
  *
@@ -160,13 +161,20 @@ static int report_resume(struct pci_dev *dev, void *data)
  * If the device provided has no subordinate bus, call the provided
  * callback on the device itself.
  */
-static void pci_walk_bridge(struct pci_dev *bridge, int (*cb)(struct pci_dev 
*, void *),
+static void pci_walk_bridge(struct pci_dev *bridge, struct pci_dev *dev,
+   int (*cb)(struct pci_dev *, void *),
void *userdata)
 {
-   if (bridge->subordinate)
+   /*
+* In a non-native case where there is no OS-visible reporting
+* device the bridge will be NULL, i.e., no RCEC, no PORT.
+*/
+   if (bridge && bridge->subordinate)
pci_walk_bus(bridge->subordinate, cb, userdata);
-   else
+   else if (bridge)
cb(bridge, userdata);
+   else
+   cb(dev, userdata);
 }
 
 static pci_ers_result_t flr_on_rciep(struct pci_dev *dev)
@@ -196,16 +204,25 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
type = pci_pcie_type(dev);
if (type == PCI_EXP_TYPE_ROOT_PORT ||
type == PCI_EXP_TYPE_DOWNSTREAM ||
-   type == PCI_EXP_TYPE_RC_EC ||
-   type == PCI_EXP_TYPE_RC_END)
+   type == PCI_EXP_TYPE_RC_EC)
bridge = dev;
+   else if (type == PCI_EXP_TYPE_RC_END)
+   bridge = dev->rcec;
else
bridge = pci_upstream_bridge(dev);
 
pci_dbg(dev, "broadcast error_detected message\n");
if (state == pci_channel_io_frozen) {
-   pci_walk_bridge(bridge, report_frozen_detected, &status);
+   pci_walk_bridge(bridge, dev, report_frozen_detected, &status);
if (type == PCI_EXP_TYPE_RC_END) {
+   /*
+* The callback only clears the Root Error Status
+* of the RCEC (see aer.c). Only perform this for the
+* native case, i.e., an RCEC is present.
+*/
+   if (bridge)
+   reset_subordinate_devices(bridge);
+
status = flr_on_rciep(dev);
if (status != PCI_ERS_RESULT_RECOVERED) {
pci_warn(dev, "function level reset failed\n");
@@ -219,13 +236,13 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
}
}
} else {
-   pci_walk_bridge(bridge, report_normal_detected, &status);
+

[PATCH v8 00/14] Add RCEC handling to PCI/AER

2020-10-02 Thread Sean V Kelley
From: Sean V Kelley 

Changes since v7 [1]:

- No functional changes.

- Reword bridge patch.
- Noted testing below for #non-native/no RCEC case
(Jonathan Cameron)

- Separate out pci_walk_bus() into pci_walk_bridge() change.
- Put remaining dev to bridge name changes in the separate patch from v7.
(Bjorn Helgaas)

[1] 
https://lore.kernel.org/lkml/20200930215820.1113353-1-seanvk@oregontracks.org/

Root Complex Event Collectors (RCEC) provide support for terminating error
and PME messages from Root Complex Integrated Endpoints (RCiEPs).  An RCEC
resides on a Bus in the Root Complex. Multiple RCECs can in fact reside on
a single bus. An RCEC will explicitly declare supported RCiEPs through the
Root Complex Endpoint Association Extended Capability.

(See PCIe 5.0-1, sections 1.3.2.3 (RCiEP), and 7.9.10 (RCEC Ext. Cap.))

The kernel lacks handling for these RCECs and the error messages received
from their respective associated RCiEPs. More recently, a new CPU
interconnect, Compute eXpress Link (CXL) depends on RCEC capabilities for
purposes of error messaging from CXL 1.1 supported RCiEP devices.

DocLink: https://www.computeexpresslink.org/

This use case is not limited to CXL. Existing hardware today includes
support for RCECs, such as the Denverton microserver product
family. Future hardware will be forthcoming.

(See Intel Document, Order number: 33061-003US)

So services such as AER or PME could be associated with an RCEC driver.
In the case of CXL, if an RCiEP (i.e., CXL 1.1 device) is associated with a
platform's RCEC it shall signal PME and AER error conditions through that
RCEC.

Towards the above use cases, add the missing RCEC class and extend the
PCIe Root Port and service drivers to allow association of RCiEPs to their
respective parent RCEC and facilitate handling of terminating error and PME
messages.

Tested-by: Jonathan Cameron  #non-native/no RCEC


Jonathan Cameron (1):
  PCI/AER: Extend AER error handling to RCECs

Qiuxu Zhuo (5):
  PCI/RCEC: Add RCEC class code and extended capability
  PCI/RCEC: Bind RCEC devices to the Root Port driver
  PCI/AER: Apply function level reset to RCiEP on fatal error
  PCI/RCEC: Add RCiEP's linked RCEC to AER/ERR
  PCI/AER: Add RCEC AER error injection support

Sean V Kelley (8):
  PCI/RCEC: Cache RCEC capabilities in pci_init_capabilities()
  PCI/ERR: Rename reset_link() to reset_subordinate_device()
  PCI/ERR: Use "bridge" for clarity in pcie_do_recovery()
  PCI/ERR: Add pci_walk_bridge() to pcie_do_recovery()
  PCI/ERR: Limit AER resets in pcie_do_recovery()
  PCI/RCEC: Add pcie_link_rcec() to associate RCiEPs
  PCI/AER: Add pcie_walk_rcec() to RCEC AER handling
  PCI/PME: Add pcie_walk_rcec() to RCEC PME handling

 drivers/pci/pci.h   |  25 -
 drivers/pci/pcie/Makefile   |   2 +-
 drivers/pci/pcie/aer.c  |  36 --
 drivers/pci/pcie/aer_inject.c   |   5 +-
 drivers/pci/pcie/err.c  | 109 +++
 drivers/pci/pcie/pme.c  |  15 ++-
 drivers/pci/pcie/portdrv_core.c |   8 +-
 drivers/pci/pcie/portdrv_pci.c  |   8 +-
 drivers/pci/pcie/rcec.c | 187 
 drivers/pci/probe.c |   2 +
 include/linux/pci.h |   5 +
 include/linux/pci_ids.h |   1 +
 include/uapi/linux/pci_regs.h   |   7 ++
 13 files changed, 367 insertions(+), 43 deletions(-)
 create mode 100644 drivers/pci/pcie/rcec.c

--
2.28.0



[PATCH v8 01/14] PCI/RCEC: Add RCEC class code and extended capability

2020-10-02 Thread Sean V Kelley
From: Qiuxu Zhuo 

A PCIe Root Complex Event Collector (RCEC) has the base class 0x08,
sub-class 0x07, and programming interface 0x00. Add the class code
0x0807 to identify RCEC devices and add the defines for the RCEC
Endpoint Association Extended Capability.

See PCI Express Base Specification, version 5.0-1, section "1.3.4
Root Complex Event Collector" and section "7.9.10 Root Complex
Event Collector Endpoint Association Extended Capability"

Signed-off-by: Qiuxu Zhuo 
Reviewed-by: Jonathan Cameron 
---
 include/linux/pci_ids.h   | 1 +
 include/uapi/linux/pci_regs.h | 7 +++
 2 files changed, 8 insertions(+)

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 1ab1e24bcbce..d8156a5dbee8 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -81,6 +81,7 @@
 #define PCI_CLASS_SYSTEM_RTC   0x0803
 #define PCI_CLASS_SYSTEM_PCI_HOTPLUG   0x0804
 #define PCI_CLASS_SYSTEM_SDHCI 0x0805
+#define PCI_CLASS_SYSTEM_RCEC  0x0807
 #define PCI_CLASS_SYSTEM_OTHER 0x0880
 
 #define PCI_BASE_CLASS_INPUT   0x09
diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
index f9701410d3b5..f335f65f65d6 100644
--- a/include/uapi/linux/pci_regs.h
+++ b/include/uapi/linux/pci_regs.h
@@ -828,6 +828,13 @@
 #define  PCI_PWR_CAP_BUDGET(x) ((x) & 1)   /* Included in system budget */
 #define PCI_EXT_CAP_PWR_SIZEOF 16
 
+/* Root Complex Event Collector Endpoint Association  */
+#define PCI_RCEC_RCIEP_BITMAP  4   /* Associated Bitmap for RCiEPs */
+#define PCI_RCEC_BUSN  8   /* RCEC Associated Bus Numbers */
+#define  PCI_RCEC_BUSN_REG_VER 0x02/* Least capability version that BUSN 
present */
+#define  PCI_RCEC_BUSN_NEXT(x) (((x) >> 8) & 0xff)
+#define  PCI_RCEC_BUSN_LAST(x) (((x) >> 16) & 0xff)
+
 /* Vendor-Specific (VSEC, PCI_EXT_CAP_ID_VNDR) */
 #define PCI_VNDR_HEADER4   /* Vendor-Specific Header */
 #define  PCI_VNDR_HEADER_ID(x) ((x) & 0x)
-- 
2.28.0



[PATCH v8 03/14] PCI/RCEC: Cache RCEC capabilities in pci_init_capabilities()

2020-10-02 Thread Sean V Kelley
From: Sean V Kelley 

Extend support for Root Complex Event Collectors by decoding and
caching the RCEC Endpoint Association Extended Capabilities when
enumerating. Use that cached information for later error source
reporting. See PCI Express Base Specification, version 5.0-1,
section 7.9.10.

Suggested-by: Bjorn Helgaas 
Co-developed-by: Qiuxu Zhuo 
Signed-off-by: Qiuxu Zhuo 
Signed-off-by: Sean V Kelley 
Reviewed-by: Jonathan Cameron 
---
 drivers/pci/pci.h | 17 +++
 drivers/pci/pcie/Makefile |  2 +-
 drivers/pci/pcie/rcec.c   | 59 +++
 drivers/pci/probe.c   |  2 ++
 include/linux/pci.h   |  4 +++
 5 files changed, 83 insertions(+), 1 deletion(-)
 create mode 100644 drivers/pci/pcie/rcec.c

diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index fa12f7cbc1a0..0e332a218d75 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -449,6 +449,15 @@ int aer_get_device_error_info(struct pci_dev *dev, struct 
aer_err_info *info);
 void aer_print_error(struct pci_dev *dev, struct aer_err_info *info);
 #endif /* CONFIG_PCIEAER */
 
+#ifdef CONFIG_PCIEPORTBUS
+/* Cached RCEC Endpoint Association */
+struct rcec_ea {
+   u8  nextbusn;
+   u8  lastbusn;
+   u32 bitmap;
+};
+#endif
+
 #ifdef CONFIG_PCIE_DPC
 void pci_save_dpc_state(struct pci_dev *dev);
 void pci_restore_dpc_state(struct pci_dev *dev);
@@ -461,6 +470,14 @@ static inline void pci_restore_dpc_state(struct pci_dev 
*dev) {}
 static inline void pci_dpc_init(struct pci_dev *pdev) {}
 #endif
 
+#ifdef CONFIG_PCIEPORTBUS
+int pci_rcec_init(struct pci_dev *dev);
+void pci_rcec_exit(struct pci_dev *dev);
+#else
+static inline int pci_rcec_init(struct pci_dev *dev) { return 0; }
+static inline void pci_rcec_exit(struct pci_dev *dev) {}
+#endif
+
 #ifdef CONFIG_PCI_ATS
 /* Address Translation Service */
 void pci_ats_init(struct pci_dev *dev);
diff --git a/drivers/pci/pcie/Makefile b/drivers/pci/pcie/Makefile
index 68da9280ff11..d9697892fa3e 100644
--- a/drivers/pci/pcie/Makefile
+++ b/drivers/pci/pcie/Makefile
@@ -2,7 +2,7 @@
 #
 # Makefile for PCI Express features and port driver
 
-pcieportdrv-y  := portdrv_core.o portdrv_pci.o err.o
+pcieportdrv-y  := portdrv_core.o portdrv_pci.o err.o rcec.o
 
 obj-$(CONFIG_PCIEPORTBUS)  += pcieportdrv.o
 
diff --git a/drivers/pci/pcie/rcec.c b/drivers/pci/pcie/rcec.c
new file mode 100644
index ..da02b0af442d
--- /dev/null
+++ b/drivers/pci/pcie/rcec.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Root Complex Event Collector Support
+ *
+ * Authors:
+ *  Sean V Kelley 
+ *  Qiuxu Zhuo 
+ *
+ * Copyright (C) 2020 Intel Corp.
+ */
+
+#include 
+#include 
+#include 
+
+#include "../pci.h"
+
+int pci_rcec_init(struct pci_dev *dev)
+{
+   struct rcec_ea *rcec_ea;
+   u32 rcec, hdr, busn;
+   u8 ver;
+
+   /* Only for Root Complex Event Collectors */
+   if (pci_pcie_type(dev) != PCI_EXP_TYPE_RC_EC)
+   return 0;
+
+   rcec = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_RCEC);
+   if (!rcec)
+   return 0;
+
+   rcec_ea = kzalloc(sizeof(*rcec_ea), GFP_KERNEL);
+   if (!rcec_ea)
+   return -ENOMEM;
+   dev->rcec_ea = rcec_ea;
+
+   pci_read_config_dword(dev, rcec + PCI_RCEC_RCIEP_BITMAP, 
&rcec_ea->bitmap);
+
+   /* Check whether RCEC BUSN register is present */
+   pci_read_config_dword(dev, rcec, &hdr);
+   ver = PCI_EXT_CAP_VER(hdr);
+   if (ver < PCI_RCEC_BUSN_REG_VER) {
+   /* Avoid later ver check by setting nextbusn */
+   rcec_ea->nextbusn = 0xff;
+   return 0;
+   }
+
+   pci_read_config_dword(dev, rcec + PCI_RCEC_BUSN, &busn);
+   rcec_ea->nextbusn = PCI_RCEC_BUSN_NEXT(busn);
+   rcec_ea->lastbusn = PCI_RCEC_BUSN_LAST(busn);
+
+   return 0;
+}
+
+void pci_rcec_exit(struct pci_dev *dev)
+{
+   kfree(dev->rcec_ea);
+   dev->rcec_ea = NULL;
+}
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 03d37128a24f..25f01f841f2d 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -2201,6 +2201,7 @@ static void pci_configure_device(struct pci_dev *dev)
 static void pci_release_capabilities(struct pci_dev *dev)
 {
pci_aer_exit(dev);
+   pci_rcec_exit(dev);
pci_vpd_release(dev);
pci_iov_release(dev);
pci_free_cap_save_buffers(dev);
@@ -2400,6 +2401,7 @@ static void pci_init_capabilities(struct pci_dev *dev)
pci_ptm_init(dev);  /* Precision Time Measurement */
pci_aer_init(dev);  /* Advanced Error Reporting */
pci_dpc_init(dev);  /* Downstream Port Containment */
+   pci_rcec_init(dev); /* Root Complex Event Collector */
 
pcie_report_downtraining(dev);
 
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 835530605c0d..2290439e8bc0 100644
--- a/include/linux/pc

[PATCH v8 06/14] PCI/ERR: Add pci_walk_bridge() to pcie_do_recovery()

2020-10-02 Thread Sean V Kelley
From: Sean V Kelley 

Consolidate subordinate bus checks with pci_walk_bus()
into pci_walk_bridge() for walking below potentially
AER affected bridges.

Suggested-by: Bjorn Helgaas 
Signed-off-by: Sean V Kelley 
---
 drivers/pci/pcie/err.c | 29 ++---
 1 file changed, 22 insertions(+), 7 deletions(-)

diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c
index e68ea5243ff2..9b2130725ab6 100644
--- a/drivers/pci/pcie/err.c
+++ b/drivers/pci/pcie/err.c
@@ -146,12 +146,28 @@ static int report_resume(struct pci_dev *dev, void *data)
return 0;
 }
 
+/**
+ * pci_walk_bridge - walk bridges potentially AER affected
+ * @bridge   bridge which may be a Port.
+ * @cb   callback to be called for each device found
+ * @userdata arbitrary pointer to be passed to callback.
+ *
+ * If the device provided is a bridge, walk the subordinate bus,
+ * including any bridged devices on buses under this bus.
+ * Call the provided callback on each device found.
+ */
+static void pci_walk_bridge(struct pci_dev *bridge, int (*cb)(struct pci_dev 
*, void *),
+   void *userdata)
+{
+   if (bridge->subordinate)
+   pci_walk_bus(bridge->subordinate, cb, userdata);
+}
+
 pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
pci_channel_state_t state,
pci_ers_result_t (*reset_subordinate_devices)(struct 
pci_dev *pdev))
 {
pci_ers_result_t status = PCI_ERS_RESULT_CAN_RECOVER;
-   struct pci_bus *bus;
struct pci_dev *bridge;
int type;
 
@@ -167,23 +183,22 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
else
bridge = pci_upstream_bridge(dev);
 
-   bus = bridge->subordinate;
pci_dbg(dev, "broadcast error_detected message\n");
if (state == pci_channel_io_frozen) {
-   pci_walk_bus(bus, report_frozen_detected, &status);
+   pci_walk_bridge(bridge, report_frozen_detected, &status);
status = reset_subordinate_device(bridge);
if (status != PCI_ERS_RESULT_RECOVERED) {
pci_warn(dev, "subordinate device reset failed\n");
goto failed;
}
} else {
-   pci_walk_bus(bus, report_normal_detected, &status);
+   pci_walk_bridge(bridge, report_normal_detected, &status);
}
 
if (status == PCI_ERS_RESULT_CAN_RECOVER) {
status = PCI_ERS_RESULT_RECOVERED;
pci_dbg(dev, "broadcast mmio_enabled message\n");
-   pci_walk_bus(bus, report_mmio_enabled, &status);
+   pci_walk_bridge(bridge, report_mmio_enabled, &status);
}
 
if (status == PCI_ERS_RESULT_NEED_RESET) {
@@ -194,14 +209,14 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
 */
status = PCI_ERS_RESULT_RECOVERED;
pci_dbg(dev, "broadcast slot_reset message\n");
-   pci_walk_bus(bus, report_slot_reset, &status);
+   pci_walk_bridge(bridge, report_slot_reset, &status);
}
 
if (status != PCI_ERS_RESULT_RECOVERED)
goto failed;
 
pci_dbg(dev, "broadcast resume message\n");
-   pci_walk_bus(bus, report_resume, &status);
+   pci_walk_bridge(bridge, report_resume, &status);
 
if (pcie_aer_is_native(bridge))
pcie_clear_device_status(bridge);
-- 
2.28.0



5.9-rc7 oops in nvkm_udevice_info() w/ GA100

2020-10-02 Thread dann frazier
hey,
  I'm seeing an Oops when nouveau loads (see below). I've verified
that this is because both device->chip and device->name are NULL prior
to the strncpy()s at the end of nvkm_udevice_info(). Bisect shows that
this started happening after:

commit 24d5ff40a732633dceab68c6559ba723784f4a68
Author: Karol Herbst 
Date: Tue Apr 28 18:54:02 2020 +0200

drm/nouveau/device: rework mmio mapping code to get rid of second map

Fixes warnings on GPUs with smaller a smaller mmio region like vGPUs.

Signed-off-by: Karol Herbst 
Signed-off-by: Ben Skeggs 

[ 213.131657] nouveau :07:00.0: unknown chipset (17a1)
[ 213.138547] nouveau :07:00.0: unknown chipset (17a1)
[ 213.144938] BUG: kernel NULL pointer dereference, address: 
[ 213.152704] #PF: supervisor read access in kernel mode
[ 213.158433] #PF: error_code(0x) - not-present page
[ 213.164162] PGD 0 P4D 0
[ 213.166985] Oops:  [#1] SMP NOPTI
[ 213.171068] CPU: 32 PID: 206 Comm: kworker/32:0 Not tainted 5.9.0-rc7+ #1
[ 213.178639] Hardware name: NVIDIA DGXA100
920-23687-2530-000/DGXA100, BIOS 0.25 06/30/2020
[ 213.187866] Workqueue: events work_for_cpu_fn
[ 213.192761] RIP: 0010:nvkm_udevice_mthd+0x1ed/0x7d0 [nouveau]
[ 213.199170] Code: 10 49 89 47 08 4d 85 c9 74 10 48 85 c0 74 0b 41 8b
51 70 48 29 d0 49 89 47 10 49 8b 86 c0 00 00 00 49 8d 7f 18 ba 10 00
00 00 <48> 8b 30 e8 6b 91 89 c0 49 8b 76 28 49 8d 7f 28 ba 40 00 00 00
e8
[ 213.220121] RSP: 0018:ae0619d47b48 EFLAGS: 00010246
[ 213.225948] RAX:  RBX: 9cefab819580 RCX: 00c6
[ 213.233907] RDX: 0010 RSI:  RDI: 9cef988f0578
[ 213.241864] RBP: ae0619d47b80 R08:  R09: 
[ 213.249813] R10: 0088 R11: 01320122 R12: 
[ 213.257762] R13: 0068 R14: 9cef6107c400 R15: 9cef988f0560
[ 213.265721] FS: () GS:9cefce00()
knlGS:
[ 213.274747] CS: 0010 DS:  ES:  CR0: 80050033
[ 213.281153] CR2:  CR3: 007f3019c000 CR4: 00350ee0
[ 213.289104] Call Trace:
[ 213.291854] ? nvkm_object_insert+0x6f/0x80 [nouveau]
[ 213.297509] nvkm_object_mthd+0x1a/0x30 [nouveau]
[ 213.302773] nvkm_ioctl_mthd+0x65/0x70 [nouveau]
[ 213.307940] nvkm_ioctl+0xf0/0x190 [nouveau]
[ 213.312735] nvkm_client_ioctl+0x12/0x20 [nouveau]
[ 213.318097] nvif_object_ioctl+0x4f/0x60 [nouveau]
[ 213.323460] nvif_object_mthd+0x9f/0x150 [nouveau]
[ 213.328822] ? nvif_object_ctor+0x14b/0x1d0 [nouveau]
[ 213.334473] nvif_device_ctor+0x61/0x70 [nouveau]
[ 213.339749] nouveau_cli_init+0x1a3/0x460 [nouveau]
[ 213.345215] ? nouveau_drm_device_init+0x3e/0x780 [nouveau]
[ 213.351454] nouveau_drm_device_init+0x77/0x780 [nouveau]
[ 213.357479] ? pci_read_config_word+0x27/0x40
[ 213.362337] ? pci_enable_device_flags+0x14f/0x170
[ 213.367705] nouveau_drm_probe+0x132/0x1f0 [nouveau]
[ 213.373241] local_pci_probe+0x48/0x80
[ 213.377419] work_for_cpu_fn+0x1a/0x30
[ 213.381598] process_one_work+0x1e8/0x3b0
[ 213.386068] worker_thread+0x53/0x420
[ 213.390149] kthread+0x12f/0x150
[ 213.393745] ? process_one_work+0x3b0/0x3b0
[ 213.398406] ? __kthread_bind_mask+0x70/0x70
[ 213.403169] ret_from_fork+0x22/0x30
[ 213.407153] Modules linked in: nouveau(+) mxm_wmi wmi video
nls_iso8859_1 dm_multipath scsi_dh_rdac scsi_dh_emc scsi_dh_alua
amd64_edac_mod edac_mce_amd amd_energy kvm_amd kvm rapl efi_pstore
ipmi_ssif input_leds cdc_ether usbnet mii ccp k10temp acpi_ipmi
ipmi_si ipmi_devintf ipmi_msghandler mac_hid sch_fq_codel ip_tables
x_tables autofs4 btrfs blake2b_generic raid10 raid456
async_raid6_recov async_memcpy async_pq async_xor async_tx xor
raid6_pq libcrc32c raid1 raid0 multipath linear mlx5_ib ses enclosure
hid_generic usbhid uas hid usb_storage ib_uverbs ib_core
crct10dif_pclmul crc32_pclmul ast ghash_clmulni_intel drm_vram_helper
aesni_intel drm_ttm_helper crypto_simd ttm cryptd drm_kms_helper
glue_helper syscopyarea sysfillrect sysimgblt mlx5_core fb_sys_fops
pci_hyperv_intf igb cec mpt3sas dca rc_core raid_class nvme tls
i2c_algo_bit scsi_transport_sas drm mlxfw xhci_pci nvme_core
xhci_pci_renesas i2c_piix4
[ 213.497060] CR2: 
[ 213.500755] ---[ end trace eed3a19f1f25ca74 ]---


Re: [PATCH] arm64: dts: meson: add SM1 soundcard name to VIM3L

2020-10-02 Thread Kevin Hilman
Christian Hewitt  writes:

>> On 2 Oct 2020, at 6:44 pm, Jerome Brunet  wrote:
>> 
>> On Fri 02 Oct 2020 at 16:16, Christian Hewitt  
>> wrote:
>> 
>>> VIM3L now inherits the sound node from the VIM3 common dtsi but is
>>> an SM1 device, so label it as such, and stop users blaming future
>>> support issues on the distro/app "wrongly detecting" their device.
>>> 
>>> Signed-off-by: Christian Hewitt 
>>> ---
>>> arch/arm64/boot/dts/amlogic/meson-sm1-khadas-vim3l.dts | 4 
>>> 1 file changed, 4 insertions(+)
>>> 
>>> diff --git a/arch/arm64/boot/dts/amlogic/meson-sm1-khadas-vim3l.dts 
>>> b/arch/arm64/boot/dts/amlogic/meson-sm1-khadas-vim3l.dts
>>> index 4b517ca72059..f46f0ecc37ec 100644
>>> --- a/arch/arm64/boot/dts/amlogic/meson-sm1-khadas-vim3l.dts
>>> +++ b/arch/arm64/boot/dts/amlogic/meson-sm1-khadas-vim3l.dts
>>> @@ -32,6 +32,10 @@
>>> regulator-boot-on;
>>> regulator-always-on;
>>> };
>>> +
>>> +   sound {
>>> +   model = "SM1-KHADAS-VIM3L";
>>> +   };
>> 
>> The sound card is the same so I don't see why the sm1 board should have
>> a different name. If you are not happy with the name, please update it
>> in the common file.
>
> It’s a nice-to-have not a must-have, but the current LE images that are
> in circulation use 5.7 with the previous board-correct name so I was
> looking for continuity. We do see user forum reports (infrequent but
> recurring) of wrongly detected hardware with other SoC platforms where
> similar name inheritance surfaces the ‘wrong’ device name in GUIs, and
> I like anything that avoids support work.
>
> I’d suggest KHADAS-VIM3-VIM3L as a common name, but then it’s the only
> device in the current device-tree set that is not prefixed with the SoC
> identifier, which (OCD) feels wrong.

True, but turns out there's nothing SoC specific about this sound block
since it's identical across SoCs, so specifying the SoC is being too
specific. 

OTOH, while I agree it looks "wrong", it's pretty common in Linux DT to
have the SoC prefix to mean only that it's "compatible" with that SoC,
not that it *is* that SoC.

However, I agree that that can lead to confusion with end users, so
since this change has not functional change, and only a UX issue in
userspace, I'm fine to apply it.

Kevin



Re: [PATCH v4 1/2] system_data_types.7: Add 'void *'

2020-10-02 Thread Alejandro Colomar

Hi Paul,

On 2020-10-02 18:53, Paul Eggert wrote:
> On 10/2/20 8:14 AM, Alejandro Colomar wrote:
>
>> +.I void *
>
> GNU style is a space between "void" and "*", so this should be '.I
> "void\ *"', both here and elsewhere. The backslash prevents a line break.

.I void *

renders with a space in between.
I'll show you the rendered version at the end of this email.

>
>> +Conversions from and to any other pointer type are done implicitly,
>> +not requiring casts at all.
>> +Note that this feature prevents any kind of type checking:
>> +the programmer should be careful not to cast a
>
> Change "cast" to "convert", since the point is that no cast is needed.

Ok.

>
>> +.PP
>> +The conversion specifier for
>> +.I void *
>> +for the
>> +.BR printf (3)
>> +and the
>> +.BR scanf (3)
>> +families of functions is
>> +.BR p ;
>> +resulting commonly in
>> +.B %p
>> +for printing
>> +.I void *
>> +values.
>
> %p works with any object pointer type (or in POSIX, any pointer type),
> not just  void *.
In theory, no (if otherwise, I'd like to know why):

[[
p
The argument shall be a pointer to void. The value of the pointer 
is converted to a sequence of printable characters, in an 
implementation-defined manner.

]] POSIX.1-2008

However, it's unlikely to cause any problems, I must admit.

>
> Should also mention "void const *", "void volatile *", etc.

I already answered to this:
https://lore.kernel.org/linux-man/cah6ehdqhh46tjvc72mewftwci7iouaod0ic1zlrga+c-36g...@mail.gmail.com/T/#m6f657e988558a556cb70f7c056ef7a24e73dbe4a

> Plus it
> really should talk about plain "void", saying that it's a placeholder as
> a return value for functions, for casting away values, and as a keyword
> in C11 for functions with no parameters (though this is being changed in
> the next C version!). I sent comments about most of this stuff already.

'void' is a completely different type from 'void *'.

This patch is for 'void *'.

If 'void' is documented,
it'll be in a different entry (although in the same page),
and therefore, that'll be for a different patch.

Thanks,

Alex

__

void *
  According  to  the  C language standard, a pointer to any object
  type may be converted to a pointer to void and back.  POSIX fur-
  ther requires that any pointer, including pointers to functions,
  may be converted to a pointer to void and back.

  Conversions from and to any other pointer type are done  implic-
  itly,  not  requiring casts at all.  Note that this feature pre-
  vents any kind of type checking: the programmer should be  care-
  ful not to cast a void * value to a type incompatible to that of
  the underlying data, because that would result in undefined  be-
  havior.

  This  type  is useful in function parameters and return value to
  allow passing values of any type.  The function will usually use
  some  mechanism to know of which type the underlying data passed
  to the function really is.

  A value of this type can't be dereferenced, as it would  give  a
  value  of  type  void  which is not possible.  Likewise, pointer
  arithmetic is not possible with this type.  However, in  GNU  C,
  poitner  arithmetic  is allowed as an extension to the standard;
  this is done by treating the size of a void or of a function  as
  1.  A consequence of this is that sizeof is also allowed on void
  and on function types, and returns 1.

  The conversion specifier for void * for the  printf(3)  and  the
  scanf(3)  families  of  functions is p; resulting commonly in %p
  for printing void * values.

  Versions: The POSIX requirement about compatibility between void
  * and function pointers was added in POSIX.1-2008 Technical Cor-
  rigendum 1 (2013).

  Conforming to: C99 and later; POSIX.1-2001 and later.

  See also: malloc(3), memcmp(3), memcpy(3), memset(3)

  See also the intptr_t and uintptr_t types in this page.


Re: [PATCH v4 1/2] dt-bindings: usb: Add binding for discrete onboard USB hubs

2020-10-02 Thread Alan Stern
On Fri, Oct 02, 2020 at 10:08:17AM -0700, Doug Anderson wrote:
> As a more similar example of single device that is listed in more than
> one location in the device tree, we can also look at embedded SDIO
> BT/WiFi combo cards.  This single device often provides WiFi under an
> SDIO bus and BT under a serial / USB bus.  I'm not 100% sure there are
> actually cases were the same board provides device tree data to both
> at the same time, but "brcm,bcm43540-bt" is an example of providing
> data to the Bluetooth (connected over serial port) and
> "brcm,bcm4329-fmac" to the WiFi (connected over the SDIO bus).  Of
> course WiFi/BT cheat in that the control logic is represented by the
> SDIO power sequencing stuff...
> 
> 
> Back to our case, though.  I guess the issue here is that we're the
> child of more than one bus.  Let's first pretend that the i2c lines of
> this hub are actually hooked up and establish how that would look
> first.  Then we can think about how it looks if this same device isn't
> hooked up via i2c.  In this case, it sounds as if you still don't want
> the device split among two nodes.  So I guess you'd prefer something
> like:
> 
> i2c {
>   usb-hub@xx {
> reg = ;
> compatible = "realtek,rts5411", "onboard-usb-hub";
> vdd-supply = <&pp3300_hub>;
> usb-devices = <&usb_controller 1>;
>   };
> };
> 
> ...and then you wouldn't have anything under the USB controller
> itself.  Is that correct?  So even though there are existing bindings
> saying that a USB device should be listed via VID/PID, the desire to
> represent this as a single node overrides that, right?  (NOTE: this is
> similar to what Matthias proposed in his response except that I've
> added an index so that we don't need _anything_ under the controller).
> 
> Having this primarily listed under the i2c bus makes sense because the
> control logic for the hub is hooked up via i2c.  Having the power
> supply associated with it also makes some amount of sense since it's a
> control signal.  It's also convenient that i2c devices have their
> probe called _before_ we try to detect if they're there because it's
> common that i2c devices need power applied first.
> 
> Now, just because we don't have the i2c bus hooked up doesn't change
> the fact that there is control logic.  We also certainly wouldn't want
> two ways of describing this same hub: one way if the i2c is hooked up
> and one way if it's not hooked up.  To me this means that the we
> should be describing this hub as a top-level node if i2c isn't hooked
> up, just like we do with "smsc,usb3503a"
> 
> Said another way, we have these points:
> 
> a) The control logic for this bus could be hooked up to an i2c bus.
> 
> b) If the control logic is hooked up to an i2c bus it feels like
> that's where the device's primary node should be placed, not under the
> USB controller.
> 
> c) To keep the i2c and non-i2c case as similar as possible, if the i2c
> bus isn't hooked up the hub's primary node should be a top-level node,
> not under the USB controller.
> 
> 
> NOTE ALSO: the fact that we might want to list this hub under an i2c
> controller also seems like it's a good argument against putting this
> logic in the xhci-platform driver?

More and more we are going to see devices that are attached to multiple 
buses.  In this case, one for power control and another for 
commands/data.  If DT doesn't already have a canonical way of handling 
such situations, it needs to develop one soon.

One can make a case that there should be multiple device nodes in this 
situation, somehow referring to each other so that the system knows they 
all describe the same device.  Maybe one "primary" node for the device 
and the others acting kind of like symbolic links.

Regardless of how the situation is represented in DT, there remains the 
issue of where (i.e., in which driver module) the appropriate code 
belongs.  This goes far beyond USB.  In general, what happens when one 
sort of device normally isn't hooked up through a power regulator, so 
its driver doesn't have any code to enable a regulator, but then some 
system does exactly that?

Even worse, what if the device is on a discoverable bus, so the driver 
doesn't get invoked at all until the device is discovered, but on the 
new system it can't be discovered until the regulator is enabled?

Alan Stern


Re: [PATCH v1] of: platform: Batch fwnode parsing in the init_machine() path

2020-10-02 Thread Grygorii Strashko

hi Saravana,

On 02/10/2020 21:27, Laurent Pinchart wrote:

Hi Saravana,

On Fri, Oct 02, 2020 at 10:58:55AM -0700, Saravana Kannan wrote:

On Fri, Oct 2, 2020 at 10:55 AM Laurent Pinchart wrote:

On Fri, Oct 02, 2020 at 10:51:51AM -0700, Saravana Kannan wrote:

On Fri, Oct 2, 2020 at 7:08 AM Rob Herring  wrote:

On Thu, Oct 1, 2020 at 5:59 PM Saravana Kannan  wrote:


When commit 93d2e4322aa7 ("of: platform: Batch fwnode parsing when
adding all top level devices") optimized the fwnode parsing when all top
level devices are added, it missed out optimizing this for platform
where the top level devices are added through the init_machine() path.

This commit does the optimization for all paths by simply moving the
fw_devlink_pause/resume() inside of_platform_default_populate().

Reported-by: Tomi Valkeinen 
Signed-off-by: Saravana Kannan 
---
  drivers/of/platform.c | 19 +++
  1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/drivers/of/platform.c b/drivers/of/platform.c
index 071f04da32c8..79972e49b539 100644
--- a/drivers/of/platform.c
+++ b/drivers/of/platform.c
@@ -501,8 +501,21 @@ int of_platform_default_populate(struct device_node *root,
  const struct of_dev_auxdata *lookup,
  struct device *parent)
  {
-   return of_platform_populate(root, of_default_bus_match_table, lookup,
-   parent);
+   int ret;
+
+   /*
+* fw_devlink_pause/resume() are only safe to be called around top
+* level device addition due to locking constraints.
+*/
+   if (!root)
+   fw_devlink_pause();
+
+   ret = of_platform_populate(root, of_default_bus_match_table, lookup,
+  parent);


of_platform_default_populate() vs. of_platform_populate() is just a
different match table. I don't think the behavior should otherwise be
different.

There's also of_platform_probe() which has slightly different matching
behavior. It should not behave differently either with respect to
devlinks.


So I'm trying to do this only when the top level devices are added for
the first time. of_platform_default_populate() seems to be the most
common path. For other cases, I think we just need to call
fw_devlink_pause/resume() wherever the top level devices are added for
the first time. As I said in the other email, we can't add
fw_devlink_pause/resume() by default to of_platform_populate().

Do you have other ideas for achieving "call fw_devlink_pause/resume()
only when top level devices are added for the first time"?


I'm not an expert in this domain, but before investigating it, would you
be able to share a hack patch that implements this (in the most simple
way) to check if it actually fixes the delays I experience on my system
?


So I take it the patch I sent out didn't work for you? Can you tell me
what machine/DT you are using?


I've replied to the patch:

Based on v5.9-rc5, before the patch:

[0.652887] cpuidle: using governor menu
[   12.349476] No ATAGs?

After the patch:

[0.650460] cpuidle: using governor menu
[   12.262101] No ATAGs?

I'm using an AM57xx EVM, whose DT is not upstream, but it's essentially
a am57xx-beagle-x15-revb1.dts (it includes that DTS) with a few
additional nodes for GPIO keys, LCD panel, backlight and touchscreen.



hope you are receiving my mails as I've provided you with all required 
information already [1]

with below diff:
[4.177231] Freeing unused kernel memory: 1024K
[4.181892] Run /sbin/init as init process

The best time with [2] is
[3.100483] Run /sbin/init as init process

Still 1 sec lose.

Pls understand an issue - requirements here are like 500ms boot with can, 
Ethernet, camera and display on ;(

[1] https://lore.kernel.org/patchwork/patch/1316134/#1511276
[2] https://lore.kernel.org/patchwork/patch/1316134/#1511435

diff --git a/arch/arm/mach-omap2/pdata-quirks.c 
b/arch/arm/mach-omap2/pdata-quirks.c
index 2a4fe3e68b82..ac1ab8928190 100644
--- a/arch/arm/mach-omap2/pdata-quirks.c
+++ b/arch/arm/mach-omap2/pdata-quirks.c
@@ -591,7 +591,9 @@ void __init pdata_quirks_init(const struct of_device_id 
*omap_dt_match_table)
if (of_machine_is_compatible("ti,omap3"))
omap3_mcbsp_init();
pdata_quirks_check(auxdata_quirks);
+   fw_devlink_pause();
of_platform_populate(NULL, omap_dt_match_table,
 omap_auxdata_lookup, NULL);
+   fw_devlink_resume();
pdata_quirks_check(pdata_quirks);
 }


--
Best regards,
grygorii


Re: [v5] mm: khugepaged: recalculate min_free_kbytes after memory hotplug as expected by khugepaged

2020-10-02 Thread Mike Kravetz
On 10/2/20 4:25 AM, Michal Hocko wrote:
> On Wed 30-09-20 15:03:11, Mike Kravetz wrote:
>> On 9/30/20 1:47 PM, Vijay Balakrishna wrote:
>>> On 9/30/2020 11:20 AM, Mike Kravetz wrote:
 On 9/29/20 9:49 AM, Vijay Balakrishna wrote:

 Sorry for jumping in so late.  Should we use this as an opportunity to
 also fix up the messages logged when (re)calculating mfk?  They are wrong
 and could be quite confusing.
>>>
>>>
>>> Sure.  Please share your thoughts regarding appropriate message.  Here is 
>>> what I'm thinking
>>>
>>> pr_warn("min_free_kbytes is not updated to %d because current value %d is 
>>> preferred\n", new_min_free_kbytes, min_free_kbytes);
>>>
>>> If above message is reasonable I can post a new revision (v6).
>>
>> Just considering the below example,
>>
 For example consider the following sequence
 of operations and corresponding log messages produced.

 Freshly booted VM with 2 nodes and 8GB memory:
 # cat /proc/sys/vm/min_free_kbytes
 90112
 # echo 9 > /proc/sys/vm/min_free_kbytes
 # cat /proc/sys/vm/min_free_kbytes
 9
 # echo 0 > /sys/devices/system/node/node1/memory56/online
 [  135.099947] Offlined Pages 32768
 [  135.102362] min_free_kbytes is not updated to 11241 because user 
 defined value 9 is preferred
>>
>> I am not sure if there is any value in printing the above line.  Especially
>> in this context as it becomes obsolete with the printing of the next line.
> 
> The original intention was to make it explicit that auto-tuning is
> influenced by the user provided configuration.
> 
 [  135.109070] khugepaged: raising min_free_kbytes from 9 to 90112 to 
 help t
 ransparent hugepage allocations
>>
>> IMO, the above line is the only one that should be output as a result of the
>> recalculation.
> 
> Well, but khugepaged could be disabled and then the above might not get
> printed. Sure the code could get reorganized and all that but is this
> really worth that?
> 
>> I guess that brings up the question of 'should we continue to track the user
>> defined value if we overwrite it?".  If we quit tracking it may help with the
>> next message.
> 
> Auto tuning and user provided override is quite tricky to get sensible.
> Especially in the case here. Admin has provided an override but has the
> potential memory hotplug been considered? Or to make it even more
> complicated, consider that the hotplug happens without admin involvement
> - e.g. memory gets hotremoved due to HW problems. Is the admin provided
> value still meaningful? To be honest I do not have a good answer and I
> am not sure we should care all that much until we see practical
> problems.

I am not insisting that this be cleaned up.  The change in this patch to
ensure THP related calculations are performed during hotplug is the most
important.

I became aware of the logging issues when looking at a customer issue with
an older kernel.  The min_free_kbytes setting was integral to the issue we
were investigating, and it was unclear whether or not the customer had
changed the value.  I knew the system log should contain evidence of manually
setting min_free_kbytes.  However, there was no evidence in the log.  Turns
out the customer did not change the value, but it did cause me to do a deep
dive into the logging code.
-- 
Mike Kravetz


Re: [PATCH v4] kvm,x86: Exit to user space in case page fault error

2020-10-02 Thread Sean Christopherson
On Fri, Oct 02, 2020 at 11:38:54AM -0400, Vivek Goyal wrote:
> On Thu, Oct 01, 2020 at 03:33:20PM -0700, Sean Christopherson wrote:
> > Alternatively, what about adding a new KVM request type to handle this?
> > E.g. when the APF comes back with -EFAULT, snapshot the GFN and make a
> > request.  The vCPU then gets kicked and exits to userspace.  Before exiting
> > to userspace, the request handler resets vcpu->arch.apf.error_gfn.  Bad GFNs
> > simply get if error_gfn is "valid", i.e. there's a pending request.
> 
> Sorry, I did not understand the above proposal. Can you please elaborate
> a bit more. Part of it is that I don't know much about KVM requests.
> Looking at the code it looks like that main loop is parsing if some
> kvm request is pending and executing that action.
> 
> Don't we want to make sure that we exit to user space when guest retries
> error gfn access again.

> In this case once we get -EFAULT, we will still inject page_ready into
> guest. And then either same process or a different process might run. 
> 
> So when exactly code raises a kvm request. If I raise it right when
> I get -EFAULT, then kvm will exit to user space upon next entry
> time. But there is no guarantee guest vcpu is running the process which
> actually accessed the error gfn. And that probably means that register
> state of cpu does not mean much and one can not easily figure out
> which task tried to access the bad memory and when.
> 
> That's why we prepare a list of error gfn and only exit to user space
> when error_gfn access is retried so that guest vcpu context is correct.
> 
> What am I missing?

I don't think it's necessary to provide userspace with the register state of
the guest task that hit the bad page.  Other than debugging, I don't see how
userspace can do anything useful which such information.

Even if you want to inject an event of some form into the guest, having the
correct context for the event itself is not required.  IMO it's perfectly
reasonable for such an event to be asynchronous.

IIUC, your end goal is to be able to gracefully handle DAX file truncation.
Simply killing the guest task that hit the bad page isn't sufficient, as
nothing prevents a future task from accessing the same bad page.  To fully
handle the situation, the guest needs to remove the bad page from its memory
pool.  Once the page is offlined, the guest kernel's error handling will
kick in when a task accesses the bad page (or nothing ever touches the bad
page again and everyone is happy).

Note, I'm not necessarily suggesting that QEMU piggyback its #MC injection
to handle this, but I suspect the resulting behavior will look quite similar,
e.g. notify the virtiofs driver in the guest, which does some magic to take
the offending region offline, and then guest tasks get SIGBUS or whatever.

I also don't think it's KVM's responsibility to _directly_ handle such a
scenario.  As I said in an earlier version, KVM can't possibly know _why_ a
page fault came back with -EFAULT, only userspace can connect the dots of
GPA -> HVA -> vm_area_struct -> file -> inject event.  KVM definitely should
exit to userspace on the -EFAULT instead of hanging the guest, but that can
be done via a new request, as suggested.


Re: [PATCH RESEND v1] ARM: dts: meson8: remove two invalid interrupt lines from the GPU node

2020-10-02 Thread Kevin Hilman
Martin Blumenstingl  writes:

> Hi Kevin,
>
> On Mon, Aug 24, 2020 at 11:16 PM Kevin Hilman  wrote:
> [...]
>> Applied, thanks!
>>
>> [1/1] ARM: dts: meson8: remove two invalid interrupt lines from the GPU node
>>   commit: b468412409c0e5752ad3396b147cac563ff8dd3b
> this one still seems to be sitting in the v5.9/fixes branch
> I don't see a reason to rush this, so can you please queue it up for
> v5.10/fixes?

Yes, sorry this one slipped through the cracks.

Kevin


Re: [PATCH v1] of: platform: Batch fwnode parsing in the init_machine() path

2020-10-02 Thread Laurent Pinchart
Hi Saravana,

On Fri, Oct 02, 2020 at 10:58:55AM -0700, Saravana Kannan wrote:
> On Fri, Oct 2, 2020 at 10:55 AM Laurent Pinchart wrote:
> > On Fri, Oct 02, 2020 at 10:51:51AM -0700, Saravana Kannan wrote:
> > > On Fri, Oct 2, 2020 at 7:08 AM Rob Herring  wrote:
> > > > On Thu, Oct 1, 2020 at 5:59 PM Saravana Kannan  
> > > > wrote:
> > > > >
> > > > > When commit 93d2e4322aa7 ("of: platform: Batch fwnode parsing when
> > > > > adding all top level devices") optimized the fwnode parsing when all 
> > > > > top
> > > > > level devices are added, it missed out optimizing this for platform
> > > > > where the top level devices are added through the init_machine() path.
> > > > >
> > > > > This commit does the optimization for all paths by simply moving the
> > > > > fw_devlink_pause/resume() inside of_platform_default_populate().
> > > > >
> > > > > Reported-by: Tomi Valkeinen 
> > > > > Signed-off-by: Saravana Kannan 
> > > > > ---
> > > > >  drivers/of/platform.c | 19 +++
> > > > >  1 file changed, 15 insertions(+), 4 deletions(-)
> > > > >
> > > > > diff --git a/drivers/of/platform.c b/drivers/of/platform.c
> > > > > index 071f04da32c8..79972e49b539 100644
> > > > > --- a/drivers/of/platform.c
> > > > > +++ b/drivers/of/platform.c
> > > > > @@ -501,8 +501,21 @@ int of_platform_default_populate(struct 
> > > > > device_node *root,
> > > > >  const struct of_dev_auxdata *lookup,
> > > > >  struct device *parent)
> > > > >  {
> > > > > -   return of_platform_populate(root, of_default_bus_match_table, 
> > > > > lookup,
> > > > > -   parent);
> > > > > +   int ret;
> > > > > +
> > > > > +   /*
> > > > > +* fw_devlink_pause/resume() are only safe to be called 
> > > > > around top
> > > > > +* level device addition due to locking constraints.
> > > > > +*/
> > > > > +   if (!root)
> > > > > +   fw_devlink_pause();
> > > > > +
> > > > > +   ret = of_platform_populate(root, of_default_bus_match_table, 
> > > > > lookup,
> > > > > +  parent);
> > > >
> > > > of_platform_default_populate() vs. of_platform_populate() is just a
> > > > different match table. I don't think the behavior should otherwise be
> > > > different.
> > > >
> > > > There's also of_platform_probe() which has slightly different matching
> > > > behavior. It should not behave differently either with respect to
> > > > devlinks.
> > >
> > > So I'm trying to do this only when the top level devices are added for
> > > the first time. of_platform_default_populate() seems to be the most
> > > common path. For other cases, I think we just need to call
> > > fw_devlink_pause/resume() wherever the top level devices are added for
> > > the first time. As I said in the other email, we can't add
> > > fw_devlink_pause/resume() by default to of_platform_populate().
> > >
> > > Do you have other ideas for achieving "call fw_devlink_pause/resume()
> > > only when top level devices are added for the first time"?
> >
> > I'm not an expert in this domain, but before investigating it, would you
> > be able to share a hack patch that implements this (in the most simple
> > way) to check if it actually fixes the delays I experience on my system
> > ?
> 
> So I take it the patch I sent out didn't work for you? Can you tell me
> what machine/DT you are using?

I've replied to the patch:

Based on v5.9-rc5, before the patch:

[0.652887] cpuidle: using governor menu
[   12.349476] No ATAGs?

After the patch:

[0.650460] cpuidle: using governor menu
[   12.262101] No ATAGs?

I'm using an AM57xx EVM, whose DT is not upstream, but it's essentially
a am57xx-beagle-x15-revb1.dts (it includes that DTS) with a few
additional nodes for GPIO keys, LCD panel, backlight and touchscreen.

-- 
Regards,

Laurent Pinchart


Re: [PATCH v4 01/11] mm: add Kernel Electric-Fence infrastructure

2020-10-02 Thread Jann Horn
On Fri, Oct 2, 2020 at 4:23 PM Dmitry Vyukov  wrote:
> On Fri, Oct 2, 2020 at 9:54 AM Jann Horn  wrote:
> > On Fri, Oct 2, 2020 at 8:33 AM Jann Horn  wrote:
> > > On Tue, Sep 29, 2020 at 3:38 PM Marco Elver  wrote:
> > > > This adds the Kernel Electric-Fence (KFENCE) infrastructure. KFENCE is a
> > > > low-overhead sampling-based memory safety error detector of heap
> > > > use-after-free, invalid-free, and out-of-bounds access errors.
> > > >
> > > > KFENCE is designed to be enabled in production kernels, and has near
> > > > zero performance overhead. Compared to KASAN, KFENCE trades performance
> > > > for precision. The main motivation behind KFENCE's design, is that with
> > > > enough total uptime KFENCE will detect bugs in code paths not typically
> > > > exercised by non-production test workloads. One way to quickly achieve a
> > > > large enough total uptime is when the tool is deployed across a large
> > > > fleet of machines.
> > [...]
> > > > +/*
> > > > + * The pool of pages used for guard pages and objects. If supported, 
> > > > allocated
> > > > + * statically, so that is_kfence_address() avoids a pointer load, and 
> > > > simply
> > > > + * compares against a constant address. Assume that if KFENCE is 
> > > > compiled into
> > > > + * the kernel, it is usually enabled, and the space is to be allocated 
> > > > one way
> > > > + * or another.
> > > > + */
> > >
> > > If this actually brings a performance win, the proper way to do this
> > > would probably be to implement this as generic kernel infrastructure
> > > that makes the compiler emit large-offset relocations (either through
> > > compiler support or using inline asm statements that move an immediate
> > > into a register output and register the location in a special section,
> > > kinda like how e.g. static keys work) and patches them at boot time,
> > > or something like that - there are other places in the kernel where
> > > very hot code uses global pointers that are only ever written once
> > > during boot, e.g. the dentry cache of the VFS and the futex hash
> > > table. Those are probably far hotter than the kfence code.
> > >
> > > While I understand that that goes beyond the scope of this project, it
> > > might be something to work on going forward - this kind of
> > > special-case logic that turns the kernel data section into heap memory
> > > would not be needed if we had that kind of infrastructure.
> >
> > After thinking about it a bit more, I'm not even convinced that this
> > is a net positive in terms of overall performance - while it allows
> > you to avoid one level of indirection in some parts of kfence, that
> > kfence code by design only runs pretty infrequently. And to enable
> > this indirection avoidance, your x86 arch_kfence_initialize_pool() is
> > shattering potentially unrelated hugepages in the kernel data section,
> > which might increase the TLB pressure (and therefore the number of
> > memory loads that have to fall back to slow page walks) in code that
> > is much hotter than yours.
> >
> > And if this indirection is a real performance problem, that problem
> > would be many times worse in the VFS and the futex subsystem, so
> > developing a more generic framework for doing this cleanly would be
> > far more important than designing special-case code to allow kfence to
> > do this.
> >
> > And from what I've seen, a non-trivial chunk of the code in this
> > series, especially the arch/ parts, is only necessary to enable this
> > microoptimization.
> >
> > Do you have performance numbers or a description of why you believe
> > that this part of kfence is exceptionally performance-sensitive? If
> > not, it might be a good idea to remove this optimization, at least for
> > the initial version of this code. (And even if the optimization is
> > worthwhile, it might be a better idea to go for the generic version
> > immediately.)
>
> This check is very hot, it happens on every free. For every freed
> object we need to understand if it belongs to KFENCE or not.

Ah, so the path you care about does not dereference __kfence_pool, it
just compares it to the supplied pointer?


First off: The way you've written is_kfence_address(), GCC 10.2 at -O3
seems to generate *utterly* *terrible* code (and the newest clang
release isn't any better); something like this:

kfree_inefficient:
  mov rax, QWORD PTR __kfence_pool[rip]
  cmp rax, rdi
  jbe .L4
.L2:
  jmp kfree_not_kfence
.L4:
  add rax, 0x20
  cmp rax, rdi
  jbe .L2
  jmp kfree_kfence

So pointers to the left of the region and pointers to the right of the
region will take different branches, and so if you have a mix of
objects on both sides of the kfence region, you'll get tons of branch
mispredictions for no good reason. You'll want to rewrite that check
as "unlikely(ptr - base <= SIZE)" instead of "unlikely(ptr >= base &&
ptr < base + SIZE" unless you know that all the objects will be on one
side. This would also reduce the performance impact of loading

Re: [PATCH v3 00/21] spi: dw: Add full Baikal-T1 SPI Controllers support

2020-10-02 Thread Andy Shevchenko
On Fri, Oct 2, 2020 at 3:56 PM Mark Brown  wrote:
> On Fri, Oct 02, 2020 at 01:24:44PM +0300, Andy Shevchenko wrote:
> > On Fri, Oct 02, 2020 at 01:28:08AM +0300, Serge Semin wrote:
>
> > > the subject. Though some of them are mere cleanups or weakly related with
> > > the subject fixes, but we just couldn't leave the code as is at some
> > > places since we were working with the DW APB SSI driver anyway. Here is
> > > what we did to fix the original DW APB SSI driver, to make it less messy.
>
> > Maybe it's time to put your name into MAINTAINERS for this driver?
>
> Seems sensible to me - Andy, it probably makes sense to add you as well?

I have more than enough on my plate currently. Maybe in the future.

> Does one of you want to send a patch for this?

--
With Best Regards,
Andy Shevchenko


Re: [PATCH v3 02/21] spi: dw: Add DWC SSI capability

2020-10-02 Thread Andy Shevchenko
On Fri, Oct 2, 2020 at 8:18 PM Serge Semin
 wrote:
>
> On Fri, Oct 02, 2020 at 01:19:29PM +0300, Andy Shevchenko wrote:
> > On Fri, Oct 02, 2020 at 01:28:10AM +0300, Serge Semin wrote:
> > > Currently DWC SSI core is supported by means of setting up the
> > > core-specific update_cr0() callback. It isn't suitable for multiple
> > > reasons. First of all having exported several methods doing the same thing
> > > but for different chips makes the code harder to maintain. Secondly the
> > > spi-dw-core driver exports the methods, then the spi-dw-mmio driver sets
> > > the private data callback with one of them so to be called by the core
> > > driver again. That makes the code logic too complicated. Thirdly using
> > > callbacks for just updating the CR0 register is problematic, since in case
> > > if the register needed to be updated from different parts of the code,
> > > we'd have to create another callback (for instance the SPI device-specific
> > > parameters don't need to be calculated each time the SPI transfer is
> > > submitted, so it's better to pre-calculate the CR0 data at the SPI-device
> > > setup stage).
> > >
> > > So keeping all the above in mind let's discard the update_cr0() callbacks,
> > > define a generic and static dw_spi_update_cr0() method and create the
> > > DW_SPI_CAP_DWC_SSI capability, which when enabled would activate the
> > > alternative CR0 register layout.
> > >
> > > While at it add the comments to the code path of the normal DW APB SSI
> > > controller setup to make the dw_spi_update_cr0() method looking coherent.
> >
>
> > What the point to increase indentation level and produce additional churn?
> > Can't you simply leave functions, unexport them, and call in one 
> > conditional of
> > whatever new function is called?
>
> I forgot to mention that in the commit log, there is another reason why it's
> better to create a generic dw_spi_update_cr0() instead of doing what you 
> suggest.
> As it will be seen from the following up patches, the dw_spi_update_cr0() 
> function
> (to be more precise it's successor, but anyway) will be used from the SPI 
> memory
> ops implementation. So if-else-ing here and there isn't a good idea for
> maintainability. For the same reason of the maintainability it's better to 
> have a
> generic method which reflects all the config peculiarities, so in case of any
> changes they would be not be forgotten to be introduced for both DWC SSI and 
> DW
> APB SSI parts of the setup procedures. As I see it that overbeats the 
> additional
> indentation level drawback.

What I meant is to leave functions as is and call them under conditional

if ()
 call one
else
 call another


-- 
With Best Regards,
Andy Shevchenko


Re: [PATCH v3 03/21] spi: dw: Detach SPI device specific CR0 config method

2020-10-02 Thread Andy Shevchenko
On Fri, Oct 2, 2020 at 8:47 PM Serge Semin
 wrote:
> On Fri, Oct 02, 2020 at 01:22:46PM +0300, Andy Shevchenko wrote:
> > On Fri, Oct 02, 2020 at 01:28:11AM +0300, Serge Semin wrote:

> > > +   /*
> > > +* Update CR0 data each time the setup callback is invoked since
> > > +* the device parameters could have been changed, for instance, by
> > > +* the MMC SPI driver or something else.
> > > +*/
> > > +   chip->cr0 = dw_spi_get_cr0(dws, spi);
> >
>
> > I would rather name it prepare or alike. 'get' assumes getting value or
> > something like that.
>
> This seems reasonable. What verb do you think would be better: prepare,
> calc, assemble, construct, make, compute, collect, compose, form, compile, 
> etc ?
> Personally prepare or calc or assemble are the best candidates. What do you
> think?

prepare is good enough if you agree on it.


-- 
With Best Regards,
Andy Shevchenko


Re: [PATCH v2 0/5] usb: dwc-meson-g12a: Add support for USB on S400 board

2020-10-02 Thread Kevin Hilman
Neil Armstrong  writes:

> Hi Kevin,
>
> On 17/09/2020 08:59, Neil Armstrong wrote:
>> The Amlogic AXG is close to the GXL Glue but with a single OTG PHY.
>> 
>> It needs the same init sequence as GXL & GXM, but it seems it doesn't need
>> the host disconnect bit.
>> 
>> The Glue driver reuses the already implemented GXL & GXM work.
>> 
>> The USB2 PHY driver needs a slight tweak to keep the OTG detection working.
>> 
>> Changes since v1 at [1]:
>> - s/close from/close to/g
>> - collected review tags
>> - added small comment about phy management in patch 3
>> - removed status = "okay" in patch 4
>> - removed invalid phy-supply of phy1 in patch 5
>> 
>> [1] http://lore.kernel.org/r/20200909160409.8678-1-narmstr...@baylibre.com
>> 
>> Neil Armstrong (5):
>>   phy: amlogic: phy-meson-gxl-usb2: keep ID pull-up even in Host mode
>>   dt-bindings: usb: amlogic,meson-g12a-usb-ctrl: add the Amlogic AXG
>> Families USB Glue Bindings
>>   usb: dwc-meson-g12a: Add support for USB on AXG SoCs
>>   arm64: dts: meson-axg: add USB nodes
>>   arm64: dts: meson-axg-s400: enable USB OTG
>
> Can you pick the DT patches ?

Yes, queued up.

Since we're already at -rc7, this may not make it until v5.11.

Kevin


Re: [PATCH 1/2] mm/frame-vec: Drop gup_flags from get_vaddr_frames()

2020-10-02 Thread Tomasz Figa
On Fri, Oct 2, 2020 at 7:53 PM Daniel Vetter  wrote:
>
> FOLL_WRITE | FOLL_FORCE is really the only reasonable thing to do for
> simple dma device that can't guarantee write protection. Which is also
> what all the callers are using.
>
> So just simplify this.
>
> Signed-off-by: Daniel Vetter 
> Cc: Inki Dae 
> Cc: Joonyoung Shim 
> Cc: Seung-Woo Kim 
> Cc: Kyungmin Park 
> Cc: Kukjin Kim 
> Cc: Krzysztof Kozlowski 
> Cc: Pawel Osciak 
> Cc: Marek Szyprowski 
> Cc: Tomasz Figa 
> Cc: Andrew Morton 
> Cc: Oded Gabbay 
> Cc: Omer Shpigelman 
> Cc: Tomer Tayar 
> Cc: Greg Kroah-Hartman 
> Cc: Pawel Piskorski 
> Cc: linux-arm-ker...@lists.infradead.org
> Cc: linux-samsung-...@vger.kernel.org
> Cc: linux-me...@vger.kernel.org
> Cc: linux...@kvack.org
> ---
>  drivers/gpu/drm/exynos/exynos_drm_g2d.c   | 3 +--
>  drivers/media/common/videobuf2/videobuf2-memops.c | 3 +--
>  drivers/misc/habanalabs/common/memory.c   | 3 +--
>  include/linux/mm.h| 2 +-
>  mm/frame_vector.c | 4 ++--
>  5 files changed, 6 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/gpu/drm/exynos/exynos_drm_g2d.c 
> b/drivers/gpu/drm/exynos/exynos_drm_g2d.c
> index 967a5cdc120e..ac452842bab3 100644
> --- a/drivers/gpu/drm/exynos/exynos_drm_g2d.c
> +++ b/drivers/gpu/drm/exynos/exynos_drm_g2d.c
> @@ -480,8 +480,7 @@ static dma_addr_t *g2d_userptr_get_dma_addr(struct 
> g2d_data *g2d,
> goto err_free;
> }
>
> -   ret = get_vaddr_frames(start, npages, FOLL_FORCE | FOLL_WRITE,
> -   g2d_userptr->vec);
> +   ret = get_vaddr_frames(start, npages, g2d_userptr->vec);
> if (ret != npages) {
> DRM_DEV_ERROR(g2d->dev,
>   "failed to get user pages from userptr.\n");
> diff --git a/drivers/media/common/videobuf2/videobuf2-memops.c 
> b/drivers/media/common/videobuf2/videobuf2-memops.c
> index 6e9e05153f4e..9dd6c27162f4 100644
> --- a/drivers/media/common/videobuf2/videobuf2-memops.c
> +++ b/drivers/media/common/videobuf2/videobuf2-memops.c
> @@ -40,7 +40,6 @@ struct frame_vector *vb2_create_framevec(unsigned long 
> start,
> unsigned long first, last;
> unsigned long nr;
> struct frame_vector *vec;
> -   unsigned int flags = FOLL_FORCE | FOLL_WRITE;
>
> first = start >> PAGE_SHIFT;
> last = (start + length - 1) >> PAGE_SHIFT;
> @@ -48,7 +47,7 @@ struct frame_vector *vb2_create_framevec(unsigned long 
> start,
> vec = frame_vector_create(nr);
> if (!vec)
> return ERR_PTR(-ENOMEM);
> -   ret = get_vaddr_frames(start & PAGE_MASK, nr, flags, vec);
> +   ret = get_vaddr_frames(start & PAGE_MASK, nr, vec);
> if (ret < 0)
> goto out_destroy;
> /* We accept only complete set of PFNs */

For drivers/media/common/videobuf2/:

Acked-by: Tomasz Figa 

Best regards,
Tomasz


Re: [PATCH v4 2/3] iommu/tegra-smmu: Rework tegra_smmu_probe_device()

2020-10-02 Thread Dmitry Osipenko
02.10.2020 21:01, Nicolin Chen пишет:
> On Fri, Oct 02, 2020 at 05:23:14PM +0300, Dmitry Osipenko wrote:
>> 02.10.2020 09:08, Nicolin Chen пишет:
>>>  static struct iommu_device *tegra_smmu_probe_device(struct device *dev)
>>>  {
>>> -   struct device_node *np = dev->of_node;
>>> -   struct tegra_smmu *smmu = NULL;
>>> -   struct of_phandle_args args;
>>> -   unsigned int index = 0;
>>> -   int err;
>>> -
>>> -   while (of_parse_phandle_with_args(np, "iommus", "#iommu-cells", index,
>>> - &args) == 0) {
>>> -   smmu = tegra_smmu_find(args.np);
>>> -   if (smmu) {
>>> -   err = tegra_smmu_configure(smmu, dev, &args);
>>> -   of_node_put(args.np);
>>> -
>>> -   if (err < 0)
>>> -   return ERR_PTR(err);
>>> -
>>> -   /*
>>> -* Only a single IOMMU master interface is currently
>>> -* supported by the Linux kernel, so abort after the
>>> -* first match.
>>> -*/
>>> -   dev_iommu_priv_set(dev, smmu);
>>> -
>>> -   break;
>>> -   }
>>> -
>>> -   of_node_put(args.np);
>>> -   index++;
>>> -   }
>>> +   struct tegra_smmu *smmu = dev_iommu_priv_get(dev);
>>>  
>>> if (!smmu)
>>> return ERR_PTR(-ENODEV);
>>
>> The !smmu can't ever be true now, isn't it? Then please remove it.
> 
> How can you be so sure? Have you read my commit message? The whole
> point of removing the hack in tegra_smmu_probe() is to return the
> ERR_PTR(-ENODEV) here. The bus_set_iommu() will call this function
> when mc->smmu is not assigned it, as it's assigned after we return
> tegra_smmu_probe() while bus_set_iommu() is still in the middle of
> the tegra_smmu_probe().
> 

My bad, I probably missed that was looking at the probe_device(), looks
good then.


Re: [PATCH 2/2] mm/frame-vec: use FOLL_LONGTERM

2020-10-02 Thread Daniel Vetter
On Fri, Oct 2, 2020 at 8:06 PM Jason Gunthorpe  wrote:
> On Fri, Oct 02, 2020 at 07:53:03PM +0200, Daniel Vetter wrote:
> > For $reasons I've stumbled over this code and I'm not sure the change
> > to the new gup functions in 55a650c35fea ("mm/gup: frame_vector:
> > convert get_user_pages() --> pin_user_pages()") was entirely correct.
> >
> > This here is used for long term buffers (not just quick I/O) like
> > RDMA, and John notes this in his patch. But I thought the rule for
> > these is that they need to add FOLL_LONGTERM, which John's patch
> > didn't do.
> >
> > There is already a dax specific check (added in b7f0554a56f2 ("mm:
> > fail get_vaddr_frames() for filesystem-dax mappings")), so this seems
> > like the prudent thing to do.
> >
> > Signed-off-by: Daniel Vetter 
> > Cc: Andrew Morton 
> > Cc: John Hubbard 
> > Cc: Jérôme Glisse 
> > Cc: Jan Kara 
> > Cc: Dan Williams 
> > Cc: linux...@kvack.org
> > Cc: linux-arm-ker...@lists.infradead.org
> > Cc: linux-samsung-...@vger.kernel.org
> > Cc: linux-me...@vger.kernel.org
> > Hi all,
> >
> > I stumbled over this and figured typing this patch can't hurt. Really
> > just to maybe learn a few things about how gup/pup is supposed to be
> > used (we have a bit of that in drivers/gpu), this here isn't really
> > ralated to anything I'm doing.
>
> FOLL_FORCE is a pretty big clue it should be FOLL_LONGTERM, IMHO

Since you're here ... I've noticed that ib sets FOLL_FORCE when the ib
verb access mode indicates possible writes. I'm not really clear on
why FOLL_WRITE isn't enough any why you need to be able to write
through a vma that's write protected currently.

> > I'm also wondering whether the explicit dax check should be removed,
> > since FOLL_LONGTERM should take care of that already.
>
> Yep! Confirms the above!
>
> This get_vaddr_frames() thing looks impossible to use properly. How on
> earth does a driver guarentee
>
>  "If @start belongs to VM_IO | VM_PFNMAP vma, we don't touch page
>  structures and the caller must make sure pfns aren't reused for
>  anything else while he is using them."
>
> The only possible way to do that is if the driver restricts the VMAs
> to ones it owns and interacts with the vm_private data to refcount
> something.
>
> Since every driver does this wrong anything that uses this is creating
> terrifying security issues.
>
> IMHO this whole API should be deleted :(

Yeah that part I just tried to conveniently ignore. I guess this dates
back to a time when ioremaps where at best fixed, and there wasn't
anything like a gpu driver dynamically managing vram around, resulting
in random entirely unrelated things possibly being mapped to that set
of pfns.

The underlying follow_pfn is also used in other places within
drivers/media, so this doesn't seem to be an accident, but actually
intentional.

I guess minimally we'd need a VM_PFNMAP flag for dynamically manged
drivers like modern drm gpu drivers, to make sure follow_pfn doesn't
follow these?
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch


Re: [DISCUSSION PATCH 00/41] random: possible ways towards NIST SP800-90B compliance

2020-10-02 Thread Theodore Y. Ts'o
On Fri, Oct 02, 2020 at 03:39:35PM +, Van Leeuwen, Pascal wrote:
> > Then your company can not contribute in Linux kernel development, as
> > this is obviously not allowed by such a footer.
> >
> Interesting, this has never been raised as a problem until today ...
> Going back through my mail archive, it looks like they started automatically 
> adding that some
> 3 months ago. Not that they informed anyone about that, it just silently 
> happened.

So use a private e-mail address (e.g., at fastmail.fm if you don't
want to run your mail server) and then tunnel out SMTP requests using
ssh.  It's not hard.  :-)

I've worked a multiple $BIG_COMPANY's, and I've been doing this for
decades.  It's also helpful when I need to send e-mails from
conference networks from my laptop

- Ted


Re: [PATCH v4 2/2] Add hardware monitoring driver for Moortec MR75203 PVT controller

2020-10-02 Thread Andy Shevchenko
On Fri, Oct 02, 2020 at 09:11:35PM +0300, Andy Shevchenko wrote:
> On Fri, Oct 02, 2020 at 03:04:27PM +0800, Rahul Tanwar wrote:

...

> > +   case hwmon_in:
> > +   if (attr == hwmon_in_input)
> > +   return 0444;
> 
> > +   return 0;
> 
> > +   default:
> 
> > +   return 0;
> 
> break here and
> 
> > +   }
> 
> return 0; here only once.

This probably makes little sense.

-- 
With Best Regards,
Andy Shevchenko




Re: [WARNING] kernel/rcu/tree.c:1058 rcu_irq_enter+0x15/0x20

2020-10-02 Thread Peter Zijlstra
On Fri, Oct 02, 2020 at 01:56:44PM -0400, Steven Rostedt wrote:
> On Wed, 30 Sep 2020 20:13:23 +0200
> Peter Zijlstra  wrote:
> 
> > Blergh, IIRC there's header hell that way. The sane fix is killing off
> > that trace_*_rcuidle() disease.
> > 
> > But I think this will also cure it.
> 
> I guess you still don't build modules ;-). I had to add a
> EXPORT_SYMBOL(lockdep_recursion) to get it to build, and then move the

Correct, my regular configs are all without modules.

> checks within the irq disabling to get rid of the using cpu pointers within
> preemptable code warnings

Ah, I think I lost a s/__this_cpu_read/raw_cpu_read/ somewhere. The
thing is, if we're preemptible/migratable it will be 0 on both CPUs and
it doesn't matter which 0 we read. If it is !0, IRQs will be disabled
and we can't get migrated.

Anyway, let me go write a Changelog to go with it.


Re: [PATCH v4 2/2] Add hardware monitoring driver for Moortec MR75203 PVT controller

2020-10-02 Thread Andy Shevchenko
On Fri, Oct 02, 2020 at 03:04:27PM +0800, Rahul Tanwar wrote:
> PVT controller (MR75203) is used to configure & control
> Moortec embedded analog IP which contains temprature
> sensor(TS), voltage monitor(VM) & process detector(PD)
> modules. Add hardware monitoring driver to support
> MR75203 PVT controller.

Some nit-picks below.
Reviewed-by: Andy Shevchenko 

> Signed-off-by: Rahul Tanwar 
> ---
>  drivers/hwmon/Kconfig   |  10 +
>  drivers/hwmon/Makefile  |   1 +
>  drivers/hwmon/mr75203.c | 651 
> 
>  3 files changed, 662 insertions(+)
>  create mode 100644 drivers/hwmon/mr75203.c
> 
> diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
> index 8dc28b26916e..2defb46677b4 100644
> --- a/drivers/hwmon/Kconfig
> +++ b/drivers/hwmon/Kconfig
> @@ -1112,6 +1112,16 @@ config SENSORS_MENF21BMC_HWMON
> This driver can also be built as a module. If so the module
> will be called menf21bmc_hwmon.
>  
> +config SENSORS_MR75203
> + tristate "Moortec Semiconductor MR75203 PVT Controller"
> + select REGMAP_MMIO
> + help
> +   If you say yes here you get support for Moortec MR75203
> +   PVT controller.
> +
> +   This driver can also be built as a module. If so, the module
> +   will be called mr75203.
> +
>  config SENSORS_ADCXX
>   tristate "National Semiconductor ADCxxxSxxx"
>   depends on SPI_MASTER
> diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile
> index a8f4b35b136b..bb4bd92a5149 100644
> --- a/drivers/hwmon/Makefile
> +++ b/drivers/hwmon/Makefile
> @@ -142,6 +142,7 @@ obj-$(CONFIG_SENSORS_MCP3021) += mcp3021.o
>  obj-$(CONFIG_SENSORS_TC654)  += tc654.o
>  obj-$(CONFIG_SENSORS_MLXREG_FAN) += mlxreg-fan.o
>  obj-$(CONFIG_SENSORS_MENF21BMC_HWMON) += menf21bmc_hwmon.o
> +obj-$(CONFIG_SENSORS_MR75203)+= mr75203.o
>  obj-$(CONFIG_SENSORS_NCT6683)+= nct6683.o
>  obj-$(CONFIG_SENSORS_NCT6775)+= nct6775.o
>  obj-$(CONFIG_SENSORS_NCT7802)+= nct7802.o
> diff --git a/drivers/hwmon/mr75203.c b/drivers/hwmon/mr75203.c
> new file mode 100644
> index ..dc6f411ae873
> --- /dev/null
> +++ b/drivers/hwmon/mr75203.c
> @@ -0,0 +1,651 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (C) 2020 MaxLinear, Inc.
> + *
> + * This driver is a hardware monitoring driver for PVT controller
> + * (MR75203) which is used to configure & control Moortec embedded
> + * analog IP to enable multiple embedded temperature sensor(TS),
> + * voltage monitor(VM) & process detector(PD) modules.
> + */

bits.h?

> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +/* PVT Common register */
> +#define PVT_IP_CONFIG0x04
> +#define TS_NUM_MSK   GENMASK(4, 0)
> +#define TS_NUM_SFT   0
> +#define PD_NUM_MSK   GENMASK(12, 8)
> +#define PD_NUM_SFT   8
> +#define VM_NUM_MSK   GENMASK(20, 16)
> +#define VM_NUM_SFT   16
> +#define CH_NUM_MSK   GENMASK(31, 24)
> +#define CH_NUM_SFT   24
> +
> +/* Macro Common Register */
> +#define CLK_SYNTH0x00
> +#define CLK_SYNTH_LO_SFT 0
> +#define CLK_SYNTH_HI_SFT 8
> +#define CLK_SYNTH_HOLD_SFT   16
> +#define CLK_SYNTH_EN BIT(24)
> +#define CLK_SYS_CYCLES_MAX   514
> +#define CLK_SYS_CYCLES_MIN   2
> +#define HZ_PER_MHZ   100L
> +
> +#define SDIF_DISABLE 0x04
> +
> +#define SDIF_STAT0x08
> +#define SDIF_BUSYBIT(0)
> +#define SDIF_LOCKBIT(1)
> +
> +#define SDIF_W   0x0c
> +#define SDIF_PROGBIT(31)
> +#define SDIF_WRN_W   BIT(27)
> +#define SDIF_WRN_R   0x00
> +#define SDIF_ADDR_SFT24
> +
> +#define SDIF_HALT0x10
> +#define SDIF_CTRL0x14
> +#define SDIF_SMPL_CTRL   0x20
> +
> +/* TS & PD Individual Macro Register */
> +#define COM_REG_SIZE 0x40
> +
> +#define SDIF_DONE(n) (COM_REG_SIZE + 0x14 + 0x40 * (n))
> +#define SDIF_SMPL_DONE   BIT(0)
> +
> +#define SDIF_DATA(n) (COM_REG_SIZE + 0x18 + 0x40 * (n))
> +#define SAMPLE_DATA_MSK  GENMASK(15, 0)
> +
> +#define HILO_RESET(n)(COM_REG_SIZE + 0x2c + 0x40 * (n))
> +
> +/* VM Individual Macro Register */
> +#define VM_COM_REG_SIZE  0x200
> +#define VM_SDIF_DONE(n)  (VM_COM_REG_SIZE + 0x34 + 0x200 * (n))
> +#define VM_SDIF_DATA(n)  (VM_COM_REG_SIZE + 0x40 + 0x200 * (n))
> +
> +/* SDA Slave Register */
> +#define IP_CTRL  0x00
> +#define IP_RST_REL   BIT(1)
> +#define IP_RUN_CONT  BIT(3)
> +#define IP_AUTO  BIT(8)
> +#define IP_VM_MODE   BIT(10)
> +
> +#define IP_CFG   0x01
> +#define CFG0_MODE_2  BIT(0)
> +#define CFG0_PARALLEL_OUT0
> +#define CFG0_12_BIT  0
> +#define CFG1_VOL_MEAS_MODE   0
> +#define CFG1_PARALLEL_OUT0
> +#define CFG1_14_BIT  0
> +
> +#define IP_DATA  0x03
> +
> +#define IP_POLL  0x04
> +#define VM_CH_INIT   BIT(20)
> +#define VM_CH_REQBIT(21)
> +
> +#d

Re: [PATCH v1] of: platform: Batch fwnode parsing in the init_machine() path

2020-10-02 Thread Grygorii Strashko




On 02/10/2020 20:48, Saravana Kannan wrote:

On Fri, Oct 2, 2020 at 8:03 AM 'Grygorii Strashko' via kernel-team
 wrote:




On 02/10/2020 14:40, Grygorii Strashko wrote:



On 02/10/2020 02:19, Laurent Pinchart wrote:

Hi Saravana,

Thank you for the patch.

On Thu, Oct 01, 2020 at 03:59:51PM -0700, Saravana Kannan wrote:

When commit 93d2e4322aa7 ("of: platform: Batch fwnode parsing when
adding all top level devices") optimized the fwnode parsing when all top
level devices are added, it missed out optimizing this for platform
where the top level devices are added through the init_machine() path.

This commit does the optimization for all paths by simply moving the
fw_devlink_pause/resume() inside of_platform_default_populate().


Based on v5.9-rc5, before the patch:

[0.652887] cpuidle: using governor menu
[   12.349476] No ATAGs?

After the patch:

[0.650460] cpuidle: using governor menu
[   12.262101] No ATAGs?

:-(


This is kinda expected :( because omap2 arch doesn't call 
of_platform_default_populate()

Call path:
board-generic.c
   DT_MACHINE_START()
 .init_machine= omap_generic_init,

   omap_generic_init()
 pdata_quirks_init(omap_dt_match_table);
  of_platform_populate(NULL, omap_dt_match_table,
   omap_auxdata_lookup, NULL);

Other affected platforms
arm: mach-ux500
some mips
some powerpc

there are also case when a lot of devices placed under bus node, in such case
   of_platform_populate() calls from bus drivers will also suffer from this 
issue.

I think one option could be to add some parameter to _populate() or introduce 
new api.

By the way, is there option to disable this feature at all?
Is there Kconfig option?
Is there any reasons why such complex and time consuming code added to the 
kernel and not implemented on DTC level?


Also, I've came with another diff, pls check.

[0.00] Booting Linux on physical CPU 0x0
[0.00] Linux version 5.9.0-rc6-01791-g9acba6b38757-dirty 
(grygorii@grygorii-XPS-13-9370) (arm-linux-gnueabihf-gcc (GNU Toolcha0
[0.00] CPU: ARMv7 Processor [412fc0f2] revision 2 (ARMv7), cr=10c5387d
[0.00] CPU: div instructions available: patching division code
[0.00] CPU: PIPT / VIPT nonaliasing data cache, PIPT instruction cache
[0.00] OF: fdt: Machine model: TI AM5718 IDK
...
[0.053443] cpuidle: using governor ladder
[0.053470] cpuidle: using governor menu
[0.089304] No ATAGs?
...
[3.092291] devtmpfs: mounted
[3.095804] Freeing unused kernel memory: 1024K
[3.100483] Run /sbin/init as init process



-- >< ---
diff --git a/drivers/of/platform.c b/drivers/of/platform.c
index 071f04da32c8..4521b26e7745 100644
--- a/drivers/of/platform.c
+++ b/drivers/of/platform.c
@@ -514,6 +514,12 @@ static const struct of_device_id reserved_mem_matches[] = {
  {}
   };

+static int __init of_platform_fw_devlink_pause(void)
+{
+   fw_devlink_pause();
+}
+core_initcall(of_platform_fw_devlink_pause);
+
   static int __init of_platform_default_populate_init(void)
   {
  struct device_node *node;
@@ -538,9 +544,7 @@ static int __init of_platform_default_populate_init(void)
  }

  /* Populate everything else. */
-   fw_devlink_pause();
  of_platform_default_populate(NULL, NULL, NULL);
-   fw_devlink_resume();

  return 0;
   }
@@ -548,6 +552,7 @@ arch_initcall_sync(of_platform_default_populate_init);

   static int __init of_platform_sync_state_init(void)
   {
+   fw_devlink_resume();


^ it seems has to be done earlier, like
+static int __init of_platform_fw_devlink_resume(void)
+{
+   fw_devlink_resume();
+   return 0;
+}
+device_initcall_sync(of_platform_fw_devlink_resume);


This will mean no device will probe until device_initcall_sync().
Unfortunately, I don't think we can make such a sweeping assumption.


Could you answer below questions, pls?

By the way, is there option to disable this feature at all?
Is there Kconfig option?


--
Best regards,
grygorii


[PATCH v7 0/3] Move recovery/coredump configuration to sysfs

2020-10-02 Thread Rishabh Bhatnagar
>From Android R onwards Google has restricted access to debugfs in user
and user-debug builds. This restricts access to most of the features
exposed through debugfs. 'Coredump' and 'Recovery' are critical
interfaces that are required for remoteproc to work on Qualcomm Chipsets.
This patch series adds recovery/coredump configuration to sysfs interface
and disables coredump collection by default. Having coredump disabled by
default on production devices makes sense.

Changelog:

v7 -> v6:
- Keep the debugfs entries intact for now.
- Reorder the patches to have a consistent sysfs interface.

v6 -> v5:
- Disable coredump collection by default
- Rename the "default" configuration to "enabled" to avoid confusion

v5 -> v4:
- Fix the cover-letter of tha patch series.

v4 -> v3:
- Remove the feature flag to expose recovery/coredump

v3 -> v2:
- Remove the coredump/recovery entries from debugfs
- Expose recovery/coredump from sysfs under a feature flag

v1 -> v2:
- Correct the contact name in the sysfs documentation.
- Remove the redundant write documentation for coredump/recovery sysfs
- Add a feature flag to make this interface switch configurable.

Rishabh Bhatnagar (3):
  remoteproc: Change default dump configuration to "disabled"
  remoteproc: Add coredump as part of sysfs interface
  remoteproc: Add recovery configuration to the sysfs interface

 Documentation/ABI/testing/sysfs-class-remoteproc |  44 +
 drivers/remoteproc/remoteproc_coredump.c |   6 +-
 drivers/remoteproc/remoteproc_debugfs.c  |  23 +++--
 drivers/remoteproc/remoteproc_sysfs.c| 119 +++
 include/linux/remoteproc.h   |   8 +-
 5 files changed, 181 insertions(+), 19 deletions(-)

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project



[PATCH v7 1/3] remoteproc: Change default dump configuration to "disabled"

2020-10-02 Thread Rishabh Bhatnagar
Currently "default" configuration option means coredumps are
enabled. To avoid confusion rename the "default" configuration
option to "enabled" and disable collection of dumps by default
as doing so makes sense for production devices.

Signed-off-by: Rishabh Bhatnagar 
---
 drivers/remoteproc/remoteproc_coredump.c |  6 +++---
 drivers/remoteproc/remoteproc_debugfs.c  | 23 +++
 include/linux/remoteproc.h   |  8 
 3 files changed, 18 insertions(+), 19 deletions(-)

diff --git a/drivers/remoteproc/remoteproc_coredump.c 
b/drivers/remoteproc/remoteproc_coredump.c
index bb15a29..34530dc 100644
--- a/drivers/remoteproc/remoteproc_coredump.c
+++ b/drivers/remoteproc/remoteproc_coredump.c
@@ -257,7 +257,7 @@ void rproc_coredump(struct rproc *rproc)
 * directly read from device memory.
 */
data_size += elf_size_of_phdr(class);
-   if (dump_conf == RPROC_COREDUMP_DEFAULT)
+   if (dump_conf == RPROC_COREDUMP_ENABLED)
data_size += segment->size;
 
phnum++;
@@ -297,14 +297,14 @@ void rproc_coredump(struct rproc *rproc)
elf_phdr_set_p_flags(class, phdr, PF_R | PF_W | PF_X);
elf_phdr_set_p_align(class, phdr, 0);
 
-   if (dump_conf == RPROC_COREDUMP_DEFAULT)
+   if (dump_conf == RPROC_COREDUMP_ENABLED)
rproc_copy_segment(rproc, data + offset, segment, 0,
   segment->size);
 
offset += elf_phdr_get_p_filesz(class, phdr);
phdr += elf_size_of_phdr(class);
}
-   if (dump_conf == RPROC_COREDUMP_DEFAULT) {
+   if (dump_conf == RPROC_COREDUMP_ENABLED) {
dev_coredumpv(&rproc->dev, data, data_size, GFP_KERNEL);
return;
}
diff --git a/drivers/remoteproc/remoteproc_debugfs.c 
b/drivers/remoteproc/remoteproc_debugfs.c
index 2e3b3e2..7e58453 100644
--- a/drivers/remoteproc/remoteproc_debugfs.c
+++ b/drivers/remoteproc/remoteproc_debugfs.c
@@ -33,9 +33,9 @@ static struct dentry *rproc_dbg;
  * enum rproc_coredump_mechanism
  */
 static const char * const rproc_coredump_str[] = {
-   [RPROC_COREDUMP_DEFAULT]= "default",
-   [RPROC_COREDUMP_INLINE] = "inline",
[RPROC_COREDUMP_DISABLED]   = "disabled",
+   [RPROC_COREDUMP_ENABLED]= "enabled",
+   [RPROC_COREDUMP_INLINE] = "inline",
 };
 
 /* Expose the current coredump configuration via debugfs */
@@ -54,20 +54,19 @@ static ssize_t rproc_coredump_read(struct file *filp, char 
__user *userbuf,
 
 /*
  * By writing to the 'coredump' debugfs entry, we control the behavior of the
- * coredump mechanism dynamically. The default value of this entry is 
"default".
+ * coredump mechanism dynamically. The default value of this entry is 
"disabled".
  *
  * The 'coredump' debugfs entry supports these commands:
  *
- * default:This is the default coredump mechanism. When the remoteproc
- * crashes the entire coredump will be copied to a separate buffer
- * and exposed to userspace.
+ * disabled:   By default coredump collection is disabled. Recovery will
+ * proceed without collecting any dump.
+ *
+ * enabled:When the remoteproc crashes the entire coredump will be copied
+ * to a separate buffer and exposed to userspace.
  *
  * inline: The coredump will not be copied to a separate buffer and the
  * recovery process will have to wait until data is read by
  * userspace. But this avoid usage of extra memory.
- *
- * disabled:   This will disable coredump. Recovery will proceed without
- * collecting any dump.
  */
 static ssize_t rproc_coredump_write(struct file *filp,
const char __user *user_buf, size_t count,
@@ -94,12 +93,12 @@ static ssize_t rproc_coredump_write(struct file *filp,
goto out;
}
 
-   if (!strncmp(buf, "disable", count)) {
+   if (!strncmp(buf, "disabled", count)) {
rproc->dump_conf = RPROC_COREDUMP_DISABLED;
+   } else if (!strncmp(buf, "enabled", count)) {
+   rproc->dump_conf = RPROC_COREDUMP_ENABLED;
} else if (!strncmp(buf, "inline", count)) {
rproc->dump_conf = RPROC_COREDUMP_INLINE;
-   } else if (!strncmp(buf, "default", count)) {
-   rproc->dump_conf = RPROC_COREDUMP_DEFAULT;
} else {
dev_err(&rproc->dev, "Invalid coredump configuration\n");
err = -EINVAL;
diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
index 2fa68bf..3fa3ba6 100644
--- a/include/linux/remoteproc.h
+++ b/include/linux/remoteproc.h
@@ -442,16 +442,16 @@ enum rproc_crash_type {
 
 /**
  * enum rproc_dump_mechanism - Coredump options for core
- * @RPROC_COREDUMP_DEFAULT:Copy dump to separate buffer a

[PATCH v7 3/3] remoteproc: Add recovery configuration to the sysfs interface

2020-10-02 Thread Rishabh Bhatnagar
Add recovery configuration to the sysfs interface. This will
allow usage of this configuration feature in production
devices where access to debugfs might be limited.

Signed-off-by: Rishabh Bhatnagar 
---
 Documentation/ABI/testing/sysfs-class-remoteproc | 20 +
 drivers/remoteproc/remoteproc_sysfs.c| 56 
 2 files changed, 76 insertions(+)

diff --git a/Documentation/ABI/testing/sysfs-class-remoteproc 
b/Documentation/ABI/testing/sysfs-class-remoteproc
index 050bd25..066b9b6 100644
--- a/Documentation/ABI/testing/sysfs-class-remoteproc
+++ b/Documentation/ABI/testing/sysfs-class-remoteproc
@@ -82,3 +82,23 @@ Description: Remote processor coredump configuration
processor's device memory. Extra buffer will not be used to
copy the dump. Also recovery process will not proceed until
all data is read by usersapce.
+
+What:  /sys/class/remoteproc/.../recovery
+Date:  July 2020
+Contact:   Bjorn Andersson , Ohad Ben-Cohen 

+Description:   Remote processor recovery mechanism
+
+   Reports the recovery mechanism of the remote processor,
+   which will be one of:
+
+   "enabled"
+   "disabled"
+
+   "enabled" means, the remote processor will be automatically
+   recovered whenever it crashes. Moreover, if the remote
+   processor crashes while recovery is disabled, it will
+   be automatically recovered too as soon as recovery is enabled.
+
+   "disabled" means, a remote processor will remain in a crashed
+   state if it crashes. This is useful for debugging purposes;
+   without it, debugging a crash is substantially harder.
diff --git a/drivers/remoteproc/remoteproc_sysfs.c 
b/drivers/remoteproc/remoteproc_sysfs.c
index 8500271..d1cf7bf 100644
--- a/drivers/remoteproc/remoteproc_sysfs.c
+++ b/drivers/remoteproc/remoteproc_sysfs.c
@@ -10,6 +10,61 @@
 
 #define to_rproc(d) container_of(d, struct rproc, dev)
 
+static ssize_t recovery_show(struct device *dev,
+struct device_attribute *attr, char *buf)
+{
+   struct rproc *rproc = to_rproc(dev);
+
+   return sprintf(buf, "%s", rproc->recovery_disabled ? "disabled\n" : 
"enabled\n");
+}
+
+/*
+ * By writing to the 'recovery' sysfs entry, we control the behavior of the
+ * recovery mechanism dynamically. The default value of this entry is 
"enabled".
+ *
+ * The 'recovery' sysfs entry supports these commands:
+ *
+ * enabled:When enabled, the remote processor will be automatically
+ * recovered whenever it crashes. Moreover, if the remote
+ * processor crashes while recovery is disabled, it will
+ * be automatically recovered too as soon as recovery is enabled.
+ *
+ * disabled:   When disabled, a remote processor will remain in a crashed
+ * state if it crashes. This is useful for debugging purposes;
+ * without it, debugging a crash is substantially harder.
+ *
+ * recover:This function will trigger an immediate recovery if the
+ * remote processor is in a crashed state, without changing
+ * or checking the recovery state (enabled/disabled).
+ * This is useful during debugging sessions, when one expects
+ * additional crashes to happen after enabling recovery. In this
+ * case, enabling recovery will make it hard to debug subsequent
+ * crashes, so it's recommended to keep recovery disabled, and
+ * instead use the "recover" command as needed.
+ */
+static ssize_t recovery_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+   struct rproc *rproc = to_rproc(dev);
+
+   if (sysfs_streq(buf, "enabled")) {
+   /* change the flag and begin the recovery process if needed */
+   rproc->recovery_disabled = false;
+   rproc_trigger_recovery(rproc);
+   } else if (sysfs_streq(buf, "disabled")) {
+   rproc->recovery_disabled = true;
+   } else if (sysfs_streq(buf, "recover")) {
+   /* begin the recovery process without changing the flag */
+   rproc_trigger_recovery(rproc);
+   } else {
+   return -EINVAL;
+   }
+
+   return count;
+}
+static DEVICE_ATTR_RW(recovery);
+
 /*
  * A coredump-configuration-to-string lookup table, for exposing a
  * human readable configuration via sysfs. Always keep in sync with
@@ -201,6 +256,7 @@ static DEVICE_ATTR_RO(name);
 
 static struct attribute *rproc_attrs[] = {
&dev_attr_coredump.attr,
+   &dev_attr_recovery.attr,
&dev_attr_firmware.attr,
&dev_attr_state.attr,
&dev_attr_name.attr,
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foun

[PATCH v7 2/3] remoteproc: Add coredump as part of sysfs interface

2020-10-02 Thread Rishabh Bhatnagar
Add coredump as part of the sysfs interface. This will
allow usage of this configuration feature in production
devices where access to debugfs might be limited.

Signed-off-by: Rishabh Bhatnagar 
---
 Documentation/ABI/testing/sysfs-class-remoteproc | 24 +
 drivers/remoteproc/remoteproc_sysfs.c| 63 
 2 files changed, 87 insertions(+)

diff --git a/Documentation/ABI/testing/sysfs-class-remoteproc 
b/Documentation/ABI/testing/sysfs-class-remoteproc
index 36094fb..050bd25 100644
--- a/Documentation/ABI/testing/sysfs-class-remoteproc
+++ b/Documentation/ABI/testing/sysfs-class-remoteproc
@@ -58,3 +58,27 @@ Description: Remote processor name
Reports the name of the remote processor. This can be used by
userspace in exactly identifying a remote processor and ease
up the usage in modifying the 'firmware' or 'state' files.
+
+What:  /sys/class/remoteproc/.../coredump
+Date:  July 2020
+Contact:   Bjorn Andersson , Ohad Ben-Cohen 

+Description:   Remote processor coredump configuration
+
+   Reports the coredump configuration of the remote processor,
+   which will be one of:
+
+   "disabled"
+   "enabled"
+   "inline"
+
+   "disabled" means no dump will be collected.
+
+   "enabled" means when the remote processor's coredump is
+   collected it will be copied to a separate buffer and that
+   buffer is exposed to userspace.
+
+   "inline" means when the remote processor's coredump is
+   collected userspace will directly read from the remote
+   processor's device memory. Extra buffer will not be used to
+   copy the dump. Also recovery process will not proceed until
+   all data is read by usersapce.
diff --git a/drivers/remoteproc/remoteproc_sysfs.c 
b/drivers/remoteproc/remoteproc_sysfs.c
index eea514c..8500271 100644
--- a/drivers/remoteproc/remoteproc_sysfs.c
+++ b/drivers/remoteproc/remoteproc_sysfs.c
@@ -10,6 +10,68 @@
 
 #define to_rproc(d) container_of(d, struct rproc, dev)
 
+/*
+ * A coredump-configuration-to-string lookup table, for exposing a
+ * human readable configuration via sysfs. Always keep in sync with
+ * enum rproc_coredump_mechanism
+ */
+static const char * const rproc_coredump_str[] = {
+   [RPROC_COREDUMP_DISABLED]   = "disabled",
+   [RPROC_COREDUMP_ENABLED]= "enabled",
+   [RPROC_COREDUMP_INLINE] = "inline",
+};
+
+/* Expose the current coredump configuration via debugfs */
+static ssize_t coredump_show(struct device *dev,
+struct device_attribute *attr, char *buf)
+{
+   struct rproc *rproc = to_rproc(dev);
+
+   return sprintf(buf, "%s\n", rproc_coredump_str[rproc->dump_conf]);
+}
+
+/*
+ * By writing to the 'coredump' sysfs entry, we control the behavior of the
+ * coredump mechanism dynamically. The default value of this entry is 
"default".
+ *
+ * The 'coredump' sysfs entry supports these commands:
+ *
+ * disabled:   This is the default coredump mechanism. Recovery will proceed
+ * without collecting any dump.
+ *
+ * default:When the remoteproc crashes the entire coredump will be
+ * copied to a separate buffer and exposed to userspace.
+ *
+ * inline: The coredump will not be copied to a separate buffer and the
+ * recovery process will have to wait until data is read by
+ * userspace. But this avoid usage of extra memory.
+ */
+static ssize_t coredump_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+   struct rproc *rproc = to_rproc(dev);
+
+   if (rproc->state == RPROC_CRASHED) {
+   dev_err(&rproc->dev, "can't change coredump configuration\n");
+   return -EBUSY;
+   }
+
+   if (sysfs_streq(buf, "disabled")) {
+   rproc->dump_conf = RPROC_COREDUMP_DISABLED;
+   } else if (sysfs_streq(buf, "enabled")) {
+   rproc->dump_conf = RPROC_COREDUMP_ENABLED;
+   } else if (sysfs_streq(buf, "inline")) {
+   rproc->dump_conf = RPROC_COREDUMP_INLINE;
+   } else {
+   dev_err(&rproc->dev, "Invalid coredump configuration\n");
+   return -EINVAL;
+   }
+
+   return count;
+}
+static DEVICE_ATTR_RW(coredump);
+
 /* Expose the loaded / running firmware name via sysfs */
 static ssize_t firmware_show(struct device *dev, struct device_attribute *attr,
  char *buf)
@@ -138,6 +200,7 @@ static ssize_t name_show(struct device *dev, struct 
device_attribute *attr,
 static DEVICE_ATTR_RO(name);
 
 static struct attribute *rproc_attrs[] = {
+   &dev_attr_coredump.attr,
&dev_attr_firmware.attr,
&dev_attr_state.attr,
&dev_attr_name.att

Re: [PATCH net-next v2 5/6] bonding: update Documentation for port/bond terminology

2020-10-02 Thread Andrew Lunn
On Fri, Oct 02, 2020 at 01:40:00PM -0400, Jarod Wilson wrote:
> Point users to the new interface names instead of the old ones, where
> appropriate. Userspace bits referenced still include use of master/slave,
> but those can't be altered until userspace changes too, ideally after
> these changes propagate to the community at large.
> 
> Cc: Jay Vosburgh 
> Cc: Veaceslav Falico 
> Cc: Andy Gospodarek 
> Cc: "David S. Miller" 
> Cc: Jakub Kicinski 
> Cc: Thomas Davis 
> Cc: net...@vger.kernel.org
> Signed-off-by: Jarod Wilson 
> ---
>  Documentation/networking/bonding.rst | 440 +--
>  1 file changed, 220 insertions(+), 220 deletions(-)
> 
> diff --git a/Documentation/networking/bonding.rst 
> b/Documentation/networking/bonding.rst
> index adc314639085..f4c4f0fae83b 100644
> --- a/Documentation/networking/bonding.rst
> +++ b/Documentation/networking/bonding.rst
> @@ -167,22 +167,22 @@ or, for backwards compatibility, the option value.  
> E.g.,
>  
>  The parameters are as follows:
>  
> -active_slave
> +active_port

Hi Jarod

It is going to take quite a while before all distributions user space
gets updated. So todays API is going to live on for a few
years. People are going to be search the documentation using the terms
their user space uses, which are going to be todays terms, not the new
ones you are introducing here. For that to work, i think you are going
to have to introduce a table listing todays names and the new names
you are adding, so search engines have some chance of finding this
document, and readers have some clue as to how to translate from what
their user space is using to the terms used in the document.

  Andrew


Re: [PATCH ] scsi: page warning: 'page' may be used uninitialized

2020-10-02 Thread John Donnelly



> On Oct 2, 2020, at 1:01 PM, Mike Christie  wrote:
> 
> On 9/23/20 7:19 PM, john.p.donne...@oracle.com wrote:
>> From: John Donnelly 
>> 
>> corrects: drivers/target/target_core_user.c:688:6: warning: 'page' may be 
>> used
>> uninitialized
>> 
>> Fixes: 3c58f737231e ("scsi: target: tcmu: Optimize use of
>> flush_dcache_page")
>> 
>> To: linux-s...@vger.kernel.org
>> Cc: Mike Christie 
>> Signed-off-by: John Donnelly 
>> ---
>> drivers/target/target_core_user.c | 2 +-
>> 1 file changed, 1 insertion(+), 1 deletion(-)
>> 
>> diff --git a/drivers/target/target_core_user.c 
>> b/drivers/target/target_core_user.c
>> index 9b7592350502..86b28117787e 100644
>> --- a/drivers/target/target_core_user.c
>> +++ b/drivers/target/target_core_user.c
>> @@ -681,7 +681,7 @@ static void scatter_data_area(struct tcmu_dev *udev,
>>  void *from, *to = NULL;
>>  size_t copy_bytes, to_offset, offset;
>>  struct scatterlist *sg;
>> -struct page *page;
>> +struct page *page = NULL;
>> 
>>  for_each_sg(data_sg, sg, data_nents, i) {
>>  int sg_remaining = sg->length;
>> 
> 
> Looks ok for now. In the next kernel we can do the more invasive approach and
> add a new struct/helpers to make the code cleaner and fix it properly.
> 
> Acked-by: Mike Christie 


Hi 

Thank you.

I am not always on the email dlists .. Please do the right thing . 





Re: [PATCH v4 2/3] iommu/tegra-smmu: Rework tegra_smmu_probe_device()

2020-10-02 Thread Nicolin Chen
On Fri, Oct 02, 2020 at 05:23:14PM +0300, Dmitry Osipenko wrote:
> 02.10.2020 09:08, Nicolin Chen пишет:
> >  static struct iommu_device *tegra_smmu_probe_device(struct device *dev)
> >  {
> > -   struct device_node *np = dev->of_node;
> > -   struct tegra_smmu *smmu = NULL;
> > -   struct of_phandle_args args;
> > -   unsigned int index = 0;
> > -   int err;
> > -
> > -   while (of_parse_phandle_with_args(np, "iommus", "#iommu-cells", index,
> > - &args) == 0) {
> > -   smmu = tegra_smmu_find(args.np);
> > -   if (smmu) {
> > -   err = tegra_smmu_configure(smmu, dev, &args);
> > -   of_node_put(args.np);
> > -
> > -   if (err < 0)
> > -   return ERR_PTR(err);
> > -
> > -   /*
> > -* Only a single IOMMU master interface is currently
> > -* supported by the Linux kernel, so abort after the
> > -* first match.
> > -*/
> > -   dev_iommu_priv_set(dev, smmu);
> > -
> > -   break;
> > -   }
> > -
> > -   of_node_put(args.np);
> > -   index++;
> > -   }
> > +   struct tegra_smmu *smmu = dev_iommu_priv_get(dev);
> >  
> > if (!smmu)
> > return ERR_PTR(-ENODEV);
> 
> The !smmu can't ever be true now, isn't it? Then please remove it.

How can you be so sure? Have you read my commit message? The whole
point of removing the hack in tegra_smmu_probe() is to return the
ERR_PTR(-ENODEV) here. The bus_set_iommu() will call this function
when mc->smmu is not assigned it, as it's assigned after we return
tegra_smmu_probe() while bus_set_iommu() is still in the middle of
the tegra_smmu_probe().


Re: [PATCH v2 0/3] drm: commit_work scheduling

2020-10-02 Thread Rob Clark
On Fri, Oct 2, 2020 at 4:01 AM Qais Yousef  wrote:
>
> On 09/30/20 14:17, Rob Clark wrote:
> > From: Rob Clark 
> >
> > The android userspace treats the display pipeline as a realtime problem.
> > And arguably, if your goal is to not miss frame deadlines (ie. vblank),
> > it is.  (See https://lwn.net/Articles/809545/ for the best explaination
> > that I found.)
> >
> > But this presents a problem with using workqueues for non-blocking
> > atomic commit_work(), because the SCHED_FIFO userspace thread(s) can
> > preempt the worker.  Which is not really the outcome you want.. once
> > the required fences are scheduled, you want to push the atomic commit
> > down to hw ASAP.
>
> For me thees 2 properties
>
> 1. Run ASAP
> 2. Finish the work un-interrupted
>
> Scream the workers need to be SCHED_FIFO by default. CFS can't give you these
> guarantees.

fwiw, commit_work does sleep/block for some time until fences are
signalled, but then once that happens we want it to run ASAP,
preempting lower priority SCHED_FIFO.

>
> IMO using sched_set_fifo() for these workers is the right thing.
>

Possibly, but we still have limited prioritization options (ie. not
enough) to set these from the kernel.  Giving userspace the control,
so it can pick sensible priorities for commit_work and vblank_work,
which fits in with the priorities of the other userspace threads seems
like the sensible thing.

> >
> > But the decision of whether commit_work should be RT or not really
> > depends on what userspace is doing.  For a pure CFS userspace display
> > pipeline, commit_work() should remain SCHED_NORMAL.
>
> I'm not sure I agree with this. I think it's better to characterize tasks 
> based
> on their properties/requirements rather than what the rest of the userspace is
> using.

I mean, the issue is that userspace is already using a few different
rt priority levels for different SF threads.  We want commit_work to
run ASAP once fences are signalled, and vblank_work to run at a
slightly higher priority still.  But the correct choice for priorities
here depends on what userspace is using, it all needs to fit together
properly.

>
> I do appreciate that maybe some of these tasks have varying requirements 
> during
> their life time. e.g: they have RT property during specific critical section
> but otherwise are CFS tasks. I think the UI thread in Android behaves like
> that.
>
> It's worth IMO trying that approach I pointed out earlier to see if making RT
> try to pick an idle CPU rather than preempt CFS helps. Not sure if it'd be
> accepted but IMHO it's a better direction to consider and discuss.

The problem I was seeing was actually the opposite..  commit_work
becomes runnable (fences signalled) but doesn't get a chance to run
because a SCHED_FIFO SF thread is running.  (Maybe I misunderstood and
you're approach would help this case too?)

> Or maybe you can wrap userspace pipeline critical section lock such that any
> task holding it will automatically be promoted to SCHED_FIFO and then demoted
> to CFS once it releases it.

The SCHED_DEADLINE + token passing approach that the lwn article
mentioned sounds interesting, if that eventually becomes possible.
But doesn't really help today..

BR,
-R

> Haven't worked with display pipelines before, so hopefully this makes sense 
> :-)
>
> Thanks
>
> --
> Qais Yousef
>
> >
> > To handle this, convert non-blocking commit_work() to use per-CRTC
> > kthread workers, instead of system_unbound_wq.  Per-CRTC workers are
> > used to avoid serializing commits when userspace is using a per-CRTC
> > update loop.  And the last patch exposes the task id to userspace as
> > a CRTC property, so that userspace can adjust the priority and sched
> > policy to fit it's needs.
> >
> >
> > v2: Drop client cap and in-kernel setting of priority/policy in
> > favor of exposing the kworker tid to userspace so that user-
> > space can set priority/policy.
> >
> > Rob Clark (3):
> >   drm/crtc: Introduce per-crtc kworker
> >   drm/atomic: Use kthread worker for nonblocking commits
> >   drm: Expose CRTC's kworker task id
> >
> >  drivers/gpu/drm/drm_atomic_helper.c | 13 
> >  drivers/gpu/drm/drm_crtc.c  | 14 +
> >  drivers/gpu/drm/drm_mode_config.c   | 14 +
> >  drivers/gpu/drm/drm_mode_object.c   |  4 
> >  include/drm/drm_atomic.h| 31 +
> >  include/drm/drm_crtc.h  |  8 
> >  include/drm/drm_mode_config.h   |  9 +
> >  include/drm/drm_property.h  |  9 +
> >  8 files changed, 98 insertions(+), 4 deletions(-)
> >
> > --
> > 2.26.2
> >


Re: [RESEND PATCH] spmi: prefix spmi bus device names with "spmi"

2020-10-02 Thread Mark Brown
On Fri, Oct 02, 2020 at 10:48:32AM -0700, Stephen Boyd wrote:
> Quoting Mark Brown (2020-10-02 09:03:24)

> > ...and doing this in the dev_name() should help other diagnostic users
> > (like dev_printk() for example).

> Don't thinks like dev_printk() prefix the bus name? See
> dev_driver_string()? So I agree that having the bus name is useful, but
> confused why there are testing scripts and things on top of regmap
> debugfs

Not that I've ever noticed, eg on the console.

> Put another way, why not introduce something similar to i2c-dev where
> userspace can read/write registers for devices on the SPMI bus?
> Otherwise I presume the test scripts inside Qualcomm are just reading
> registers out of regmap?

I know some other vendors use the regmap debugfs for their diagnostic
tools (obviously not with SPMI).  It's generally so they can get the
benefit of the cache, it's a combination of allowing the state to be
inspected while the driver has the device powered down and for devices
on slower buses being much more performant.


signature.asc
Description: PGP signature


Re: [PATCH 2/2] mm/frame-vec: use FOLL_LONGTERM

2020-10-02 Thread Jason Gunthorpe
On Fri, Oct 02, 2020 at 07:53:03PM +0200, Daniel Vetter wrote:
> For $reasons I've stumbled over this code and I'm not sure the change
> to the new gup functions in 55a650c35fea ("mm/gup: frame_vector:
> convert get_user_pages() --> pin_user_pages()") was entirely correct.
> 
> This here is used for long term buffers (not just quick I/O) like
> RDMA, and John notes this in his patch. But I thought the rule for
> these is that they need to add FOLL_LONGTERM, which John's patch
> didn't do.
> 
> There is already a dax specific check (added in b7f0554a56f2 ("mm:
> fail get_vaddr_frames() for filesystem-dax mappings")), so this seems
> like the prudent thing to do.
> 
> Signed-off-by: Daniel Vetter 
> Cc: Andrew Morton 
> Cc: John Hubbard 
> Cc: Jérôme Glisse 
> Cc: Jan Kara 
> Cc: Dan Williams 
> Cc: linux...@kvack.org
> Cc: linux-arm-ker...@lists.infradead.org
> Cc: linux-samsung-...@vger.kernel.org
> Cc: linux-me...@vger.kernel.org
> Hi all,
> 
> I stumbled over this and figured typing this patch can't hurt. Really
> just to maybe learn a few things about how gup/pup is supposed to be
> used (we have a bit of that in drivers/gpu), this here isn't really
> ralated to anything I'm doing.

FOLL_FORCE is a pretty big clue it should be FOLL_LONGTERM, IMHO

> I'm also wondering whether the explicit dax check should be removed,
> since FOLL_LONGTERM should take care of that already.

Yep! Confirms the above!

This get_vaddr_frames() thing looks impossible to use properly. How on
earth does a driver guarentee

 "If @start belongs to VM_IO | VM_PFNMAP vma, we don't touch page
 structures and the caller must make sure pfns aren't reused for
 anything else while he is using them."

The only possible way to do that is if the driver restricts the VMAs
to ones it owns and interacts with the vm_private data to refcount
something.

Since every driver does this wrong anything that uses this is creating
terrifying security issues.

IMHO this whole API should be deleted :(

Jason


Re: [PATCH] ftrace: Fix some typos in comment

2020-10-02 Thread Steven Rostedt
On Fri,  2 Oct 2020 22:31:26 +0800
Qiujun Huang  wrote:

> s/coorditate/coordinate/
> s/emty/empty/
> s/preeptive/preemptive/
> s/succes/success/
> s/carefule/careful/
> 
> Signed-off-by: Qiujun Huang 
> ---
>  kernel/trace/ftrace.c | 10 +-
>  1 file changed, 5 insertions(+), 5 deletions(-)
> 

Applied. Thanks,

-- Steve


Re: [PATCH v2] net: qrtr: ns: Fix the incorrect usage of rcu_read_lock()

2020-10-02 Thread Doug Anderson
Hi,

On Fri, Oct 2, 2020 at 10:06 AM Manivannan Sadhasivam
 wrote:
>
> The rcu_read_lock() is not supposed to lock the kernel_sendmsg() API
> since it has the lock_sock() in qrtr_sendmsg() which will sleep. Hence,
> fix it by excluding the locking for kernel_sendmsg().
>
> While at it, let's also use radix_tree_deref_retry() to confirm the
> validity of the pointer returned by radix_tree_deref_slot() and use
> radix_tree_iter_resume() to resume iterating the tree properly before
> releasing the lock as suggested by Doug.
>
> Fixes: a7809ff90ce6 ("net: qrtr: ns: Protect radix_tree_deref_slot() using 
> rcu read locks")
> Reported-by: Doug Anderson 
> Tested-by: Alex Elder 
> Signed-off-by: Manivannan Sadhasivam 
> ---
>
> Changes in v2:
>
> * Used radix_tree_deref_retry() and radix_tree_iter_resume() as
> suggested by Doug.
>
>  net/qrtr/ns.c | 63 ++-
>  1 file changed, 57 insertions(+), 6 deletions(-)
>
> diff --git a/net/qrtr/ns.c b/net/qrtr/ns.c
> index 934999b56d60..dadbe2885be2 100644
> --- a/net/qrtr/ns.c
> +++ b/net/qrtr/ns.c
> @@ -203,15 +203,24 @@ static int announce_servers(struct sockaddr_qrtr *sq)
> /* Announce the list of servers registered in this node */
> radix_tree_for_each_slot(slot, &node->servers, &iter, 0) {
> srv = radix_tree_deref_slot(slot);
> +   if (!srv)
> +   continue;
> +   if (radix_tree_deref_retry(srv)) {
> +   slot = radix_tree_iter_retry(&iter);
> +   continue;
> +   }
> +   slot = radix_tree_iter_resume(slot, &iter);
> +   rcu_read_unlock();
>
> ret = service_announce_new(sq, srv);
> if (ret < 0) {
> pr_err("failed to announce new service\n");
> -   goto err_out;
> +   return ret;
> }
> +
> +   rcu_read_lock();
> }
>
> -err_out:
> rcu_read_unlock();
>
> return ret;

nit: you can go back to "return 0" and get rid of the init of "ret =
0" at the beginning of the function.  The need to "return ret" and
init to 0 was introduced by your previous change because of the "goto
err_out" which you no longer have.

...this is true for all your functions, I believe.


> @@ -571,16 +605,33 @@ static int ctrl_cmd_new_lookup(struct sockaddr_qrtr 
> *from,
> rcu_read_lock();
> radix_tree_for_each_slot(node_slot, &nodes, &node_iter, 0) {
> node = radix_tree_deref_slot(node_slot);
> +   if (!node)
> +   continue;
> +   if (radix_tree_deref_retry(node)) {
> +   node_slot = radix_tree_iter_retry(&node_iter);
> +   continue;
> +   }
> +   node_slot = radix_tree_iter_resume(node_slot, &node_iter);
>
> radix_tree_for_each_slot(srv_slot, &node->servers,
>  &srv_iter, 0) {
> struct qrtr_server *srv;
>
> srv = radix_tree_deref_slot(srv_slot);
> +   if (!srv)
> +   continue;
> +   if (radix_tree_deref_retry(srv)) {
> +   srv_slot = radix_tree_iter_retry(&srv_iter);
> +   continue;
> +   }
> +   srv_slot = radix_tree_iter_resume(srv_slot, 
> &srv_iter);
> +
> if (!server_match(srv, &filter))
> continue;
>

nit: move the "srv_slot = radix_tree_iter_resume(srv_slot,
&srv_iter);" line here (after the !server_match() test) so you only
call it if you're doing the unlock?


I'm not too worried about the nits, though it'd be nice to fix them.
Thus, I'll add:

Reviewed-by: Douglas Anderson 

...though I'll remind you that I'm a self-professed clueless person
about RCU and radix trees).

I haven't stress tested anything, but at least I no longer get any
warnings at bootup and my WiFi and modem still probe, so I guess:

Tested-by: Douglas Anderson 


Re: [PATCH v4 3/3] iommu/tegra-smmu: Add PCI support

2020-10-02 Thread Dmitry Osipenko
02.10.2020 09:08, Nicolin Chen пишет:
> This patch simply adds support for PCI devices.
> 
> Signed-off-by: Nicolin Chen 
> ---

Reviewed-by: Dmitry Osipenko 


Re: [PATCH] mm: Remove src/dst mm parameter in copy_page_range()

2020-10-02 Thread Peter Xu
On Fri, Oct 02, 2020 at 02:28:58PM -0300, Jason Gunthorpe wrote:
> On Fri, Oct 02, 2020 at 01:14:29PM -0400, Peter Xu wrote:
> > On Fri, Oct 02, 2020 at 08:43:12AM -0300, Jason Gunthorpe wrote:
> > > > -static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct 
> > > > *src_mm,
> > > > -  pmd_t *dst_pmd, pmd_t *src_pmd, struct 
> > > > vm_area_struct *vma,
> > > > -  struct vm_area_struct *new,
> > > > +static int copy_pte_range(pmd_t *dst_pmd, pmd_t *src_pmd,
> > > > +  struct vm_area_struct *vma, struct vm_area_struct 
> > > > *new,
> > > >unsigned long addr, unsigned long end)
> > > 
> > > I link this, my only minor quibble is the mixing of dst/src and new
> > > language, and then reversing the order in each place. Would read
> > > better to be consistent:
> > > 
> > >   copy_pte_range(dst_vma, dst_pmd, src_vma, src_pmd, addr, end)
> > 
> > I have no strong opinion on the ordering, but I agree the names are clearer.
> > Considering normally we put the same type of parameters to be together, how
> > about:
> > 
> >   copy_pte_range(dst_vma, src_vma, dst_pmd, src_pmd, addr, end)
> 
> I was looking at the order of (dst_pmd, src_pmd, src_vma, dest_vma)
> 
> Whichever, just have some logic to it :)

Oh, sure. :)

-- 
Peter Xu



Re: [PATCH v4 3/3] iommu/tegra-smmu: Add PCI support

2020-10-02 Thread Dmitry Osipenko
02.10.2020 20:45, Nicolin Chen пишет:
> On Fri, Oct 02, 2020 at 05:35:24PM +0300, Dmitry Osipenko wrote:
>> 02.10.2020 09:08, Nicolin Chen пишет:
>>> @@ -865,7 +866,11 @@ static struct iommu_group 
>>> *tegra_smmu_device_group(struct device *dev)
>>> group->smmu = smmu;
>>> group->soc = soc;
>>>  
>>> -   group->group = iommu_group_alloc();
>>> +   if (dev_is_pci(dev))
>>> +   group->group = pci_device_group(dev);
>>> +   else
>>> +   group->group = generic_device_group(dev);
>>> +
>>> if (IS_ERR(group->group)) {
>>> devm_kfree(smmu->dev, group);
>>> mutex_unlock(&smmu->lock);
>>> @@ -1069,22 +1074,32 @@ struct tegra_smmu *tegra_smmu_probe(struct device 
>>> *dev,
>>> iommu_device_set_fwnode(&smmu->iommu, dev->fwnode);
>>>  
>>> err = iommu_device_register(&smmu->iommu);
>>> -   if (err) {
>>> -   iommu_device_sysfs_remove(&smmu->iommu);
>>> -   return ERR_PTR(err);
>>> -   }
>>> +   if (err)
>>> +   goto err_sysfs;
>>>  
>>> err = bus_set_iommu(&platform_bus_type, &tegra_smmu_ops);
>>> -   if (err < 0) {
>>> -   iommu_device_unregister(&smmu->iommu);
>>> -   iommu_device_sysfs_remove(&smmu->iommu);
>>> -   return ERR_PTR(err);
>>> -   }
>>> +   if (err < 0)
>>> +   goto err_unregister;
>>> +
>>> +#ifdef CONFIG_PCI
>>> +   err = bus_set_iommu(&pci_bus_type, &tegra_smmu_ops);
>>> +   if (err < 0)
>>> +   goto err_bus_set;
>>> +#endif
>>>  
>>> if (IS_ENABLED(CONFIG_DEBUG_FS))
>>> tegra_smmu_debugfs_init(smmu);
>>>  
>>> return smmu;
>>> +
>>> +err_bus_set: __maybe_unused;
>>
>> __maybe_unused?
> 
> In order to mute a build warning when CONFIG_PCI=n...
> 

okay


Re: [RFC PATCH 0/7] RAS/CEC: Extend CEC for errors count check on short time period

2020-10-02 Thread Borislav Petkov
On Fri, Oct 02, 2020 at 06:33:17PM +0100, James Morse wrote:
> > I think adding the CPU error collection to the kernel
> > has the following advantages,
> > 1. The CPU error collection and isolation would not be active if the
> >  rasdaemon stopped running or not running on a machine.

Wasn't there this thing called systemd which promised that it would
restart daemons when they fail? And even if it is not there, you can
always do your own cronjob which checks rasdaemon presence and restarts
it if it has died and sends a mail to the admin to check why it had
died.

Everything else I've trimmed but James has put it a lot more eloquently
than me and I cannot agree more with what he says. Doing this in
userspace is better in every aspect you can think of.

The current CEC thing runs in the kernel because it has a completely
different purpose - to limit corrected error reports which turn into
very expensive support calls for errors which were corrected but people
simply don't get that they were corrected. Instead, they throw hands in
the air and go "OMG, my hardware is failing".

Where those are, as James says:

> These are corrected errors. Nothing has gone wrong.

-- 
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette


Re: [PATCH v4] block/scsi-ioctl: Fix kernel-infoleak in scsi_put_cdrom_generic_arg()

2020-10-02 Thread Jens Axboe
On 10/2/20 8:22 AM, Peilin Ye wrote:
> scsi_put_cdrom_generic_arg() is copying uninitialized stack memory to
> userspace, since the compiler may leave a 3-byte hole in the middle of
> `cgc32`. Fix it by adding a padding field to `struct
> compat_cdrom_generic_command`.

Applied, thanks.

-- 
Jens Axboe



Re: [PATCH ] scsi: page warning: 'page' may be used uninitialized

2020-10-02 Thread Mike Christie
On 9/23/20 7:19 PM, john.p.donne...@oracle.com wrote:
> From: John Donnelly 
> 
> corrects: drivers/target/target_core_user.c:688:6: warning: 'page' may be used
> uninitialized
> 
> Fixes: 3c58f737231e ("scsi: target: tcmu: Optimize use of
> flush_dcache_page")
> 
> To: linux-s...@vger.kernel.org
> Cc: Mike Christie 
> Signed-off-by: John Donnelly 
> ---
>  drivers/target/target_core_user.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/target/target_core_user.c 
> b/drivers/target/target_core_user.c
> index 9b7592350502..86b28117787e 100644
> --- a/drivers/target/target_core_user.c
> +++ b/drivers/target/target_core_user.c
> @@ -681,7 +681,7 @@ static void scatter_data_area(struct tcmu_dev *udev,
>   void *from, *to = NULL;
>   size_t copy_bytes, to_offset, offset;
>   struct scatterlist *sg;
> - struct page *page;
> + struct page *page = NULL;
>  
>   for_each_sg(data_sg, sg, data_nents, i) {
>   int sg_remaining = sg->length;
> 

Looks ok for now. In the next kernel we can do the more invasive approach and
add a new struct/helpers to make the code cleaner and fix it properly.

Acked-by: Mike Christie 


Re: [PATCH v2 0/6] kselftest: arm64/mte: Tests for user-space MTE

2020-10-02 Thread Catalin Marinas
On Fri, Oct 02, 2020 at 05:26:24PM +0530, Amit Daniel Kachhap wrote:
> Amit Daniel Kachhap (6):
>   kselftest/arm64: Add utilities and a test to validate mte memory
>   kselftest/arm64: Verify mte tag inclusion via prctl
>   kselftest/arm64: Check forked child mte memory accessibility
>   kselftest/arm64: Verify all different mmap MTE options
>   kselftest/arm64: Verify KSM page merge for MTE pages
>   kselftest/arm64: Check mte tagged user address in kernel

Thanks Amit for respinning the series. Tested on FVP (some Qemu bugs get
in the way and they fail).

Acked-by: Catalin Marinas 
Tested-by: Catalin Marinas 


Re: [PATCH v1] of: platform: Batch fwnode parsing in the init_machine() path

2020-10-02 Thread Saravana Kannan
On Fri, Oct 2, 2020 at 10:55 AM Laurent Pinchart
 wrote:
>
> Hi Saravana,
>
> On Fri, Oct 02, 2020 at 10:51:51AM -0700, Saravana Kannan wrote:
> > On Fri, Oct 2, 2020 at 7:08 AM Rob Herring  wrote:
> > > On Thu, Oct 1, 2020 at 5:59 PM Saravana Kannan  
> > > wrote:
> > > >
> > > > When commit 93d2e4322aa7 ("of: platform: Batch fwnode parsing when
> > > > adding all top level devices") optimized the fwnode parsing when all top
> > > > level devices are added, it missed out optimizing this for platform
> > > > where the top level devices are added through the init_machine() path.
> > > >
> > > > This commit does the optimization for all paths by simply moving the
> > > > fw_devlink_pause/resume() inside of_platform_default_populate().
> > > >
> > > > Reported-by: Tomi Valkeinen 
> > > > Signed-off-by: Saravana Kannan 
> > > > ---
> > > >  drivers/of/platform.c | 19 +++
> > > >  1 file changed, 15 insertions(+), 4 deletions(-)
> > > >
> > > > diff --git a/drivers/of/platform.c b/drivers/of/platform.c
> > > > index 071f04da32c8..79972e49b539 100644
> > > > --- a/drivers/of/platform.c
> > > > +++ b/drivers/of/platform.c
> > > > @@ -501,8 +501,21 @@ int of_platform_default_populate(struct 
> > > > device_node *root,
> > > >  const struct of_dev_auxdata *lookup,
> > > >  struct device *parent)
> > > >  {
> > > > -   return of_platform_populate(root, of_default_bus_match_table, 
> > > > lookup,
> > > > -   parent);
> > > > +   int ret;
> > > > +
> > > > +   /*
> > > > +* fw_devlink_pause/resume() are only safe to be called around 
> > > > top
> > > > +* level device addition due to locking constraints.
> > > > +*/
> > > > +   if (!root)
> > > > +   fw_devlink_pause();
> > > > +
> > > > +   ret = of_platform_populate(root, of_default_bus_match_table, 
> > > > lookup,
> > > > +  parent);
> > >
> > > of_platform_default_populate() vs. of_platform_populate() is just a
> > > different match table. I don't think the behavior should otherwise be
> > > different.
> > >
> > > There's also of_platform_probe() which has slightly different matching
> > > behavior. It should not behave differently either with respect to
> > > devlinks.
> >
> > So I'm trying to do this only when the top level devices are added for
> > the first time. of_platform_default_populate() seems to be the most
> > common path. For other cases, I think we just need to call
> > fw_devlink_pause/resume() wherever the top level devices are added for
> > the first time. As I said in the other email, we can't add
> > fw_devlink_pause/resume() by default to of_platform_populate().
> >
> > Do you have other ideas for achieving "call fw_devlink_pause/resume()
> > only when top level devices are added for the first time"?
>
> I'm not an expert in this domain, but before investigating it, would you
> be able to share a hack patch that implements this (in the most simple
> way) to check if it actually fixes the delays I experience on my system
> ?

So I take it the patch I sent out didn't work for you? Can you tell me
what machine/DT you are using?

-Saravana


Re: [WARNING] kernel/rcu/tree.c:1058 rcu_irq_enter+0x15/0x20

2020-10-02 Thread Steven Rostedt
On Wed, 30 Sep 2020 20:13:23 +0200
Peter Zijlstra  wrote:

> Blergh, IIRC there's header hell that way. The sane fix is killing off
> that trace_*_rcuidle() disease.
> 
> But I think this will also cure it.

I guess you still don't build modules ;-). I had to add a
EXPORT_SYMBOL(lockdep_recursion) to get it to build, and then move the
checks within the irq disabling to get rid of the using cpu pointers within
preemptable code warnings

But it appears to solve the problem.

-- Steve

diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 0e100c9784a5..70610f217b4e 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -77,6 +77,7 @@ module_param(lock_stat, int, 0644);
 #endif
 
 DEFINE_PER_CPU(unsigned int, lockdep_recursion);
+EXPORT_SYMBOL(lockdep_recursion);
 
 static inline bool lockdep_enabled(void)
 {
@@ -4241,13 +4242,13 @@ void lockdep_init_map_waits(struct lockdep_map *lock, 
const char *name,
if (subclass) {
unsigned long flags;
 
-   if (DEBUG_LOCKS_WARN_ON(!lockdep_enabled()))
-   return;
-
raw_local_irq_save(flags);
+   if (DEBUG_LOCKS_WARN_ON(!lockdep_enabled()))
+   goto out;
lockdep_recursion_inc();
register_lock_class(lock, subclass, 1);
lockdep_recursion_finish();
+out:
raw_local_irq_restore(flags);
}
 }
@@ -4928,15 +4929,15 @@ void lock_set_class(struct lockdep_map *lock, const 
char *name,
 {
unsigned long flags;
 
-   if (unlikely(!lockdep_enabled()))
-   return;
-
raw_local_irq_save(flags);
+   if (unlikely(!lockdep_enabled()))
+   goto out;
lockdep_recursion_inc();
check_flags(flags);
if (__lock_set_class(lock, name, key, subclass, ip))
check_chain_key(current);
lockdep_recursion_finish();
+out:
raw_local_irq_restore(flags);
 }
 EXPORT_SYMBOL_GPL(lock_set_class);
@@ -4945,15 +4946,15 @@ void lock_downgrade(struct lockdep_map *lock, unsigned 
long ip)
 {
unsigned long flags;
 
-   if (unlikely(!lockdep_enabled()))
-   return;
-
raw_local_irq_save(flags);
+   if (unlikely(!lockdep_enabled()))
+   goto out;
lockdep_recursion_inc();
check_flags(flags);
if (__lock_downgrade(lock, ip))
check_chain_key(current);
lockdep_recursion_finish();
+out:
raw_local_irq_restore(flags);
 }
 EXPORT_SYMBOL_GPL(lock_downgrade);
@@ -5041,16 +5042,18 @@ void lock_release(struct lockdep_map *lock, unsigned 
long ip)
 
trace_lock_release(lock, ip);
 
+   raw_local_irq_save(flags);
+
if (unlikely(!lockdep_enabled()))
-   return;
+   goto out;
 
-   raw_local_irq_save(flags);
check_flags(flags);
 
lockdep_recursion_inc();
if (__lock_release(lock, ip))
check_chain_key(current);
lockdep_recursion_finish();
+out:
raw_local_irq_restore(flags);
 }
 EXPORT_SYMBOL_GPL(lock_release);
@@ -5060,15 +5063,17 @@ noinstr int lock_is_held_type(const struct lockdep_map 
*lock, int read)
unsigned long flags;
int ret = 0;
 
-   if (unlikely(!lockdep_enabled()))
-   return 1; /* avoid false negative lockdep_assert_held() */
-
raw_local_irq_save(flags);
+   if (unlikely(!lockdep_enabled())) {
+   ret = 1; /* avoid false negative lockdep_assert_held() */
+   goto out;
+   }
check_flags(flags);
 
lockdep_recursion_inc();
ret = __lock_is_held(lock, read);
lockdep_recursion_finish();
+out:
raw_local_irq_restore(flags);
 
return ret;
@@ -5081,15 +5086,16 @@ struct pin_cookie lock_pin_lock(struct lockdep_map 
*lock)
struct pin_cookie cookie = NIL_COOKIE;
unsigned long flags;
 
+   raw_local_irq_save(flags);
if (unlikely(!lockdep_enabled()))
-   return cookie;
+   goto out;
 
-   raw_local_irq_save(flags);
check_flags(flags);
 
lockdep_recursion_inc();
cookie = __lock_pin_lock(lock);
lockdep_recursion_finish();
+out:
raw_local_irq_restore(flags);
 
return cookie;
@@ -5100,15 +5106,16 @@ void lock_repin_lock(struct lockdep_map *lock, struct 
pin_cookie cookie)
 {
unsigned long flags;
 
+   raw_local_irq_save(flags);
if (unlikely(!lockdep_enabled()))
-   return;
+   goto out;
 
-   raw_local_irq_save(flags);
check_flags(flags);
 
lockdep_recursion_inc();
__lock_repin_lock(lock, cookie);
lockdep_recursion_finish();
+out:
raw_local_irq_restore(flags);
 }
 EXPORT_SYMBOL_GPL(lock_repin_lock);
@@ -5117,15 +5124,16 @@ void lock_unpin_lock(struct lockdep_map *lock, struct 
pin_cookie cookie)
 {
unsigned long flags;
 
+

Re: [kbuild-all] Re: [PATCH] mm: memcg/slab: fix slab statistics in !SMP configuration

2020-10-02 Thread Roman Gushchin
On Fri, Oct 02, 2020 at 01:34:06PM +0800, Philip Li wrote:
> On Thu, Oct 01, 2020 at 05:55:59PM -0700, Roman Gushchin wrote:
> > On Fri, Oct 02, 2020 at 08:08:40AM +0800, kbuild test robot wrote:
> > > Hi Roman,
> > > 
> > > Thank you for the patch! Yet something to improve:
> > > 
> > > [auto build test ERROR on mmotm/master]
> > 
> > It's a bogus error, the patch was applied onto mmotm/master, which doesn't
> > contain necessary slab controller patches.
> sorry for the wrong report, and thanks for the input, we will check
> this to see whether we can find an appropriate base for this case.

I wonder if you can look at the "Fixes" tag if the patch contains one?

> Meanwhile, you can give a try to use --base option when git format patch, 
> which
> helps us to have more clue when looking for base info.

Will do.

Thanks!


Re: [PATCH 1/1] docs: admin-guide: fdt and initrd load in EFI stub

2020-10-02 Thread Heinrich Schuchardt
On 02.10.20 19:21, Ard Biesheuvel wrote:
> Hi Heinrich,
>
> Thanks for documenting this.
>
>
> On Fri, 2 Oct 2020 at 19:11, Heinrich Schuchardt  wrote:
>>
>> Describe how a device tree and an initial RAM disk can be passed to the EFI
>> Boot Stub.
>>
>> Signed-off-by: Heinrich Schuchardt 
>> ---
>>  Documentation/admin-guide/efi-stub.rst | 35 ++
>>  1 file changed, 35 insertions(+)
>>
>> diff --git a/Documentation/admin-guide/efi-stub.rst 
>> b/Documentation/admin-guide/efi-stub.rst
>> index 833edb0d0bc4..86f50a33884c 100644
>> --- a/Documentation/admin-guide/efi-stub.rst
>> +++ b/Documentation/admin-guide/efi-stub.rst
>> @@ -38,6 +38,34 @@ arch/arm/boot/zImage should be copied to the system 
>> partition, and it
>>  may not need to be renamed. Similarly for arm64, arch/arm64/boot/Image
>>  should be copied but not necessarily renamed.
>>
>> +Passing an initial RAM disk to the EFI Boot Stub
>> +
>> +
>> +The following means sorted by decreasing priority can be used to provide an
>> +initial RAM disk to the EFI Boot Stub:
>> +
>> +* The firmware may provide a UEFI Load File 2 Protocol. The stub will try to
>> +  load the RAM disk by calling the LoadFile() service of the protocol using
>> +  a vendor device path with the vendor GUID
>> +  5568e427-0x68fc-4f3d-ac74-ca555231cc68.
>> +* Next the EFI stub will try to load the file indicated by the "initrd=" 
>> command
>> +  line parameter.
>> +* The prior boot stage may pass the location of the initial RAM disk via the
>> +  "linux,initrd-start" and "linux,initrd-end" properties of the "/chosen" 
>> node
>> +  of the device-tree.
>> +
>
> On x86, the boot_params struct is used to pass the address and size of
> the initrd in memory. Maybe include that for completeness?

Sure we should add it. But I will just wait for more review comments.

>
>> +The first two items are inhibited by the "noinitrd" command line parameter.
>> +
>
> Interesting. Are you saying noinitrd is ignored by the kernel itself?
>
> Looking at the code, it might only work for preventing the load of old
> style initrd ramdisks, whereas initramfs images are handled
> separately.
>
> This is something that we should probably fix one way or the other.
>

initrd_load() seems to depend on the value and will not create /dev/ram
if "noinitrd" is set.
init/do_mounts_initrd.o is compiled for ARMv8.

But my ARMv8 Odroid C2 boots fine via U-Boot->GRUB->EFI stub->Linux with:

[  +0.00] Kernel command line: BOOT_IMAGE=/vmlinuz-5.9.0-rc6-arm64+
root=UUID=.. ro earlycon=meson,0xc81004c0,115200n8 noinitrd

So I assume initrd_load() is either not called or at least not needed
for the FDT case.

Best regards

Heinrich

>
>> +Passing a device-tree to the EFI Boot Stub
>> +--
>> +
>> +A device-tree can be passed to the EFI Boot Stub in decreasing priority 
>> using
>> +
>> +* command line option dtb=
>> +* a UEFI configuration table with GUID b1b621d5-f19c-41a5-830b-d9152c69aae0.
>> +
>> +The command line option is only available if CONFIG_EFI_ARMSTUB_DTB_LOADER=y
>> +and secure boot is disabled.
>>
>>  Passing kernel parameters from the EFI shell
>>  
>> @@ -46,6 +74,10 @@ Arguments to the kernel can be passed after bzImage.efi, 
>> e.g.::
>>
>> fs0:> bzImage.efi console=ttyS0 root=/dev/sda4
>>
>> +The "noinitrd" option
>> +-
>> +
>> +The "noinitrd" option stops the EFI stub from loading an initial RAM disk.
>>
>>  The "initrd=" option
>>  
>> @@ -98,3 +130,6 @@ CONFIGURATION TABLE.
>>
>>  "dtb=" is processed in the same manner as the "initrd=" option that is
>>  described above.
>> +
>> +This option is only available if CONFIG_EFI_ARMSTUB_DTB_LOADER=y and secure
>> +boot is disabled.
>> --
>> 2.28.0
>>



Re: [PATCH v2 0/3] drm: commit_work scheduling

2020-10-02 Thread Rob Clark
On Fri, Oct 2, 2020 at 4:05 AM Ville Syrjälä
 wrote:
>
> On Fri, Oct 02, 2020 at 01:52:56PM +0300, Ville Syrjälä wrote:
> > On Thu, Oct 01, 2020 at 05:25:55PM +0200, Daniel Vetter wrote:
> > > On Thu, Oct 1, 2020 at 5:15 PM Rob Clark  wrote:
> > > >
> > > > On Thu, Oct 1, 2020 at 12:25 AM Daniel Vetter  wrote:
> > > > >
> > > > > On Wed, Sep 30, 2020 at 11:16 PM Rob Clark  
> > > > > wrote:
> > > > > >
> > > > > > From: Rob Clark 
> > > > > >
> > > > > > The android userspace treats the display pipeline as a realtime 
> > > > > > problem.
> > > > > > And arguably, if your goal is to not miss frame deadlines (ie. 
> > > > > > vblank),
> > > > > > it is.  (See https://lwn.net/Articles/809545/ for the best 
> > > > > > explaination
> > > > > > that I found.)
> > > > > >
> > > > > > But this presents a problem with using workqueues for non-blocking
> > > > > > atomic commit_work(), because the SCHED_FIFO userspace thread(s) can
> > > > > > preempt the worker.  Which is not really the outcome you want.. once
> > > > > > the required fences are scheduled, you want to push the atomic 
> > > > > > commit
> > > > > > down to hw ASAP.
> > > > > >
> > > > > > But the decision of whether commit_work should be RT or not really
> > > > > > depends on what userspace is doing.  For a pure CFS userspace 
> > > > > > display
> > > > > > pipeline, commit_work() should remain SCHED_NORMAL.
> > > > > >
> > > > > > To handle this, convert non-blocking commit_work() to use per-CRTC
> > > > > > kthread workers, instead of system_unbound_wq.  Per-CRTC workers are
> > > > > > used to avoid serializing commits when userspace is using a per-CRTC
> > > > > > update loop.  And the last patch exposes the task id to userspace as
> > > > > > a CRTC property, so that userspace can adjust the priority and sched
> > > > > > policy to fit it's needs.
> > > > > >
> > > > > >
> > > > > > v2: Drop client cap and in-kernel setting of priority/policy in
> > > > > > favor of exposing the kworker tid to userspace so that user-
> > > > > > space can set priority/policy.
> > > > >
> > > > > Yeah I think this looks more reasonable. Still a bit irky interface,
> > > > > so I'd like to get some kworker/rt ack on this. Other opens:
> > > > > - needs userspace, the usual drill
> > > >
> > > > fwiw, right now the userspace is "modetest + chrt".. *probably* the
> > > > userspace will become a standalone helper or daemon, mostly because
> > > > the chrome gpu-process sandbox does not allow setting SCHED_FIFO.  I'm
> > > > still entertaining the possibility of switching between rt and cfs
> > > > depending on what is in the foreground (ie. only do rt for android
> > > > apps).
> > > >
> > > > > - we need this also for vblank workers, otherwise this wont work for
> > > > > drivers needing those because of another priority inversion.
> > > >
> > > > I have a thought on that, see below..
> > >
> > > Hm, not seeing anything about vblank worker below?
> > >
> > > > > - we probably want some indication of whether this actually does
> > > > > something useful, not all drivers use atomic commit helpers. Not sure
> > > > > how to do that.
> > > >
> > > > I'm leaning towards converting the other drivers over to use the
> > > > per-crtc kwork, and then dropping the 'commit_work` from atomic state.
> > > > I can add a patch to that, but figured I could postpone that churn
> > > > until there is some by-in on this whole idea.
> > >
> > > i915 has its own commit code, it's not even using the current commit
> > > helpers (nor the commit_work). Not sure how much other fun there is.
> >
> > I don't think we want per-crtc threads for this in i915. Seems
> > to me easier to guarantee atomicity across multiple crtcs if
> > we just commit them from the same thread.
>
> Oh, and we may have to commit things in a very specific order
> to guarantee the hw doesn't fall over, so yeah definitely per-crtc
> thread is a no go.

If I'm understanding the i915 code, this is only the case for modeset
commits?  I suppose we could achieve the same result by just deciding
to pick the kthread of the first CRTC for modeset commits.  I'm not
really so much concerned about parallelism for modeset.

> I don't even understand the serialization argument. If the commits
> are truly independent then why isn't the unbound wq enough to avoid
> the serialization? It should just spin up a new thread for each commit
> no?

The problem with wq is prioritization and SCHED_FIFO userspace
components stomping on the feet of commit_work.  That is the entire
motivation of this series in the first place, so no we cannot use
unbound wq.

BR,
-R


Re: [PATCH v1] of: platform: Batch fwnode parsing in the init_machine() path

2020-10-02 Thread Laurent Pinchart
Hi Saravana,

On Fri, Oct 02, 2020 at 10:51:51AM -0700, Saravana Kannan wrote:
> On Fri, Oct 2, 2020 at 7:08 AM Rob Herring  wrote:
> > On Thu, Oct 1, 2020 at 5:59 PM Saravana Kannan  wrote:
> > >
> > > When commit 93d2e4322aa7 ("of: platform: Batch fwnode parsing when
> > > adding all top level devices") optimized the fwnode parsing when all top
> > > level devices are added, it missed out optimizing this for platform
> > > where the top level devices are added through the init_machine() path.
> > >
> > > This commit does the optimization for all paths by simply moving the
> > > fw_devlink_pause/resume() inside of_platform_default_populate().
> > >
> > > Reported-by: Tomi Valkeinen 
> > > Signed-off-by: Saravana Kannan 
> > > ---
> > >  drivers/of/platform.c | 19 +++
> > >  1 file changed, 15 insertions(+), 4 deletions(-)
> > >
> > > diff --git a/drivers/of/platform.c b/drivers/of/platform.c
> > > index 071f04da32c8..79972e49b539 100644
> > > --- a/drivers/of/platform.c
> > > +++ b/drivers/of/platform.c
> > > @@ -501,8 +501,21 @@ int of_platform_default_populate(struct device_node 
> > > *root,
> > >  const struct of_dev_auxdata *lookup,
> > >  struct device *parent)
> > >  {
> > > -   return of_platform_populate(root, of_default_bus_match_table, 
> > > lookup,
> > > -   parent);
> > > +   int ret;
> > > +
> > > +   /*
> > > +* fw_devlink_pause/resume() are only safe to be called around top
> > > +* level device addition due to locking constraints.
> > > +*/
> > > +   if (!root)
> > > +   fw_devlink_pause();
> > > +
> > > +   ret = of_platform_populate(root, of_default_bus_match_table, 
> > > lookup,
> > > +  parent);
> >
> > of_platform_default_populate() vs. of_platform_populate() is just a
> > different match table. I don't think the behavior should otherwise be
> > different.
> >
> > There's also of_platform_probe() which has slightly different matching
> > behavior. It should not behave differently either with respect to
> > devlinks.
> 
> So I'm trying to do this only when the top level devices are added for
> the first time. of_platform_default_populate() seems to be the most
> common path. For other cases, I think we just need to call
> fw_devlink_pause/resume() wherever the top level devices are added for
> the first time. As I said in the other email, we can't add
> fw_devlink_pause/resume() by default to of_platform_populate().
> 
> Do you have other ideas for achieving "call fw_devlink_pause/resume()
> only when top level devices are added for the first time"?

I'm not an expert in this domain, but before investigating it, would you
be able to share a hack patch that implements this (in the most simple
way) to check if it actually fixes the delays I experience on my system
?

-- 
Regards,

Laurent Pinchart


[PATCH 1/2] mm/frame-vec: Drop gup_flags from get_vaddr_frames()

2020-10-02 Thread Daniel Vetter
FOLL_WRITE | FOLL_FORCE is really the only reasonable thing to do for
simple dma device that can't guarantee write protection. Which is also
what all the callers are using.

So just simplify this.

Signed-off-by: Daniel Vetter 
Cc: Inki Dae 
Cc: Joonyoung Shim 
Cc: Seung-Woo Kim 
Cc: Kyungmin Park 
Cc: Kukjin Kim 
Cc: Krzysztof Kozlowski 
Cc: Pawel Osciak 
Cc: Marek Szyprowski 
Cc: Tomasz Figa 
Cc: Andrew Morton 
Cc: Oded Gabbay 
Cc: Omer Shpigelman 
Cc: Tomer Tayar 
Cc: Greg Kroah-Hartman 
Cc: Pawel Piskorski 
Cc: linux-arm-ker...@lists.infradead.org
Cc: linux-samsung-...@vger.kernel.org
Cc: linux-me...@vger.kernel.org
Cc: linux...@kvack.org
---
 drivers/gpu/drm/exynos/exynos_drm_g2d.c   | 3 +--
 drivers/media/common/videobuf2/videobuf2-memops.c | 3 +--
 drivers/misc/habanalabs/common/memory.c   | 3 +--
 include/linux/mm.h| 2 +-
 mm/frame_vector.c | 4 ++--
 5 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/exynos/exynos_drm_g2d.c 
b/drivers/gpu/drm/exynos/exynos_drm_g2d.c
index 967a5cdc120e..ac452842bab3 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_g2d.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_g2d.c
@@ -480,8 +480,7 @@ static dma_addr_t *g2d_userptr_get_dma_addr(struct g2d_data 
*g2d,
goto err_free;
}
 
-   ret = get_vaddr_frames(start, npages, FOLL_FORCE | FOLL_WRITE,
-   g2d_userptr->vec);
+   ret = get_vaddr_frames(start, npages, g2d_userptr->vec);
if (ret != npages) {
DRM_DEV_ERROR(g2d->dev,
  "failed to get user pages from userptr.\n");
diff --git a/drivers/media/common/videobuf2/videobuf2-memops.c 
b/drivers/media/common/videobuf2/videobuf2-memops.c
index 6e9e05153f4e..9dd6c27162f4 100644
--- a/drivers/media/common/videobuf2/videobuf2-memops.c
+++ b/drivers/media/common/videobuf2/videobuf2-memops.c
@@ -40,7 +40,6 @@ struct frame_vector *vb2_create_framevec(unsigned long start,
unsigned long first, last;
unsigned long nr;
struct frame_vector *vec;
-   unsigned int flags = FOLL_FORCE | FOLL_WRITE;
 
first = start >> PAGE_SHIFT;
last = (start + length - 1) >> PAGE_SHIFT;
@@ -48,7 +47,7 @@ struct frame_vector *vb2_create_framevec(unsigned long start,
vec = frame_vector_create(nr);
if (!vec)
return ERR_PTR(-ENOMEM);
-   ret = get_vaddr_frames(start & PAGE_MASK, nr, flags, vec);
+   ret = get_vaddr_frames(start & PAGE_MASK, nr, vec);
if (ret < 0)
goto out_destroy;
/* We accept only complete set of PFNs */
diff --git a/drivers/misc/habanalabs/common/memory.c 
b/drivers/misc/habanalabs/common/memory.c
index 5ff4688683fd..43b10aee8150 100644
--- a/drivers/misc/habanalabs/common/memory.c
+++ b/drivers/misc/habanalabs/common/memory.c
@@ -1287,8 +1287,7 @@ static int get_user_memory(struct hl_device *hdev, u64 
addr, u64 size,
return -ENOMEM;
}
 
-   rc = get_vaddr_frames(start, npages, FOLL_FORCE | FOLL_WRITE,
-   userptr->vec);
+   rc = get_vaddr_frames(start, npages, userptr->vec);
 
if (rc != npages) {
dev_err(hdev->dev,
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 16b799a0522c..7d14aa2780d2 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1757,7 +1757,7 @@ struct frame_vector {
 struct frame_vector *frame_vector_create(unsigned int nr_frames);
 void frame_vector_destroy(struct frame_vector *vec);
 int get_vaddr_frames(unsigned long start, unsigned int nr_pfns,
-unsigned int gup_flags, struct frame_vector *vec);
+struct frame_vector *vec);
 void put_vaddr_frames(struct frame_vector *vec);
 int frame_vector_to_pages(struct frame_vector *vec);
 void frame_vector_to_pfns(struct frame_vector *vec);
diff --git a/mm/frame_vector.c b/mm/frame_vector.c
index 10f82d5643b6..5d34c9047e9c 100644
--- a/mm/frame_vector.c
+++ b/mm/frame_vector.c
@@ -12,7 +12,6 @@
  * get_vaddr_frames() - map virtual addresses to pfns
  * @start: starting user address
  * @nr_frames: number of pages / pfns from start to map
- * @gup_flags: flags modifying lookup behaviour
  * @vec:   structure which receives pages / pfns of the addresses mapped.
  * It should have space for at least nr_frames entries.
  *
@@ -32,10 +31,11 @@
  * This function takes care of grabbing mmap_lock as necessary.
  */
 int get_vaddr_frames(unsigned long start, unsigned int nr_frames,
-unsigned int gup_flags, struct frame_vector *vec)
+struct frame_vector *vec)
 {
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
+   unsigned int gup_flags = FOLL_WRITE | FOLL_FORCE;
int ret = 0;
int err;
int locked;
-- 
2.28.0



[PATCH 2/2] mm/frame-vec: use FOLL_LONGTERM

2020-10-02 Thread Daniel Vetter
For $reasons I've stumbled over this code and I'm not sure the change
to the new gup functions in 55a650c35fea ("mm/gup: frame_vector:
convert get_user_pages() --> pin_user_pages()") was entirely correct.

This here is used for long term buffers (not just quick I/O) like
RDMA, and John notes this in his patch. But I thought the rule for
these is that they need to add FOLL_LONGTERM, which John's patch
didn't do.

There is already a dax specific check (added in b7f0554a56f2 ("mm:
fail get_vaddr_frames() for filesystem-dax mappings")), so this seems
like the prudent thing to do.

Signed-off-by: Daniel Vetter 
Cc: Andrew Morton 
Cc: John Hubbard 
Cc: Jérôme Glisse 
Cc: Jan Kara 
Cc: Dan Williams 
Cc: linux...@kvack.org
Cc: linux-arm-ker...@lists.infradead.org
Cc: linux-samsung-...@vger.kernel.org
Cc: linux-me...@vger.kernel.org
---
Hi all,

I stumbled over this and figured typing this patch can't hurt. Really
just to maybe learn a few things about how gup/pup is supposed to be
used (we have a bit of that in drivers/gpu), this here isn't really
ralated to anything I'm doing.

I'm also wondering whether the explicit dax check should be removed,
since FOLL_LONGTERM should take care of that already.
-Daniel
---
 mm/frame_vector.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/frame_vector.c b/mm/frame_vector.c
index 5d34c9047e9c..3507e09cb3ff 100644
--- a/mm/frame_vector.c
+++ b/mm/frame_vector.c
@@ -35,7 +35,7 @@ int get_vaddr_frames(unsigned long start, unsigned int 
nr_frames,
 {
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
-   unsigned int gup_flags = FOLL_WRITE | FOLL_FORCE;
+   unsigned int gup_flags = FOLL_WRITE | FOLL_FORCE | FOLL_LONGTERM;
int ret = 0;
int err;
int locked;
-- 
2.28.0



Re: Linux 5.9-rc7 / VmallocTotal wrongly reported | Patch OK

2020-10-02 Thread Roman Gushchin
On Fri, Oct 02, 2020 at 09:01:14AM +0200, Bastian Bittorf wrote:
> On Thu, Oct 01, 2020 at 01:39:31PM -0700, Roman Gushchin wrote:
> > > # Processor type and features
> > > #
> > > # CONFIG_ZONE_DMA is not set
> > > # CONFIG_SMP is not set
> > 
> > Yes, here is the deal.
> > 
> > The SMP-version of __mod_node_page_state() converts a passed value from 
> > bytes
> > to pages, but the non-SMP doesn't.
> > 
> > Thanks!
> > 
> > --
> > 
> > From 3d0233b37340c78012b991d3570b92f91cf5ebd2 Mon Sep 17 00:00:00 2001
> > From: Roman Gushchin 
> > Date: Thu, 1 Oct 2020 13:07:49 -0700
> > Subject: [PATCH] mm: memcg/slab: fix slab statistics in !SMP configuration
> 
> 
> I manually applied your patch and
> this fixes to issue on my side too:

Thank you for testing!

And sorry for the hassle.

Roman


Re: [PATCH v1] of: platform: Batch fwnode parsing in the init_machine() path

2020-10-02 Thread Saravana Kannan
On Fri, Oct 2, 2020 at 7:08 AM Rob Herring  wrote:
>
> On Thu, Oct 1, 2020 at 5:59 PM Saravana Kannan  wrote:
> >
> > When commit 93d2e4322aa7 ("of: platform: Batch fwnode parsing when
> > adding all top level devices") optimized the fwnode parsing when all top
> > level devices are added, it missed out optimizing this for platform
> > where the top level devices are added through the init_machine() path.
> >
> > This commit does the optimization for all paths by simply moving the
> > fw_devlink_pause/resume() inside of_platform_default_populate().
> >
> > Reported-by: Tomi Valkeinen 
> > Signed-off-by: Saravana Kannan 
> > ---
> >  drivers/of/platform.c | 19 +++
> >  1 file changed, 15 insertions(+), 4 deletions(-)
> >
> > diff --git a/drivers/of/platform.c b/drivers/of/platform.c
> > index 071f04da32c8..79972e49b539 100644
> > --- a/drivers/of/platform.c
> > +++ b/drivers/of/platform.c
> > @@ -501,8 +501,21 @@ int of_platform_default_populate(struct device_node 
> > *root,
> >  const struct of_dev_auxdata *lookup,
> >  struct device *parent)
> >  {
> > -   return of_platform_populate(root, of_default_bus_match_table, 
> > lookup,
> > -   parent);
> > +   int ret;
> > +
> > +   /*
> > +* fw_devlink_pause/resume() are only safe to be called around top
> > +* level device addition due to locking constraints.
> > +*/
> > +   if (!root)
> > +   fw_devlink_pause();
> > +
> > +   ret = of_platform_populate(root, of_default_bus_match_table, lookup,
> > +  parent);
>
> of_platform_default_populate() vs. of_platform_populate() is just a
> different match table. I don't think the behavior should otherwise be
> different.
>
> There's also of_platform_probe() which has slightly different matching
> behavior. It should not behave differently either with respect to
> devlinks.

So I'm trying to do this only when the top level devices are added for
the first time. of_platform_default_populate() seems to be the most
common path. For other cases, I think we just need to call
fw_devlink_pause/resume() wherever the top level devices are added for
the first time. As I said in the other email, we can't add
fw_devlink_pause/resume() by default to of_platform_populate().

Do you have other ideas for achieving "call fw_devlink_pause/resume()
only when top level devices are added for the first time"?

-Saravana


Re: [PATCH] mm: optionally disable brk()

2020-10-02 Thread David Hildenbrand
On 02.10.20 19:19, Topi Miettinen wrote:
> The brk() system call allows to change data segment size (heap). This
> is mainly used by glibc for memory allocation, but it can use mmap()
> and that results in more randomized memory mappings since the heap is
> always located at fixed offset to program while mmap()ed memory is
> randomized.

Want to take more Unix out of Linux?

Honestly, why care about disabling? User space can happily use mmap() if
it prefers.


-- 
Thanks,

David / dhildenb



Re: [PATCH v4 3/3] iommu/tegra-smmu: Add PCI support

2020-10-02 Thread Nicolin Chen
On Fri, Oct 02, 2020 at 05:35:24PM +0300, Dmitry Osipenko wrote:
> 02.10.2020 09:08, Nicolin Chen пишет:
> > @@ -865,7 +866,11 @@ static struct iommu_group 
> > *tegra_smmu_device_group(struct device *dev)
> > group->smmu = smmu;
> > group->soc = soc;
> >  
> > -   group->group = iommu_group_alloc();
> > +   if (dev_is_pci(dev))
> > +   group->group = pci_device_group(dev);
> > +   else
> > +   group->group = generic_device_group(dev);
> > +
> > if (IS_ERR(group->group)) {
> > devm_kfree(smmu->dev, group);
> > mutex_unlock(&smmu->lock);
> > @@ -1069,22 +1074,32 @@ struct tegra_smmu *tegra_smmu_probe(struct device 
> > *dev,
> > iommu_device_set_fwnode(&smmu->iommu, dev->fwnode);
> >  
> > err = iommu_device_register(&smmu->iommu);
> > -   if (err) {
> > -   iommu_device_sysfs_remove(&smmu->iommu);
> > -   return ERR_PTR(err);
> > -   }
> > +   if (err)
> > +   goto err_sysfs;
> >  
> > err = bus_set_iommu(&platform_bus_type, &tegra_smmu_ops);
> > -   if (err < 0) {
> > -   iommu_device_unregister(&smmu->iommu);
> > -   iommu_device_sysfs_remove(&smmu->iommu);
> > -   return ERR_PTR(err);
> > -   }
> > +   if (err < 0)
> > +   goto err_unregister;
> > +
> > +#ifdef CONFIG_PCI
> > +   err = bus_set_iommu(&pci_bus_type, &tegra_smmu_ops);
> > +   if (err < 0)
> > +   goto err_bus_set;
> > +#endif
> >  
> > if (IS_ENABLED(CONFIG_DEBUG_FS))
> > tegra_smmu_debugfs_init(smmu);
> >  
> > return smmu;
> > +
> > +err_bus_set: __maybe_unused;
> 
> __maybe_unused?

In order to mute a build warning when CONFIG_PCI=n...


Re: [RFC PATCH v1 15/26] docs: reporting-bugs: make readers test mainline, but leave a loophole

2020-10-02 Thread Randy Dunlap
On 10/1/20 1:39 AM, Thorsten Leemhuis wrote:
> Now that the document described all preparatory steps tell users to
> install the latest kernel. Try pretty hard to motivate them installing a
> mainline kernel, as that is best for reporting issues. Mention the
> latest stable kernel as an acceptable alternative, but discourage this
> option. Point out that longterm kernels are unsuitable.
> 
> While at it, provide a few hints how to obtain a fresh kernel. Also
> explain how to find out what the latest version actually is. And mention
> why it might be a good idea to wait till the end of the merge window
> when reporting issues.
> 
> Signed-off-by: Thorsten Leemhuis 
> ---
> 
> = RFC =
> 
> Am I asking for too much from users by telling them to test mainline? But most
> will likely have an outdated and heavily patched vendor kernel anyway, so they
> have to install a vanilla kernel if they want to report something upstream;
> that's why I thought "well, then let's go all in and make them test mainline.

That is appropriate IMO.

> ---
>  Documentation/admin-guide/reporting-bugs.rst | 88 
>  1 file changed, 88 insertions(+)
> 
> diff --git a/Documentation/admin-guide/reporting-bugs.rst 
> b/Documentation/admin-guide/reporting-bugs.rst
> index f99d92a05bca..dee6d65aa95c 100644
> --- a/Documentation/admin-guide/reporting-bugs.rst
> +++ b/Documentation/admin-guide/reporting-bugs.rst
> @@ -643,6 +643,94 @@ hardware apart from a kernel issue that rarely happens 
> and thus is hard to
>  reproduce.
>  
>  
> +Install the latest mainline kernel
> +--
> +
> +*Install the latest Linux mainline kernel: that's where all issue get 
> fixed

   issues

> +first, because it's the version line the kernel developers mainly care
> +about. Testing and reporting with the latest Linux stable kernel can be
> +acceptable alternative in some situations, but is best avoided.*

   an acceptable

> +
> +Reporting an issue to the Linux kernel developers they fixed a while ago is
> +annoying for them and wasting their and your time. That's why it's in
> +everybody's interest to check if the issue occurs with the latest version 
> before
> +reporting it.
> +
> +In the scope of the Linux kernel the term 'latest' means: a kernel version
> +recently created from the main line of development, as this 'mainline' tree 
> is
> +where every fix gets applied to first; only later they are allowed to get
> +backported to older, still support version lines called 'stable' and 
> 'longterm'

  supported

> +kernels. That's why it's a prerequisite to check mainline even if just want 
> to

 even if you just 
want to

> +see the issue fixed in one of those. Another reasons: sometimes fixes for an

   in one of those other version lines. Another reason:


> +issue are only applied to mainline, as they are too risky to get backported
> +into older version lines where they thus remain unfixed.
> +
> +It's thus in your and everybody's else interest to reproduce the issue with a

 everybody else's

> +fresh mainline kernel before reporting it. Reproducing it with the latest 
> Linux
> +'stable' kernel can be acceptable alternative, if you can't test mainline for
> +some reason; this is not ideal, but better than not reporting the issue at 
> all.
> +
> +Avoid testing with one of the longterm kernels (sometimes called "LTS 
> kernels"),
> +they are too distant from current development; the same is also true for

   as they are too distant

> +mainline or stable kernels that are not very recent, as there is a new 
> release
> +of those nearly every week.
> +
> +Ways to obtains a fresh vanilla kernel
> +~~
> +
> +One way to get the latest mainline or stable kernel in a vanilla fashion is 
> to
> +download the Linux sources from `kernel.org `_ and 
> build a
> +kernel image and modules from them yourself. How to do that is not described
> +here, as many texts on the internet explain the necessary steps already. If 
> you
> +are new to it, consider following one of those how-to's that suggest to use
> +``make localmodconfig``, as that tries to pick up the configuration of your
> +current kernel and then tries to adjust it somewhat for your system. That 
> does
> +not make the resulting kernel any better, but makes it compile a lot faster.
> +
> +There might be a way around building your own kernel, if you are in a luck: 
> for

in luck: for

> +popular Linux distribution you'll find repositories on the net that offer
> +packages with of the latest mainline or stable Linux vanilla kernels for easy
> +installation. It's totally okay to use packages with these pre-compiled 
> kern

Re: [PATCH 8/8] WIP: add a dma_alloc_contiguous API

2020-10-02 Thread Tomasz Figa
Hi Christoph,

On Wed, Sep 30, 2020 at 06:09:17PM +0200, Christoph Hellwig wrote:
> Add a new API that returns a virtually non-contigous array of pages
> and dma address.  This API is only implemented for dma-iommu and will
> not be implemented for non-iommu DMA API instances that have to allocate
> contiguous memory.  It is up to the caller to check if the API is
> available.

Would you mind scheding some more light on what made the previous attempt
not work well? I liked the previous API because it was more consistent with
the regular dma_alloc_coherent().

> 
> The intent is that media drivers can use this API if either:

FWIW, the USB subsystem also has similar needs, and so do some DRM drivers
using DMA API rather than IOMMU API directly. Basically I believe that all
the users removed in your previous series relied on custom downstream
patches to make DMA_ATTR_NON_CONSISTENT work and could be finally made work
in upstream using this API.

> 
>  - no kernel mapping or only temporary kernel mappings are required.
>That is as a better replacement for DMA_ATTR_NO_KERNEL_MAPPING
>  - a kernel mapping is required for cached and DMA mapped pages, but
>the driver also needs the pages to e.g. map them to userspace.
>In that sense it is a replacement for some aspects of the recently
>removed and never fully implemented DMA_ATTR_NON_CONSISTENT

What's the expected allocation and mapping flow with the latter? Would that be

pages = dma_alloc_noncoherent(...)
vaddr = vmap(pages, ...);

?

Would one just use the usual dma_sync_for_{cpu,device}() for cache
invallidate/clean, while keeping the mapping in place?

Best regards,
Tomasz


[GIT PULL] io_uring fixes for 5.9-rc

2020-10-02 Thread Jens Axboe
Hi Linus,

- Fix for async buffered reads if read-ahead is fully disabled (Hao)

- double poll match fix

- ->show_fdinfo() potential ABBA deadlock complaint fix

Please pull!


The following changes since commit f38c7e3abfba9a9e180b34f642254c43782e7ffe:

  io_uring: ensure async buffered read-retry is setup properly (2020-09-25 
15:39:13 -0600)

are available in the Git repository at:

  git://git.kernel.dk/linux-block.git tags/io_uring-5.9-2020-10-02

for you to fetch changes up to c8d317aa1887b40b188ec3aaa6e9e524333caed1:

  io_uring: fix async buffered reads when readahead is disabled (2020-09-29 
07:54:00 -0600)


io_uring-5.9-2020-10-02


Hao Xu (1):
  io_uring: fix async buffered reads when readahead is disabled

Jens Axboe (2):
  io_uring: always delete double poll wait entry on match
  io_uring: fix potential ABBA deadlock in ->show_fdinfo()

 fs/io_uring.c | 23 ++-
 mm/filemap.c  |  6 +-
 2 files changed, 23 insertions(+), 6 deletions(-)

-- 
Jens Axboe



Re: [PATCH v1] of: platform: Batch fwnode parsing in the init_machine() path

2020-10-02 Thread Saravana Kannan
On Fri, Oct 2, 2020 at 8:03 AM 'Grygorii Strashko' via kernel-team
 wrote:
>
>
>
> On 02/10/2020 14:40, Grygorii Strashko wrote:
> >
> >
> > On 02/10/2020 02:19, Laurent Pinchart wrote:
> >> Hi Saravana,
> >>
> >> Thank you for the patch.
> >>
> >> On Thu, Oct 01, 2020 at 03:59:51PM -0700, Saravana Kannan wrote:
> >>> When commit 93d2e4322aa7 ("of: platform: Batch fwnode parsing when
> >>> adding all top level devices") optimized the fwnode parsing when all top
> >>> level devices are added, it missed out optimizing this for platform
> >>> where the top level devices are added through the init_machine() path.
> >>>
> >>> This commit does the optimization for all paths by simply moving the
> >>> fw_devlink_pause/resume() inside of_platform_default_populate().
> >>
> >> Based on v5.9-rc5, before the patch:
> >>
> >> [0.652887] cpuidle: using governor menu
> >> [   12.349476] No ATAGs?
> >>
> >> After the patch:
> >>
> >> [0.650460] cpuidle: using governor menu
> >> [   12.262101] No ATAGs?
> >>
> >> :-(
> >
> > This is kinda expected :( because omap2 arch doesn't call 
> > of_platform_default_populate()
> >
> > Call path:
> > board-generic.c
> >   DT_MACHINE_START()
> > .init_machine= omap_generic_init,
> >
> >   omap_generic_init()
> > pdata_quirks_init(omap_dt_match_table);
> >  of_platform_populate(NULL, omap_dt_match_table,
> >   omap_auxdata_lookup, NULL);
> >
> > Other affected platforms
> > arm: mach-ux500
> > some mips
> > some powerpc
> >
> > there are also case when a lot of devices placed under bus node, in such 
> > case
> >   of_platform_populate() calls from bus drivers will also suffer from this 
> > issue.
> >
> > I think one option could be to add some parameter to _populate() or 
> > introduce new api.
> >
> > By the way, is there option to disable this feature at all?
> > Is there Kconfig option?
> > Is there any reasons why such complex and time consuming code added to the 
> > kernel and not implemented on DTC level?
> >
> >
> > Also, I've came with another diff, pls check.
> >
> > [0.00] Booting Linux on physical CPU 0x0
> > [0.00] Linux version 5.9.0-rc6-01791-g9acba6b38757-dirty 
> > (grygorii@grygorii-XPS-13-9370) (arm-linux-gnueabihf-gcc (GNU Toolcha0
> > [0.00] CPU: ARMv7 Processor [412fc0f2] revision 2 (ARMv7), 
> > cr=10c5387d
> > [0.00] CPU: div instructions available: patching division code
> > [0.00] CPU: PIPT / VIPT nonaliasing data cache, PIPT instruction 
> > cache
> > [0.00] OF: fdt: Machine model: TI AM5718 IDK
> > ...
> > [0.053443] cpuidle: using governor ladder
> > [0.053470] cpuidle: using governor menu
> > [0.089304] No ATAGs?
> > ...
> > [3.092291] devtmpfs: mounted
> > [3.095804] Freeing unused kernel memory: 1024K
> > [3.100483] Run /sbin/init as init process
> >
> >
> >
> > -- >< ---
> > diff --git a/drivers/of/platform.c b/drivers/of/platform.c
> > index 071f04da32c8..4521b26e7745 100644
> > --- a/drivers/of/platform.c
> > +++ b/drivers/of/platform.c
> > @@ -514,6 +514,12 @@ static const struct of_device_id 
> > reserved_mem_matches[] = {
> >  {}
> >   };
> >
> > +static int __init of_platform_fw_devlink_pause(void)
> > +{
> > +   fw_devlink_pause();
> > +}
> > +core_initcall(of_platform_fw_devlink_pause);
> > +
> >   static int __init of_platform_default_populate_init(void)
> >   {
> >  struct device_node *node;
> > @@ -538,9 +544,7 @@ static int __init 
> > of_platform_default_populate_init(void)
> >  }
> >
> >  /* Populate everything else. */
> > -   fw_devlink_pause();
> >  of_platform_default_populate(NULL, NULL, NULL);
> > -   fw_devlink_resume();
> >
> >  return 0;
> >   }
> > @@ -548,6 +552,7 @@ arch_initcall_sync(of_platform_default_populate_init);
> >
> >   static int __init of_platform_sync_state_init(void)
> >   {
> > +   fw_devlink_resume();
>
> ^ it seems has to be done earlier, like
> +static int __init of_platform_fw_devlink_resume(void)
> +{
> +   fw_devlink_resume();
> +   return 0;
> +}
> +device_initcall_sync(of_platform_fw_devlink_resume);

This will mean no device will probe until device_initcall_sync().
Unfortunately, I don't think we can make such a sweeping assumption.

-Saravana


Re: [RESEND PATCH] spmi: prefix spmi bus device names with "spmi"

2020-10-02 Thread Stephen Boyd
Quoting Mark Brown (2020-10-02 09:03:24)
> On Thu, Oct 01, 2020 at 05:45:00PM -0700, David Collins wrote:
> 
> > The SPMI regmap debugfs files are used extensively for testing and debug
> > purposes internally at Qualcomm and by our customers.  It would be helpful
> > if the more verbose naming scheme were accepted upstream to avoid
> > confusion and broken test scripts.
> 
> ...and doing this in the dev_name() should help other diagnostic users
> (like dev_printk() for example).

Don't thinks like dev_printk() prefix the bus name? See
dev_driver_string()? So I agree that having the bus name is useful, but
confused why there are testing scripts and things on top of regmap
debugfs

Put another way, why not introduce something similar to i2c-dev where
userspace can read/write registers for devices on the SPMI bus?
Otherwise I presume the test scripts inside Qualcomm are just reading
registers out of regmap?


Re: [PATCH v3 03/21] spi: dw: Detach SPI device specific CR0 config method

2020-10-02 Thread Serge Semin
On Fri, Oct 02, 2020 at 01:22:46PM +0300, Andy Shevchenko wrote:
> On Fri, Oct 02, 2020 at 01:28:11AM +0300, Serge Semin wrote:
> > Indeed there is no point in detecting the SPI peripheral device parameters
> > and initializing the CR0 register fields each time an SPI transfer is
> > executed. Instead let's define a dedicated CR0 chip-data member, which
> > will be initialized in accordance with the SPI device settings at the
> > moment of setting it up.
> > 
> > By doing so we'll finally make the SPI device chip_data serving as it's
> > supposed to - to preserve the SPI device specific DW SPI configuration.
> > See spi-fsl-dspi.c, spi-pl022.c, spi-pxa2xx.c drivers for example of the
> > way the chip data is utilized.
> 
> > +static void dw_spi_update_cr0(struct dw_spi *dws, struct spi_device *spi,
> > + struct spi_transfer *transfer)
> 

> Yep, why not to place this in previous patch exactly here?

The previous patch is about introducing the DWC SSI capability. This one is
about splitting the functionality up.

> 
> > +   /*
> > +* Update CR0 data each time the setup callback is invoked since
> > +* the device parameters could have been changed, for instance, by
> > +* the MMC SPI driver or something else.
> > +*/
> > +   chip->cr0 = dw_spi_get_cr0(dws, spi);
> 

> I would rather name it prepare or alike. 'get' assumes getting value or
> something like that.

This seems reasonable. What verb do you think would be better: prepare,
calc, assemble, construct, make, compute, collect, compose, form, compile, etc ?
Personally prepare or calc or assemble are the best candidates. What do you
think?

-Sergey

> 
> -- 
> With Best Regards,
> Andy Shevchenko
> 
> 


[PATCH v5 16/16] test_firmware: Test partial read support

2020-10-02 Thread Kees Cook
From: Scott Branden 

Add additional hooks to test_firmware to pass in support
for partial file read using request_firmware_into_buf():

buf_size: size of buffer to request firmware into
partial: indicates that a partial file request is being made
file_offset: to indicate offset into file to request

Also update firmware selftests to use the new partial read test API.

Signed-off-by: Scott Branden 
Co-developed-by: Kees Cook 
Signed-off-by: Kees Cook 
---
 lib/test_firmware.c   | 154 --
 .../selftests/firmware/fw_filesystem.sh   |  91 +++
 2 files changed, 233 insertions(+), 12 deletions(-)

diff --git a/lib/test_firmware.c b/lib/test_firmware.c
index 06c955057756..2baa275a6ddf 100644
--- a/lib/test_firmware.c
+++ b/lib/test_firmware.c
@@ -52,6 +52,9 @@ struct test_batched_req {
  * @name: the name of the firmware file to look for
  * @into_buf: when the into_buf is used if this is true
  * request_firmware_into_buf() will be used instead.
+ * @buf_size: size of buf to allocate when into_buf is true
+ * @file_offset: file offset to request when calling request_firmware_into_buf
+ * @partial: partial read opt when calling request_firmware_into_buf
  * @sync_direct: when the sync trigger is used if this is true
  * request_firmware_direct() will be used instead.
  * @send_uevent: whether or not to send a uevent for async requests
@@ -91,6 +94,9 @@ struct test_batched_req {
 struct test_config {
char *name;
bool into_buf;
+   size_t buf_size;
+   size_t file_offset;
+   bool partial;
bool sync_direct;
bool send_uevent;
u8 num_requests;
@@ -185,6 +191,9 @@ static int __test_firmware_config_init(void)
test_fw_config->num_requests = TEST_FIRMWARE_NUM_REQS;
test_fw_config->send_uevent = true;
test_fw_config->into_buf = false;
+   test_fw_config->buf_size = TEST_FIRMWARE_BUF_SIZE;
+   test_fw_config->file_offset = 0;
+   test_fw_config->partial = false;
test_fw_config->sync_direct = false;
test_fw_config->req_firmware = request_firmware;
test_fw_config->test_result = 0;
@@ -238,28 +247,35 @@ static ssize_t config_show(struct device *dev,
dev_name(dev));
 
if (test_fw_config->name)
-   len += scnprintf(buf+len, PAGE_SIZE - len,
+   len += scnprintf(buf + len, PAGE_SIZE - len,
"name:\t%s\n",
test_fw_config->name);
else
-   len += scnprintf(buf+len, PAGE_SIZE - len,
+   len += scnprintf(buf + len, PAGE_SIZE - len,
"name:\tEMTPY\n");
 
-   len += scnprintf(buf+len, PAGE_SIZE - len,
+   len += scnprintf(buf + len, PAGE_SIZE - len,
"num_requests:\t%u\n", test_fw_config->num_requests);
 
-   len += scnprintf(buf+len, PAGE_SIZE - len,
+   len += scnprintf(buf + len, PAGE_SIZE - len,
"send_uevent:\t\t%s\n",
test_fw_config->send_uevent ?
"FW_ACTION_HOTPLUG" :
"FW_ACTION_NOHOTPLUG");
-   len += scnprintf(buf+len, PAGE_SIZE - len,
+   len += scnprintf(buf + len, PAGE_SIZE - len,
"into_buf:\t\t%s\n",
test_fw_config->into_buf ? "true" : "false");
-   len += scnprintf(buf+len, PAGE_SIZE - len,
+   len += scnprintf(buf + len, PAGE_SIZE - len,
+   "buf_size:\t%zu\n", test_fw_config->buf_size);
+   len += scnprintf(buf + len, PAGE_SIZE - len,
+   "file_offset:\t%zu\n", test_fw_config->file_offset);
+   len += scnprintf(buf + len, PAGE_SIZE - len,
+   "partial:\t\t%s\n",
+   test_fw_config->partial ? "true" : "false");
+   len += scnprintf(buf + len, PAGE_SIZE - len,
"sync_direct:\t\t%s\n",
test_fw_config->sync_direct ? "true" : "false");
-   len += scnprintf(buf+len, PAGE_SIZE - len,
+   len += scnprintf(buf + len, PAGE_SIZE - len,
"read_fw_idx:\t%u\n", test_fw_config->read_fw_idx);
 
mutex_unlock(&test_fw_mutex);
@@ -317,6 +333,30 @@ static ssize_t test_dev_config_show_bool(char *buf, bool 
val)
return snprintf(buf, PAGE_SIZE, "%d\n", val);
 }
 
+static int test_dev_config_update_size_t(const char *buf,
+size_t size,
+size_t *cfg)
+{
+   int ret;
+   long new;
+
+   ret = kstrtol(buf, 10, &new);
+   if (ret)
+   return ret;
+
+   mutex_lock(&test_fw_mutex);
+   *(size_t *)cfg = new;
+   mutex_unlock(&test_fw_mutex);
+
+   /* Always return full write size even if we didn't consume all */
+   return size;
+}
+
+static ssize_t test_dev_config_show_size_t(

Re: [git pull] epoll fixes

2020-10-02 Thread Linus Torvalds
On Fri, Oct 2, 2020 at 10:20 AM Al Viro  wrote:
>
> Several race fixes in epoll.

Fudge. I screwed up the commit message due to a cut-and-paste error
(don't ask - sometimes google chrome and gnome-terminal seem to stop
agreeing about the normal X paste buffer)

And I extra stupidly pushed the thing out after the build succeeded,
not having noticed how I screwed up the trivial commit message.

I've force-updated the public sites, and I really hope nobody pulled
in that (very short) time when my tree had a bogus commit message.

(In case anybody cares, the commit message said "SEQCNT_MUTEX_ZERO"
instead of "Several race fixes in epoll" because that's what I had
looked at in another terminal. So it was a very WTF message)

I think this was only the second time I had a forced push to fix some
stupidity of mine. So it's not exactly _common_, but it's
embarrassing.

Linus


Re: [PATCH v2 4/4] arm: dts: add 8devices Habanero DVK

2020-10-02 Thread Robert Marko
On Wed, Sep 9, 2020 at 9:56 PM Robert Marko  wrote:
>
> 8devices Habanero DVK is a dual-band SoM development kit based on Qualcomm
> IPQ4019 + QCA8075 platform.
>
> Specs are:
> CPU: QCA IPQ4019
> RAM: DDR3L 512MB
> Storage: 32MB SPI-NOR and optional Parallel SLC NAND(Some boards ship with it 
> and some without)
> WLAN1: 2.4 GHz built into IPQ4019 (802.11n) 2x2
> WLAN2: 5 GHz built into IPO4019 (802.11ac Wawe-2) 2x2
> Ethernet: 5x Gbit LAN (QCA 8075)
> USB: 1x USB 2.0 and 1x USB 3.0 (Both built into IPQ4019)
> MicroSD slot (Uses SD controller built into IPQ4019)
> SDIO3.0/EMMC slot (Uses the same SD controller)
> Mini PCI-E Gen 2.0 slot (Built into IPQ4019)
> 5x LEDs (4 GPIO controllable)
> 2x Pushbutton (1 is connected to GPIO, other to SoC reset)
> LCD ZIF socket (Uses the LCD controller built into IPQ4019 which has no 
> driver support)
> 1x UART 115200 rate on J18
>
> 2x breakout development headers
> 12V DC Jack for power
> DIP switch for bootstrap configuration
>
> Signed-off-by: Robert Marko 
> Cc: Luka Perkov 
> ---
> Changes since v1:
> * Drop include that does not exist
>
>  arch/arm/boot/dts/Makefile|   1 +
>  .../boot/dts/qcom-ipq4019-habanero-dvk.dts| 304 ++
>  2 files changed, 305 insertions(+)
>  create mode 100644 arch/arm/boot/dts/qcom-ipq4019-habanero-dvk.dts
>
> diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile
> index 246d82fc5fcd..004262e0d699 100644
> --- a/arch/arm/boot/dts/Makefile
> +++ b/arch/arm/boot/dts/Makefile
> @@ -898,6 +898,7 @@ dtb-$(CONFIG_ARCH_QCOM) += \
> qcom-ipq4019-ap.dk04.1-c3.dtb \
> qcom-ipq4019-ap.dk07.1-c1.dtb \
> qcom-ipq4019-ap.dk07.1-c2.dtb \
> +   qcom-ipq4019-habanero-dvk.dtb \
> qcom-ipq8064-ap148.dtb \
> qcom-ipq8064-rb3011.dtb \
> qcom-msm8660-surf.dtb \
> diff --git a/arch/arm/boot/dts/qcom-ipq4019-habanero-dvk.dts 
> b/arch/arm/boot/dts/qcom-ipq4019-habanero-dvk.dts
> new file mode 100644
> index ..fe054adda0a7
> --- /dev/null
> +++ b/arch/arm/boot/dts/qcom-ipq4019-habanero-dvk.dts
> @@ -0,0 +1,304 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later OR MIT
> +/* Copyright (c) 2019, Robert Marko  */
> +
> +#include "qcom-ipq4019.dtsi"
> +#include 
> +#include 
> +
> +/ {
> +   model = "8devices Habanero DVK";
> +   compatible = "8dev,habanero-dvk";
> +
> +   keys {
> +   compatible = "gpio-keys";
> +
> +   reset {
> +   label = "reset";
> +   gpios = <&tlmm 8 GPIO_ACTIVE_LOW>;
> +   linux,code = ;
> +   };
> +   };
> +
> +   leds {
> +   compatible = "gpio-leds";
> +
> +   led_status: status {
> +   label = "habanero-dvk:green:status";
> +   gpios = <&tlmm 37 GPIO_ACTIVE_HIGH>;
> +   panic-indicator;
> +   };
> +
> +   led_upgrade: upgrade {
> +   label = "habanero-dvk:green:upgrade";
> +   gpios = <&tlmm 40 GPIO_ACTIVE_HIGH>;
> +   };
> +
> +   wlan2g {
> +   label = "habanero-dvk:green:wlan2g";
> +   gpios = <&tlmm 46 GPIO_ACTIVE_HIGH>;
> +   linux,default-trigger = "phy0tpt";
> +   };
> +
> +   wlan5g {
> +   label = "habanero-dvk:green:wlan5g";
> +   gpios = <&tlmm 48 GPIO_ACTIVE_HIGH>;
> +   linux,default-trigger = "phy1tpt";
> +   };
> +   };
> +};
> +
> +&vqmmc {
> +   status = "okay";
> +};
> +
> +&sdhci {
> +   status = "okay";
> +
> +   pinctrl-0 = <&sd_pins>;
> +   pinctrl-names = "default";
> +   cd-gpios = <&tlmm 22 GPIO_ACTIVE_LOW>;
> +   vqmmc-supply = <&vqmmc>;
> +};
> +
> +&qpic_bam {
> +   status = "okay";
> +};
> +
> +&tlmm {
> +   mdio_pins: mdio_pinmux {
> +   mux_1 {
> +   pins = "gpio6";
> +   function = "mdio";
> +   bias-pull-up;
> +   };
> +
> +   mux_2 {
> +   pins = "gpio7";
> +   function = "mdc";
> +   bias-pull-up;
> +   };
> +   };
> +
> +   serial_pins: serial_pinmux {
> +   mux {
> +   pins = "gpio16", "gpio17";
> +   function = "blsp_uart0";
> +   bias-disable;
> +   };
> +   };
> +
> +   spi_0_pins: spi_0_pinmux {
> +   pinmux {
> +   function = "blsp_spi0";
> +   pins = "gpio13", "gpio14", "gpio15";
> +   drive-strength = <12>;
> +   bias-disable;
> +   };
> +
> +   pinmux_cs {
> +   function = "gpio";
> +   p

[PATCH net-next v2 5/6] bonding: update Documentation for port/bond terminology

2020-10-02 Thread Jarod Wilson
Point users to the new interface names instead of the old ones, where
appropriate. Userspace bits referenced still include use of master/slave,
but those can't be altered until userspace changes too, ideally after
these changes propagate to the community at large.

Cc: Jay Vosburgh 
Cc: Veaceslav Falico 
Cc: Andy Gospodarek 
Cc: "David S. Miller" 
Cc: Jakub Kicinski 
Cc: Thomas Davis 
Cc: net...@vger.kernel.org
Signed-off-by: Jarod Wilson 
---
 Documentation/networking/bonding.rst | 440 +--
 1 file changed, 220 insertions(+), 220 deletions(-)

diff --git a/Documentation/networking/bonding.rst 
b/Documentation/networking/bonding.rst
index adc314639085..f4c4f0fae83b 100644
--- a/Documentation/networking/bonding.rst
+++ b/Documentation/networking/bonding.rst
@@ -167,22 +167,22 @@ or, for backwards compatibility, the option value.  E.g.,
 
 The parameters are as follows:
 
-active_slave
+active_port
 
-   Specifies the new active slave for modes that support it
+   Specifies the new active port for modes that support it
(active-backup, balance-alb and balance-tlb).  Possible values
-   are the name of any currently enslaved interface, or an empty
-   string.  If a name is given, the slave and its link must be up in order
-   to be selected as the new active slave.  If an empty string is
-   specified, the current active slave is cleared, and a new active
-   slave is selected automatically.
+   are the name of any currently aggregated interface, or an empty
+   string.  If a name is given, the port and its port must be up in order
+   to be selected as the new active port.  If an empty string is
+   specified, the current active port is cleared, and a new active
+   port is selected automatically.
 
Note that this is only available through the sysfs interface. No module
parameter by this name exists.
 
The normal value of this option is the name of the currently
-   active slave, or the empty string if there is no active slave or
-   the current mode does not use an active slave.
+   active port, or the empty string if there is no active port or
+   the current mode does not use an active port.
 
 ad_actor_sys_prio
 
@@ -199,8 +199,8 @@ ad_actor_system
protocol packet exchanges (LACPDUs). The value cannot be NULL or
multicast. It is preferred to have the local-admin bit set for this
mac but driver does not enforce it. If the value is not given then
-   system defaults to using the masters' mac address as actors' system
-   address.
+   system defaults to using the bonds' mac address as actors'
+   system address.
 
This parameter has effect only in 802.3ad mode and is available through
SysFs interface.
@@ -216,8 +216,8 @@ ad_select
bandwidth.
 
Reselection of the active aggregator occurs only when all
-   slaves of the active aggregator are down or the active
-   aggregator has no slaves.
+   ports of the active aggregator are down or the active
+   aggregator has no ports.
 
This is the default value.
 
@@ -226,18 +226,18 @@ ad_select
The active aggregator is chosen by largest aggregate
bandwidth.  Reselection occurs if:
 
-   - A slave is added to or removed from the bond
+   - A port is added to or removed from the bond
 
-   - Any slave's link state changes
+   - Any port's link state changes
 
-   - Any slave's 802.3ad association state changes
+   - Any port's 802.3ad association state changes
 
- The bond's administrative state changes to up
 
count or 2
 
The active aggregator is chosen by the largest number of
-   ports (slaves).  Reselection occurs as described under the
+   ports (ports).  Reselection occurs as described under the
"bandwidth" setting, above.
 
The bandwidth and count selection policies permit failover of
@@ -265,7 +265,7 @@ ad_user_port_key
This parameter has effect only in 802.3ad mode and is available through
SysFs interface.
 
-all_slaves_active
+all_ports_active
 
Specifies that duplicate frames (received on inactive ports) should be
dropped (0) or delivered (1).
@@ -281,10 +281,10 @@ arp_interval
 
Specifies the ARP link monitoring frequency in milliseconds.
 
-   The ARP monitor works by periodically checking the slave
+   The ARP monitor works by periodically checking the port
devices to determine whether they have sent or received
traffic recently (the precise criteria depends upon the
-   bonding mode, and the state of the slave).  Regular traffic is
+   bonding mode, and the state of the port).  Regular traffic is
generated via ARP probes 

[PATCH net-next v2 4/6] bonding: rename bonding_sysfs_slave.c to _port.c

2020-10-02 Thread Jarod Wilson
Now that use of "slave" has been replaced by "port", rename this file too.

Cc: Jay Vosburgh 
Cc: Veaceslav Falico 
Cc: Andy Gospodarek 
Cc: "David S. Miller" 
Cc: Jakub Kicinski 
Cc: Thomas Davis 
Cc: net...@vger.kernel.org
Signed-off-by: Jarod Wilson 
---
 drivers/net/bonding/Makefile  | 2 +-
 drivers/net/bonding/{bond_sysfs_slave.c => bond_sysfs_port.c} | 0
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename drivers/net/bonding/{bond_sysfs_slave.c => bond_sysfs_port.c} (100%)

diff --git a/drivers/net/bonding/Makefile b/drivers/net/bonding/Makefile
index 30e8ae3da2da..2ed0083514a6 100644
--- a/drivers/net/bonding/Makefile
+++ b/drivers/net/bonding/Makefile
@@ -5,7 +5,7 @@
 
 obj-$(CONFIG_BONDING) += bonding.o
 
-bonding-objs := bond_main.o bond_3ad.o bond_alb.o bond_sysfs.o 
bond_sysfs_slave.o bond_debugfs.o bond_netlink.o bond_options.o
+bonding-objs := bond_main.o bond_3ad.o bond_alb.o bond_sysfs.o 
bond_sysfs_port.o bond_debugfs.o bond_netlink.o bond_options.o
 
 proc-$(CONFIG_PROC_FS) += bond_procfs.o
 bonding-objs += $(proc-y)
diff --git a/drivers/net/bonding/bond_sysfs_slave.c 
b/drivers/net/bonding/bond_sysfs_port.c
similarity index 100%
rename from drivers/net/bonding/bond_sysfs_slave.c
rename to drivers/net/bonding/bond_sysfs_port.c
-- 
2.27.0



[PATCH net-next v2 6/6] bonding: make Kconfig toggle to disable legacy interfaces

2020-10-02 Thread Jarod Wilson
By default, enable retaining all user-facing API that includes the use of
master and slave, but add a Kconfig knob that allows those that wish to
remove it entirely do so in one shot.

Cc: Jay Vosburgh 
Cc: Veaceslav Falico 
Cc: Andy Gospodarek 
Cc: "David S. Miller" 
Cc: Jakub Kicinski 
Cc: Thomas Davis 
Cc: net...@vger.kernel.org
Signed-off-by: Jarod Wilson 
---
 drivers/net/Kconfig   | 12 
 drivers/net/bonding/bond_main.c   |  4 ++--
 drivers/net/bonding/bond_options.c|  4 ++--
 drivers/net/bonding/bond_procfs.c |  8 
 drivers/net/bonding/bond_sysfs.c  | 14 ++
 drivers/net/bonding/bond_sysfs_port.c |  6 --
 6 files changed, 38 insertions(+), 10 deletions(-)

diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index c3dbe64e628e..1a13894820cb 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -56,6 +56,18 @@ config BONDING
  To compile this driver as a module, choose M here: the module
  will be called bonding.
 
+config BONDING_LEGACY_INTERFACES
+   default y
+   bool "Maintain legacy bonding interface names"
+   help
+ The bonding driver historically made use of the terms "master" and
+ "slave" to describe it's component members. This has since been
+ changed to "bond" and "port" as part of a broader effort to remove
+ the use of socially problematic language from the kernel. However,
+ removing all such cases requires breaking long-standing user-facing
+ interfaces in /proc and /sys, which will not be done, unless you
+ opt out of them here, by selecting 'N'.
+
 config DUMMY
tristate "Dummy net driver support"
help
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index b8a351d85da4..226d5fb76221 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -194,7 +194,7 @@ module_param(lp_interval, uint, 0);
 MODULE_PARM_DESC(lp_interval, "The number of seconds between instances where "
  "the bonding driver sends learning packets to "
  "each port's peer switch. The default is 1.");
-/* legacy compatability module parameters */
+#ifdef CONFIG_BONDING_LEGACY_INTERFACES
 module_param_named(all_slaves_active, apa, int, 0644);
 MODULE_PARM_DESC(all_slaves_active, "Keep all frames received on an interface "
 "by setting active flag for all slaves; "
@@ -205,7 +205,7 @@ MODULE_PARM_DESC(packets_per_slave, "Packets to send per 
slave in balance-rr "
"mode; 0 for a random slave, 1 packet per "
"slave (default), >1 packets per slave. "
"(Legacy compat synonym for 
packets_per_port).");
-/* end legacy compatability module parameters */
+#endif
 
 /*- Global variables */
 
diff --git a/drivers/net/bonding/bond_options.c 
b/drivers/net/bonding/bond_options.c
index 8e4050c2b08e..630079ba5452 100644
--- a/drivers/net/bonding/bond_options.c
+++ b/drivers/net/bonding/bond_options.c
@@ -434,7 +434,7 @@ static const struct bond_option bond_opts[BOND_OPT_LAST] = {
.values = bond_intmax_tbl,
.set = bond_option_peer_notif_delay_set
},
-/* legacy sysfs interfaces */
+#ifdef CONFIG_BONDING_LEGACY_INTERFACES
[BOND_OPT_PACKETS_PER_SLAVE] = {
.id = BOND_OPT_PACKETS_PER_SLAVE,
.name = "packets_per_slave",
@@ -467,7 +467,7 @@ static const struct bond_option bond_opts[BOND_OPT_LAST] = {
.flags = BOND_OPTFLAG_RAWVAL,
.set = bond_option_ports_set
},
-/* end legacy sysfs interfaces */
+#endif
 };
 
 /* Searches for an option by name */
diff --git a/drivers/net/bonding/bond_procfs.c 
b/drivers/net/bonding/bond_procfs.c
index 2e65472e3c58..8e4a03d86329 100644
--- a/drivers/net/bonding/bond_procfs.c
+++ b/drivers/net/bonding/bond_procfs.c
@@ -86,8 +86,10 @@ static void bond_info_show_bond_dev(struct seq_file *seq)
primary = rcu_dereference(bond->primary_port);
seq_printf(seq, "Primary Port: %s",
   primary ? primary->dev->name : "None");
+#ifdef CONFIG_BONDING_LEGACY_INTERFACES
seq_printf(seq, "Primary Slave: %s",
   primary ? primary->dev->name : "None");
+#endif
if (primary) {
optval = bond_opt_get_val(BOND_OPT_PRIMARY_RESELECT,
  
bond->params.primary_reselect);
@@ -97,8 +99,10 @@ static void bond_info_show_bond_dev(struct seq_file *seq)
 
seq_printf(seq, "\nCurrently Active Port: %s\n",
   (curr) ? curr->dev->name : "None");
+#ifdef CONFIG_BONDING_LEGACY_INTERFACES
seq_printf(seq, "Currently Act

[PATCH net-next v2 2/6] bonding: replace use of the term master where possible

2020-10-02 Thread Jarod Wilson
Simply refer to what was the bonding "master" as the "bond" or bonding
device, depending on context. However, do retain compat code for the
bonding_masters sysfs interface to avoid breaking userspace.

Cc: Jay Vosburgh 
Cc: Veaceslav Falico 
Cc: Andy Gospodarek 
Cc: "David S. Miller" 
Cc: Jakub Kicinski 
Cc: Thomas Davis 
Cc: net...@vger.kernel.org
Signed-off-by: Jarod Wilson 
---
 drivers/infiniband/core/cma.c |   2 +-
 drivers/infiniband/core/lag.c |   2 +-
 drivers/infiniband/core/roce_gid_mgmt.c   |   6 +-
 drivers/net/bonding/bond_3ad.c|   2 +-
 drivers/net/bonding/bond_main.c   |  58 
 drivers/net/bonding/bond_procfs.c |   4 +-
 drivers/net/bonding/bond_sysfs.c  | 135 ++
 .../net/ethernet/mellanox/mlx4/en_netdev.c|  10 +-
 .../ethernet/netronome/nfp/flower/lag_conf.c  |   2 +-
 .../ethernet/qlogic/netxen/netxen_nic_main.c  |   8 +-
 include/linux/netdevice.h |   8 +-
 include/net/bonding.h |   4 +-
 12 files changed, 158 insertions(+), 83 deletions(-)

diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 7f0e91e92968..fd5ad5139106 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -4687,7 +4687,7 @@ static int cma_netdev_callback(struct notifier_block 
*self, unsigned long event,
if (event != NETDEV_BONDING_FAILOVER)
return NOTIFY_DONE;
 
-   if (!netif_is_bond_master(ndev))
+   if (!netif_is_bond_dev(ndev))
return NOTIFY_DONE;
 
mutex_lock(&lock);
diff --git a/drivers/infiniband/core/lag.c b/drivers/infiniband/core/lag.c
index 7063e41eaf26..2afaca2f9d0b 100644
--- a/drivers/infiniband/core/lag.c
+++ b/drivers/infiniband/core/lag.c
@@ -128,7 +128,7 @@ struct net_device *rdma_lag_get_ah_roce_slave(struct 
ib_device *device,
dev_hold(master);
rcu_read_unlock();
 
-   if (!netif_is_bond_master(master))
+   if (!netif_is_bond_dev(master))
goto put;
 
slave = rdma_get_xmit_slave_udp(device, master, ah_attr, flags);
diff --git a/drivers/infiniband/core/roce_gid_mgmt.c 
b/drivers/infiniband/core/roce_gid_mgmt.c
index 2860def84f4d..85c48977be6c 100644
--- a/drivers/infiniband/core/roce_gid_mgmt.c
+++ b/drivers/infiniband/core/roce_gid_mgmt.c
@@ -129,7 +129,7 @@ enum bonding_slave_state {
 static enum bonding_slave_state is_eth_active_slave_of_bonding_rcu(struct 
net_device *dev,
   struct 
net_device *upper)
 {
-   if (upper && netif_is_bond_master(upper)) {
+   if (upper && netif_is_bond_dev(upper)) {
struct net_device *pdev =
bond_option_active_slave_get_rcu(netdev_priv(upper));
 
@@ -216,7 +216,7 @@ is_ndev_for_default_gid_filter(struct ib_device *ib_dev, u8 
port,
 * make sure that it the upper netdevice of rdma netdevice.
 */
res = ((cookie_ndev == rdma_ndev && !netif_is_bond_slave(rdma_ndev)) ||
-  (netif_is_bond_master(cookie_ndev) &&
+  (netif_is_bond_dev(cookie_ndev) &&
rdma_is_upper_dev_rcu(rdma_ndev, cookie_ndev)));
 
rcu_read_unlock();
@@ -271,7 +271,7 @@ is_upper_ndev_bond_master_filter(struct ib_device *ib_dev, 
u8 port,
return false;
 
rcu_read_lock();
-   if (netif_is_bond_master(cookie_ndev) &&
+   if (netif_is_bond_dev(cookie_ndev) &&
rdma_is_upper_dev_rcu(rdma_ndev, cookie_ndev))
match = true;
rcu_read_unlock();
diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index 0eb717b0bfc6..852b9c4f6a47 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -2550,7 +2550,7 @@ void bond_3ad_handle_link_change(struct slave *slave, 
char link)
 }
 
 /**
- * bond_3ad_set_carrier - set link state for bonding master
+ * bond_3ad_set_carrier - set link state for bonding device
  * @bond: bonding structure
  *
  * if we have an active aggregator, we're up, if not, we're down.
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 28c04a7a5105..405d230b8ea3 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -469,8 +469,8 @@ static const struct xfrmdev_ops bond_xfrmdev_ops = {
 
 /*--- Link status ---*/
 
-/* Set the carrier state for the master according to the state of its
- * slaves.  If any slaves are up, the master is up.  In 802.3ad mode,
+/* Set the carrier state for the bond according to the state of its
+ * slaves.  If any slaves are up, the bond is up.  In 802.3ad mode,
  * do special 802.3ad magic.
  *
  * Returns zero if carrier state does not change, nonzero if it does.
@@ -1372,7 +1372,7 @@ static rx_handler_result_t bond_handle_frame(struct 
sk_buff **pskb)

Re: [PATCH v2 2/4] arm: dts: add 8devices Jalapeno

2020-10-02 Thread Robert Marko
On Wed, Sep 9, 2020 at 9:56 PM Robert Marko  wrote:
>
> 8devices Jalapeno is a dual-band SoM, based on Qualcomm
> IPQ4018 + QCA8072 platform.
>
> Specification:
> QCA IPQ4018, Quad core ARM v7 Cortex A7 717MHz
> 256 MB of DDR3 RAM
> 8 MB of SPI NOR flash
> 128 MB of Winbond SPI NAND flash
> WLAN1: Qualcomm Atheros QCA4018 2.4GHz 802.11bgn 2:2x2
> WLAN2: Qualcomm Atheros QCA4018 5GHz 802.11a/n/ac 2:2x2
> ETH: Qualcomm Atheros QCA8072 Gigabit Switch (1 x LAN, 1 x WAN)
>
> Signed-off-by: Robert Marko 
> Cc: Luka Perkov 
> ---
> Changes since v1:
> * Drop include that does not exist
>
>  arch/arm/boot/dts/Makefile  |   1 +
>  arch/arm/boot/dts/qcom-ipq4018-jalapeno.dts | 214 
>  2 files changed, 215 insertions(+)
>  create mode 100644 arch/arm/boot/dts/qcom-ipq4018-jalapeno.dts
>
> diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile
> index 4572db3fa5ae..9b474208057d 100644
> --- a/arch/arm/boot/dts/Makefile
> +++ b/arch/arm/boot/dts/Makefile
> @@ -890,6 +890,7 @@ dtb-$(CONFIG_ARCH_QCOM) += \
> qcom-apq8074-dragonboard.dtb \
> qcom-apq8084-ifc6540.dtb \
> qcom-apq8084-mtp.dtb \
> +   qcom-ipq4018-jalapeno.dtb \
> qcom-ipq4019-ap.dk01.1-c1.dtb \
> qcom-ipq4019-ap.dk04.1-c1.dtb \
> qcom-ipq4019-ap.dk04.1-c3.dtb \
> diff --git a/arch/arm/boot/dts/qcom-ipq4018-jalapeno.dts 
> b/arch/arm/boot/dts/qcom-ipq4018-jalapeno.dts
> new file mode 100644
> index ..394412619894
> --- /dev/null
> +++ b/arch/arm/boot/dts/qcom-ipq4018-jalapeno.dts
> @@ -0,0 +1,214 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later OR MIT
> +// Copyright (c) 2018, Robert Marko 
> +
> +#include "qcom-ipq4019.dtsi"
> +#include 
> +#include 
> +
> +/ {
> +   model = "8devices Jalapeno";
> +   compatible = "8dev,jalapeno";
> +};
> +
> +&tlmm {
> +   mdio_pins: mdio_pinmux {
> +   pinmux_1 {
> +   pins = "gpio53";
> +   function = "mdio";
> +   };
> +
> +   pinmux_2 {
> +   pins = "gpio52";
> +   function = "mdc";
> +   };
> +
> +   pinconf {
> +   pins = "gpio52", "gpio53";
> +   bias-pull-up;
> +   };
> +   };
> +
> +   serial_pins: serial_pinmux {
> +   mux {
> +   pins = "gpio60", "gpio61";
> +   function = "blsp_uart0";
> +   bias-disable;
> +   };
> +   };
> +
> +   spi_0_pins: spi_0_pinmux {
> +   pin {
> +   function = "blsp_spi0";
> +   pins = "gpio55", "gpio56", "gpio57";
> +   drive-strength = <2>;
> +   bias-disable;
> +   };
> +
> +   pin_cs {
> +   function = "gpio";
> +   pins = "gpio54", "gpio59";
> +   drive-strength = <2>;
> +   bias-disable;
> +   output-high;
> +   };
> +   };
> +};
> +
> +&watchdog {
> +   status = "okay";
> +};
> +
> +&prng {
> +   status = "okay";
> +};
> +
> +&blsp_dma {
> +   status = "okay";
> +};
> +
> +&blsp1_spi1 {
> +   status = "okay";
> +
> +   pinctrl-0 = <&spi_0_pins>;
> +   pinctrl-names = "default";
> +   cs-gpios = <&tlmm 54 GPIO_ACTIVE_HIGH>, <&tlmm 59 GPIO_ACTIVE_HIGH>;
> +
> +   flash@0 {
> +   status = "okay";
> +
> +   compatible = "jedec,spi-nor";
> +   reg = <0>;
> +   spi-max-frequency = <2400>;
> +
> +   partitions {
> +   compatible = "fixed-partitions";
> +   #address-cells = <1>;
> +   #size-cells = <1>;
> +
> +   partition@0 {
> +   label = "SBL1";
> +   reg = <0x 0x0004>;
> +   read-only;
> +   };
> +
> +   partition@4 {
> +   label = "MIBIB";
> +   reg = <0x0004 0x0002>;
> +   read-only;
> +   };
> +
> +   partition@6 {
> +   label = "QSEE";
> +   reg = <0x0006 0x0006>;
> +   read-only;
> +   };
> +
> +   partition@c {
> +   label = "CDT";
> +   reg = <0x000c 0x0001>;
> +   read-only;
> +   };
> +
> +   partition@d {
> +   label = "DDRPARAMS";
> +   reg = <0x000d 0x0001>

[PATCH net-next v2 0/6] bonding: rename bond components

2020-10-02 Thread Jarod Wilson
The bonding driver's use of master and slave, while largely understood
in technical circles, poses a barrier for inclusion to some potential
members of the development and user community, due to the historical
context of masters and slaves, particularly in the United States. This
is a first full pass at replacing those phrases with more socially
inclusive ones, opting for bond to replace master and port to
replace slave, which is congruent with the bridge and team drivers.

There are a few problems with this change. First up, "port" is used in
the bonding 802.3ad code, so the first step here is to rename port to
ad_port, so we can reuse port. Second, we have the issue of not wanting
to break any existing userspace, which I believe this patchset
accomplishes, while also adding alternate interfaces using the new
terminology. This set also includes a Kconfig option that will let
people make the conscious decision to break userspace and no longer
expose the original master/slave interfaces, once their userspace is
able to cope with their removal.

Lastly, we do still have the issue of ease of backporting fixes to
-stable trees. I've not had a huge amount of time to spend on it, but
brief forays into coccinelle didn't really pay off (since it's meant to
operate on code, not patches), and the best solution I can come up with
is providing a shell script someone could run over git-format-patch
output before git-am'ing the result to a -stable tree, though scripting
these changes in the first place turned out to be not the best thing to
do anyway, due to subtle cases where use of master or slave can NOT yet
be replaced, so a large amount of work was done by hand, inspection,
trial and error, which is why this set is a lot longer in coming than
I'd originally hoped. I don't expect -stable backports to be horrible to
figure out one way or another though, and I don't believe that a bit of
inconvenience on that front is enough to warrant not making these
changes.

See here for further details on Red Hat's commitment to this work:
https://www.redhat.com/en/blog/making-open-source-more-inclusive-eradicating-problematic-language

As far as testing goes, I've manually operated on various bonds while
working on this code, and have run it through multiple lnst test runs,
which exercises the existing sysfs interfaces fairly extensively. As far
as I can tell, there is no breakage of existing interfaces with this
set, unless the user consciously opts to do so via Kconfig.

v2: legacy module parameters are retained this time, and we're trying
out bond/port instead of aggregator/link in place of master/slave. The
procfs interface legacy output is also duplicated or dropped, depending
on Kconfig, rather than being replaced.

Cc: Jay Vosburgh 
Cc: Veaceslav Falico 
Cc: Andy Gospodarek 
Cc: "David S. Miller" 
Cc: Jakub Kicinski 
Cc: Thomas Davis 
Cc: net...@vger.kernel.org

Jarod Wilson (6):
  bonding: rename 802.3ad's struct port to ad_port
  bonding: replace use of the term master where possible
  bonding: rename slave to port where possible
  bonding: rename bonding_sysfs_slave.c to _port.c
  bonding: update Documentation for port/bond terminology
  bonding: make Kconfig toggle to disable legacy interfaces

 .clang-format |4 +-
 Documentation/networking/bonding.rst  |  440 ++--
 drivers/infiniband/core/cma.c |2 +-
 drivers/infiniband/core/lag.c |2 +-
 drivers/infiniband/core/roce_gid_mgmt.c   |   10 +-
 drivers/infiniband/hw/mlx4/main.c |2 +-
 drivers/net/Kconfig   |   12 +
 drivers/net/bonding/Makefile  |2 +-
 drivers/net/bonding/bond_3ad.c| 1701 ++--
 drivers/net/bonding/bond_alb.c|  689 ++---
 drivers/net/bonding/bond_debugfs.c|2 +-
 drivers/net/bonding/bond_main.c   | 2339 +
 drivers/net/bonding/bond_netlink.c|  114 +-
 drivers/net/bonding/bond_options.c|  258 +-
 drivers/net/bonding/bond_procfs.c |  102 +-
 drivers/net/bonding/bond_sysfs.c  |  242 +-
 drivers/net/bonding/bond_sysfs_port.c |  187 ++
 drivers/net/bonding/bond_sysfs_slave.c|  176 --
 .../ethernet/chelsio/cxgb3/cxgb3_offload.c|2 +-
 .../net/ethernet/mellanox/mlx4/en_netdev.c|   14 +-
 .../ethernet/mellanox/mlx5/core/en/rep/bond.c |4 +-
 .../net/ethernet/mellanox/mlx5/core/en_tc.c   |2 +-
 .../ethernet/netronome/nfp/flower/lag_conf.c  |2 +-
 .../ethernet/qlogic/netxen/netxen_nic_main.c  |   12 +-
 include/linux/netdevice.h |   22 +-
 include/net/bond_3ad.h|   42 +-
 include/net/bond_alb.h|   74 +-
 include/net/bond_options.h|   18 +-
 include/net/bonding.h |  362 +--
 include/net/lag.h |2 +-
 30 files changed

Re: [PATCH v2 3/4] arm: dts: add Alfa Network AP120C-AC

2020-10-02 Thread Robert Marko
On Wed, Sep 9, 2020 at 9:56 PM Robert Marko  wrote:
>
> ALFA Network AP120C-AC is a dual-band ceiling AP, based on Qualcomm
> IPQ4018 + QCA8075 platform.
>
> Specification:
>
> - Qualcomm IPQ4018 (717 MHz)
> - 256 MB of RAM (DDR3)
> - 16 MB (SPI NOR) + 128 or 512 MB (SPI NAND) of flash
> - 2x Gbps Ethernet, with 802.3af PoE support in one port
> - 2T2R 2.4/5 GHz (IPQ4018), with ext. FEMs (QFE1952, QFE1922)
> - 3x U.FL connectors
> - 1x 1.8 dBi (Bluetooth) and 2x 3/5 dBi dual-band (Wi-Fi) antennas
> - Atmel/Microchip AT97SC3205T TPM module (I2C bus)
> - TI CC2540 Bluetooth LE module (USB 2.0 bus)
> - 1x button (reset)
> - 1x USB 2.0
> - DC jack for main power input (12 V)
> - UART header available on PCB (2.0 mm pitch)
>
> This adds DTS for both the generic and custom Bit edition for Sartura.
>
> Signed-off-by: Robert Marko 
> Cc: Luka Perkov 
> ---
> Changes since v1:
> * Drop include that does not exist
>
>  arch/arm/boot/dts/Makefile|   2 +
>  .../boot/dts/qcom-ipq4018-ap120c-ac-bit.dts   |  28 ++
>  arch/arm/boot/dts/qcom-ipq4018-ap120c-ac.dts  |  27 ++
>  arch/arm/boot/dts/qcom-ipq4018-ap120c-ac.dtsi | 254 ++
>  4 files changed, 311 insertions(+)
>  create mode 100644 arch/arm/boot/dts/qcom-ipq4018-ap120c-ac-bit.dts
>  create mode 100644 arch/arm/boot/dts/qcom-ipq4018-ap120c-ac.dts
>  create mode 100644 arch/arm/boot/dts/qcom-ipq4018-ap120c-ac.dtsi
>
> diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile
> index 9b474208057d..246d82fc5fcd 100644
> --- a/arch/arm/boot/dts/Makefile
> +++ b/arch/arm/boot/dts/Makefile
> @@ -890,6 +890,8 @@ dtb-$(CONFIG_ARCH_QCOM) += \
> qcom-apq8074-dragonboard.dtb \
> qcom-apq8084-ifc6540.dtb \
> qcom-apq8084-mtp.dtb \
> +   qcom-ipq4018-ap120c-ac.dtb \
> +   qcom-ipq4018-ap120c-ac-bit.dtb \
> qcom-ipq4018-jalapeno.dtb \
> qcom-ipq4019-ap.dk01.1-c1.dtb \
> qcom-ipq4019-ap.dk04.1-c1.dtb \
> diff --git a/arch/arm/boot/dts/qcom-ipq4018-ap120c-ac-bit.dts 
> b/arch/arm/boot/dts/qcom-ipq4018-ap120c-ac-bit.dts
> new file mode 100644
> index ..028ac8e24797
> --- /dev/null
> +++ b/arch/arm/boot/dts/qcom-ipq4018-ap120c-ac-bit.dts
> @@ -0,0 +1,28 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later OR MIT
> +
> +#include "qcom-ipq4018-ap120c-ac.dtsi"
> +
> +/ {
> +   model = "ALFA Network AP120C-AC Bit";
> +
> +   leds {
> +   compatible = "gpio-leds";
> +
> +   power {
> +   label = "ap120c-ac:green:power";
> +   gpios = <&tlmm 5 GPIO_ACTIVE_LOW>;
> +   default-state = "on";
> +   };
> +
> +   wlan {
> +   label = "ap120c-ac:green:wlan";
> +   gpios = <&tlmm 3 GPIO_ACTIVE_HIGH>;
> +   };
> +
> +   support {
> +   label = "ap120c-ac:green:support";
> +   gpios = <&tlmm 2 GPIO_ACTIVE_HIGH>;
> +   panic-indicator;
> +   };
> +   };
> +};
> diff --git a/arch/arm/boot/dts/qcom-ipq4018-ap120c-ac.dts 
> b/arch/arm/boot/dts/qcom-ipq4018-ap120c-ac.dts
> new file mode 100644
> index ..b7916fc26d68
> --- /dev/null
> +++ b/arch/arm/boot/dts/qcom-ipq4018-ap120c-ac.dts
> @@ -0,0 +1,27 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later OR MIT
> +
> +#include "qcom-ipq4018-ap120c-ac.dtsi"
> +
> +/ {
> +   leds {
> +   compatible = "gpio-leds";
> +
> +   status: status {
> +   label = "ap120c-ac:blue:status";
> +   gpios = <&tlmm 5 GPIO_ACTIVE_LOW>;
> +   default-state = "keep";
> +   };
> +
> +   wlan2g {
> +   label = "ap120c-ac:green:wlan2g";
> +   gpios = <&tlmm 3 GPIO_ACTIVE_HIGH>;
> +   linux,default-trigger = "phy0tpt";
> +   };
> +
> +   wlan5g {
> +   label = "ap120c-ac:red:wlan5g";
> +   gpios = <&tlmm 2 GPIO_ACTIVE_HIGH>;
> +   linux,default-trigger = "phy1tpt";
> +   };
> +   };
> +};
> diff --git a/arch/arm/boot/dts/qcom-ipq4018-ap120c-ac.dtsi 
> b/arch/arm/boot/dts/qcom-ipq4018-ap120c-ac.dtsi
> new file mode 100644
> index ..1f3b1ce82108
> --- /dev/null
> +++ b/arch/arm/boot/dts/qcom-ipq4018-ap120c-ac.dtsi
> @@ -0,0 +1,254 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later OR MIT
> +
> +#include "qcom-ipq4019.dtsi"
> +#include 
> +#include 
> +
> +/ {
> +   model = "ALFA Network AP120C-AC";
> +   compatible = "alfa-network,ap120c-ac";
> +
> +   keys {
> +   compatible = "gpio-keys";
> +
> +   reset {
> +   label = "reset";
> +   gpios = <&tlmm 63 GPIO_ACTIVE_LOW>;
> +   linux,code = ;
> +   };
> +   };
> +};
> +
> +&t

[PATCH net-next v2 1/6] bonding: rename 802.3ad's struct port to ad_port

2020-10-02 Thread Jarod Wilson
The intention is to reuse "port" in place of "slave" in the bonding driver
after making this change, as port is consistent with the bridge and team
drivers, and allows us to remove socially problematic language from the
bonding driver.

Cc: Jay Vosburgh 
Cc: Veaceslav Falico 
Cc: Andy Gospodarek 
Cc: "David S. Miller" 
Cc: Jakub Kicinski 
Cc: Thomas Davis 
Cc: net...@vger.kernel.org
Signed-off-by: Jarod Wilson 
---
 drivers/net/bonding/bond_3ad.c | 1307 
 drivers/net/bonding/bond_main.c|4 +-
 drivers/net/bonding/bond_netlink.c |6 +-
 drivers/net/bonding/bond_procfs.c  |   36 +-
 drivers/net/bonding/bond_sysfs_slave.c |   10 +-
 include/net/bond_3ad.h |   14 +-
 6 files changed, 688 insertions(+), 689 deletions(-)

diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index aa001b16765a..0eb717b0bfc6 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -89,60 +89,60 @@ static const u8 lacpdu_mcast_addr[ETH_ALEN + 2] 
__long_aligned =
MULTICAST_LACPDU_ADDR;
 
 /* = main 802.3ad protocol functions == */
-static int ad_lacpdu_send(struct port *port);
-static int ad_marker_send(struct port *port, struct bond_marker *marker);
-static void ad_mux_machine(struct port *port, bool *update_slave_arr);
-static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port);
-static void ad_tx_machine(struct port *port);
-static void ad_periodic_machine(struct port *port);
-static void ad_port_selection_logic(struct port *port, bool *update_slave_arr);
+static int ad_lacpdu_send(struct ad_port *ad_port);
+static int ad_marker_send(struct ad_port *ad_port, struct bond_marker *marker);
+static void ad_mux_machine(struct ad_port *ad_port, bool *update_slave_arr);
+static void ad_rx_machine(struct lacpdu *lacpdu, struct ad_port *ad_port);
+static void ad_tx_machine(struct ad_port *ad_port);
+static void ad_periodic_machine(struct ad_port *ad_port);
+static void ad_port_selection_logic(struct ad_port *ad_port, bool 
*update_slave_arr);
 static void ad_agg_selection_logic(struct aggregator *aggregator,
   bool *update_slave_arr);
 static void ad_clear_agg(struct aggregator *aggregator);
 static void ad_initialize_agg(struct aggregator *aggregator);
-static void ad_initialize_port(struct port *port, int lacp_fast);
-static void ad_enable_collecting_distributing(struct port *port,
+static void ad_initialize_port(struct ad_port *ad_port, int lacp_fast);
+static void ad_enable_collecting_distributing(struct ad_port *ad_port,
  bool *update_slave_arr);
-static void ad_disable_collecting_distributing(struct port *port,
+static void ad_disable_collecting_distributing(struct ad_port *ad_port,
   bool *update_slave_arr);
 static void ad_marker_info_received(struct bond_marker *marker_info,
-   struct port *port);
+   struct ad_port *ad_port);
 static void ad_marker_response_received(struct bond_marker *marker,
-   struct port *port);
-static void ad_update_actor_keys(struct port *port, bool reset);
+   struct ad_port *ad_port);
+static void ad_update_actor_keys(struct ad_port *ad_port, bool reset);
 
 
 /* = api to bonding and kernel code == */
 
 /**
- * __get_bond_by_port - get the port's bonding struct
- * @port: the port we're looking at
+ * __get_bond_by_ad_port - get the ad_port's bonding struct
+ * @ad_port: the ad_port we're looking at
  *
- * Return @port's bonding struct, or %NULL if it can't be found.
+ * Return @ad_port's bonding struct, or %NULL if it can't be found.
  */
-static inline struct bonding *__get_bond_by_port(struct port *port)
+static inline struct bonding *__get_bond_by_ad_port(struct ad_port *ad_port)
 {
-   if (port->slave == NULL)
+   if (ad_port->slave == NULL)
return NULL;
 
-   return bond_get_bond_by_slave(port->slave);
+   return bond_get_bond_by_slave(ad_port->slave);
 }
 
 /**
  * __get_first_agg - get the first aggregator in the bond
- * @port: the port we're looking at
+ * @ad_port: the ad_port we're looking at
  *
  * Return the aggregator of the first slave in @bond, or %NULL if it can't be
  * found.
  * The caller must hold RCU or RTNL lock.
  */
-static inline struct aggregator *__get_first_agg(struct port *port)
+static inline struct aggregator *__get_first_agg(struct ad_port *ad_port)
 {
-   struct bonding *bond = __get_bond_by_port(port);
+   struct bonding *bond = __get_bond_by_ad_port(ad_port);
struct slave *first_slave;
struct aggregator *agg;
 
-   /* If there's no bond for this port, or bond has no slaves */
+   /* If there's no bond for this ad_port, or bond has no slaves */
 

[PATCH v5 05/16] fs/kernel_read_file: Remove redundant size argument

2020-10-02 Thread Kees Cook
In preparation for refactoring kernel_read_file*(), remove the redundant
"size" argument which is not needed: it can be included in the return
code, with callers adjusted. (VFS reads already cannot be larger than
INT_MAX.)

Signed-off-by: Kees Cook 
Reviewed-by: Mimi Zohar 
Reviewed-by: Luis Chamberlain 
Reviewed-by: James Morris 
Acked-by: Scott Branden 
---
 drivers/base/firmware_loader/main.c | 10 ++
 fs/kernel_read_file.c   | 20 +---
 include/linux/kernel_read_file.h|  8 
 kernel/kexec_file.c | 14 +++---
 kernel/module.c |  7 +++
 security/integrity/digsig.c |  5 +++--
 security/integrity/ima/ima_fs.c |  6 --
 7 files changed, 36 insertions(+), 34 deletions(-)

diff --git a/drivers/base/firmware_loader/main.c 
b/drivers/base/firmware_loader/main.c
index 8c6ea389afcf..6df1bdcfeb9d 100644
--- a/drivers/base/firmware_loader/main.c
+++ b/drivers/base/firmware_loader/main.c
@@ -467,7 +467,7 @@ fw_get_filesystem_firmware(struct device *device, struct 
fw_priv *fw_priv,
 size_t in_size,
 const void *in_buffer))
 {
-   loff_t size;
+   size_t size;
int i, len;
int rc = -ENOENT;
char *path;
@@ -499,10 +499,9 @@ fw_get_filesystem_firmware(struct device *device, struct 
fw_priv *fw_priv,
fw_priv->size = 0;
 
/* load firmware files from the mount namespace of init */
-   rc = kernel_read_file_from_path_initns(path, &buffer,
-  &size, msize,
+   rc = kernel_read_file_from_path_initns(path, &buffer, msize,
   READING_FIRMWARE);
-   if (rc) {
+   if (rc < 0) {
if (rc != -ENOENT)
dev_warn(device, "loading %s failed with error 
%d\n",
 path, rc);
@@ -511,6 +510,9 @@ fw_get_filesystem_firmware(struct device *device, struct 
fw_priv *fw_priv,
 path);
continue;
}
+   size = rc;
+   rc = 0;
+
dev_dbg(device, "Loading firmware from %s\n", path);
if (decompress) {
dev_dbg(device, "f/w decompressing %s\n",
diff --git a/fs/kernel_read_file.c b/fs/kernel_read_file.c
index 54d972d4befc..dc28a8def597 100644
--- a/fs/kernel_read_file.c
+++ b/fs/kernel_read_file.c
@@ -5,7 +5,7 @@
 #include 
 #include 
 
-int kernel_read_file(struct file *file, void **buf, loff_t *size,
+int kernel_read_file(struct file *file, void **buf,
 loff_t max_size, enum kernel_read_file_id id)
 {
loff_t i_size, pos;
@@ -29,7 +29,7 @@ int kernel_read_file(struct file *file, void **buf, loff_t 
*size,
ret = -EINVAL;
goto out;
}
-   if (i_size > SIZE_MAX || (max_size > 0 && i_size > max_size)) {
+   if (i_size > INT_MAX || (max_size > 0 && i_size > max_size)) {
ret = -EFBIG;
goto out;
}
@@ -59,8 +59,6 @@ int kernel_read_file(struct file *file, void **buf, loff_t 
*size,
}
 
ret = security_kernel_post_read_file(file, *buf, i_size, id);
-   if (!ret)
-   *size = pos;
 
 out_free:
if (ret < 0) {
@@ -72,11 +70,11 @@ int kernel_read_file(struct file *file, void **buf, loff_t 
*size,
 
 out:
allow_write_access(file);
-   return ret;
+   return ret == 0 ? pos : ret;
 }
 EXPORT_SYMBOL_GPL(kernel_read_file);
 
-int kernel_read_file_from_path(const char *path, void **buf, loff_t *size,
+int kernel_read_file_from_path(const char *path, void **buf,
   loff_t max_size, enum kernel_read_file_id id)
 {
struct file *file;
@@ -89,14 +87,14 @@ int kernel_read_file_from_path(const char *path, void 
**buf, loff_t *size,
if (IS_ERR(file))
return PTR_ERR(file);
 
-   ret = kernel_read_file(file, buf, size, max_size, id);
+   ret = kernel_read_file(file, buf, max_size, id);
fput(file);
return ret;
 }
 EXPORT_SYMBOL_GPL(kernel_read_file_from_path);
 
 int kernel_read_file_from_path_initns(const char *path, void **buf,
- loff_t *size, loff_t max_size,
+ loff_t max_size,
  enum kernel_read_file_id id)
 {
struct file *file;
@@ -115,13 +113,13 @@ int kernel_read_file_from_path_initns(const char *path, 
void **buf,
if (IS_ERR(file))
return PTR_ERR(file);
 
-   ret = kernel_read_file(file, buf, size, max_size, id);
+   ret = kernel_read_file(file, buf, max_size, id);
fput(file);
return ret;
 }
 EXPORT_SYMBOL_GPL(kernel_

Re: [PATCH v2 3/3] dt-bindings: thermal: update sustainable-power with abstract scale

2020-10-02 Thread Doug Anderson
Hi,

On Fri, Oct 2, 2020 at 9:40 AM Lukasz Luba  wrote:
>
> On 10/2/20 4:47 PM, Doug Anderson wrote:
> > Hi,
> >
> > On Fri, Oct 2, 2020 at 8:13 AM Lukasz Luba  wrote:
> >>
> >> Hi Doug,
> >>
> >> On 10/2/20 3:31 PM, Doug Anderson wrote:
> >>> Hi,
> >>>
> >>> On Fri, Oct 2, 2020 at 4:45 AM Lukasz Luba  wrote:
> 
>  Update the documentation for the binding 'sustainable-power' and allow
>  to provide values in an abstract scale. It is required when the cooling
>  devices use an abstract scale for their power values.
> 
>  Signed-off-by: Lukasz Luba 
>  ---
> .../devicetree/bindings/thermal/thermal-zones.yaml  | 13 +
> 1 file changed, 9 insertions(+), 4 deletions(-)
> 
>  diff --git 
>  a/Documentation/devicetree/bindings/thermal/thermal-zones.yaml 
>  b/Documentation/devicetree/bindings/thermal/thermal-zones.yaml
>  index 3ec9cc87ec50..4d8f2e37d1e6 100644
>  --- a/Documentation/devicetree/bindings/thermal/thermal-zones.yaml
>  +++ b/Documentation/devicetree/bindings/thermal/thermal-zones.yaml
>  @@ -99,10 +99,15 @@ patternProperties:
>   sustainable-power:
> $ref: /schemas/types.yaml#/definitions/uint32
> description:
>  -  An estimate of the sustainable power (in mW) that this 
>  thermal zone
>  -  can dissipate at the desired control temperature. For 
>  reference, the
>  -  sustainable power of a 4-inch phone is typically 2000mW, 
>  while on a
>  -  10-inch tablet is around 4500mW.
>  +  An estimate of the sustainable power (in mW or in an abstract 
>  scale)
>  + that this thermal zone can dissipate at the desired control
>  + temperature. For reference, the sustainable power of a 4-inch 
>  phone
>  + is typically 2000mW, while on a 10-inch tablet is around 
>  4500mW.
>  +
>  + It is possible to express the sustainable power in an abstract
>  + scale. This is the case when the related cooling devices use 
>  also
>  + abstract scale to express their power usage. The scale must be
>  + consistent.
> >>>
> >>> Two thoughts:
> >>>
> >>> 1. If we're going to allow "sustainable-power" to be in abstract
> >>> scale, why not allow "dynamic-power-coefficient" to be in abstract
> >>> scale too?  I assume that the whole reason against that originally was
> >>> the idea of device tree purity, but if we're allowing the abstract
> >>> scale here then there seems no reason not to allow it for
> >>> "dynamic-power-coefficient".
> >>
> >> With this binding it's a bit more tricky.
> >> I also have to discuss a few things internally. This requirement of
> >> uW/MHz/V^2 makes the code easier also for potential drivers
> >> like GPU (which are going to register the devfreq cooling with EM).
> >>
> >> Let me think about it, but for now I would just update these bits.
> >> These are required to proper IPA operation, the dyn.-pow.-coef. is a
> >> nice to have and possible next step.
> >
> > I guess the problem is that Rajendra is currently planning to remove
> > all the "dynamic-power-coefficient" values from device tree right now
> > and move them to the source code because the numbers we currently have
> > in the device tree _are_ in abstract scale and thus violate the
> > bindings.  Moving this to source code won't help us get to more real
> > power numbers (since it'll still be abstract scale), it'll just be
> > pure churn.  If we're OK with the abstract scale in general then we
> > should allow it everywhere and not add churn for no reason.
>
> IIUC he is still going to use the Energy Model, but with different
> registration function. We have such a driver: scmi-cpufreq.c, which
> uses em_dev_register_perf_domain(). He can still use EM, EAS, IPA
> not violating anything.

Right.  He's going to take the exact same "abstract scale" numbers
that he has today and take them out of device tree and put them in the
cpufreq driver.  Doing so magically makes it so that he's not
violating anything since "abstract scale" is not currently allowed in
device tree but is allowed in the cpufreq driver.  I'm not saying that
he's doing anything wrong, I'm just saying that it's pointless churn.
If we're OK with "abstract scale" in one place in the device tree we
should be OK with it everywhere in the device tree.  Then Rajendra
wouldn't need his patch at all and he could leave his numbers in the
device tree.


> The real problem that we want to address is with sustainable-power in
> IPA. It is used in power budget calculation and if the devices operate
> in abstract scale, then there is an issue.
> There are two options to get that value:
> 1. from DT, which can have optimized value, stored by OEM engineer
> 2. from IPA estimation code, which just calculates it as a sum of
> minimum OPP power for each cooling device.
>
> The 2nd option

[PATCH v5 08/16] LSM: Introduce kernel_post_load_data() hook

2020-10-02 Thread Kees Cook
There are a few places in the kernel where LSMs would like to have
visibility into the contents of a kernel buffer that has been loaded or
read. While security_kernel_post_read_file() (which includes the
buffer) exists as a pairing for security_kernel_read_file(), no such
hook exists to pair with security_kernel_load_data().

Earlier proposals for just using security_kernel_post_read_file() with a
NULL file argument were rejected (i.e. "file" should always be valid for
the security_..._file hooks, but it appears at least one case was
left in the kernel during earlier refactoring. (This will be fixed in
a subsequent patch.)

Since not all cases of security_kernel_load_data() can have a single
contiguous buffer made available to the LSM hook (e.g. kexec image
segments are separately loaded), there needs to be a way for the LSM to
reason about its expectations of the hook coverage. In order to handle
this, add a "contents" argument to the "kernel_load_data" hook that
indicates if the newly added "kernel_post_load_data" hook will be called
with the full contents once loaded. That way, LSMs requiring full contents
can choose to unilaterally reject "kernel_load_data" with contents=false
(which is effectively the existing hook coverage), but when contents=true
they can allow it and later evaluate the "kernel_post_load_data" hook
once the buffer is loaded.

With this change, LSMs can gain coverage over non-file-backed data loads
(e.g. init_module(2) and firmware userspace helper), which will happen
in subsequent patches.

Additionally prepare IMA to start processing these cases.

Signed-off-by: Kees Cook 
Reviewed-by: KP Singh 
---
 drivers/base/firmware_loader/fallback.c   |  2 +-
 .../base/firmware_loader/fallback_platform.c  |  2 +-
 include/linux/ima.h   | 13 --
 include/linux/lsm_hook_defs.h |  4 +++-
 include/linux/lsm_hooks.h | 10 
 include/linux/security.h  | 14 +--
 kernel/kexec.c|  2 +-
 kernel/module.c   |  2 +-
 security/integrity/ima/ima_main.c | 24 ++-
 security/loadpin/loadpin.c|  2 +-
 security/security.c   | 20 +---
 security/selinux/hooks.c  |  2 +-
 12 files changed, 82 insertions(+), 15 deletions(-)

diff --git a/drivers/base/firmware_loader/fallback.c 
b/drivers/base/firmware_loader/fallback.c
index 283ca2de76d4..bff4717cc6b5 100644
--- a/drivers/base/firmware_loader/fallback.c
+++ b/drivers/base/firmware_loader/fallback.c
@@ -613,7 +613,7 @@ static bool fw_run_sysfs_fallback(u32 opt_flags)
return false;
 
/* Also permit LSMs and IMA to fail firmware sysfs fallback */
-   ret = security_kernel_load_data(LOADING_FIRMWARE);
+   ret = security_kernel_load_data(LOADING_FIRMWARE, false);
if (ret < 0)
return false;
 
diff --git a/drivers/base/firmware_loader/fallback_platform.c 
b/drivers/base/firmware_loader/fallback_platform.c
index 6958ab1a8059..a12c79d47efc 100644
--- a/drivers/base/firmware_loader/fallback_platform.c
+++ b/drivers/base/firmware_loader/fallback_platform.c
@@ -17,7 +17,7 @@ int firmware_fallback_platform(struct fw_priv *fw_priv, u32 
opt_flags)
if (!(opt_flags & FW_OPT_FALLBACK_PLATFORM))
return -ENOENT;
 
-   rc = security_kernel_load_data(LOADING_FIRMWARE);
+   rc = security_kernel_load_data(LOADING_FIRMWARE, false);
if (rc)
return rc;
 
diff --git a/include/linux/ima.h b/include/linux/ima.h
index 64804f78408b..af9fb8c5f16a 100644
--- a/include/linux/ima.h
+++ b/include/linux/ima.h
@@ -20,7 +20,9 @@ extern void ima_post_create_tmpfile(struct inode *inode);
 extern void ima_file_free(struct file *file);
 extern int ima_file_mmap(struct file *file, unsigned long prot);
 extern int ima_file_mprotect(struct vm_area_struct *vma, unsigned long prot);
-extern int ima_load_data(enum kernel_load_data_id id);
+extern int ima_load_data(enum kernel_load_data_id id, bool contents);
+extern int ima_post_load_data(char *buf, loff_t size,
+ enum kernel_load_data_id id, char *description);
 extern int ima_read_file(struct file *file, enum kernel_read_file_id id);
 extern int ima_post_read_file(struct file *file, void *buf, loff_t size,
  enum kernel_read_file_id id);
@@ -78,7 +80,14 @@ static inline int ima_file_mprotect(struct vm_area_struct 
*vma,
return 0;
 }
 
-static inline int ima_load_data(enum kernel_load_data_id id)
+static inline int ima_load_data(enum kernel_load_data_id id, bool contents)
+{
+   return 0;
+}
+
+static inline int ima_post_load_data(char *buf, loff_t size,
+enum kernel_load_data_id id,
+char *description)
 {
return 0;
 }
diff --git a/include/linu

[PATCH v5 03/16] fs/kernel_read_file: Split into separate include file

2020-10-02 Thread Kees Cook
From: Scott Branden 

Move kernel_read_file* out of linux/fs.h to its own linux/kernel_read_file.h
include file. That header gets pulled in just about everywhere
and doesn't really need functions not related to the general fs interface.

Suggested-by: Christoph Hellwig 
Signed-off-by: Scott Branden 
Signed-off-by: Kees Cook 
Reviewed-by: Christoph Hellwig 
Reviewed-by: Mimi Zohar 
Reviewed-by: Luis Chamberlain 
Acked-by: Greg Kroah-Hartman 
Acked-by: James Morris 
Link: 
https://lore.kernel.org/r/20200706232309.12010-2-scott.bran...@broadcom.com
---
 drivers/base/firmware_loader/main.c |  1 +
 fs/exec.c   |  1 +
 include/linux/fs.h  | 38 -
 include/linux/ima.h |  1 +
 include/linux/kernel_read_file.h| 51 +
 include/linux/security.h|  1 +
 kernel/kexec_file.c |  1 +
 kernel/module.c |  1 +
 security/integrity/digsig.c |  1 +
 security/integrity/ima/ima_fs.c |  1 +
 security/integrity/ima/ima_main.c   |  1 +
 security/integrity/ima/ima_policy.c |  1 +
 security/loadpin/loadpin.c  |  1 +
 security/security.c |  1 +
 security/selinux/hooks.c|  1 +
 15 files changed, 64 insertions(+), 38 deletions(-)
 create mode 100644 include/linux/kernel_read_file.h

diff --git a/drivers/base/firmware_loader/main.c 
b/drivers/base/firmware_loader/main.c
index b0ec2721f55d..8c6ea389afcf 100644
--- a/drivers/base/firmware_loader/main.c
+++ b/drivers/base/firmware_loader/main.c
@@ -12,6 +12,7 @@
 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
diff --git a/fs/exec.c b/fs/exec.c
index 9233cd50dc4c..c454af329413 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -23,6 +23,7 @@
  * formats.
  */
 
+#include 
 #include 
 #include 
 #include 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 3fb7af12d033..0885d53afb11 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2858,44 +2858,6 @@ static inline void i_readcount_inc(struct inode *inode)
 #endif
 extern int do_pipe_flags(int *, int);
 
-/* This is a list of *what* is being read, not *how* nor *where*. */
-#define __kernel_read_file_id(id) \
-   id(UNKNOWN, unknown)\
-   id(FIRMWARE, firmware)  \
-   id(MODULE, kernel-module)   \
-   id(KEXEC_IMAGE, kexec-image)\
-   id(KEXEC_INITRAMFS, kexec-initramfs)\
-   id(POLICY, security-policy) \
-   id(X509_CERTIFICATE, x509-certificate)  \
-   id(MAX_ID, )
-
-#define __fid_enumify(ENUM, dummy) READING_ ## ENUM,
-#define __fid_stringify(dummy, str) #str,
-
-enum kernel_read_file_id {
-   __kernel_read_file_id(__fid_enumify)
-};
-
-static const char * const kernel_read_file_str[] = {
-   __kernel_read_file_id(__fid_stringify)
-};
-
-static inline const char *kernel_read_file_id_str(enum kernel_read_file_id id)
-{
-   if ((unsigned)id >= READING_MAX_ID)
-   return kernel_read_file_str[READING_UNKNOWN];
-
-   return kernel_read_file_str[id];
-}
-
-extern int kernel_read_file(struct file *, void **, loff_t *, loff_t,
-   enum kernel_read_file_id);
-extern int kernel_read_file_from_path(const char *, void **, loff_t *, loff_t,
- enum kernel_read_file_id);
-extern int kernel_read_file_from_path_initns(const char *, void **, loff_t *, 
loff_t,
-enum kernel_read_file_id);
-extern int kernel_read_file_from_fd(int, void **, loff_t *, loff_t,
-   enum kernel_read_file_id);
 extern ssize_t kernel_read(struct file *, void *, size_t, loff_t *);
 ssize_t __kernel_read(struct file *file, void *buf, size_t count, loff_t *pos);
 extern ssize_t kernel_write(struct file *, const void *, size_t, loff_t *);
diff --git a/include/linux/ima.h b/include/linux/ima.h
index d15100de6cdd..64804f78408b 100644
--- a/include/linux/ima.h
+++ b/include/linux/ima.h
@@ -7,6 +7,7 @@
 #ifndef _LINUX_IMA_H
 #define _LINUX_IMA_H
 
+#include 
 #include 
 #include 
 #include 
diff --git a/include/linux/kernel_read_file.h b/include/linux/kernel_read_file.h
new file mode 100644
index ..78cf3d7dc835
--- /dev/null
+++ b/include/linux/kernel_read_file.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_KERNEL_READ_FILE_H
+#define _LINUX_KERNEL_READ_FILE_H
+
+#include 
+#include 
+
+/* This is a list of *what* is being read, not *how* nor *where*. */
+#define __kernel_read_file_id(id) \
+   id(UNKNOWN, unknown)\
+   id(FIRMWARE, firmware)  \
+   id(MODULE, kernel-module)   \
+   id(KEXEC_IMAGE, kexec-image)\
+   id(KEXEC_INITRAMFS, kexec-initramfs)\
+   id(POLICY, security-policy) \
+   id(X509_CERTIFICATE, x509-certificate)  \
+   id(MAX_ID, )
+
+#define __fid_enumify(ENUM, dummy)

Re: [PATCH][next] net: phy: dp83869: fix unsigned comparisons against less than zero values

2020-10-02 Thread Andrew Lunn
On Fri, Oct 02, 2020 at 12:26:49PM -0500, Dan Murphy wrote:
> Colin
> 
> On 10/2/20 11:54 AM, Colin King wrote:
> > From: Colin Ian King 
> > 
> > Currently the comparisons of u16 integers value and sopass_val with
> > less than zero for error checking is always false because the values
> > are unsigned. Fix this by making these variables int.  This does not
> > affect the shift and mask operations performed on these variables
> > 
> > Addresses-Coverity: ("Unsigned compared against zero")
> > Fixes: 49fc23018ec6 ("net: phy: dp83869: support Wake on LAN")
> > Signed-off-by: Colin Ian King 
> > ---
> >   drivers/net/phy/dp83869.c | 2 +-
> >   1 file changed, 1 insertion(+), 1 deletion(-)
> > 
> > diff --git a/drivers/net/phy/dp83869.c b/drivers/net/phy/dp83869.c
> > index 0aee5f645b71..cf6dec7b7d8e 100644
> > --- a/drivers/net/phy/dp83869.c
> > +++ b/drivers/net/phy/dp83869.c
> > @@ -305,7 +305,7 @@ static int dp83869_set_wol(struct phy_device *phydev,
> >   static void dp83869_get_wol(struct phy_device *phydev,
> > struct ethtool_wolinfo *wol)
> >   {
> > -   u16 value, sopass_val;
> > +   int value, sopass_val;
> > wol->supported = (WAKE_UCAST | WAKE_BCAST | WAKE_MAGIC |
> > WAKE_MAGICSECURE);
> 
> Wonder why this was not reported before as the previous comparison issue
> reported by zero day.

I think it needs W=1

  Andrew


[PATCH v5 12/16] IMA: Add support for file reads without contents

2020-10-02 Thread Kees Cook
From: Scott Branden 

When the kernel_read_file LSM hook is called with contents=false, IMA
can appraise the file directly, without requiring a filled buffer. When
such a buffer is available, though, IMA can continue to use it instead
of forcing a double read here.

Signed-off-by: Scott Branden 
Link: 
https://lore.kernel.org/lkml/20200706232309.12010-10-scott.bran...@broadcom.com/
Signed-off-by: Kees Cook 
Reviewed-by: Mimi Zohar 
---
 security/integrity/ima/ima_main.c | 22 --
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/security/integrity/ima/ima_main.c 
b/security/integrity/ima/ima_main.c
index 939f53d02627..82c9d62bcb11 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -613,11 +613,8 @@ void ima_post_path_mknod(struct dentry *dentry)
 int ima_read_file(struct file *file, enum kernel_read_file_id read_id,
  bool contents)
 {
-   /* Reject all partial reads during appraisal. */
-   if (!contents) {
-   if (ima_appraise & IMA_APPRAISE_ENFORCE)
-   return -EACCES;
-   }
+   enum ima_hooks func;
+   u32 secid;
 
/*
 * Do devices using pre-allocated memory run the risk of the
@@ -626,7 +623,20 @@ int ima_read_file(struct file *file, enum 
kernel_read_file_id read_id,
 * buffers? It may be desirable to include the buffer address
 * in this API and walk all the dma_map_single() mappings to check.
 */
-   return 0;
+
+   /*
+* There will be a call made to ima_post_read_file() with
+* a filled buffer, so we don't need to perform an extra
+* read early here.
+*/
+   if (contents)
+   return 0;
+
+   /* Read entire file for all partial reads. */
+   func = read_idmap[read_id] ?: FILE_CHECK;
+   security_task_getsecid(current, &secid);
+   return process_measurement(file, current_cred(), secid, NULL,
+  0, MAY_READ, func);
 }
 
 const int read_idmap[READING_MAX_ID] = {
-- 
2.25.1



[PATCH v5 06/16] fs/kernel_read_file: Switch buffer size arg to size_t

2020-10-02 Thread Kees Cook
In preparation for further refactoring of kernel_read_file*(), rename
the "max_size" argument to the more accurate "buf_size", and correct
its type to size_t. Add kerndoc to explain the specifics of how the
arguments will be used. Note that with buf_size now size_t, it can no
longer be negative (and was never called with a negative value). Adjust
callers to use it as a "maximum size" when *buf is NULL.

Signed-off-by: Kees Cook 
Reviewed-by: Mimi Zohar 
Reviewed-by: Luis Chamberlain 
Reviewed-by: James Morris 
Acked-by: Scott Branden 
---
 fs/kernel_read_file.c| 34 +++-
 include/linux/kernel_read_file.h |  8 
 security/integrity/digsig.c  |  2 +-
 security/integrity/ima/ima_fs.c  |  2 +-
 4 files changed, 31 insertions(+), 15 deletions(-)

diff --git a/fs/kernel_read_file.c b/fs/kernel_read_file.c
index dc28a8def597..e21a76001fff 100644
--- a/fs/kernel_read_file.c
+++ b/fs/kernel_read_file.c
@@ -5,15 +5,31 @@
 #include 
 #include 
 
+/**
+ * kernel_read_file() - read file contents into a kernel buffer
+ *
+ * @file   file to read from
+ * @bufpointer to a "void *" buffer for reading into (if
+ * *@buf is NULL, a buffer will be allocated, and
+ * @buf_size will be ignored)
+ * @buf_size   size of buf, if already allocated. If @buf not
+ * allocated, this is the largest size to allocate.
+ * @id the kernel_read_file_id identifying the type of
+ * file contents being read (for LSMs to examine)
+ *
+ * Returns number of bytes read (no single read will be bigger
+ * than INT_MAX), or negative on error.
+ *
+ */
 int kernel_read_file(struct file *file, void **buf,
-loff_t max_size, enum kernel_read_file_id id)
+size_t buf_size, enum kernel_read_file_id id)
 {
loff_t i_size, pos;
ssize_t bytes = 0;
void *allocated = NULL;
int ret;
 
-   if (!S_ISREG(file_inode(file)->i_mode) || max_size < 0)
+   if (!S_ISREG(file_inode(file)->i_mode))
return -EINVAL;
 
ret = deny_write_access(file);
@@ -29,7 +45,7 @@ int kernel_read_file(struct file *file, void **buf,
ret = -EINVAL;
goto out;
}
-   if (i_size > INT_MAX || (max_size > 0 && i_size > max_size)) {
+   if (i_size > INT_MAX || i_size > buf_size) {
ret = -EFBIG;
goto out;
}
@@ -75,7 +91,7 @@ int kernel_read_file(struct file *file, void **buf,
 EXPORT_SYMBOL_GPL(kernel_read_file);
 
 int kernel_read_file_from_path(const char *path, void **buf,
-  loff_t max_size, enum kernel_read_file_id id)
+  size_t buf_size, enum kernel_read_file_id id)
 {
struct file *file;
int ret;
@@ -87,14 +103,14 @@ int kernel_read_file_from_path(const char *path, void 
**buf,
if (IS_ERR(file))
return PTR_ERR(file);
 
-   ret = kernel_read_file(file, buf, max_size, id);
+   ret = kernel_read_file(file, buf, buf_size, id);
fput(file);
return ret;
 }
 EXPORT_SYMBOL_GPL(kernel_read_file_from_path);
 
 int kernel_read_file_from_path_initns(const char *path, void **buf,
- loff_t max_size,
+ size_t buf_size,
  enum kernel_read_file_id id)
 {
struct file *file;
@@ -113,13 +129,13 @@ int kernel_read_file_from_path_initns(const char *path, 
void **buf,
if (IS_ERR(file))
return PTR_ERR(file);
 
-   ret = kernel_read_file(file, buf, max_size, id);
+   ret = kernel_read_file(file, buf, buf_size, id);
fput(file);
return ret;
 }
 EXPORT_SYMBOL_GPL(kernel_read_file_from_path_initns);
 
-int kernel_read_file_from_fd(int fd, void **buf, loff_t max_size,
+int kernel_read_file_from_fd(int fd, void **buf, size_t buf_size,
 enum kernel_read_file_id id)
 {
struct fd f = fdget(fd);
@@ -128,7 +144,7 @@ int kernel_read_file_from_fd(int fd, void **buf, loff_t 
max_size,
if (!f.file)
goto out;
 
-   ret = kernel_read_file(f.file, buf, max_size, id);
+   ret = kernel_read_file(f.file, buf, buf_size, id);
 out:
fdput(f);
return ret;
diff --git a/include/linux/kernel_read_file.h b/include/linux/kernel_read_file.h
index 0ca0bdbed1bd..910039e7593e 100644
--- a/include/linux/kernel_read_file.h
+++ b/include/linux/kernel_read_file.h
@@ -36,16 +36,16 @@ static inline const char *kernel_read_file_id_str(enum 
kernel_read_file_id id)
 }
 
 int kernel_read_file(struct file *file,
-void **buf, loff_t max_size,
+void **buf, size_t buf_size,
 enum kernel_read_file_id id);
 int kernel_read_file_from_path(const char *path,
-  void **buf, loff_t max_size,
+  

[PATCH v5 07/16] fs/kernel_read_file: Add file_size output argument

2020-10-02 Thread Kees Cook
In preparation for adding partial read support, add an optional output
argument to kernel_read_file*() that reports the file size so callers
can reason more easily about their reading progress.

Signed-off-by: Kees Cook 
Reviewed-by: Mimi Zohar 
Reviewed-by: Luis Chamberlain 
Reviewed-by: James Morris 
Acked-by: Scott Branden 
---
 drivers/base/firmware_loader/main.c |  1 +
 fs/kernel_read_file.c   | 19 +--
 include/linux/kernel_read_file.h|  4 
 kernel/kexec_file.c |  4 ++--
 kernel/module.c |  2 +-
 security/integrity/digsig.c |  2 +-
 security/integrity/ima/ima_fs.c |  2 +-
 7 files changed, 23 insertions(+), 11 deletions(-)

diff --git a/drivers/base/firmware_loader/main.c 
b/drivers/base/firmware_loader/main.c
index 6df1bdcfeb9d..d9a180148c4b 100644
--- a/drivers/base/firmware_loader/main.c
+++ b/drivers/base/firmware_loader/main.c
@@ -500,6 +500,7 @@ fw_get_filesystem_firmware(struct device *device, struct 
fw_priv *fw_priv,
 
/* load firmware files from the mount namespace of init */
rc = kernel_read_file_from_path_initns(path, &buffer, msize,
+  NULL,
   READING_FIRMWARE);
if (rc < 0) {
if (rc != -ENOENT)
diff --git a/fs/kernel_read_file.c b/fs/kernel_read_file.c
index e21a76001fff..2e29c38eb4df 100644
--- a/fs/kernel_read_file.c
+++ b/fs/kernel_read_file.c
@@ -14,6 +14,8 @@
  * @buf_size will be ignored)
  * @buf_size   size of buf, if already allocated. If @buf not
  * allocated, this is the largest size to allocate.
+ * @file_size  if non-NULL, the full size of @file will be
+ * written here.
  * @id the kernel_read_file_id identifying the type of
  * file contents being read (for LSMs to examine)
  *
@@ -22,7 +24,8 @@
  *
  */
 int kernel_read_file(struct file *file, void **buf,
-size_t buf_size, enum kernel_read_file_id id)
+size_t buf_size, size_t *file_size,
+enum kernel_read_file_id id)
 {
loff_t i_size, pos;
ssize_t bytes = 0;
@@ -49,6 +52,8 @@ int kernel_read_file(struct file *file, void **buf,
ret = -EFBIG;
goto out;
}
+   if (file_size)
+   *file_size = i_size;
 
if (!*buf)
*buf = allocated = vmalloc(i_size);
@@ -91,7 +96,8 @@ int kernel_read_file(struct file *file, void **buf,
 EXPORT_SYMBOL_GPL(kernel_read_file);
 
 int kernel_read_file_from_path(const char *path, void **buf,
-  size_t buf_size, enum kernel_read_file_id id)
+  size_t buf_size, size_t *file_size,
+  enum kernel_read_file_id id)
 {
struct file *file;
int ret;
@@ -103,14 +109,14 @@ int kernel_read_file_from_path(const char *path, void 
**buf,
if (IS_ERR(file))
return PTR_ERR(file);
 
-   ret = kernel_read_file(file, buf, buf_size, id);
+   ret = kernel_read_file(file, buf, buf_size, file_size, id);
fput(file);
return ret;
 }
 EXPORT_SYMBOL_GPL(kernel_read_file_from_path);
 
 int kernel_read_file_from_path_initns(const char *path, void **buf,
- size_t buf_size,
+ size_t buf_size, size_t *file_size,
  enum kernel_read_file_id id)
 {
struct file *file;
@@ -129,13 +135,14 @@ int kernel_read_file_from_path_initns(const char *path, 
void **buf,
if (IS_ERR(file))
return PTR_ERR(file);
 
-   ret = kernel_read_file(file, buf, buf_size, id);
+   ret = kernel_read_file(file, buf, buf_size, file_size, id);
fput(file);
return ret;
 }
 EXPORT_SYMBOL_GPL(kernel_read_file_from_path_initns);
 
 int kernel_read_file_from_fd(int fd, void **buf, size_t buf_size,
+size_t *file_size,
 enum kernel_read_file_id id)
 {
struct fd f = fdget(fd);
@@ -144,7 +151,7 @@ int kernel_read_file_from_fd(int fd, void **buf, size_t 
buf_size,
if (!f.file)
goto out;
 
-   ret = kernel_read_file(f.file, buf, buf_size, id);
+   ret = kernel_read_file(f.file, buf, buf_size, file_size, id);
 out:
fdput(f);
return ret;
diff --git a/include/linux/kernel_read_file.h b/include/linux/kernel_read_file.h
index 910039e7593e..023293eaf948 100644
--- a/include/linux/kernel_read_file.h
+++ b/include/linux/kernel_read_file.h
@@ -37,15 +37,19 @@ static inline const char *kernel_read_file_id_str(enum 
kernel_read_file_id id)
 
 int kernel_read_file(struct file *file,
 void **buf, size_t buf_size,
+size_t *file_size,
 enum kernel_read_file_i

[PATCH v5 02/16] fs/kernel_read_file: Remove FIRMWARE_EFI_EMBEDDED enum

2020-10-02 Thread Kees Cook
The "FIRMWARE_EFI_EMBEDDED" enum is a "where", not a "what". It
should not be distinguished separately from just "FIRMWARE", as this
confuses the LSMs about what is being loaded. Additionally, there was
no actual validation of the firmware contents happening.

Fixes: e4c2c0ff00ec ("firmware: Add new platform fallback mechanism and 
firmware_request_platform()")
Signed-off-by: Kees Cook 
Reviewed-by: Luis Chamberlain 
Acked-by: Scott Branden 
Cc: sta...@vger.kernel.org
---
 drivers/base/firmware_loader/fallback_platform.c | 2 +-
 include/linux/fs.h   | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/base/firmware_loader/fallback_platform.c 
b/drivers/base/firmware_loader/fallback_platform.c
index 685edb7dd05a..6958ab1a8059 100644
--- a/drivers/base/firmware_loader/fallback_platform.c
+++ b/drivers/base/firmware_loader/fallback_platform.c
@@ -17,7 +17,7 @@ int firmware_fallback_platform(struct fw_priv *fw_priv, u32 
opt_flags)
if (!(opt_flags & FW_OPT_FALLBACK_PLATFORM))
return -ENOENT;
 
-   rc = security_kernel_load_data(LOADING_FIRMWARE_EFI_EMBEDDED);
+   rc = security_kernel_load_data(LOADING_FIRMWARE);
if (rc)
return rc;
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 7336e22d0c5d..3fb7af12d033 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2858,11 +2858,10 @@ static inline void i_readcount_inc(struct inode *inode)
 #endif
 extern int do_pipe_flags(int *, int);
 
-/* This is a list of *what* is being read, not *how*. */
+/* This is a list of *what* is being read, not *how* nor *where*. */
 #define __kernel_read_file_id(id) \
id(UNKNOWN, unknown)\
id(FIRMWARE, firmware)  \
-   id(FIRMWARE_EFI_EMBEDDED, firmware) \
id(MODULE, kernel-module)   \
id(KEXEC_IMAGE, kexec-image)\
id(KEXEC_INITRAMFS, kexec-initramfs)\
-- 
2.25.1



[PATCH v5 01/16] fs/kernel_read_file: Remove FIRMWARE_PREALLOC_BUFFER enum

2020-10-02 Thread Kees Cook
FIRMWARE_PREALLOC_BUFFER is a "how", not a "what", and confuses the LSMs
that are interested in filtering between types of things. The "how"
should be an internal detail made uninteresting to the LSMs.

Fixes: a098ecd2fa7d ("firmware: support loading into a pre-allocated buffer")
Fixes: fd90bc559bfb ("ima: based on policy verify firmware signatures 
(pre-allocated buffer)")
Fixes: 4f0496d8ffa3 ("ima: based on policy warn about loading firmware 
(pre-allocated buffer)")
Signed-off-by: Kees Cook 
Reviewed-by: Mimi Zohar 
Reviewed-by: Luis Chamberlain 
Acked-by: Scott Branden 
Cc: sta...@vger.kernel.org
---
 drivers/base/firmware_loader/main.c | 5 ++---
 fs/exec.c   | 7 ---
 include/linux/fs.h  | 2 +-
 kernel/module.c | 2 +-
 security/integrity/digsig.c | 2 +-
 security/integrity/ima/ima_fs.c | 2 +-
 security/integrity/ima/ima_main.c   | 6 ++
 7 files changed, 12 insertions(+), 14 deletions(-)

diff --git a/drivers/base/firmware_loader/main.c 
b/drivers/base/firmware_loader/main.c
index 63b9714a0154..b0ec2721f55d 100644
--- a/drivers/base/firmware_loader/main.c
+++ b/drivers/base/firmware_loader/main.c
@@ -470,14 +470,12 @@ fw_get_filesystem_firmware(struct device *device, struct 
fw_priv *fw_priv,
int i, len;
int rc = -ENOENT;
char *path;
-   enum kernel_read_file_id id = READING_FIRMWARE;
size_t msize = INT_MAX;
void *buffer = NULL;
 
/* Already populated data member means we're loading into a buffer */
if (!decompress && fw_priv->data) {
buffer = fw_priv->data;
-   id = READING_FIRMWARE_PREALLOC_BUFFER;
msize = fw_priv->allocated_size;
}
 
@@ -501,7 +499,8 @@ fw_get_filesystem_firmware(struct device *device, struct 
fw_priv *fw_priv,
 
/* load firmware files from the mount namespace of init */
rc = kernel_read_file_from_path_initns(path, &buffer,
-  &size, msize, id);
+  &size, msize,
+  READING_FIRMWARE);
if (rc) {
if (rc != -ENOENT)
dev_warn(device, "loading %s failed with error 
%d\n",
diff --git a/fs/exec.c b/fs/exec.c
index a91003e28eaa..9233cd50dc4c 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -954,6 +954,7 @@ int kernel_read_file(struct file *file, void **buf, loff_t 
*size,
 {
loff_t i_size, pos;
ssize_t bytes = 0;
+   void *allocated = NULL;
int ret;
 
if (!S_ISREG(file_inode(file)->i_mode) || max_size < 0)
@@ -977,8 +978,8 @@ int kernel_read_file(struct file *file, void **buf, loff_t 
*size,
goto out;
}
 
-   if (id != READING_FIRMWARE_PREALLOC_BUFFER)
-   *buf = vmalloc(i_size);
+   if (!*buf)
+   *buf = allocated = vmalloc(i_size);
if (!*buf) {
ret = -ENOMEM;
goto out;
@@ -1007,7 +1008,7 @@ int kernel_read_file(struct file *file, void **buf, 
loff_t *size,
 
 out_free:
if (ret < 0) {
-   if (id != READING_FIRMWARE_PREALLOC_BUFFER) {
+   if (allocated) {
vfree(*buf);
*buf = NULL;
}
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 7519ae003a08..7336e22d0c5d 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2858,10 +2858,10 @@ static inline void i_readcount_inc(struct inode *inode)
 #endif
 extern int do_pipe_flags(int *, int);
 
+/* This is a list of *what* is being read, not *how*. */
 #define __kernel_read_file_id(id) \
id(UNKNOWN, unknown)\
id(FIRMWARE, firmware)  \
-   id(FIRMWARE_PREALLOC_BUFFER, firmware)  \
id(FIRMWARE_EFI_EMBEDDED, firmware) \
id(MODULE, kernel-module)   \
id(KEXEC_IMAGE, kexec-image)\
diff --git a/kernel/module.c b/kernel/module.c
index 1c5cff34d9f2..b2808acac46b 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -4035,7 +4035,7 @@ SYSCALL_DEFINE3(finit_module, int, fd, const char __user 
*, uargs, int, flags)
 {
struct load_info info = { };
loff_t size;
-   void *hdr;
+   void *hdr = NULL;
int err;
 
err = may_init_module();
diff --git a/security/integrity/digsig.c b/security/integrity/digsig.c
index e9cbadade74b..ac02b7632353 100644
--- a/security/integrity/digsig.c
+++ b/security/integrity/digsig.c
@@ -169,7 +169,7 @@ int __init integrity_add_key(const unsigned int id, const 
void *data,
 
 int __init integrity_load_x509(const unsigned int id, const char *path)
 {
-   void *data;
+   void *data = NULL;
loff_t size;
int rc;
key_perm_t perm;
diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c
in

Re: [PATCH v2 1/4] arm: dts: qcom: ipq4019: add more labels

2020-10-02 Thread Robert Marko
On Wed, Sep 9, 2020 at 9:56 PM Robert Marko  wrote:
>
> Lets add labels to more commonly used nodes for easier modification in board 
> DTS files.
>
> Signed-off-by: Robert Marko 
> Cc: Luka Perkov 
> ---
> Changes since v1:
> * Drop include that does not exist
>
>  arch/arm/boot/dts/qcom-ipq4019.dtsi | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/arch/arm/boot/dts/qcom-ipq4019.dtsi 
> b/arch/arm/boot/dts/qcom-ipq4019.dtsi
> index 3d056aada8d1..7bf1da916f25 100644
> --- a/arch/arm/boot/dts/qcom-ipq4019.dtsi
> +++ b/arch/arm/boot/dts/qcom-ipq4019.dtsi
> @@ -190,7 +190,7 @@ gcc: clock-controller@180 {
> reg = <0x180 0x6>;
> };
>
> -   rng@22000 {
> +   prng: rng@22000 {
> compatible = "qcom,prng";
> reg = <0x22000 0x140>;
> clocks = <&gcc GCC_PRNG_AHB_CLK>;
> @@ -310,7 +310,7 @@ cryptobam: dma@8e04000 {
> status = "disabled";
> };
>
> -   crypto@8e3a000 {
> +   crypto: crypto@8e3a000 {
> compatible = "qcom,crypto-v5.1";
> reg = <0x08e3a000 0x6000>;
> clocks = <&gcc GCC_CRYPTO_AHB_CLK>,
> @@ -396,7 +396,7 @@ blsp1_uart2: serial@78b {
> dma-names = "rx", "tx";
> };
>
> -   watchdog@b017000 {
> +   watchdog: watchdog@b017000 {
> compatible = "qcom,kpss-wdt", "qcom,kpss-wdt-ipq4019";
> reg = <0xb017000 0x40>;
> clocks = <&sleep_clk>;
> --
> 2.26.2
>

Hi,
Is there an issue with the patch preventing the review?

Regards,
Robert


Re: [PATCH v8] ARM: dts: qcom: ipq4019: add USB devicetree nodes

2020-10-02 Thread Robert Marko
On Wed, Sep 9, 2020 at 6:38 PM Robert Marko  wrote:
>
> From: John Crispin 
>
> Since we now have driver for the USB PHY, and USB controller is already 
> supported by the DWC3 driver lets add the necessary nodes to DTSI.
>
> Signed-off-by: John Crispin 
> Signed-off-by: Robert Marko 
> Cc: Luka Perkov 
> Reviewed-by: Vinod Koul 
> ---
> Changes from v7 to v8:
> * Add labels for usb2 and usb3 nodes
> Changes from v6 to v7:
> * Remove changes to qcom-ipq4019-ap.dk01.1.dtsi
> It has slipped in unwanted, we only want to add
> nodes to the DTSI.
>
>  arch/arm/boot/dts/qcom-ipq4019.dtsi | 74 +
>  1 file changed, 74 insertions(+)
>
> diff --git a/arch/arm/boot/dts/qcom-ipq4019.dtsi 
> b/arch/arm/boot/dts/qcom-ipq4019.dtsi
> index 74d8e2c8e4b3..4a973253024a 100644
> --- a/arch/arm/boot/dts/qcom-ipq4019.dtsi
> +++ b/arch/arm/boot/dts/qcom-ipq4019.dtsi
> @@ -605,5 +605,79 @@ ethphy4: ethernet-phy@4 {
> reg = <4>;
> };
> };
> +
> +   usb3_ss_phy: ssphy@9a000 {
> +   compatible = "qcom,usb-ss-ipq4019-phy";
> +   #phy-cells = <0>;
> +   reg = <0x9a000 0x800>;
> +   reg-names = "phy_base";
> +   resets = <&gcc USB3_UNIPHY_PHY_ARES>;
> +   reset-names = "por_rst";
> +   status = "disabled";
> +   };
> +
> +   usb3_hs_phy: hsphy@a6000 {
> +   compatible = "qcom,usb-hs-ipq4019-phy";
> +   #phy-cells = <0>;
> +   reg = <0xa6000 0x40>;
> +   reg-names = "phy_base";
> +   resets = <&gcc USB3_HSPHY_POR_ARES>, <&gcc 
> USB3_HSPHY_S_ARES>;
> +   reset-names = "por_rst", "srif_rst";
> +   status = "disabled";
> +   };
> +
> +   usb3: usb3@8af8800 {
> +   compatible = "qcom,dwc3";
> +   reg = <0x8af8800 0x100>;
> +   #address-cells = <1>;
> +   #size-cells = <1>;
> +   clocks = <&gcc GCC_USB3_MASTER_CLK>,
> +<&gcc GCC_USB3_SLEEP_CLK>,
> +<&gcc GCC_USB3_MOCK_UTMI_CLK>;
> +   clock-names = "master", "sleep", "mock_utmi";
> +   ranges;
> +   status = "disabled";
> +
> +   dwc3@8a0 {
> +   compatible = "snps,dwc3";
> +   reg = <0x8a0 0xf8000>;
> +   interrupts =  IRQ_TYPE_LEVEL_HIGH>;
> +   phys = <&usb3_hs_phy>, <&usb3_ss_phy>;
> +   phy-names = "usb2-phy", "usb3-phy";
> +   dr_mode = "host";
> +   };
> +   };
> +
> +   usb2_hs_phy: hsphy@a8000 {
> +   compatible = "qcom,usb-hs-ipq4019-phy";
> +   #phy-cells = <0>;
> +   reg = <0xa8000 0x40>;
> +   reg-names = "phy_base";
> +   resets = <&gcc USB2_HSPHY_POR_ARES>, <&gcc 
> USB2_HSPHY_S_ARES>;
> +   reset-names = "por_rst", "srif_rst";
> +   status = "disabled";
> +   };
> +
> +   usb2: usb2@60f8800 {
> +   compatible = "qcom,dwc3";
> +   reg = <0x60f8800 0x100>;
> +   #address-cells = <1>;
> +   #size-cells = <1>;
> +   clocks = <&gcc GCC_USB2_MASTER_CLK>,
> +<&gcc GCC_USB2_SLEEP_CLK>,
> +<&gcc GCC_USB2_MOCK_UTMI_CLK>;
> +   clock-names = "master", "sleep", "mock_utmi";
> +   ranges;
> +   status = "disabled";
> +
> +   dwc3@600 {
> +   compatible = "snps,dwc3";
> +   reg = <0x600 0xf8000>;
> +   interrupts =  IRQ_TYPE_LEVEL_HIGH>;
> +   phys = <&usb2_hs_phy>;
> +   phy-names = "usb2-phy";
> +   dr_mode = "host";
> +   };
> +   };
> };
>  };
> --
> 2.26.2
>

Hi,
Is there an issue with the patch preventing the review?

Regards,
Robert


[PATCH v5 11/16] LSM: Add "contents" flag to kernel_read_file hook

2020-10-02 Thread Kees Cook
As with the kernel_load_data LSM hook, add a "contents" flag to the
kernel_read_file LSM hook that indicates whether the LSM can expect
a matching call to the kernel_post_read_file LSM hook with the full
contents of the file. With the coming addition of partial file read
support for kernel_read_file*() API, the LSM will no longer be able
to always see the entire contents of a file during the read calls.

For cases where the LSM must read examine the complete file contents,
it will need to do so on its own every time the kernel_read_file
hook is called with contents=false (or reject such cases). Adjust all
existing LSMs to retain existing behavior.

Signed-off-by: Kees Cook 
Reviewed-by: Mimi Zohar 
---
 fs/kernel_read_file.c |  2 +-
 include/linux/ima.h   |  6 --
 include/linux/lsm_hook_defs.h |  2 +-
 include/linux/lsm_hooks.h |  3 +++
 include/linux/security.h  |  6 --
 security/integrity/ima/ima_main.c | 10 +-
 security/loadpin/loadpin.c| 14 --
 security/security.c   |  7 ---
 security/selinux/hooks.c  |  5 +++--
 9 files changed, 41 insertions(+), 14 deletions(-)

diff --git a/fs/kernel_read_file.c b/fs/kernel_read_file.c
index 2e29c38eb4df..d73bc3fa710a 100644
--- a/fs/kernel_read_file.c
+++ b/fs/kernel_read_file.c
@@ -39,7 +39,7 @@ int kernel_read_file(struct file *file, void **buf,
if (ret)
return ret;
 
-   ret = security_kernel_read_file(file, id);
+   ret = security_kernel_read_file(file, id, true);
if (ret)
goto out;
 
diff --git a/include/linux/ima.h b/include/linux/ima.h
index af9fb8c5f16a..8fa7bcfb2da2 100644
--- a/include/linux/ima.h
+++ b/include/linux/ima.h
@@ -23,7 +23,8 @@ extern int ima_file_mprotect(struct vm_area_struct *vma, 
unsigned long prot);
 extern int ima_load_data(enum kernel_load_data_id id, bool contents);
 extern int ima_post_load_data(char *buf, loff_t size,
  enum kernel_load_data_id id, char *description);
-extern int ima_read_file(struct file *file, enum kernel_read_file_id id);
+extern int ima_read_file(struct file *file, enum kernel_read_file_id id,
+bool contents);
 extern int ima_post_read_file(struct file *file, void *buf, loff_t size,
  enum kernel_read_file_id id);
 extern void ima_post_path_mknod(struct dentry *dentry);
@@ -92,7 +93,8 @@ static inline int ima_post_load_data(char *buf, loff_t size,
return 0;
 }
 
-static inline int ima_read_file(struct file *file, enum kernel_read_file_id id)
+static inline int ima_read_file(struct file *file, enum kernel_read_file_id id,
+   bool contents)
 {
return 0;
 }
diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
index 83c6f1f5cc1e..d67cb3502310 100644
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
@@ -188,7 +188,7 @@ LSM_HOOK(int, 0, kernel_load_data, enum kernel_load_data_id 
id, bool contents)
 LSM_HOOK(int, 0, kernel_post_load_data, char *buf, loff_t size,
 enum kernel_read_file_id id, char *description)
 LSM_HOOK(int, 0, kernel_read_file, struct file *file,
-enum kernel_read_file_id id)
+enum kernel_read_file_id id, bool contents)
 LSM_HOOK(int, 0, kernel_post_read_file, struct file *file, char *buf,
 loff_t size, enum kernel_read_file_id id)
 LSM_HOOK(int, 0, task_fix_setuid, struct cred *new, const struct cred *old,
diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index 6bb4f1a0158c..8814e3d5952d 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -651,6 +651,7 @@
  * @file contains the file structure pointing to the file being read
  * by the kernel.
  * @id kernel read file identifier
+ * @contents if a subsequent @kernel_post_read_file will be called.
  * Return 0 if permission is granted.
  * @kernel_post_read_file:
  * Read a file specified by userspace.
@@ -659,6 +660,8 @@
  * @buf pointer to buffer containing the file contents.
  * @size length of the file contents.
  * @id kernel read file identifier
+ * This must be paired with a prior @kernel_read_file call that had
+ * @contents set to true.
  * Return 0 if permission is granted.
  * @task_fix_setuid:
  * Update the module's state after setting one or more of the user
diff --git a/include/linux/security.h b/include/linux/security.h
index 51c8e4e6b7cc..bc2725491560 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -391,7 +391,8 @@ int security_kernel_load_data(enum kernel_load_data_id id, 
bool contents);
 int security_kernel_post_load_data(char *buf, loff_t size,
   enum kernel_load_data_id id,
   char *description);
-int security_kernel_read_file(struct file *file, enum kernel_read_file_id id);
+int security_kernel_re

[PATCH v5 15/16] firmware: Add request_partial_firmware_into_buf()

2020-10-02 Thread Kees Cook
From: Scott Branden 

Add request_partial_firmware_into_buf() to allow for portions of a
firmware file to be read into a buffer. This is needed when large firmware
must be loaded in portions from a file on memory constrained systems.

Signed-off-by: Scott Branden 
Co-developed-by: Kees Cook 
Signed-off-by: Kees Cook 
---
 drivers/base/firmware_loader/firmware.h |   4 +
 drivers/base/firmware_loader/main.c | 101 +++-
 include/linux/firmware.h|  12 +++
 3 files changed, 99 insertions(+), 18 deletions(-)

diff --git a/drivers/base/firmware_loader/firmware.h 
b/drivers/base/firmware_loader/firmware.h
index f86de5d7e0d7..63bd29fdcb9c 100644
--- a/drivers/base/firmware_loader/firmware.h
+++ b/drivers/base/firmware_loader/firmware.h
@@ -32,6 +32,8 @@
  * @FW_OPT_FALLBACK_PLATFORM: Enable fallback to device fw copy embedded in
  * the platform's main firmware. If both this fallback and the sysfs
  *  fallback are enabled, then this fallback will be tried first.
+ * @FW_OPT_PARTIAL: Allow partial read of firmware instead of needing to read
+ * entire file.
  */
 enum fw_opt {
FW_OPT_UEVENT   = BIT(0),
@@ -41,6 +43,7 @@ enum fw_opt {
FW_OPT_NOCACHE  = BIT(4),
FW_OPT_NOFALLBACK_SYSFS = BIT(5),
FW_OPT_FALLBACK_PLATFORM= BIT(6),
+   FW_OPT_PARTIAL  = BIT(7),
 };
 
 enum fw_status {
@@ -68,6 +71,7 @@ struct fw_priv {
void *data;
size_t size;
size_t allocated_size;
+   size_t offset;
u32 opt_flags;
 #ifdef CONFIG_FW_LOADER_PAGED_BUF
bool is_paged_buf;
diff --git a/drivers/base/firmware_loader/main.c 
b/drivers/base/firmware_loader/main.c
index 78c8e44c08cb..78355095e00d 100644
--- a/drivers/base/firmware_loader/main.c
+++ b/drivers/base/firmware_loader/main.c
@@ -170,10 +170,19 @@ static struct fw_priv *__allocate_fw_priv(const char 
*fw_name,
  struct firmware_cache *fwc,
  void *dbuf,
  size_t size,
+ size_t offset,
  u32 opt_flags)
 {
struct fw_priv *fw_priv;
 
+   /* For a partial read, the buffer must be preallocated. */
+   if ((opt_flags & FW_OPT_PARTIAL) && !dbuf)
+   return NULL;
+
+   /* Only partial reads are allowed to use an offset. */
+   if (offset != 0 && !(opt_flags & FW_OPT_PARTIAL))
+   return NULL;
+
fw_priv = kzalloc(sizeof(*fw_priv), GFP_ATOMIC);
if (!fw_priv)
return NULL;
@@ -188,6 +197,7 @@ static struct fw_priv *__allocate_fw_priv(const char 
*fw_name,
fw_priv->fwc = fwc;
fw_priv->data = dbuf;
fw_priv->allocated_size = size;
+   fw_priv->offset = offset;
fw_priv->opt_flags = opt_flags;
fw_state_init(fw_priv);
 #ifdef CONFIG_FW_LOADER_USER_HELPER
@@ -216,12 +226,17 @@ static int alloc_lookup_fw_priv(const char *fw_name,
struct fw_priv **fw_priv,
void *dbuf,
size_t size,
+   size_t offset,
u32 opt_flags)
 {
struct fw_priv *tmp;
 
spin_lock(&fwc->lock);
-   if (!(opt_flags & FW_OPT_NOCACHE)) {
+   /*
+* Do not merge requests that are marked to be non-cached or
+* are performing partial reads.
+*/
+   if (!(opt_flags & (FW_OPT_NOCACHE | FW_OPT_PARTIAL))) {
tmp = __lookup_fw_priv(fw_name);
if (tmp) {
kref_get(&tmp->ref);
@@ -232,7 +247,7 @@ static int alloc_lookup_fw_priv(const char *fw_name,
}
}
 
-   tmp = __allocate_fw_priv(fw_name, fwc, dbuf, size, opt_flags);
+   tmp = __allocate_fw_priv(fw_name, fwc, dbuf, size, offset, opt_flags);
if (tmp) {
INIT_LIST_HEAD(&tmp->list);
if (!(opt_flags & FW_OPT_NOCACHE))
@@ -490,6 +505,9 @@ fw_get_filesystem_firmware(struct device *device, struct 
fw_priv *fw_priv,
return -ENOMEM;
 
for (i = 0; i < ARRAY_SIZE(fw_path); i++) {
+   size_t file_size = 0;
+   size_t *file_size_ptr = NULL;
+
/* skip the unset customized path */
if (!fw_path[i][0])
continue;
@@ -503,9 +521,18 @@ fw_get_filesystem_firmware(struct device *device, struct 
fw_priv *fw_priv,
 
fw_priv->size = 0;
 
+   /*
+* The total file size is only examined when doing a partial
+* read; the "full read" case needs to fail if the whole
+* firmware was not completely loaded.
+*/
+   if ((fw_priv->opt_flags & FW_OPT_PARTIAL) && buffer)
+   file

<    1   2   3   4   5   6   7   8   9   10   >