[PATCH net-next v3 3/3] netvsc: remove bonding setup script

2017-08-01 Thread Stephen Hemminger
No longer needed, now all managed by transparent VF logic.

Signed-off-by: Stephen Hemminger 
---
 tools/hv/bondvf.sh | 255 -
 1 file changed, 255 deletions(-)
 delete mode 100755 tools/hv/bondvf.sh

diff --git a/tools/hv/bondvf.sh b/tools/hv/bondvf.sh
deleted file mode 100755
index 80f102860cf8..
--- a/tools/hv/bondvf.sh
+++ /dev/null
@@ -1,255 +0,0 @@
-#!/bin/bash
-
-# This example script creates bonding network devices based on synthetic NIC
-# (the virtual network adapter usually provided by Hyper-V) and the matching
-# VF NIC (SRIOV virtual function). So the synthetic NIC and VF NIC can
-# function as one network device, and fail over to the synthetic NIC if VF is
-# down.
-#
-# Usage:
-# - After configured vSwitch and vNIC with SRIOV, start Linux virtual
-#   machine (VM)
-# - Run this scripts on the VM. It will create configuration files in
-#   distro specific directory.
-# - Reboot the VM, so that the bonding config are enabled.
-#
-# The config files are DHCP by default. You may edit them if you need to change
-# to Static IP or change other settings.
-#
-
-sysdir=/sys/class/net
-netvsc_cls={f8615163-df3e-46c5-913f-f2d2f965ed0e}
-bondcnt=0
-
-# Detect Distro
-if [ -f /etc/redhat-release ];
-then
-   cfgdir=/etc/sysconfig/network-scripts
-   distro=redhat
-elif grep -q 'Ubuntu' /etc/issue
-then
-   cfgdir=/etc/network
-   distro=ubuntu
-elif grep -q 'SUSE' /etc/issue
-then
-   cfgdir=/etc/sysconfig/network
-   distro=suse
-else
-   echo "Unsupported Distro"
-   exit 1
-fi
-
-echo Detected Distro: $distro, or compatible
-
-# Get a list of ethernet names
-list_eth=(`cd $sysdir && ls -d */ | cut -d/ -f1 | grep -v bond`)
-eth_cnt=${#list_eth[@]}
-
-echo List of net devices:
-
-# Get the MAC addresses
-for (( i=0; i < $eth_cnt; i++ ))
-do
-   list_mac[$i]=`cat $sysdir/${list_eth[$i]}/address`
-   echo ${list_eth[$i]}, ${list_mac[$i]}
-done
-
-# Find NIC with matching MAC
-for (( i=0; i < $eth_cnt-1; i++ ))
-do
-   for (( j=i+1; j < $eth_cnt; j++ ))
-   do
-   if [ "${list_mac[$i]}" = "${list_mac[$j]}" ]
-   then
-   list_match[$i]=${list_eth[$j]}
-   break
-   fi
-   done
-done
-
-function create_eth_cfg_redhat {
-   local fn=$cfgdir/ifcfg-$1
-
-   rm -f $fn
-   echo DEVICE=$1 >>$fn
-   echo TYPE=Ethernet >>$fn
-   echo BOOTPROTO=none >>$fn
-   echo UUID=`uuidgen` >>$fn
-   echo ONBOOT=yes >>$fn
-   echo PEERDNS=yes >>$fn
-   echo IPV6INIT=yes >>$fn
-   echo MASTER=$2 >>$fn
-   echo SLAVE=yes >>$fn
-}
-
-function create_eth_cfg_pri_redhat {
-   create_eth_cfg_redhat $1 $2
-}
-
-function create_bond_cfg_redhat {
-   local fn=$cfgdir/ifcfg-$1
-
-   rm -f $fn
-   echo DEVICE=$1 >>$fn
-   echo TYPE=Bond >>$fn
-   echo BOOTPROTO=dhcp >>$fn
-   echo UUID=`uuidgen` >>$fn
-   echo ONBOOT=yes >>$fn
-   echo PEERDNS=yes >>$fn
-   echo IPV6INIT=yes >>$fn
-   echo BONDING_MASTER=yes >>$fn
-   echo BONDING_OPTS=\"mode=active-backup miimon=100 primary=$2\" >>$fn
-}
-
-function del_eth_cfg_ubuntu {
-   local mainfn=$cfgdir/interfaces
-   local fnlist=( $mainfn )
-
-   local dirlist=(`awk '/^[ \t]*source/{print $2}' $mainfn`)
-
-   local i
-   for i in "${dirlist[@]}"
-   do
-   fnlist+=(`ls $i 2>/dev/null`)
-   done
-
-   local tmpfl=$(mktemp)
-
-   local nic_start='^[ \t]*(auto|iface|mapping|allow-.*)[ \t]+'$1
-   local nic_end='^[ \t]*(auto|iface|mapping|allow-.*|source)'
-
-   local fn
-   for fn in "${fnlist[@]}"
-   do
-   awk "/$nic_end/{x=0} x{next} /$nic_start/{x=1;next} 1" \
-   $fn >$tmpfl
-
-   cp $tmpfl $fn
-   done
-
-   rm $tmpfl
-}
-
-function create_eth_cfg_ubuntu {
-   local fn=$cfgdir/interfaces
-
-   del_eth_cfg_ubuntu $1
-   echo $'\n'auto $1 >>$fn
-   echo iface $1 inet manual >>$fn
-   echo bond-master $2 >>$fn
-}
-
-function create_eth_cfg_pri_ubuntu {
-   local fn=$cfgdir/interfaces
-
-   del_eth_cfg_ubuntu $1
-   echo $'\n'allow-hotplug $1 >>$fn
-   echo iface $1 inet manual >>$fn
-   echo bond-master $2 >>$fn
-   echo bond-primary $1 >>$fn
-}
-
-function create_bond_cfg_ubuntu {
-   local fn=$cfgdir/interfaces
-
-   del_eth_cfg_ubuntu $1
-
-   echo $'\n'auto $1 >>$fn
-   echo iface $1 inet dhcp >>$fn
-   echo bond-mode active-backup >>$fn
-   echo bond-miimon 100 >>$fn
-   echo bond-slaves none >>$fn
-}
-
-function create_eth_cfg_suse {
-local fn=$cfgdir/ifcfg-$1
-
-rm -f $fn
-   echo BOOTPROTO=none >>$fn
-   echo STARTMODE=auto >>$fn
-}
-
-function create_eth_cfg_pri_suse {
-   local fn=$cfgdir/ifcfg-$1
-
-   rm -f $fn
-   echo BOOTPROTO=none >>$fn
-   echo 

[PATCH net-next v3 2/3] netvsc: add documentation

2017-08-01 Thread Stephen Hemminger
Add some background documentation on netvsc device options
and limitations.

Signed-off-by: Stephen Hemminger 
---
 Documentation/networking/netvsc.txt | 63 +
 MAINTAINERS |  1 +
 2 files changed, 64 insertions(+)
 create mode 100644 Documentation/networking/netvsc.txt

diff --git a/Documentation/networking/netvsc.txt 
b/Documentation/networking/netvsc.txt
new file mode 100644
index ..4ddb4e4b0426
--- /dev/null
+++ b/Documentation/networking/netvsc.txt
@@ -0,0 +1,63 @@
+Hyper-V network driver
+==
+
+Compatibility
+=
+
+This driver is compatible with Windows Server 2012 R2, 2016 and
+Windows 10.
+
+Features
+
+
+  Checksum offload
+  
+  The netvsc driver supports checksum offload as long as the
+  Hyper-V host version does. Windows Server 2016 and Azure
+  support checksum offload for TCP and UDP for both IPv4 and
+  IPv6. Windows Server 2012 only supports checksum offload for TCP.
+
+  Receive Side Scaling
+  
+  Hyper-V supports receive side scaling. For TCP, packets are
+  distributed among available queues based on IP address and port
+  number. Current versions of Hyper-V host, only distribute UDP
+  packets based on the IP source and destination address.
+  The port number is not used as part of the hash value for UDP.
+  Fragmented IP packets are not distributed between queues;
+  all fragmented packets arrive on the first channel.
+
+  Generic Receive Offload, aka GRO
+  
+  The driver supports GRO and it is enabled by default. GRO coalesces
+  like packets and significantly reduces CPU usage under heavy Rx
+  load.
+
+  SR-IOV support
+  --
+  Hyper-V supports SR-IOV as a hardware acceleration option. If SR-IOV
+  is enabled in both the vSwitch and the guest configuration, then the
+  Virtual Function (VF) device is passed to the guest as a PCI
+  device. In this case, both a synthetic (netvsc) and VF device are
+  visible in the guest OS and both NIC's have the same MAC address.
+
+  The VF is enslaved by netvsc device.  The netvsc driver will transparently
+  switch the data path to the VF when it is available and up.
+  Network state (addresses, firewall, etc) should be applied only to the
+  netvsc device; the slave device should not be accessed directly in
+  most cases.  The exceptions are if some special queue discipline or
+  flow direction is desired, these should be applied directly to the
+  VF slave device.
+
+  Receive Buffer
+  --
+  Packets are received into a receive area which is created when device
+  is probed. The receive area is broken into MTU sized chunks and each may
+  contain one or more packets. The number of receive sections may be changed
+  via ethtool Rx ring parameters.
+
+  There is a similar send buffer which is used to aggregate packets for 
sending.
+  The send area is broken into chunks of 6144 bytes, each of section may
+  contain one or more packets. The send buffer is an optimization, the driver
+  will use slower method to handle very large packets or if the send buffer
+  area is exhausted.
diff --git a/MAINTAINERS b/MAINTAINERS
index 207e45310620..448f2f67802f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6258,6 +6258,7 @@ M:Haiyang Zhang 
 M: Stephen Hemminger 
 L: de...@linuxdriverproject.org
 S: Maintained
+F: Documentation/networking/netvsc.txt
 F: arch/x86/include/asm/mshyperv.h
 F: arch/x86/include/uapi/asm/hyperv.h
 F: arch/x86/kernel/cpu/mshyperv.c
-- 
2.11.0

--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next v3 0/3] netvsc: transparent VF support

2017-08-01 Thread Stephen Hemminger
This patch set changes how SR-IOV Virtual Function devices are managed
in the Hyper-V network driver. This version is rebased onto current net-next.

Background

In Hyper-V SR-IOV can be enabled (and disabled) by changing guest settings
on host. When SR-IOV is enabled a matching PCI device is hot plugged and
visible on guest. The VF device is an add-on to an existing netvsc
device, and has the same MAC address.

How is this different?

The original support of VF relied on using bonding driver in active
standby mode to handle the VF device.

With the new netvsc VF logic, the Linux hyper-V network
virtual driver will directly manage the link to SR-IOV VF device.
When VF device is detected (hot plug) it is automatically made a
slave device of the netvsc device. The VF device state reflects
the state of the netvsc device; i.e. if netvsc is set down, then
VF is set down. If netvsc is set up, then VF is brought up.
 
Packet flow is independent of VF status; all packets are sent and
received as if they were associated with the netvsc device. If VF is
removed or link is down then the synthetic VMBUS path is used.
 
What was wrong with using bonding script?

A lot of work went into getting the bonding script to work on all
distributions, but it was a major struggle. Linux network devices
can be configured many, many ways and there is no one solution from
userspace to make it all work. What is really hard is when
configuration is attached to synthetic device during boot (eth0) and
then the same addresses and firewall rules needs to also work later if
doing bonding. The new code gets around all of this.
 
How does VF work during initialization?

Since all packets are sent and received through the logical netvsc
device, initialization is much easier. Just configure the regular
netvsc Ethernet device; when/if SR-IOV is enabled it just
works. Provisioning and cloud init only need to worry about setting up
netvsc device (eth0). If SR-IOV is enabled (even as a later step), the
address and rules stay the same.
 
What devices show up?

Both netvsc and PCI devices are visible in the system. The netvsc
device is active and named in usual manner (eth0). The PCI device is
visible to Linux and gets renamed by udev to a persistent name
(enP2p3s0). The PCI device name is now irrelevant now.

The logic also sets the PCI VF device SLAVE flag on the network
device so network tools can see the relationship if they are smart
enough to understand how layered devices work.
 
This is a lot like how I see Windows working.
The VF device is visible in Device Manager, but is not configured.
 
Is there any performance impact?
There is no visible change in performance. The bonding
and netvsc driver both have equivalent steps.
 
Is it compatible with old bonding script?

It turns out that if you use the old bonding script, then everything
still works but in a sub-optimum manner. What happens is that bonding
is unable to steal the VF from the netvsc device so it creates a one
legged bond.  Packet flow then is:
bond0 <--> eth0 <- -> VF (enP2p3s0).
In other words, if you get it wrong it still works, just
awkward and slower.
 
What if I add address or firewall rule onto the VF?

Same problems occur with now as already occur with bonding, bridging,
teaming on Linux if user incorrectly does configuration onto
an underlying slave device. It will sort of work, packets will come in
and out but the Linux kernel gets confused and things like ARP don’t
work right.  There is no way to block manipulation of the slave
device, and I am sure someone will find some special use case where
they want it.

Stephen Hemminger (3):
  netvsc: transparent VF management
  netvsc: add documentation
  netvsc: remove bonding setup script

 Documentation/networking/netvsc.txt |  63 ++
 MAINTAINERS |   1 +
 drivers/net/hyperv/hyperv_net.h |  12 ++
 drivers/net/hyperv/netvsc_drv.c | 419 
 tools/hv/bondvf.sh  | 255 --
 5 files changed, 406 insertions(+), 344 deletions(-)
 create mode 100644 Documentation/networking/netvsc.txt
 delete mode 100755 tools/hv/bondvf.sh

-- 
2.11.0

--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC 2/5] i3c: Add core I3C infrastructure

2017-08-01 Thread Greg Kroah-Hartman
On Tue, Aug 01, 2017 at 11:30:01PM +0200, Boris Brezillon wrote:
> > > > No release type?  Oh that's bad bad bad and implies you have never
> > > > removed a device from your system as the kernel would have complained
> > > > loudly at you.  
> > > 
> > > You got me, never tried to remove a device :-). Note that these  
> > > ->release() hooks will just be dummy ones, because right now, device  
> > > resources are freed at bus destruction time.  
> > 
> > You better not have a "dummy" release hook, do that and as per the
> > kernel documentation, I get to make fun of you in public for doing that
> > :(
> 
> I'm not afraid of admitting I don't know everything, even the
> simplest things that you consider as basics for a kernel developer. You
> can make fun of me publicly if you want but that's not helping :-P.

No, I am referring to the Documentation/kobject.txt file, where it says:
One important point cannot be overstated: every kobject must
have a release() method, and the kobject must persist (in a
consistent state) until that method is called. If these
constraints are not met, the code is flawed.  Note that the
kernel will warn you if you forget to provide a release()
method.  Do not try to get rid of this warning by providing an
"empty" release function; you will be mocked mercilessly by the
kobject maintainer if you attempt this.

Sometimes I wonder why I even write documentation...

The point is, you have to release the memory the device structure "owns"
in the release callback, if not, then the model is not correct.  That's
all, please fix up your code to do so and I will be glad to review it
again.  I'm not trying to be rude here at all, but please, at the least,
read the documentation we have already first...

thanks,

greg k-h
--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH net-next v2 0/3] netvsc: transparent SR-IOV VF support

2017-08-01 Thread David Miller
From: Stephen Hemminger 
Date: Mon, 31 Jul 2017 16:45:21 -0700

> This patch set changes how SR-IOV Virtual Function devices are managed
> in the Hyper-V network driver. It was part of earlier bundle, but
> is now updated.

I think you need to do a rebase.  I just merged net into net-next and
this created some conflicts.
--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC 2/5] i3c: Add core I3C infrastructure

2017-08-01 Thread Boris Brezillon
Le Tue, 1 Aug 2017 19:27:03 +0200,
Wolfram Sang  a écrit :

> > I'm surprised they didn't allow for slave clock stretching when
> > communicating with a legacy i2c device, it will prohibit use of a rather
> > large class of devices. :(  
> 
> Yes, but I3C is push/pull IIRC.

It is.

> 
> > As for interrupts you are always free to wire up an out-of-band
> > interrupt like before. :)  
> 
> Yes, my wording was a bit too strong. It is possible, sure. Yet, I
> understood that one of the features of I3C is to have in-band interrupt
> support. We will see if the demand for backward compatibility or "saving
> pins" is higher.
> 

Indeed, you can use in-band interrupts if your device is able to
generate them, but that doesn't prevent I3C device designers from using
an external pin to signal interrupts if they prefer.
--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC 2/5] i3c: Add core I3C infrastructure

2017-08-01 Thread Boris Brezillon
Hi Greg,

Le Tue, 1 Aug 2017 10:51:33 -0700,
Greg Kroah-Hartman  a écrit :

> On Tue, Aug 01, 2017 at 12:48:01PM +0200, Boris Brezillon wrote:
> > > > +static DEFINE_MUTEX(i3c_core_lock);
> > > > +
> > > > +void i3c_bus_lock(struct i3c_bus *bus, bool exclusive)
> > > > +{
> > > > +   if (exclusive)
> > > > +   down_write(>lock);
> > > > +   else
> > > > +   down_read(>lock);
> > > > +}
> > > 
> > > The "exclusive" flag is odd, and messy, and hard to understand, don't
> > > you agree?  
> > 
> > I could create 2 functions, one for the exclusive lock and the other
> > one for the shared lock.  
> 
> Or you could just use a simple mutex until you determine you really
> would do better with a rw lock :)
> 
> > > And have you measured the difference in using a rw lock over
> > > a normal mutex and found it to be faster?  If not, just use a normal
> > > mutex, it's simpler and almost always better in the end.  
> > 
> > I did not measure the difference, but using a standard lock means
> > serializing all I3C accesses going through a given master in the core.  
> 
> Which you are doing with a rw lock anyway, right?

Absolutely not. If you look more closely at the code you'll see that
most of the time the lock is taken in read/non-exclusive mode. The only
situations where it's taken in exclusive mode is when the operation (and
associated command) has an impact on the bus and/or its devices.

For instance, resetting addresses of all I3C devices on the bus (using
RSTDAA) means you won't be able to send I3C messages to these devices
after that. If you don't take an exclusive lock to protect this
operation, that means you might end up with drivers queuing messages
with a device address that is about to be invalid. Also, we might soon
provide the possibility to change I3C dynamic address at runtime, which
is important since the dynamic address also encodes the priority of the
device when it's generating in-band interrupts (the lower the address
the higher the priority). We need this 'change dynamic address'
operation to be atomic for the same reason: to prevent drivers from
sending messages to an invalid address.

> 
> > Note that this lock is not here to guarantee that calls to
> > master->ops->xxx() are serialized (this part is left to the master
> > driver), but to ensure that when a bus management operation is in
> > progress (like changing the address of a device on the bus), no one can
> > send I3C frames. If we don't do that, one could queue an I3C transfer,
> > and by the time this transfer reach the bus, the I3C device this
> > message was targeting may have a different address.  
> 
> That sounds really odd.  locks should protect data, not bus access,
> right?

Well, it's protecting data: the dynamic address is a piece of
information attached to the device. 

> 
> > Unless you see a good reason to not use a R/W lock, I'd like to keep it
> > this way because master IPs are likely to implement advanced queuing
> > mechanism (allows one to queue new transfers even if the master is
> > already busy processing other requests), and serializing things at the
> > framework level will just prevent us from using this kind of
> > optimization.  
> 
> Unless you can prove otherwise, using a rw lock is almost always worse
> than just a mutex.

Is it still true when it's taken in non-exclusive mode most of the
time, and the time you spend in the critical section is non-negligible?

I won't pretend I know better than you do what is preferable, it's just
that the RW lock seemed appropriate to me for the situation I tried to
described here.

> And you shouldn't have a lock for bus transactions,
> that's just going to be a total mess.

It's not a lock for bus transactions, it's lock to protect from any
operation that can have an impact on the bus or its devices (see the
'change device address' example above).

> You could have a lock for a
> single device access, but that still seems really strange, is the i3c
> spec that bad?

Having a lock per device would complicate even more the situation
because some operations like the broadcasted RSTDAA have an impact on
all devices on the bus.

> 
> > > > +static ssize_t hdrcap_show(struct device *dev,
> > > > +  struct device_attribute *da,
> > > > +  char *buf)
> > > > +{
> > > > +   struct i3c_device *i3cdev = dev_to_i3cdev(dev);
> > > > +   struct i3c_bus *bus = i3c_device_get_bus(i3cdev);
> > > > +   unsigned long caps = i3cdev->info.hdr_cap;
> > > > +   ssize_t offset = 0, ret;
> > > > +   int mode;
> > > > +
> > > > +   i3c_bus_lock(bus, false);
> > > > +   for_each_set_bit(mode, , 8) {
> > > > +   if (mode >= ARRAY_SIZE(hdrcap_strings))
> > > > +   break;
> > > > +
> > > > +   if (!hdrcap_strings[mode])
> > > > +   continue;
> > > > +
> > > > +   ret = sprintf(buf 

Re: [RFC 2/5] i3c: Add core I3C infrastructure

2017-08-01 Thread Arnd Bergmann
On Tue, Aug 1, 2017 at 5:14 PM, Boris Brezillon
 wrote:
> On Tue, 1 Aug 2017 16:22:21 +0200 Arnd Bergmann  wrote:
>> On Tue, Aug 1, 2017 at 3:58 PM, Boris Brezillon
>>  wrote:
>> > On Tue, 1 Aug 2017 15:34:14 +0200
>> > Boris Brezillon  wrote:
>> >> On Tue, 1 Aug 2017 15:11:44 +0200
>> >> Arnd Bergmann  wrote:
>> >> > On Tue, Aug 1, 2017 at 2:29 PM, Boris Brezillon
>> >> >  wrote:
>> > I just realized I forgot to add a "depends on I2C" in the I3C Kconfig
>> > entry. Indeed, I'm unconditionally calling functions provided by the
>> > I2C framework which have no dummy wrapper when I2C support is disabled.
>> > I could of course conditionally compile some portion of the I3C
>> > framework so that it still builds when I2C is disabled but I'm not sure
>> > it's worth the trouble.
>> >
>> > This "depends on I2C" should also solve the I2C+I3C driver issue, since
>> > I2C is necessarily enabled when I3C is.
>> >
>> > Am I missing something?
>>
>> That should solve another part of the problem, as a combined driver then
>> just needs 'depends on I3C'.
>>
>> On top of that, the i3c_driver structure could also contain callback
>> pointers for the i2c subsystem, e.g. i2c_probe(), i2c_remove() etc.
>> When the i2c_probe() callback exists, the i3c layer could construct
>> a 'struct i2c_driver' with those callbacks and register that under the
>> cover. This would mean that combined drivers no longer need to
>> register two driver objects.
>
> That should work. Actually, i2c_driver contains a few more hooks, like
> ->alert(), ->command() and ->detect(). Of course we could assume that
> I3C/I2C drivers do not need them,

I was thinking we can add them as they are needed.

> but I'm wondering if it's not easier
> to just add an i2c_driver pointer inside the i3c_driver struct and let
> the driver populate it if it needs to supports both protocols.
>
> Something like:
>
> struct i3c_driver {
> ...
> struct i2c_driver *i2c_compat;
> ...
> };
>
>
> and then in I3C/I2C drivers:
>
> static struct i2c_driver my_i2c_driver = {
> ...
> };
>
> static struct i3c_driver my_i3c_driver = {
> ...
> .i2c_compat = _i2c_driver,
> ...
> };
> module_i3c_driver(my_i3c_driver);
>
>
>
> Of course, you'll have a few fields of ->i2c_compat that would be
> filled by the core (like the driver name which can be extracted from
> my_i3c_driver->driver.name).

Right, that would work too, but it's almost the same as the version
you proposed earlier that would use

module_i2c_i3c_driver(my_i2c_driver, my_i3c_driver);

It's probably a little cleaner this way in the subsystem implementation
compared to my suggestion of adding the i2c callback pointers in
struct i3c_driver, while that would make the drivers look a little nicer
(and save a few lines per driver).

 Arnd
--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2] printk: Add boottime and real timestamps

2017-08-01 Thread Mark Salyzyn

On 08/01/2017 10:35 AM, Prarit Bhargava wrote:


On 08/01/2017 01:00 PM, John Stultz wrote:

On Tue, Aug 1, 2017 at 5:55 AM, Prarit Bhargava  wrote:

printk.time=1/CONFIG_PRINTK_TIME=1 adds a unmodified local hardware clock
timestamp to printk messages.  The local hardware clock loses time each
day making it difficult to determine exactly when an issue has occurred in
the kernel log, and making it difficult to determine how kernel and
hardware issues relate to each other in real time.

Make printk output different timestampes by adding options for no
timestamp, the local hardware clock, the monotonic clock, and the real
clock.  Allow a user to pick one of the clocks by using the printk.time
kernel parameter.  Output the type of clock in
/sys/module/printk/parameters/time so userspace programs can interpret the
timestamp.

Real clock & 32-bit systems:  Selecting the real clock printk timestamp may
lead to unlikely situations where a timestamp is wrong because the real time
offset is read without the protection of a sequence lock in the call to
ktime_get_log_ts() in printk_get_ts().

v2: Use peterz's suggested Kconfig options.  Merge patchset together.  Fix
i386 !CONFIG_PRINTK builds.

Signed-off-by: Prarit Bhargava 

...

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 98fe715522e8..7a8870b4ddbb 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1,8 +1,39 @@
  menu "printk and dmesg options"

+choice
+   prompt "printk default clock"
+   config PRINTK_TIME_DISABLE
+   bool "Disabled"
+   help
+Selecting this option disables the time stamps of printk().
+
+   config PRINTK_TIME_LOCAL
+   bool "Local Clock"
+   help
+ Selecting this option causes the time stamps of printk() to be
+ stamped with the unadjusted hardware clock.
+
+   config PRINTK_TIME_MONO
+   bool "CLOCK_MONOTONIC"
+   help
+ Selecting this option causes the time stamps of printk() to be
+ stamped with the adjusted monotonic clock.
+
+   config PRINTK_TIME_REAL
+   bool "CLOCK_REALTIME"
+   help
+ Selecting this option causes the time stamps of printk() to be
+ stamped with the adjusted realtime clock.

Its been asked already, but I've not yet seen an answer.

Sorry for missing this.


Is there a reason your not also adding PRINTK_TIME_BOOT here (which to
me would be more generally useful then REAL or MONO)?

REAL has been useful to me in debug cases where events on the system were timed
to the wall clock (ex cron job running at 3AM).  I hadn't really thought much
about using BOOT TBH because MONO seemed to work just fine.

Mark Salyzyn, did you want BOOT or MONO?

P.


You must IMHO include MONO (default when on?), BOOT and REAL if you are 
offering the ability for the kernel to switch time base.


[TL;DR]

I had a partner request for Boottime (for kernel and userspace) for 
those that are providing products that focus more on sensors. We said 
no, case closed. This option in your patch would reopen that possibility.


We have too many use cases, too many partners all pulling in different 
directions. We have met with resistance moving userspace to monotonic to 
match kernel on phones, preference remains realtime for user space, and 
monotonic for kernel. Yet watch partners preferred moving all to 
monotonic time, but they required realtime (or could do with boottime) 
dual-print to monitor battery and power. We only wanted to print 
realtime (or monotonic depending on case) during time disruptions 
(suspend/resume mainly). With your proposal we may have less resistance 
to realtime for kernel and userspace, but we will have partners that 
insist on using monotonic none-the-less because we do not have control, 
so we _must_ provide the dual-time option for them.


The dual-time in disruptive cases depends a _lot_ on what happens with 
this patch. I am expecting mine to be very sensitive to the config 
settings (ie: if both select realtime, then no dual-time is printed), so 
my proposal is on hiatus until this is resolved.


-- Mark


--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2] printk: Add boottime and real timestamps

2017-08-01 Thread Thomas Gleixner
On Tue, 1 Aug 2017, Prarit Bhargava wrote:
> On 08/01/2017 01:00 PM, John Stultz wrote:
> > Its been asked already, but I've not yet seen an answer.
> 
> Sorry for missing this.
> 
> > Is there a reason your not also adding PRINTK_TIME_BOOT here (which to
> > me would be more generally useful then REAL or MONO)?
> 
> REAL has been useful to me in debug cases where events on the system were 
> timed
> to the wall clock (ex cron job running at 3AM).  I hadn't really thought much
> about using BOOT TBH because MONO seemed to work just fine.
> 
> Mark Salyzyn, did you want BOOT or MONO?

We really want both.

Thanks,

tglx
--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [v4 2/4] mm, oom: cgroup-aware OOM killer

2017-08-01 Thread Roman Gushchin
On Tue, Aug 01, 2017 at 07:03:03PM +0200, Michal Hocko wrote:
> On Tue 01-08-17 16:25:48, Roman Gushchin wrote:
> > On Tue, Aug 01, 2017 at 04:54:35PM +0200, Michal Hocko wrote:
> [...]
> > > I would reap out the oom_kill_process into a separate patch.
> > 
> > It was a separate patch, I've merged it based on Vladimir's feedback.
> > No problems, I can divide it back.
> 
> It would make the review slightly more easier
> > 
> > > > -static void oom_kill_process(struct oom_control *oc, const char 
> > > > *message)
> > > > +static void __oom_kill_process(struct task_struct *victim)
> > > 
> > > To the rest of the patch. I have to say I do not quite like how it is
> > > implemented. I was hoping for something much simpler which would hook
> > > into oom_evaluate_task. If a task belongs to a memcg with kill-all flag
> > > then we would update the cumulative memcg badness (more specifically the
> > > badness of the topmost parent with kill-all flag). Memcg will then
> > > compete with existing self contained tasks (oom_badness will have to
> > > tell whether points belong to a task or a memcg to allow the caller to
> > > deal with it). But it shouldn't be much more complex than that.
> > 
> > I'm not sure, it will be any simpler. Basically I'm doing the same:
> > the difference is that you want to iterate over tasks and for each
> > task traverse the memcg tree, update per-cgroup oom score and find
> > the corresponding memcg(s) with the kill-all flag. I'm doing the opposite:
> > traverse the cgroup tree, and for each leaf cgroup iterate over processes.
> 
> Yeah but this doesn't fit very well to the existing scheme so we would
> need two different schemes which is not ideal from maint. point of view.
> We also do not have to duplicate all the tricky checks we already do in
> oom_evaluate_task. So I would prefer if we could try to hook there and
> do the special handling there.

I hope, that iterating over all tasks just to check if there are
in-flight OOM victims might be optimized at some point.
That means, we would be able to choose a victim much cheaper.
It's not easy, but it feels as a right direction to go.

Also, adding new tricks to the oom_evaluate_task() will make the code
even more hairy. Some of the existing tricks are useless for memcg selection.

> 
> > Also, please note, that even without the kill-all flag the decision is made
> > on per-cgroup level (except tasks in the root cgroup).
> 
> Yeah and I am not sure this is a reasonable behavior. Why should we
> consider memcgs which are not kill-all as a single entity?

I think, it's reasonable to choose a cgroup/container to blow off based on
the cgroup oom_priority/size (including hierarchical settings), and then
kill one biggest or all tasks depending on cgroup settings.

Thanks!
--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC 2/5] i3c: Add core I3C infrastructure

2017-08-01 Thread Greg Kroah-Hartman
On Tue, Aug 01, 2017 at 12:48:01PM +0200, Boris Brezillon wrote:
> > > +static DEFINE_MUTEX(i3c_core_lock);
> > > +
> > > +void i3c_bus_lock(struct i3c_bus *bus, bool exclusive)
> > > +{
> > > + if (exclusive)
> > > + down_write(>lock);
> > > + else
> > > + down_read(>lock);
> > > +}  
> > 
> > The "exclusive" flag is odd, and messy, and hard to understand, don't
> > you agree?
> 
> I could create 2 functions, one for the exclusive lock and the other
> one for the shared lock.

Or you could just use a simple mutex until you determine you really
would do better with a rw lock :)

> > And have you measured the difference in using a rw lock over
> > a normal mutex and found it to be faster?  If not, just use a normal
> > mutex, it's simpler and almost always better in the end.
> 
> I did not measure the difference, but using a standard lock means
> serializing all I3C accesses going through a given master in the core.

Which you are doing with a rw lock anyway, right?

> Note that this lock is not here to guarantee that calls to
> master->ops->xxx() are serialized (this part is left to the master
> driver), but to ensure that when a bus management operation is in
> progress (like changing the address of a device on the bus), no one can
> send I3C frames. If we don't do that, one could queue an I3C transfer,
> and by the time this transfer reach the bus, the I3C device this
> message was targeting may have a different address.

That sounds really odd.  locks should protect data, not bus access,
right?

> Unless you see a good reason to not use a R/W lock, I'd like to keep it
> this way because master IPs are likely to implement advanced queuing
> mechanism (allows one to queue new transfers even if the master is
> already busy processing other requests), and serializing things at the
> framework level will just prevent us from using this kind of
> optimization.

Unless you can prove otherwise, using a rw lock is almost always worse
than just a mutex.  And you shouldn't have a lock for bus transactions,
that's just going to be a total mess.  You could have a lock for a
single device access, but that still seems really strange, is the i3c
spec that bad?

> > > +static ssize_t hdrcap_show(struct device *dev,
> > > +struct device_attribute *da,
> > > +char *buf)
> > > +{
> > > + struct i3c_device *i3cdev = dev_to_i3cdev(dev);
> > > + struct i3c_bus *bus = i3c_device_get_bus(i3cdev);
> > > + unsigned long caps = i3cdev->info.hdr_cap;
> > > + ssize_t offset = 0, ret;
> > > + int mode;
> > > +
> > > + i3c_bus_lock(bus, false);
> > > + for_each_set_bit(mode, , 8) {
> > > + if (mode >= ARRAY_SIZE(hdrcap_strings))
> > > + break;
> > > +
> > > + if (!hdrcap_strings[mode])
> > > + continue;
> > > +
> > > + ret = sprintf(buf + offset, "%s\n", hdrcap_strings[mode]);  
> > 
> > Multiple lines in a single sysfs file?  No.
> 
> Okay. Would that be okay with a different separator (like a comma)?

No, sysfs files are "one value per file", given you don't have any
documentation saying what this file is supposed to be showing, I can't
really judge the proper way for you to present it to userspace :)

> > > +static const struct attribute_group *i3c_device_groups[] = {
> > > + _device_group,
> > > + NULL,
> > > +};  
> > 
> > ATTRIBUTE_GROUPS()?
> 
> My initial plan was to have a common set of attributes that apply to
> both devices and masters, and then add specific attributes in each of
> them when they only apply to a specific device type.

That's fine, but you do know that attributes can be enabled/disabled at
device creation time with the return value of the callback is_visable(),
right?  Why not just use that here, simplifying a lot of logic?

> Just out of curiosity, what's the preferred solution when you need to
> do something like that? Should we just use ATTRIBUTE_GROUPS() and
> duplicate the entries that apply to both device types?

is_visable()?

> > No release type?  Oh that's bad bad bad and implies you have never
> > removed a device from your system as the kernel would have complained
> > loudly at you.
> 
> You got me, never tried to remove a device :-). Note that these
> ->release() hooks will just be dummy ones, because right now, device
> resources are freed at bus destruction time.

You better not have a "dummy" release hook, do that and as per the
kernel documentation, I get to make fun of you in public for doing that
:(

> Also, I see that dev->release() is called instead of
> dev->type->release() if it's not NULL [1]. what's the preferred solution
> here? Set dev->release or type->release()?

It depends on how your bus is managed, who controls the creation of the
resources, free it in the same place you create it.

thanks,

greg k-h
--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  

Re: [PATCH v2] printk: Add boottime and real timestamps

2017-08-01 Thread John Stultz
On Tue, Aug 1, 2017 at 10:35 AM, Prarit Bhargava  wrote:
>
>
> On 08/01/2017 01:00 PM, John Stultz wrote:
>> Is there a reason your not also adding PRINTK_TIME_BOOT here (which to
>> me would be more generally useful then REAL or MONO)?
>
> REAL has been useful to me in debug cases where events on the system were 
> timed
> to the wall clock (ex cron job running at 3AM).  I hadn't really thought much
> about using BOOT TBH because MONO seemed to work just fine.
>
> Mark Salyzyn, did you want BOOT or MONO?

I know Mark has specific formatting needs, so I'm not going to speak
for him, but BOOT is actually a nice improvement over MONO, since it
includes suspend time, and avoids inconsistencies due to userspace
tweaking the time, which REALTIME has.  So for debugging suspend
related issues and being able to understand how much time a system has
spent in suspend vs not it would be quite useful.

thanks
-john
--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2] printk: Add boottime and real timestamps

2017-08-01 Thread Prarit Bhargava


On 08/01/2017 01:00 PM, John Stultz wrote:
> On Tue, Aug 1, 2017 at 5:55 AM, Prarit Bhargava  wrote:
>> printk.time=1/CONFIG_PRINTK_TIME=1 adds a unmodified local hardware clock
>> timestamp to printk messages.  The local hardware clock loses time each
>> day making it difficult to determine exactly when an issue has occurred in
>> the kernel log, and making it difficult to determine how kernel and
>> hardware issues relate to each other in real time.
>>
>> Make printk output different timestampes by adding options for no
>> timestamp, the local hardware clock, the monotonic clock, and the real
>> clock.  Allow a user to pick one of the clocks by using the printk.time
>> kernel parameter.  Output the type of clock in
>> /sys/module/printk/parameters/time so userspace programs can interpret the
>> timestamp.
>>
>> Real clock & 32-bit systems:  Selecting the real clock printk timestamp may
>> lead to unlikely situations where a timestamp is wrong because the real time
>> offset is read without the protection of a sequence lock in the call to
>> ktime_get_log_ts() in printk_get_ts().
>>
>> v2: Use peterz's suggested Kconfig options.  Merge patchset together.  Fix
>> i386 !CONFIG_PRINTK builds.
>>
>> Signed-off-by: Prarit Bhargava 
> ...
>> diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
>> index 98fe715522e8..7a8870b4ddbb 100644
>> --- a/lib/Kconfig.debug
>> +++ b/lib/Kconfig.debug
>> @@ -1,8 +1,39 @@
>>  menu "printk and dmesg options"
>>
>> +choice
>> +   prompt "printk default clock"
>> +   config PRINTK_TIME_DISABLE
>> +   bool "Disabled"
>> +   help
>> +Selecting this option disables the time stamps of printk().
>> +
>> +   config PRINTK_TIME_LOCAL
>> +   bool "Local Clock"
>> +   help
>> + Selecting this option causes the time stamps of printk() to be
>> + stamped with the unadjusted hardware clock.
>> +
>> +   config PRINTK_TIME_MONO
>> +   bool "CLOCK_MONOTONIC"
>> +   help
>> + Selecting this option causes the time stamps of printk() to be
>> + stamped with the adjusted monotonic clock.
>> +
>> +   config PRINTK_TIME_REAL
>> +   bool "CLOCK_REALTIME"
>> +   help
>> + Selecting this option causes the time stamps of printk() to be
>> + stamped with the adjusted realtime clock.
> 
> Its been asked already, but I've not yet seen an answer.

Sorry for missing this.

> Is there a reason your not also adding PRINTK_TIME_BOOT here (which to
> me would be more generally useful then REAL or MONO)?

REAL has been useful to me in debug cases where events on the system were timed
to the wall clock (ex cron job running at 3AM).  I hadn't really thought much
about using BOOT TBH because MONO seemed to work just fine.

Mark Salyzyn, did you want BOOT or MONO?

P.
--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC 2/5] i3c: Add core I3C infrastructure

2017-08-01 Thread Wolfram Sang

> I'm surprised they didn't allow for slave clock stretching when
> communicating with a legacy i2c device, it will prohibit use of a rather
> large class of devices. :(

Yes, but I3C is push/pull IIRC.

> As for interrupts you are always free to wire up an out-of-band
> interrupt like before. :)

Yes, my wording was a bit too strong. It is possible, sure. Yet, I
understood that one of the features of I3C is to have in-band interrupt
support. We will see if the demand for backward compatibility or "saving
pins" is higher.



signature.asc
Description: PGP signature


Re: [v4 2/4] mm, oom: cgroup-aware OOM killer

2017-08-01 Thread Michal Hocko
On Tue 01-08-17 16:25:48, Roman Gushchin wrote:
> On Tue, Aug 01, 2017 at 04:54:35PM +0200, Michal Hocko wrote:
[...]
> > I would reap out the oom_kill_process into a separate patch.
> 
> It was a separate patch, I've merged it based on Vladimir's feedback.
> No problems, I can divide it back.

It would make the review slightly more easier
> 
> > > -static void oom_kill_process(struct oom_control *oc, const char *message)
> > > +static void __oom_kill_process(struct task_struct *victim)
> > 
> > To the rest of the patch. I have to say I do not quite like how it is
> > implemented. I was hoping for something much simpler which would hook
> > into oom_evaluate_task. If a task belongs to a memcg with kill-all flag
> > then we would update the cumulative memcg badness (more specifically the
> > badness of the topmost parent with kill-all flag). Memcg will then
> > compete with existing self contained tasks (oom_badness will have to
> > tell whether points belong to a task or a memcg to allow the caller to
> > deal with it). But it shouldn't be much more complex than that.
> 
> I'm not sure, it will be any simpler. Basically I'm doing the same:
> the difference is that you want to iterate over tasks and for each
> task traverse the memcg tree, update per-cgroup oom score and find
> the corresponding memcg(s) with the kill-all flag. I'm doing the opposite:
> traverse the cgroup tree, and for each leaf cgroup iterate over processes.

Yeah but this doesn't fit very well to the existing scheme so we would
need two different schemes which is not ideal from maint. point of view.
We also do not have to duplicate all the tricky checks we already do in
oom_evaluate_task. So I would prefer if we could try to hook there and
do the special handling there.

> Also, please note, that even without the kill-all flag the decision is made
> on per-cgroup level (except tasks in the root cgroup).

Yeah and I am not sure this is a reasonable behavior. Why should we
consider memcgs which are not kill-all as a single entity?

-- 
Michal Hocko
SUSE Labs
--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2] printk: Add boottime and real timestamps

2017-08-01 Thread John Stultz
On Tue, Aug 1, 2017 at 5:55 AM, Prarit Bhargava  wrote:
> printk.time=1/CONFIG_PRINTK_TIME=1 adds a unmodified local hardware clock
> timestamp to printk messages.  The local hardware clock loses time each
> day making it difficult to determine exactly when an issue has occurred in
> the kernel log, and making it difficult to determine how kernel and
> hardware issues relate to each other in real time.
>
> Make printk output different timestampes by adding options for no
> timestamp, the local hardware clock, the monotonic clock, and the real
> clock.  Allow a user to pick one of the clocks by using the printk.time
> kernel parameter.  Output the type of clock in
> /sys/module/printk/parameters/time so userspace programs can interpret the
> timestamp.
>
> Real clock & 32-bit systems:  Selecting the real clock printk timestamp may
> lead to unlikely situations where a timestamp is wrong because the real time
> offset is read without the protection of a sequence lock in the call to
> ktime_get_log_ts() in printk_get_ts().
>
> v2: Use peterz's suggested Kconfig options.  Merge patchset together.  Fix
> i386 !CONFIG_PRINTK builds.
>
> Signed-off-by: Prarit Bhargava 
...
> diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
> index 98fe715522e8..7a8870b4ddbb 100644
> --- a/lib/Kconfig.debug
> +++ b/lib/Kconfig.debug
> @@ -1,8 +1,39 @@
>  menu "printk and dmesg options"
>
> +choice
> +   prompt "printk default clock"
> +   config PRINTK_TIME_DISABLE
> +   bool "Disabled"
> +   help
> +Selecting this option disables the time stamps of printk().
> +
> +   config PRINTK_TIME_LOCAL
> +   bool "Local Clock"
> +   help
> + Selecting this option causes the time stamps of printk() to be
> + stamped with the unadjusted hardware clock.
> +
> +   config PRINTK_TIME_MONO
> +   bool "CLOCK_MONOTONIC"
> +   help
> + Selecting this option causes the time stamps of printk() to be
> + stamped with the adjusted monotonic clock.
> +
> +   config PRINTK_TIME_REAL
> +   bool "CLOCK_REALTIME"
> +   help
> + Selecting this option causes the time stamps of printk() to be
> + stamped with the adjusted realtime clock.

Its been asked already, but I've not yet seen an answer.
Is there a reason your not also adding PRINTK_TIME_BOOT here (which to
me would be more generally useful then REAL or MONO)?

thanks
-john
--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2] printk: Add boottime and real timestamps

2017-08-01 Thread Luis R. Rodriguez
On Tue, Aug 01, 2017 at 08:55:28AM -0400, Prarit Bhargava wrote:
> diff --git a/arch/x86/configs/x86_64_defconfig 
> b/arch/x86/configs/x86_64_defconfig
> index 4a4b16e56d35..23da8e5297a1 100644
> --- a/arch/x86/configs/x86_64_defconfig
> +++ b/arch/x86/configs/x86_64_defconfig
> @@ -283,7 +283,13 @@ CONFIG_NLS_CODEPAGE_437=y
>  CONFIG_NLS_ASCII=y
>  CONFIG_NLS_ISO8859_1=y
>  CONFIG_NLS_UTF8=y
> -CONFIG_PRINTK_TIME=y
> +# CONFIG_PRINTK_TIME_DISABLE is not set
> +CONFIG_PRINTK_TIME_LOCAL=Y
> +# CONFIG_PRINTK_TIME_DISABLE is not set
> +CONFIG_PRINTK_TIME_LOCAL=Y
> +# CONFIG_PRINTK_TIME_DISABLE is not set
> +CONFIG_PRINTK_TIME_LOCAL=Y
> +CONFIG_PRINTK_TIME=1
>  # CONFIG_ENABLE_WARN_DEPRECATED is not set
>  CONFIG_MAGIC_SYSRQ=y
>  # CONFIG_UNUSED_SYMBOLS is not set

You've gone trigger paste happy here.

> diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
> index fc47863f629c..672a649b90d8 100644
> --- a/kernel/printk/printk.c
> +++ b/kernel/printk/printk.c
> @@ -576,6 +576,8 @@ static u32 truncate_msg(u16 *text_len, u16 *trunc_msg_len,
>   return msg_used_size(*text_len + *trunc_msg_len, 0, pad_len);
>  }
>  
> +static u64 printk_get_ts(void);
> +
>  /* insert record into the buffer, discard old ones, update heads */
>  static int log_store(int facility, int level,
>enum log_flags flags, u64 ts_nsec,
> @@ -624,7 +626,7 @@ static int log_store(int facility, int level,
>   if (ts_nsec > 0)
>   msg->ts_nsec = ts_nsec;
>   else
> - msg->ts_nsec = local_clock();
> + msg->ts_nsec = printk_get_ts();
>   memset(log_dict(msg) + dict_len, 0, pad_len);
>   msg->len = size;
>  
> @@ -1202,8 +1204,89 @@ static inline void boot_delay_msec(int level)
>  }
>  #endif
>  
> -static bool printk_time = IS_ENABLED(CONFIG_PRINTK_TIME);
> -module_param_named(time, printk_time, bool, S_IRUGO | S_IWUSR);
> +static int printk_time = CONFIG_PRINTK_TIME;
> +static int printk_time_setting; /* initial setting */

If you use an enum for printk_time_setting you can then kdoc'ify
each setting and also match the values names on kconfig. With this
you can move the big comment below onto the kdoc for the enum.
You can then nicely expand on the documentation to explain what
compromises were made for providing time if timekeeping_init() was
not called yet.

/**
 * enum printk_time_setting - description
 * @PRINTK_TIME_DISABLE: description
 * @PRINTK_TIME_LOCAL: description
 * @PRINTK_TIME_MONO: description
 * @PRINTK_TIME_REAL: description - you can go into the 32-bit issue here
 */
enum printk_time_setting {
PRINTK_TIME_DISABLE = 0,
PRINTK_TIME_LOCAL = 1,
PRINTK_TIME_MONO = 2,
PRINTK_TIME_REAL = 3,
};

And since you are kdoc'ifying consider later adding proper rst format docs
for printk and then pegging this kdoc entry into it. Unfortunately I only
see Documentation/printk-formats.txt and Documentation/x86/earlyprintk.txt,
is that all we have on printk docs?

> +
> +/*
> + * Real clock & 32-bit systems:  Selecting the real clock printk timestamp 
> may
> + * lead to unlikely situations where a timestamp is wrong because the real 
> time
> + * offset is read without the protection of a sequence lock in the call to
> + * ktime_get_log_ts() in printk_get_ts() below.
> + */
> +static int printk_time_set(const char *val, const struct kernel_param *kp)
> +{
> + char *param = strstrip((char *)val);
> + int _printk_time;
> +
> + if (strlen(param) != 1)
> + return -EINVAL;
> +
> + switch (param[0]) {
> + case '0':
> + case 'n':
> + case 'N':
> + _printk_time = 0; /* none/disabled */
> + break;
> + case '1':
> + case 'y':
> + case 'Y':
> + _printk_time = 1; /* local unadjusted HW clock */
> + break;
> + case '2':
> + _printk_time = 2; /* monotonic */
> + break;
> + case '3':
> + _printk_time = 3; /* realtime */
> + break;
> + default:
> + pr_warn("printk: invalid timestamp value\n");
> + return -EINVAL;
> + }
> +
> + /*
> +  * Only allow enabling and disabling of the current printk_time
> +  * setting.  Changing it from one setting to another confuses
> +  * userspace.
> +  */
> + if (printk_time_setting == 0) {
> + printk_time_setting = _printk_time;
> + } else if ((printk_time_setting != _printk_time) &&
> +(_printk_time != 0)) {
> + pr_warn("printk: timestamp can only be set to 0 or %d ",
> + printk_time_setting);
> + return -EINVAL;
> + }
> +
> + printk_time = _printk_time;
> + pr_info("printk: timestamp set to %d.", printk_time);

Then here you can also add a respective conversion for describing what the enum
was, "%s (%d)", printk_time_desc(printk_time), printk_time) which will make it
easier to understand what was done at init in the 

Re: [RFC v6 21/62] powerpc: introduce execute-only pkey

2017-08-01 Thread Thiago Jung Bauermann

Michael Ellerman  writes:

> Thiago Jung Bauermann  writes:
>> Ram Pai  writes:
> ...
>>> +
>>> +   /* We got one, store it and use it from here on out */
>>> +   if (need_to_set_mm_pkey)
>>> +   mm->context.execute_only_pkey = execute_only_pkey;
>>> +   return execute_only_pkey;
>>> +}
>>
>> If you follow the code flow in __execute_only_pkey, the AMR and UAMOR
>> are read 3 times in total, and AMR is written twice. IAMR is read and
>> written twice. Since they are SPRs and access to them is slow (or isn't
>> it?),
>
> SPRs read/writes are slow, but they're not *that* slow in comparison to
> a system call (which I think is where this code is being called?).

Yes, this code runs on mprotect and mmap syscalls if the memory is
requested to have execute but not read nor write permissions.

> So we should try to avoid too many SPR read/writes, but at the same time
> we can accept more than the minimum if it makes the code much easier to
> follow.

Ok. Ram had asked me to suggest a way to optimize the SPR reads and
writes and I came up with the patch below. Do you think it's worth it?

The patch applies on top of this series, but if Ram includes it I think
he would break it up and merge it into the other patches.

-- 
Thiago Jung Bauermann
IBM Linux Technology Center


>From f6e73e67d325c4a1952c375072ca35156a9f2042 Mon Sep 17 00:00:00 2001
From: Thiago Jung Bauermann 
Date: Mon, 31 Jul 2017 20:22:59 -0300
Subject: [PATCH] powerpc: Cache protection key registers in
 __execute_only_pkey

Pass around a struct with the contents of AMR, IAMR and AMOR, as well as
flags indicating whether those fields hold valid values and whether they
should be committed back to the registers.

Signed-off-by: Thiago Jung Bauermann 
---
 arch/powerpc/include/asm/pkeys.h |  18 --
 arch/powerpc/mm/pkeys.c  | 120 +--
 2 files changed, 104 insertions(+), 34 deletions(-)

diff --git a/arch/powerpc/include/asm/pkeys.h b/arch/powerpc/include/asm/pkeys.h
index e61ed6c332db..66f15dbc5855 100644
--- a/arch/powerpc/include/asm/pkeys.h
+++ b/arch/powerpc/include/asm/pkeys.h
@@ -129,12 +129,15 @@ static inline bool mm_pkey_is_allocated(struct mm_struct 
*mm, int pkey)
mm_set_pkey_is_allocated(mm, pkey));
 }
 
-extern void __arch_activate_pkey(int pkey);
+struct pkey_regs_cache;
+
+extern void __arch_activate_pkey(int pkey, struct pkey_regs_cache *regs);
 extern void __arch_deactivate_pkey(int pkey);
 /*
  * Returns a positive, 5-bit key on success, or -1 on failure.
  */
-static inline int mm_pkey_alloc(struct mm_struct *mm)
+static inline int __mm_pkey_alloc(struct mm_struct *mm,
+ struct pkey_regs_cache *regs)
 {
/*
 * Note: this is the one and only place we make sure
@@ -162,10 +165,15 @@ static inline int mm_pkey_alloc(struct mm_struct *mm)
 * enable the key in the hardware
 */
if (ret > 0)
-   __arch_activate_pkey(ret);
+   __arch_activate_pkey(ret, regs);
return ret;
 }
 
+static inline int mm_pkey_alloc(struct mm_struct *mm)
+{
+   return __mm_pkey_alloc(mm, NULL);
+}
+
 static inline int mm_pkey_free(struct mm_struct *mm, int pkey)
 {
if (!pkey_inited)
@@ -206,13 +214,13 @@ static inline int arch_override_mprotect_pkey(struct 
vm_area_struct *vma,
 }
 
 extern int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
-   unsigned long init_val);
+   unsigned long init_val, struct pkey_regs_cache *regs);
 static inline int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
unsigned long init_val)
 {
if (!pkey_inited)
return -EINVAL;
-   return __arch_set_user_pkey_access(tsk, pkey, init_val);
+   return __arch_set_user_pkey_access(tsk, pkey, init_val, NULL);
 }
 
 static inline bool arch_pkeys_enabled(void)
diff --git a/arch/powerpc/mm/pkeys.c b/arch/powerpc/mm/pkeys.c
index 1424c79f45f6..718ea23f8184 100644
--- a/arch/powerpc/mm/pkeys.c
+++ b/arch/powerpc/mm/pkeys.c
@@ -22,52 +22,92 @@ u32  initial_allocation_mask;   /* bits set for 
reserved keys */
 #define PKEY_REG_BITS (sizeof(u64)*8)
 #define pkeyshift(pkey) (PKEY_REG_BITS - ((pkey+1) * AMR_BITS_PER_PKEY))
 
-static bool is_pkey_enabled(int pkey)
+/*
+ * The registers controlling memory protection keys are expensive to access, so
+ * we want to cache their values in code paths that might need to use them more
+ * than once.
+ */
+struct pkey_regs_cache {
+   u64 amr;
+   u64 iamr;
+   u64 uamor;
+
+   bool amr_valid;
+   bool iamr_valid;
+   bool uamor_valid;
+
+   bool write_amr;
+   bool write_iamr;
+   bool write_uamor;
+};
+
+static bool is_pkey_enabled(int pkey, struct pkey_regs_cache *regs)
 {
-   return !!(read_uamor() & (0x3ul << 

Re: [v4 2/4] mm, oom: cgroup-aware OOM killer

2017-08-01 Thread Roman Gushchin
On Tue, Aug 01, 2017 at 04:54:35PM +0200, Michal Hocko wrote:
> On Wed 26-07-17 14:27:16, Roman Gushchin wrote:
> [...]
> > +static long memcg_oom_badness(struct mem_cgroup *memcg,
> > + const nodemask_t *nodemask)
> > +{
> > +   long points = 0;
> > +   int nid;
> > +
> > +   for_each_node_state(nid, N_MEMORY) {
> > +   if (nodemask && !node_isset(nid, *nodemask))
> > +   continue;
> > +
> > +   points += mem_cgroup_node_nr_lru_pages(memcg, nid,
> > +   LRU_ALL_ANON | BIT(LRU_UNEVICTABLE));
> > +   }
> > +
> > +   points += memcg_page_state(memcg, MEMCG_KERNEL_STACK_KB) /
> > +   (PAGE_SIZE / 1024);
> > +   points += memcg_page_state(memcg, NR_SLAB_UNRECLAIMABLE);
> > +   points += memcg_page_state(memcg, MEMCG_SOCK);
> > +   points += memcg_page_state(memcg, MEMCG_SWAP);
> > +
> > +   return points;
> 
> I am wondering why are you diverging from the global oom_badness
> behavior here. Although doing per NUMA accounting sounds like a better
> idea but then you just end up mixing this with non NUMA numbers and the
> whole thing is harder to understand without great advantages.

Ok, makes sense. I can revert to the existing OOM behaviour here.

> > +static void select_victim_memcg(struct mem_cgroup *root, struct 
> > oom_control *oc)
> > +{
> > +   struct mem_cgroup *iter, *parent;
> > +
> > +   for_each_mem_cgroup_tree(iter, root) {
> > +   if (memcg_has_children(iter)) {
> > +   iter->oom_score = 0;
> > +   continue;
> > +   }
> > +
> > +   iter->oom_score = oom_evaluate_memcg(iter, oc->nodemask);
> > +   if (iter->oom_score == -1) {
> > +   oc->chosen_memcg = (void *)-1UL;
> > +   mem_cgroup_iter_break(root, iter);
> > +   return;
> > +   }
> > +
> > +   if (!iter->oom_score)
> > +   continue;
> > +
> > +   for (parent = parent_mem_cgroup(iter); parent && parent != root;
> > +parent = parent_mem_cgroup(parent))
> > +   parent->oom_score += iter->oom_score;
> > +   }
> > +
> > +   for (;;) {
> > +   struct cgroup_subsys_state *css;
> > +   struct mem_cgroup *memcg = NULL;
> > +   long score = LONG_MIN;
> > +
> > +   css_for_each_child(css, >css) {
> > +   struct mem_cgroup *iter = mem_cgroup_from_css(css);
> > +
> > +   if (iter->oom_score > score) {
> > +   memcg = iter;
> > +   score = iter->oom_score;
> > +   }
> > +   }
> > +
> > +   if (!memcg) {
> > +   if (oc->memcg && root == oc->memcg) {
> > +   oc->chosen_memcg = oc->memcg;
> > +   css_get(>chosen_memcg->css);
> > +   oc->chosen_points = oc->memcg->oom_score;
> > +   }
> > +   break;
> > +   }
> > +
> > +   if (memcg->oom_kill_all_tasks || !memcg_has_children(memcg)) {
> > +   oc->chosen_memcg = memcg;
> > +   css_get(>chosen_memcg->css);
> > +   oc->chosen_points = score;
> > +   break;
> > +   }
> > +
> > +   root = memcg;
> > +   }
> > +}
> 
> This and the rest of the victim selection code is really hairy and hard
> to follow.

Will adding more comments help here?

> 
> I would reap out the oom_kill_process into a separate patch.

It was a separate patch, I've merged it based on Vladimir's feedback.
No problems, I can divide it back.

> > -static void oom_kill_process(struct oom_control *oc, const char *message)
> > +static void __oom_kill_process(struct task_struct *victim)
> 
> To the rest of the patch. I have to say I do not quite like how it is
> implemented. I was hoping for something much simpler which would hook
> into oom_evaluate_task. If a task belongs to a memcg with kill-all flag
> then we would update the cumulative memcg badness (more specifically the
> badness of the topmost parent with kill-all flag). Memcg will then
> compete with existing self contained tasks (oom_badness will have to
> tell whether points belong to a task or a memcg to allow the caller to
> deal with it). But it shouldn't be much more complex than that.

I'm not sure, it will be any simpler. Basically I'm doing the same:
the difference is that you want to iterate over tasks and for each
task traverse the memcg tree, update per-cgroup oom score and find
the corresponding memcg(s) with the kill-all flag. I'm doing the opposite:
traverse the cgroup tree, and for each leaf cgroup iterate over processes.

Also, please note, that even without the kill-all flag the decision is made
on per-cgroup level (except tasks in the root cgroup).

Thank you!

Roman
--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a 

Re: [RFC 2/5] i3c: Add core I3C infrastructure

2017-08-01 Thread Boris Brezillon
On Tue, 1 Aug 2017 16:22:21 +0200
Arnd Bergmann  wrote:

> On Tue, Aug 1, 2017 at 3:58 PM, Boris Brezillon
>  wrote:
> > On Tue, 1 Aug 2017 15:34:14 +0200
> > Boris Brezillon  wrote:  
> >> On Tue, 1 Aug 2017 15:11:44 +0200
> >> Arnd Bergmann  wrote:  
> >> > On Tue, Aug 1, 2017 at 2:29 PM, Boris Brezillon
> >> >  wrote:  
> > I just realized I forgot to add a "depends on I2C" in the I3C Kconfig
> > entry. Indeed, I'm unconditionally calling functions provided by the
> > I2C framework which have no dummy wrapper when I2C support is disabled.
> > I could of course conditionally compile some portion of the I3C
> > framework so that it still builds when I2C is disabled but I'm not sure
> > it's worth the trouble.
> >
> > This "depends on I2C" should also solve the I2C+I3C driver issue, since
> > I2C is necessarily enabled when I3C is.
> >
> > Am I missing something?  
> 
> That should solve another part of the problem, as a combined driver then
> just needs 'depends on I3C'.
> 
> On top of that, the i3c_driver structure could also contain callback
> pointers for the i2c subsystem, e.g. i2c_probe(), i2c_remove() etc.
> When the i2c_probe() callback exists, the i3c layer could construct
> a 'struct i2c_driver' with those callbacks and register that under the
> cover. This would mean that combined drivers no longer need to
> register two driver objects.

That should work. Actually, i2c_driver contains a few more hooks, like
->alert(), ->command() and ->detect(). Of course we could assume that
I3C/I2C drivers do not need them, but I'm wondering if it's not easier
to just add an i2c_driver pointer inside the i3c_driver struct and let
the driver populate it if it needs to supports both protocols.

Something like:

struct i3c_driver {
...
struct i2c_driver *i2c_compat;
...
};


and then in I3C/I2C drivers:

static struct i2c_driver my_i2c_driver = {
...
};

static struct i3c_driver my_i3c_driver = {
...
.i2c_compat = _i2c_driver,
...
};
module_i3c_driver(my_i3c_driver);



Of course, you'll have a few fields of ->i2c_compat that would be
filled by the core (like the driver name which can be extracted from
my_i3c_driver->driver.name).
--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC 2/5] i3c: Add core I3C infrastructure

2017-08-01 Thread Wolfram Sang

> I do not know of any real devices as of today (all my tests have been
> done with a dummy/fake I3C slaves emulated with a slave IP),

I see.

> spec clearly describe what legacy/static addresses are for and one of
> their use case is to connect an I3C device on an I2C bus and let it act
> as an I2C device.

OK. That makes it more likely.

> Unless you want your device (likely a sensor) to be compatible with both
> I3C and I2C so that you can target even more people.

Right. My question was if this is a realistic or more academic scenario.

> I'm perfectly fine with the I3C / I2C framework separation. The only
> minor problem I had with that was the inaccuracy of the
> sysfs/device-model representation: we don't have one i2c and one i3c
> bus, we just have one i3c bus with a mix of i2c and i3c devices.

I understand that. What if I2C had the same seperation between the "bus"
and the "master"?



signature.asc
Description: PGP signature


Re: [RFC PATCH v2 04/38] KVM: arm/arm64: Check if nested virtualization is in use

2017-08-01 Thread Christoffer Dall
On Tue, Aug 01, 2017 at 10:07:40AM -0400, Jintack Lim wrote:
> On Sun, Jul 30, 2017 at 3:59 PM, Christoffer Dall  wrote:
> > On Tue, Jul 18, 2017 at 11:58:30AM -0500, Jintack Lim wrote:
> >> Nested virtualizaion is in use only if all three conditions are met:
> >> - The architecture supports nested virtualization.
> >> - The kernel parameter is set.
> >> - The userspace uses nested virtualiztion feature.
> >>
> >> Signed-off-by: Jintack Lim 
> >> ---
> >>  arch/arm/include/asm/kvm_host.h   | 11 +++
> >>  arch/arm64/include/asm/kvm_host.h |  2 ++
> >>  arch/arm64/kvm/nested.c   | 17 +
> >>  virt/kvm/arm/arm.c|  4 
> >>  4 files changed, 34 insertions(+)
> >>
> >> diff --git a/arch/arm/include/asm/kvm_host.h 
> >> b/arch/arm/include/asm/kvm_host.h
> >> index 00b0f97..7e9e6c8 100644
> >> --- a/arch/arm/include/asm/kvm_host.h
> >> +++ b/arch/arm/include/asm/kvm_host.h
> >> @@ -303,4 +303,15 @@ static inline int __init kvmarm_nested_cfg(char *buf)
> >>  {
> >>   return 0;
> >>  }
> >> +
> >> +static inline int init_nested_virt(void)
> >> +{
> >> + return 0;
> >> +}
> >> +
> >> +static inline bool nested_virt_in_use(struct kvm_vcpu *vcpu)
> >> +{
> >> + return false;
> >> +}
> >> +
> >>  #endif /* __ARM_KVM_HOST_H__ */
> >> diff --git a/arch/arm64/include/asm/kvm_host.h 
> >> b/arch/arm64/include/asm/kvm_host.h
> >> index 6df0c7c..86d4b6c 100644
> >> --- a/arch/arm64/include/asm/kvm_host.h
> >> +++ b/arch/arm64/include/asm/kvm_host.h
> >> @@ -387,5 +387,7 @@ static inline void __cpu_init_stage2(void)
> >>  }
> >>
> >>  int __init kvmarm_nested_cfg(char *buf);
> >> +int init_nested_virt(void);
> >> +bool nested_virt_in_use(struct kvm_vcpu *vcpu);
> >>
> >>  #endif /* __ARM64_KVM_HOST_H__ */
> >> diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c
> >> index 79f38da..9a05c76 100644
> >> --- a/arch/arm64/kvm/nested.c
> >> +++ b/arch/arm64/kvm/nested.c
> >> @@ -24,3 +24,20 @@ int __init kvmarm_nested_cfg(char *buf)
> >>  {
> >>   return strtobool(buf, _param);
> >>  }
> >> +
> >> +int init_nested_virt(void)
> >> +{
> >> + if (nested_param && cpus_have_const_cap(ARM64_HAS_NESTED_VIRT))
> >> + kvm_info("Nested virtualization is supported\n");
> >> +
> >> + return 0;
> >> +}
> >> +
> >> +bool nested_virt_in_use(struct kvm_vcpu *vcpu)
> >> +{
> >> + if (nested_param && cpus_have_const_cap(ARM64_HAS_NESTED_VIRT)
> >> + && test_bit(KVM_ARM_VCPU_NESTED_VIRT, vcpu->arch.features))
> >> + return true;
> >
> > you could initialize a bool in init_nested_virt which you then check
> > here to avoid duplicating the logic.
> 
> I can make a bool to check the kernel param and the capability. The
> third one is per VM given by the userspace, so we don't know it when
> we initialize the host hypervisor. We can potentially have a bool in
> kvm_vcpu_arch or kvm_arch to cache the whole three conditions, if that
> sounds ok.
> 

Yes, that sounds good to me.

Thanks,
-Christoffer
--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [v4 2/4] mm, oom: cgroup-aware OOM killer

2017-08-01 Thread Michal Hocko
On Wed 26-07-17 14:27:16, Roman Gushchin wrote:
[...]
> +static long memcg_oom_badness(struct mem_cgroup *memcg,
> +   const nodemask_t *nodemask)
> +{
> + long points = 0;
> + int nid;
> +
> + for_each_node_state(nid, N_MEMORY) {
> + if (nodemask && !node_isset(nid, *nodemask))
> + continue;
> +
> + points += mem_cgroup_node_nr_lru_pages(memcg, nid,
> + LRU_ALL_ANON | BIT(LRU_UNEVICTABLE));
> + }
> +
> + points += memcg_page_state(memcg, MEMCG_KERNEL_STACK_KB) /
> + (PAGE_SIZE / 1024);
> + points += memcg_page_state(memcg, NR_SLAB_UNRECLAIMABLE);
> + points += memcg_page_state(memcg, MEMCG_SOCK);
> + points += memcg_page_state(memcg, MEMCG_SWAP);
> +
> + return points;

I am wondering why are you diverging from the global oom_badness
behavior here. Although doing per NUMA accounting sounds like a better
idea but then you just end up mixing this with non NUMA numbers and the
whole thing is harder to understand without great advantages.

> +static void select_victim_memcg(struct mem_cgroup *root, struct oom_control 
> *oc)
> +{
> + struct mem_cgroup *iter, *parent;
> +
> + for_each_mem_cgroup_tree(iter, root) {
> + if (memcg_has_children(iter)) {
> + iter->oom_score = 0;
> + continue;
> + }
> +
> + iter->oom_score = oom_evaluate_memcg(iter, oc->nodemask);
> + if (iter->oom_score == -1) {
> + oc->chosen_memcg = (void *)-1UL;
> + mem_cgroup_iter_break(root, iter);
> + return;
> + }
> +
> + if (!iter->oom_score)
> + continue;
> +
> + for (parent = parent_mem_cgroup(iter); parent && parent != root;
> +  parent = parent_mem_cgroup(parent))
> + parent->oom_score += iter->oom_score;
> + }
> +
> + for (;;) {
> + struct cgroup_subsys_state *css;
> + struct mem_cgroup *memcg = NULL;
> + long score = LONG_MIN;
> +
> + css_for_each_child(css, >css) {
> + struct mem_cgroup *iter = mem_cgroup_from_css(css);
> +
> + if (iter->oom_score > score) {
> + memcg = iter;
> + score = iter->oom_score;
> + }
> + }
> +
> + if (!memcg) {
> + if (oc->memcg && root == oc->memcg) {
> + oc->chosen_memcg = oc->memcg;
> + css_get(>chosen_memcg->css);
> + oc->chosen_points = oc->memcg->oom_score;
> + }
> + break;
> + }
> +
> + if (memcg->oom_kill_all_tasks || !memcg_has_children(memcg)) {
> + oc->chosen_memcg = memcg;
> + css_get(>chosen_memcg->css);
> + oc->chosen_points = score;
> + break;
> + }
> +
> + root = memcg;
> + }
> +}

This and the rest of the victim selection code is really hairy and hard
to follow.

I would reap out the oom_kill_process into a separate patch.

> -static void oom_kill_process(struct oom_control *oc, const char *message)
> +static void __oom_kill_process(struct task_struct *victim)

To the rest of the patch. I have to say I do not quite like how it is
implemented. I was hoping for something much simpler which would hook
into oom_evaluate_task. If a task belongs to a memcg with kill-all flag
then we would update the cumulative memcg badness (more specifically the
badness of the topmost parent with kill-all flag). Memcg will then
compete with existing self contained tasks (oom_badness will have to
tell whether points belong to a task or a memcg to allow the caller to
deal with it). But it shouldn't be much more complex than that.

Or is there something that I am missing and that would prevent such a
simple approach?
-- 
Michal Hocko
SUSE Labs
--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 2/3] irq/irq_sim: add a devres variant of irq_sim_init()

2017-08-01 Thread Bartosz Golaszewski
Add a resource managed version of irq_sim_init(). This can be
conveniently used in device drivers.

Signed-off-by: Bartosz Golaszewski 
---
 Documentation/driver-model/devres.txt |  1 +
 include/linux/irq_sim.h   |  4 
 kernel/irq_sim.c  | 43 +++
 3 files changed, 48 insertions(+)

diff --git a/Documentation/driver-model/devres.txt 
b/Documentation/driver-model/devres.txt
index 30e04f7a690d..69f08c0f23a8 100644
--- a/Documentation/driver-model/devres.txt
+++ b/Documentation/driver-model/devres.txt
@@ -312,6 +312,7 @@ IRQ
   devm_irq_alloc_descs_from()
   devm_irq_alloc_generic_chip()
   devm_irq_setup_generic_chip()
+  devm_irq_sim_init()
 
 LED
   devm_led_classdev_register()
diff --git a/include/linux/irq_sim.h b/include/linux/irq_sim.h
index 0c1abf0e3244..94c4bfc9b7a9 100644
--- a/include/linux/irq_sim.h
+++ b/include/linux/irq_sim.h
@@ -9,6 +9,7 @@
 #define _LINUX_IRQ_SIM_H
 
 #include 
+#include 
 
 struct irq_sim_work_ctx {
struct irq_work work;
@@ -30,6 +31,9 @@ struct irq_sim {
 int irq_sim_init(struct irq_sim *sim, unsigned int num_irqs);
 void irq_sim_fini(struct irq_sim *sim);
 
+int devm_irq_sim_init(struct device *dev,
+ struct irq_sim *sim, unsigned int num_irqs);
+
 void irq_sim_fire(struct irq_sim *sim, unsigned int offset);
 
 int irq_sim_irqnum(struct irq_sim *sim, unsigned int offset);
diff --git a/kernel/irq_sim.c b/kernel/irq_sim.c
index 4387e2bee97c..92686c0c7790 100644
--- a/kernel/irq_sim.c
+++ b/kernel/irq_sim.c
@@ -90,6 +90,49 @@ void irq_sim_fini(struct irq_sim *sim)
 }
 EXPORT_SYMBOL_GPL(irq_sim_fini);
 
+struct irq_sim_devres {
+   struct irq_sim *sim;
+};
+
+static void devm_irq_sim_release(struct device *dev, void *res)
+{
+   struct irq_sim_devres *this = res;
+
+   irq_sim_fini(this->sim);
+}
+
+/**
+ * irq_sim_init - Initialize the interrupt simulator for a managed device.
+ *
+ * @dev:Device to initialize the simulator object for.
+ * @sim:The interrupt simulator object to initialize.
+ * @num_irqs:   Number of interrupts to allocate
+ *
+ * Returns 0 on success and a negative error number on failure.
+ */
+int devm_irq_sim_init(struct device *dev,
+ struct irq_sim *sim, unsigned int num_irqs)
+{
+   struct irq_sim_devres *dr;
+   int rv;
+
+   dr = devres_alloc(devm_irq_sim_release, sizeof(*dr), GFP_KERNEL);
+   if (!dr)
+   return -ENOMEM;
+
+   rv = irq_sim_init(sim, num_irqs);
+   if (rv) {
+   devres_free(dr);
+   return rv;
+   }
+
+   dr->sim = sim;
+   devres_add(dev, dr);
+
+   return 0;
+}
+EXPORT_SYMBOL_GPL(devm_irq_sim_init);
+
 /**
  * irq_sim_fire - Enqueue an interrupt.
  *
-- 
2.13.2

--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 3/3] gpio: mockup: use irq_sim

2017-08-01 Thread Bartosz Golaszewski
Shrink the driver by removing the code dealing with dummy interrupts
and replacing it with calls to the irq_sim API.

Signed-off-by: Bartosz Golaszewski 
---
 drivers/gpio/Kconfig   |  2 +-
 drivers/gpio/gpio-mockup.c | 77 +-
 2 files changed, 8 insertions(+), 71 deletions(-)

diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index f235eae04c16..e2264fb31f87 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -311,7 +311,7 @@ config GPIO_MOCKUP
depends on GPIOLIB && SYSFS
select GPIO_SYSFS
select GPIOLIB_IRQCHIP
-   select IRQ_WORK
+   select IRQ_SIM
help
  This enables GPIO Testing driver, which provides a way to test GPIO
  subsystem through sysfs(or char device) and debugfs. GPIO_SYSFS
diff --git a/drivers/gpio/gpio-mockup.c b/drivers/gpio/gpio-mockup.c
index a6565e128f9e..6db7163e6d98 100644
--- a/drivers/gpio/gpio-mockup.c
+++ b/drivers/gpio/gpio-mockup.c
@@ -20,7 +20,7 @@
 #include 
 #include 
 #include 
-#include 
+#include 
 #include 
 #include 
 
@@ -47,18 +47,12 @@ enum {
 struct gpio_mockup_line_status {
int dir;
bool value;
-   bool irq_enabled;
-};
-
-struct gpio_mockup_irq_context {
-   struct irq_work work;
-   int irq;
 };
 
 struct gpio_mockup_chip {
struct gpio_chip gc;
struct gpio_mockup_line_status *lines;
-   struct gpio_mockup_irq_context irq_ctx;
+   struct irq_sim irqsim;
struct dentry *dbg_dir;
 };
 
@@ -144,65 +138,11 @@ static int gpio_mockup_name_lines(struct device *dev,
return 0;
 }
 
-static int gpio_mockup_to_irq(struct gpio_chip *chip, unsigned int offset)
-{
-   return chip->irq_base + offset;
-}
-
-static void gpio_mockup_irqmask(struct irq_data *data)
+static int gpio_mockup_to_irq(struct gpio_chip *gc, unsigned int offset)
 {
-   struct gpio_chip *gc = irq_data_get_irq_chip_data(data);
struct gpio_mockup_chip *chip = gpiochip_get_data(gc);
 
-   chip->lines[data->irq - gc->irq_base].irq_enabled = false;
-}
-
-static void gpio_mockup_irqunmask(struct irq_data *data)
-{
-   struct gpio_chip *gc = irq_data_get_irq_chip_data(data);
-   struct gpio_mockup_chip *chip = gpiochip_get_data(gc);
-
-   chip->lines[data->irq - gc->irq_base].irq_enabled = true;
-}
-
-static struct irq_chip gpio_mockup_irqchip = {
-   .name   = GPIO_MOCKUP_NAME,
-   .irq_mask   = gpio_mockup_irqmask,
-   .irq_unmask = gpio_mockup_irqunmask,
-};
-
-static void gpio_mockup_handle_irq(struct irq_work *work)
-{
-   struct gpio_mockup_irq_context *irq_ctx;
-
-   irq_ctx = container_of(work, struct gpio_mockup_irq_context, work);
-   handle_simple_irq(irq_to_desc(irq_ctx->irq));
-}
-
-static int gpio_mockup_irqchip_setup(struct device *dev,
-struct gpio_mockup_chip *chip)
-{
-   struct gpio_chip *gc = >gc;
-   int irq_base, i;
-
-   irq_base = devm_irq_alloc_descs(dev, -1, 0, gc->ngpio, 0);
-   if (irq_base < 0)
-   return irq_base;
-
-   gc->irq_base = irq_base;
-   gc->irqchip = _mockup_irqchip;
-
-   for (i = 0; i < gc->ngpio; i++) {
-   irq_set_chip(irq_base + i, gc->irqchip);
-   irq_set_chip_data(irq_base + i, gc);
-   irq_set_handler(irq_base + i, _simple_irq);
-   irq_modify_status(irq_base + i,
- IRQ_NOREQUEST | IRQ_NOAUTOEN, IRQ_NOPROBE);
-   }
-
-   init_irq_work(>irq_ctx.work, gpio_mockup_handle_irq);
-
-   return 0;
+   return irq_sim_irqnum(>irqsim, offset);
 }
 
 static ssize_t gpio_mockup_event_write(struct file *file,
@@ -228,11 +168,8 @@ static ssize_t gpio_mockup_event_write(struct file *file,
chip = priv->chip;
gc = >gc;
 
-   if (chip->lines[priv->offset].irq_enabled) {
-   gpiod_set_value_cansleep(desc, val);
-   priv->chip->irq_ctx.irq = gc->irq_base + priv->offset;
-   irq_work_queue(>chip->irq_ctx.work);
-   }
+   gpiod_set_value_cansleep(desc, val);
+   irq_sim_fire(>irqsim, priv->offset);
 
return size;
 }
@@ -319,7 +256,7 @@ static int gpio_mockup_add(struct device *dev,
return ret;
}
 
-   ret = gpio_mockup_irqchip_setup(dev, chip);
+   ret = devm_irq_sim_init(dev, >irqsim, gc->ngpio);
if (ret)
return ret;
 
-- 
2.13.2

--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 0/3] simulated interrupts

2017-08-01 Thread Bartosz Golaszewski
Some frameworks (e.g. iio, gpiolib) use irq_work to implement simulated
interrupts that can be 'fired' from process context when needed and
requested just like normal interrupts. This is useful for testing and
development purposes.

Currently this code is reimplemented by every user. This series
proposes to add a new set of functions that can be used by drivers
that want to simulate interrupts without having to duplicate any
boilerplate code.

The first patch adds a simple irq simulator framework. The second
extends it with resource management. The third uses the new
functionality in the gpio-mockup testing driver.

NOTE: The next candidate for using this API would be iio-dummy-evgen.

v1 -> v2:
- added a call to irq_work_sync in irq_sim_fini()

Bartosz Golaszewski (3):
  irq/irq_sim: add a simple interrupt simulator framework
  irq/irq_sim: add a devres variant of irq_sim_init()
  gpio: mockup: use irq_sim

 Documentation/driver-model/devres.txt |   1 +
 drivers/gpio/Kconfig  |   2 +-
 drivers/gpio/gpio-mockup.c|  77 ++--
 include/linux/irq_sim.h   |  41 +
 init/Kconfig  |   4 +
 kernel/Makefile   |   1 +
 kernel/irq_sim.c  | 162 ++
 7 files changed, 217 insertions(+), 71 deletions(-)
 create mode 100644 include/linux/irq_sim.h
 create mode 100644 kernel/irq_sim.c

-- 
2.13.2

--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 1/3] irq/irq_sim: add a simple interrupt simulator framework

2017-08-01 Thread Bartosz Golaszewski
Implement a simple, irq_work-based framework for simulating
interrupts. Currently the API exposes routines for initializing and
deinitializing the simulator object, enqueueing the interrupts and
retrieving the allocated interrupt numbers based on the offset of the
dummy interrupt in the simulator struct.

Signed-off-by: Bartosz Golaszewski 
---
 include/linux/irq_sim.h |  37 +++
 init/Kconfig|   4 ++
 kernel/Makefile |   1 +
 kernel/irq_sim.c| 119 
 4 files changed, 161 insertions(+)
 create mode 100644 include/linux/irq_sim.h
 create mode 100644 kernel/irq_sim.c

diff --git a/include/linux/irq_sim.h b/include/linux/irq_sim.h
new file mode 100644
index ..0c1abf0e3244
--- /dev/null
+++ b/include/linux/irq_sim.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2017 Bartosz Golaszewski 
+ *
+ * Provides a framework for allocating simulated interrupts which can be
+ * requested like normal irqs and enqueued from process context.
+ */
+
+#ifndef _LINUX_IRQ_SIM_H
+#define _LINUX_IRQ_SIM_H
+
+#include 
+
+struct irq_sim_work_ctx {
+   struct irq_work work;
+   int irq;
+};
+
+struct irq_sim_irq_ctx {
+   int irqnum;
+   bool enabled;
+};
+
+struct irq_sim {
+   struct irq_sim_work_ctx work_ctx;
+   int irq_base;
+   unsigned int irq_count;
+   struct irq_sim_irq_ctx *irqs;
+};
+
+int irq_sim_init(struct irq_sim *sim, unsigned int num_irqs);
+void irq_sim_fini(struct irq_sim *sim);
+
+void irq_sim_fire(struct irq_sim *sim, unsigned int offset);
+
+int irq_sim_irqnum(struct irq_sim *sim, unsigned int offset);
+
+#endif /* _LINUX_IRQ_SIM_H */
diff --git a/init/Kconfig b/init/Kconfig
index 8514b25db21c..220456599c3f 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -23,6 +23,10 @@ config CONSTRUCTORS
 config IRQ_WORK
bool
 
+config IRQ_SIM
+   bool
+   select IRQ_WORK
+
 config BUILDTIME_EXTABLE_SORT
bool
 
diff --git a/kernel/Makefile b/kernel/Makefile
index 4cb8e8b23c6e..4472567c5835 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -97,6 +97,7 @@ obj-$(CONFIG_TRACE_CLOCK) += trace/
 obj-$(CONFIG_RING_BUFFER) += trace/
 obj-$(CONFIG_TRACEPOINTS) += trace/
 obj-$(CONFIG_IRQ_WORK) += irq_work.o
+obj-$(CONFIG_IRQ_SIM) += irq_sim.o
 obj-$(CONFIG_CPU_PM) += cpu_pm.o
 obj-$(CONFIG_BPF) += bpf/
 
diff --git a/kernel/irq_sim.c b/kernel/irq_sim.c
new file mode 100644
index ..4387e2bee97c
--- /dev/null
+++ b/kernel/irq_sim.c
@@ -0,0 +1,119 @@
+/*
+ * Copyright (C) 2017 Bartosz Golaszewski 
+ *
+ * Provides a framework for allocating simulated interrupts which can be
+ * requested like normal irqs and enqueued from process context.
+ */
+
+#include 
+#include 
+
+static void irq_sim_irqmask(struct irq_data *data)
+{
+   struct irq_sim_irq_ctx *irq_ctx = irq_data_get_irq_chip_data(data);
+
+   irq_ctx->enabled = false;
+}
+
+static void irq_sim_irqunmask(struct irq_data *data)
+{
+   struct irq_sim_irq_ctx *irq_ctx = irq_data_get_irq_chip_data(data);
+
+   irq_ctx->enabled = true;
+}
+
+static struct irq_chip irq_sim_irqchip = {
+   .name   = "irq_sim",
+   .irq_mask   = irq_sim_irqmask,
+   .irq_unmask = irq_sim_irqunmask,
+};
+
+static void irq_sim_handle_irq(struct irq_work *work)
+{
+   struct irq_sim_work_ctx *work_ctx;
+
+   work_ctx = container_of(work, struct irq_sim_work_ctx, work);
+   handle_simple_irq(irq_to_desc(work_ctx->irq));
+}
+
+/**
+ * irq_sim_init - Initialize the interrupt simulator: allocate a range of
+ *dummy interrupts.
+ *
+ * @sim:The interrupt simulator object to initialize.
+ * @num_irqs:   Number of interrupts to allocate
+ *
+ * Returns 0 on success and a negative error number on failure.
+ */
+int irq_sim_init(struct irq_sim *sim, unsigned int num_irqs)
+{
+   int i;
+
+   sim->irqs = kmalloc_array(num_irqs, sizeof(*sim->irqs), GFP_KERNEL);
+   if (!sim->irqs)
+   return -ENOMEM;
+
+   sim->irq_base = irq_alloc_descs(-1, 0, num_irqs, 0);
+   if (sim->irq_base < 0) {
+   kfree(sim->irqs);
+   return sim->irq_base;
+   }
+
+   for (i = 0; i < num_irqs; i++) {
+   sim->irqs[i].irqnum = sim->irq_base + i;
+   sim->irqs[i].enabled = false;
+   irq_set_chip(sim->irq_base + i, _sim_irqchip);
+   irq_set_chip_data(sim->irq_base + i, >irqs[i]);
+   irq_set_handler(sim->irq_base + i, _simple_irq);
+   irq_modify_status(sim->irq_base + i,
+ IRQ_NOREQUEST | IRQ_NOAUTOEN, IRQ_NOPROBE);
+   }
+
+   init_irq_work(>work_ctx.work, irq_sim_handle_irq);
+   sim->irq_count = num_irqs;
+
+   return 0;
+}
+EXPORT_SYMBOL_GPL(irq_sim_init);
+
+/**
+ * irq_sim_fini - Deinitialize the interrupt simulator: free the interrupt
+ *descriptors and 

Re: [RFC 2/5] i3c: Add core I3C infrastructure

2017-08-01 Thread Boris Brezillon
On Tue, 1 Aug 2017 16:12:18 +0200
Wolfram Sang  wrote:

> > > The second way is to have a number of #ifdef and complex
> > > Kconfig dependencies for the driver to only register the
> > > device_driver objects for the buses that are enabled. This
> > > is also doable, but everyone gets the logic wrong the first time.  
> > 
> > Hm, I understand now why you'd prefer to have a single bus. Can't we
> > solve this problem with a module_i3c_i2c_driver() macro that would hide
> > all this complexity from I2C/I3C drivers?  
> 
> Do you know of devices speaking both i3c and i2c as of today?

I do not know of any real devices as of today (all my tests have been
done with a dummy/fake I3C slaves emulated with a slave IP), but the
spec clearly describe what legacy/static addresses are for and one of
their use case is to connect an I3C device on an I2C bus and let it act
as an I2C device.

> 
> I think I3C/I2C is a bit different than I2C/SPI. For the latter, it
> might happen that you have only this or that bus on the board, so it
> makes sense to support both. But if you have I3C, you can simply attach
> the I2C device onto it. I guess you would only implement I3C in the
> device if you explicitly need its feature set. And then, a I2C fallback
> doesn't make much sense? Or am I missing something?

Unless you want your device (likely a sensor) to be compatible with both
I3C and I2C so that you can target even more people.

> 
> OK, now I know that those I3C+I2C devices will exist, even if only for
> Murphy's law. However, my assumptions would be that those devices are
> not common and so we could live with the core plus bus_drivers
> seperation we have for SPI/I2C already (although I would love a common
> regmap-based I2C/SPI abstraction).
> 

I'm perfectly fine with the I3C / I2C framework separation. The only
minor problem I had with that was the inaccuracy of the
sysfs/device-model representation: we don't have one i2c and one i3c
bus, we just have one i3c bus with a mix of i2c and i3c devices.

Apart from that, I'm happy with the current approach.
--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC 2/5] i3c: Add core I3C infrastructure

2017-08-01 Thread Arnd Bergmann
On Tue, Aug 1, 2017 at 3:58 PM, Boris Brezillon
 wrote:
> On Tue, 1 Aug 2017 15:34:14 +0200
> Boris Brezillon  wrote:
>> On Tue, 1 Aug 2017 15:11:44 +0200
>> Arnd Bergmann  wrote:
>> > On Tue, Aug 1, 2017 at 2:29 PM, Boris Brezillon
>> >  wrote:
> I just realized I forgot to add a "depends on I2C" in the I3C Kconfig
> entry. Indeed, I'm unconditionally calling functions provided by the
> I2C framework which have no dummy wrapper when I2C support is disabled.
> I could of course conditionally compile some portion of the I3C
> framework so that it still builds when I2C is disabled but I'm not sure
> it's worth the trouble.
>
> This "depends on I2C" should also solve the I2C+I3C driver issue, since
> I2C is necessarily enabled when I3C is.
>
> Am I missing something?

That should solve another part of the problem, as a combined driver then
just needs 'depends on I3C'.

On top of that, the i3c_driver structure could also contain callback
pointers for the i2c subsystem, e.g. i2c_probe(), i2c_remove() etc.
When the i2c_probe() callback exists, the i3c layer could construct
a 'struct i2c_driver' with those callbacks and register that under the
cover. This would mean that combined drivers no longer need to
register two driver objects.

 Arnd
--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2] Documentation: filesystems: update filesystem locking documentation

2017-08-01 Thread Jonathan Corbet
On Tue, 1 Aug 2017 07:09:10 -0400
Sean Anderson  wrote:

> Documentation/filesystems/Locking no longer reflects current locking
> semantics. i_mutex is no longer used for locking, and has been superseded
> by i_rwsem. Additionally, ->iterate_shared() was not documented.
> 
> Signed-off-by: Sean Anderson 

So should I pick this up, or are we waiting for Al to finish
unpacking ... ?

jon
--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 0/3] Support for QCOM BAM DMA command descriptor

2017-08-01 Thread Abhishek Sahu
v3:

1. Added Documentation for new flag
2. Changed the flag description

v2:

1. Added DMA_PREP_CMD flag and used the same for BAM DMA
   command descriptor
2. Removed custom mapping API patches

v1:

https://www.spinics.net/lists/dmaengine/msg12009.html

These patches mainly add the support for QCOM BAM command
descriptor implementing BAM DMA support for some QCOM
peripherals like QPIC NAND/LCD.

Currently there is no flag in DMA API which tells the DMA
controller that the passed data is in command descriptor
format so added the flag in DMA API for this.

Abhishek Sahu (3):
  dmaengine: add DMA_PREP_CMD for non-Data descriptors.
  dmaengine: qcom: bam_dma: wrapper functions for command descriptor
  dmaengine: qcom: bam_dma: add command descriptor flag

 Documentation/dmaengine/provider.txt |  7 
 drivers/dma/qcom/bam_dma.c   |  6 ++-
 include/linux/dma/qcom_bam_dma.h | 79 
 include/linux/dmaengine.h|  4 ++
 4 files changed, 95 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/dma/qcom_bam_dma.h

-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of 
Code Aurora Forum, hosted by The Linux Foundation

--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 2/3] dmaengine: qcom: bam_dma: wrapper functions for command descriptor

2017-08-01 Thread Abhishek Sahu
QCOM BAM also supports command descriptor which allows the SW to
create descriptors of type command which does not generate any
data transmissions but configures registers in the peripheral.
In command descriptor the 32bit address point to the start of
the command block which holds the command elements and the
16bit size define the size of the command block.

Each Command Element is structured by 4 words:
Write command: address + cmd
   register data
   register mask
   reserved

Read command: address + cmd
  read data result address,
  reserved
  reserved

This patch creates a new header file for BAM driver which contains the
structures and wrapper functions for command descriptor. This file will
be used by different QCOM peripheral drivers for forming the command
descriptor

Signed-off-by: Abhishek Sahu 
---
 include/linux/dma/qcom_bam_dma.h | 79 
 1 file changed, 79 insertions(+)
 create mode 100644 include/linux/dma/qcom_bam_dma.h

diff --git a/include/linux/dma/qcom_bam_dma.h b/include/linux/dma/qcom_bam_dma.h
new file mode 100644
index 000..077d43a
--- /dev/null
+++ b/include/linux/dma/qcom_bam_dma.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2016-2017, The Linux Foundation. All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _QCOM_BAM_DMA_H
+#define _QCOM_BAM_DMA_H
+
+#include 
+
+/*
+ * This data type corresponds to the native Command Element
+ * supported by BAM DMA Engine.
+ *
+ * @cmd_and_addr - upper 8 bits command and lower 24 bits register address.
+ * @data - for write command: content to be written into peripheral register.
+ *for read command: dest addr to write peripheral register value.
+ * @mask - register mask.
+ * @reserved - for future usage.
+ *
+ */
+struct bam_cmd_element {
+   __le32 cmd_and_addr;
+   __le32 data;
+   __le32 mask;
+   __le32 reserved;
+};
+
+/*
+ * This enum indicates the command type in a command element
+ */
+enum bam_command_type {
+   BAM_WRITE_COMMAND = 0,
+   BAM_READ_COMMAND,
+};
+
+/*
+ * prep_bam_ce_le32 - Wrapper function to prepare a single BAM command
+ * element with the data already in le32 format.
+ *
+ * @bam_ce: bam command element
+ * @addr: target address
+ * @cmd: BAM command
+ * @data: actual data for write and dest addr for read in le32
+ */
+static inline void
+bam_prep_ce_le32(struct bam_cmd_element *bam_ce, u32 addr,
+enum bam_command_type cmd, __le32 data)
+{
+   bam_ce->cmd_and_addr =
+   cpu_to_le32((addr & 0xff) | ((cmd & 0xff) << 24));
+   bam_ce->data = data;
+   bam_ce->mask = cpu_to_le32(0x);
+}
+
+/*
+ * bam_prep_ce - Wrapper function to prepare a single BAM command element
+ * with the data.
+ *
+ * @bam_ce: BAM command element
+ * @addr: target address
+ * @cmd: BAM command
+ * @data: actual data for write and dest addr for read
+ */
+static inline void
+bam_prep_ce(struct bam_cmd_element *bam_ce, u32 addr,
+   enum bam_command_type cmd, u32 data)
+{
+   bam_prep_ce_le32(bam_ce, addr, cmd, cpu_to_le32(data));
+}
+#endif
-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of 
Code Aurora Forum, hosted by The Linux Foundation

--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 3/3] dmaengine: qcom: bam_dma: add command descriptor flag

2017-08-01 Thread Abhishek Sahu
If DMA_PREP_CMD flag is passed in prep_slave_sg then peripheral
driver has passed the data is in BAM command descriptor format
and BAM driver should set CMD bit for each of the HW descriptors.

Signed-off-by: Abhishek Sahu 
---
 drivers/dma/qcom/bam_dma.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/dma/qcom/bam_dma.c b/drivers/dma/qcom/bam_dma.c
index 03c4eb3..6d89fb6 100644
--- a/drivers/dma/qcom/bam_dma.c
+++ b/drivers/dma/qcom/bam_dma.c
@@ -65,6 +65,7 @@ struct bam_desc_hw {
 #define DESC_FLAG_EOT BIT(14)
 #define DESC_FLAG_EOB BIT(13)
 #define DESC_FLAG_NWD BIT(12)
+#define DESC_FLAG_CMD BIT(11)
 
 struct bam_async_desc {
struct virt_dma_desc vd;
@@ -645,6 +646,9 @@ static struct dma_async_tx_descriptor 
*bam_prep_slave_sg(struct dma_chan *chan,
unsigned int curr_offset = 0;
 
do {
+   if (flags & DMA_PREP_CMD)
+   desc->flags |= cpu_to_le16(DESC_FLAG_CMD);
+
desc->addr = cpu_to_le32(sg_dma_address(sg) +
 curr_offset);
 
@@ -960,7 +964,7 @@ static void bam_start_dma(struct bam_chan *bchan)
 
/* set any special flags on the last descriptor */
if (async_desc->num_desc == async_desc->xfer_len)
-   desc[async_desc->xfer_len - 1].flags =
+   desc[async_desc->xfer_len - 1].flags |=
cpu_to_le16(async_desc->flags);
else
desc[async_desc->xfer_len - 1].flags |=
-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of 
Code Aurora Forum, hosted by The Linux Foundation

--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 1/3] dmaengine: add DMA_PREP_CMD for non-Data descriptors.

2017-08-01 Thread Abhishek Sahu
Some of the DMA controllers are capable of issuing the commands
to peripheral by the DMA. These commands can be list of register
reads/writes and its different from normal data reads/writes.
This patch adds new flag DMA_PREP_CMD in DMA API which tells
the driver that the data passed to DMA API is command data
and DMA controller driver will form descriptor in the required
format.

This flag can be used by any DMA controller driver which requires
the descriptor in different format for non-Data descriptors.

Signed-off-by: Abhishek Sahu 
---
 Documentation/dmaengine/provider.txt | 7 +++
 include/linux/dmaengine.h| 4 
 2 files changed, 11 insertions(+)

diff --git a/Documentation/dmaengine/provider.txt 
b/Documentation/dmaengine/provider.txt
index e33bc1c..bfadbfd 100644
--- a/Documentation/dmaengine/provider.txt
+++ b/Documentation/dmaengine/provider.txt
@@ -395,6 +395,13 @@ where to put them)
  when DMA_CTRL_REUSE is already set
- Terminating the channel
 
+  * DMA_PREP_CMD
+- If set, the client driver tells DMA controller that passed data in DMA
+  API is command data.
+- Interpretation of command data is DMA controller specific. It can be
+  used for issuing commands to other peripherals/register reads/register
+  writes for which the descriptor should be in different format from
+  normal data descriptors.
 
 General Design Notes
 
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 5336808..45fc833 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -186,6 +186,9 @@ struct dma_interleaved_template {
  *  on the result of this operation
  * @DMA_CTRL_REUSE: client can reuse the descriptor and submit again till
  *  cleared or freed
+ * @DMA_PREP_CMD: tell the driver that the data passed to DMA API is command
+ *  data and the descriptor should be in different format from normal
+ *  data descriptors.
  */
 enum dma_ctrl_flags {
DMA_PREP_INTERRUPT = (1 << 0),
@@ -195,6 +198,7 @@ enum dma_ctrl_flags {
DMA_PREP_CONTINUE = (1 << 4),
DMA_PREP_FENCE = (1 << 5),
DMA_CTRL_REUSE = (1 << 6),
+   DMA_PREP_CMD = (1 << 7),
 };
 
 /**
-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of 
Code Aurora Forum, hosted by The Linux Foundation

--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH v2 20/38] KVM: arm64: Handle eret instruction traps

2017-08-01 Thread Jintack Lim
On Sun, Jul 30, 2017 at 4:00 PM, Christoffer Dall  wrote:
> On Tue, Jul 18, 2017 at 11:58:46AM -0500, Jintack Lim wrote:
>> When HCR.NV bit is set, eret instructions trap to EL2 with EC code 0x1A.
>> Emulate eret instructions by setting pc and pstate.
>
> It may be worth noting in the commit message that this is all we have to
> do, because the rest of the logic will then discover that the mode could
> change from virtual EL2 to EL1 and will setup the hw registers etc. when
> changing modes.

Makes sense. I'll write it up in the commit message.

>
>>
>> Note that the current exception level is always the virtual EL2, since
>> we set HCR_EL2.NV bit only when entering the virtual EL2. So, we take
>> spsr and elr states from the virtual _EL2 registers.
>>
>> Signed-off-by: Jintack Lim 
>> ---
>>  arch/arm64/include/asm/esr.h |  1 +
>>  arch/arm64/kvm/handle_exit.c | 16 
>>  arch/arm64/kvm/trace.h   | 21 +
>>  3 files changed, 38 insertions(+)
>>
>> diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
>> index e7d8e28..210fde6 100644
>> --- a/arch/arm64/include/asm/esr.h
>> +++ b/arch/arm64/include/asm/esr.h
>> @@ -43,6 +43,7 @@
>>  #define ESR_ELx_EC_HVC64 (0x16)
>>  #define ESR_ELx_EC_SMC64 (0x17)
>>  #define ESR_ELx_EC_SYS64 (0x18)
>> +#define ESR_ELx_EC_ERET  (0x1A)
>>  /* Unallocated EC: 0x19 - 0x1E */
>>  #define ESR_ELx_EC_IMP_DEF   (0x1f)
>>  #define ESR_ELx_EC_IABT_LOW  (0x20)
>> diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
>> index 17d8a16..9259881 100644
>> --- a/arch/arm64/kvm/handle_exit.c
>> +++ b/arch/arm64/kvm/handle_exit.c
>> @@ -147,6 +147,21 @@ static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu, 
>> struct kvm_run *run)
>>   return 1;
>>  }
>>
>> +static int kvm_handle_eret(struct kvm_vcpu *vcpu, struct kvm_run *run)
>> +{
>> + trace_kvm_nested_eret(vcpu, vcpu_el2_sreg(vcpu, ELR_EL2),
>> +   vcpu_el2_sreg(vcpu, SPSR_EL2));
>> +
>> + /*
>> +  * Note that the current exception level is always the virtual EL2,
>> +  * since we set HCR_EL2.NV bit only when entering the virtual EL2.
>> +  */
>> + *vcpu_pc(vcpu) = vcpu_el2_sreg(vcpu, ELR_EL2);
>> + *vcpu_cpsr(vcpu) = vcpu_el2_sreg(vcpu, SPSR_EL2);
>> +
>> + return 1;
>> +}
>> +
>>  static exit_handle_fn arm_exit_handlers[] = {
>>   [0 ... ESR_ELx_EC_MAX]  = kvm_handle_unknown_ec,
>>   [ESR_ELx_EC_WFx]= kvm_handle_wfx,
>> @@ -160,6 +175,7 @@ static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu, 
>> struct kvm_run *run)
>>   [ESR_ELx_EC_HVC64]  = handle_hvc,
>>   [ESR_ELx_EC_SMC64]  = handle_smc,
>>   [ESR_ELx_EC_SYS64]  = kvm_handle_sys_reg,
>> + [ESR_ELx_EC_ERET]   = kvm_handle_eret,
>>   [ESR_ELx_EC_IABT_LOW]   = kvm_handle_guest_abort,
>>   [ESR_ELx_EC_DABT_LOW]   = kvm_handle_guest_abort,
>>   [ESR_ELx_EC_SOFTSTP_LOW]= kvm_handle_guest_debug,
>> diff --git a/arch/arm64/kvm/trace.h b/arch/arm64/kvm/trace.h
>> index 7c86cfb..5f40987 100644
>> --- a/arch/arm64/kvm/trace.h
>> +++ b/arch/arm64/kvm/trace.h
>> @@ -187,6 +187,27 @@
>>   TP_printk("vcpu: %p, inject exception to vEL2: ESR_EL2 0x%lx, vector: 
>> 0x%016lx",
>> __entry->vcpu, __entry->esr_el2, __entry->pc)
>>  );
>> +
>> +TRACE_EVENT(kvm_nested_eret,
>> + TP_PROTO(struct kvm_vcpu *vcpu, unsigned long elr_el2,
>> +  unsigned long spsr_el2),
>> + TP_ARGS(vcpu, elr_el2, spsr_el2),
>> +
>> + TP_STRUCT__entry(
>> + __field(struct kvm_vcpu *,  vcpu)
>> + __field(unsigned long,  elr_el2)
>> + __field(unsigned long,  spsr_el2)
>> + ),
>> +
>> + TP_fast_assign(
>> + __entry->vcpu = vcpu;
>> + __entry->elr_el2 = elr_el2;
>> + __entry->spsr_el2 = spsr_el2;
>> + ),
>> +
>> + TP_printk("vcpu: %p, eret to elr_el2: 0x%016lx, with spsr_el2: 
>> 0x%08lx",
>> +   __entry->vcpu, __entry->elr_el2, __entry->spsr_el2)
>> +);
>>  #endif /* _TRACE_ARM64_KVM_H */
>>
>>  #undef TRACE_INCLUDE_PATH
>> --
>> 1.9.1
>>
>
> Otherwise this patch looks good.
>
> Thanks,
> -Christoffer
--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH v2 08/38] KVM: arm64: Add EL2 special registers to vcpu context

2017-08-01 Thread Jintack Lim
On Sun, Jul 30, 2017 at 3:59 PM, Christoffer Dall  wrote:
> On Tue, Jul 18, 2017 at 11:58:34AM -0500, Jintack Lim wrote:
>> To support the virtual EL2 execution, we need to maintain the EL2
>> special registers such as SPSR_EL2, ELR_EL2 and SP_EL2 in vcpu context.
>>
>> Note that SP_EL2 is not accessible in EL2, so we don't need a trap
>> handler for this register.
>
> Actually, it's not accessible *in the MRS/MSR instruction* but it is of
> course accessible as the current stack pointer (which is why you need
> the state, but not the trap handler).

That is correct. I'll fix the commit message.

>
> Otherwise, the patch looks good.

Thanks!

>
> Thanks,
> -Christoffer
>
>>
>> Signed-off-by: Jintack Lim 
>> ---
>>  arch/arm64/include/asm/kvm_host.h | 12 
>>  arch/arm64/include/asm/sysreg.h   |  4 
>>  arch/arm64/kvm/sys_regs.c | 38 
>> +-
>>  arch/arm64/kvm/sys_regs.h |  8 
>>  4 files changed, 57 insertions(+), 5 deletions(-)
>>
>> diff --git a/arch/arm64/include/asm/kvm_host.h 
>> b/arch/arm64/include/asm/kvm_host.h
>> index 1dc4ed6..57dccde 100644
>> --- a/arch/arm64/include/asm/kvm_host.h
>> +++ b/arch/arm64/include/asm/kvm_host.h
>> @@ -171,6 +171,15 @@ enum vcpu_sysreg {
>>   NR_SYS_REGS /* Nothing after this line! */
>>  };
>>
>> +enum el2_special_regs {
>> + __INVALID_EL2_SPECIAL_REG__,
>> + SPSR_EL2,   /* Saved Program Status Register (EL2) */
>> + ELR_EL2,/* Exception Link Register (EL2) */
>> + SP_EL2, /* Stack Pointer (EL2) */
>> +
>> + NR_EL2_SPECIAL_REGS
>> +};
>> +
>>  /* 32bit mapping */
>>  #define c0_MPIDR (MPIDR_EL1 * 2) /* MultiProcessor ID Register */
>>  #define c0_CSSELR(CSSELR_EL1 * 2)/* Cache Size Selection Register */
>> @@ -218,6 +227,8 @@ struct kvm_cpu_context {
>>   u64 sys_regs[NR_SYS_REGS];
>>   u32 copro[NR_COPRO_REGS];
>>   };
>> +
>> + u64 el2_special_regs[NR_EL2_SPECIAL_REGS];
>>  };
>>
>>  typedef struct kvm_cpu_context kvm_cpu_context_t;
>> @@ -307,6 +318,7 @@ struct kvm_vcpu_arch {
>>
>>  #define vcpu_gp_regs(v)  (&(v)->arch.ctxt.gp_regs)
>>  #define vcpu_sys_reg(v,r)((v)->arch.ctxt.sys_regs[(r)])
>> +#define vcpu_el2_sreg(v,r)   ((v)->arch.ctxt.el2_special_regs[(r)])
>>  /*
>>   * CP14 and CP15 live in the same array, as they are backed by the
>>   * same system registers.
>> diff --git a/arch/arm64/include/asm/sysreg.h 
>> b/arch/arm64/include/asm/sysreg.h
>> index 9277c4a..98c32ef 100644
>> --- a/arch/arm64/include/asm/sysreg.h
>> +++ b/arch/arm64/include/asm/sysreg.h
>> @@ -268,6 +268,8 @@
>>
>>  #define SYS_DACR32_EL2   sys_reg(3, 4, 3, 0, 0)
>>
>> +#define SYS_SPSR_EL2 sys_reg(3, 4, 4, 0, 0)
>> +#define SYS_ELR_EL2  sys_reg(3, 4, 4, 0, 1)
>>  #define SYS_SP_EL1   sys_reg(3, 4, 4, 1, 0)
>>
>>  #define SYS_IFSR32_EL2   sys_reg(3, 4, 5, 0, 1)
>> @@ -332,6 +334,8 @@
>>  #define SYS_CNTVOFF_EL2  sys_reg(3, 4, 14, 0, 3)
>>  #define SYS_CNTHCTL_EL2  sys_reg(3, 4, 14, 1, 0)
>>
>> +#define SYS_SP_EL2   sys_reg(3, 6, 4, 1, 0)
>> +
>>  /* Common SCTLR_ELx flags. */
>>  #define SCTLR_ELx_EE(1 << 25)
>>  #define SCTLR_ELx_I  (1 << 12)
>> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
>> index 1568f8b..2b3ed70 100644
>> --- a/arch/arm64/kvm/sys_regs.c
>> +++ b/arch/arm64/kvm/sys_regs.c
>> @@ -900,15 +900,33 @@ static inline void access_rw(struct sys_reg_params *p, 
>> u64 *sysreg)
>>   *sysreg = p->regval;
>>  }
>>
>> +static u64 *get_special_reg(struct kvm_vcpu *vcpu, struct sys_reg_params *p)
>> +{
>> + u64 reg = sys_reg(p->Op0, p->Op1, p->CRn, p->CRm, p->Op2);
>> +
>> + switch (reg) {
>> + case SYS_SP_EL1:
>> + return >arch.ctxt.gp_regs.sp_el1;
>> + case SYS_ELR_EL2:
>> + return _el2_sreg(vcpu, ELR_EL2);
>> + case SYS_SPSR_EL2:
>> + return _el2_sreg(vcpu, SPSR_EL2);
>> + default:
>> + return NULL;
>> + };
>> +}
>> +
>>  static bool trap_el2_regs(struct kvm_vcpu *vcpu,
>>struct sys_reg_params *p,
>>const struct sys_reg_desc *r)
>>  {
>> - /* SP_EL1 is NOT maintained in sys_regs array */
>> - if (sys_reg(p->Op0, p->Op1, p->CRn, p->CRm, p->Op2) == SYS_SP_EL1)
>> - access_rw(p, >arch.ctxt.gp_regs.sp_el1);
>> - else
>> - access_rw(p, _sys_reg(vcpu, r->reg));
>> + u64 *sys_reg;
>> +
>> + sys_reg = get_special_reg(vcpu, p);
>> + if (!sys_reg)
>> + sys_reg = _sys_reg(vcpu, r->reg);
>> +
>> + access_rw(p, sys_reg);
>>
>>   return true;
>>  }
>> @@ -1116,6 +1134,8 @@ static bool trap_el2_regs(struct kvm_vcpu *vcpu,
>>
>>   { SYS_DESC(SYS_DACR32_EL2), NULL, reset_unknown, DACR32_EL2 },
>>
>> +

Re: [RFC PATCH v2 04/38] KVM: arm/arm64: Check if nested virtualization is in use

2017-08-01 Thread Jintack Lim
On Sun, Jul 30, 2017 at 3:59 PM, Christoffer Dall  wrote:
> On Tue, Jul 18, 2017 at 11:58:30AM -0500, Jintack Lim wrote:
>> Nested virtualizaion is in use only if all three conditions are met:
>> - The architecture supports nested virtualization.
>> - The kernel parameter is set.
>> - The userspace uses nested virtualiztion feature.
>>
>> Signed-off-by: Jintack Lim 
>> ---
>>  arch/arm/include/asm/kvm_host.h   | 11 +++
>>  arch/arm64/include/asm/kvm_host.h |  2 ++
>>  arch/arm64/kvm/nested.c   | 17 +
>>  virt/kvm/arm/arm.c|  4 
>>  4 files changed, 34 insertions(+)
>>
>> diff --git a/arch/arm/include/asm/kvm_host.h 
>> b/arch/arm/include/asm/kvm_host.h
>> index 00b0f97..7e9e6c8 100644
>> --- a/arch/arm/include/asm/kvm_host.h
>> +++ b/arch/arm/include/asm/kvm_host.h
>> @@ -303,4 +303,15 @@ static inline int __init kvmarm_nested_cfg(char *buf)
>>  {
>>   return 0;
>>  }
>> +
>> +static inline int init_nested_virt(void)
>> +{
>> + return 0;
>> +}
>> +
>> +static inline bool nested_virt_in_use(struct kvm_vcpu *vcpu)
>> +{
>> + return false;
>> +}
>> +
>>  #endif /* __ARM_KVM_HOST_H__ */
>> diff --git a/arch/arm64/include/asm/kvm_host.h 
>> b/arch/arm64/include/asm/kvm_host.h
>> index 6df0c7c..86d4b6c 100644
>> --- a/arch/arm64/include/asm/kvm_host.h
>> +++ b/arch/arm64/include/asm/kvm_host.h
>> @@ -387,5 +387,7 @@ static inline void __cpu_init_stage2(void)
>>  }
>>
>>  int __init kvmarm_nested_cfg(char *buf);
>> +int init_nested_virt(void);
>> +bool nested_virt_in_use(struct kvm_vcpu *vcpu);
>>
>>  #endif /* __ARM64_KVM_HOST_H__ */
>> diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c
>> index 79f38da..9a05c76 100644
>> --- a/arch/arm64/kvm/nested.c
>> +++ b/arch/arm64/kvm/nested.c
>> @@ -24,3 +24,20 @@ int __init kvmarm_nested_cfg(char *buf)
>>  {
>>   return strtobool(buf, _param);
>>  }
>> +
>> +int init_nested_virt(void)
>> +{
>> + if (nested_param && cpus_have_const_cap(ARM64_HAS_NESTED_VIRT))
>> + kvm_info("Nested virtualization is supported\n");
>> +
>> + return 0;
>> +}
>> +
>> +bool nested_virt_in_use(struct kvm_vcpu *vcpu)
>> +{
>> + if (nested_param && cpus_have_const_cap(ARM64_HAS_NESTED_VIRT)
>> + && test_bit(KVM_ARM_VCPU_NESTED_VIRT, vcpu->arch.features))
>> + return true;
>
> you could initialize a bool in init_nested_virt which you then check
> here to avoid duplicating the logic.

I can make a bool to check the kernel param and the capability. The
third one is per VM given by the userspace, so we don't know it when
we initialize the host hypervisor. We can potentially have a bool in
kvm_vcpu_arch or kvm_arch to cache the whole three conditions, if that
sounds ok.

>
>> +
>> + return false;
>> +}
>> diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
>> index 1c1c772..36aae3a 100644
>> --- a/virt/kvm/arm/arm.c
>> +++ b/virt/kvm/arm/arm.c
>> @@ -1478,6 +1478,10 @@ int kvm_arch_init(void *opaque)
>>   if (err)
>>   goto out_err;
>>
>> + err = init_nested_virt();
>> + if (err)
>> + return err;
>> +
>>   err = init_subsystems();
>>   if (err)
>>   goto out_hyp;
>> --
>> 1.9.1
>>
--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH v2 04/38] KVM: arm/arm64: Check if nested virtualization is in use

2017-08-01 Thread Jintack Lim
On Sun, Jul 30, 2017 at 3:59 PM, Christoffer Dall  wrote:
> On Tue, Jul 18, 2017 at 11:58:30AM -0500, Jintack Lim wrote:
>> Nested virtualizaion is in use only if all three conditions are met:
>> - The architecture supports nested virtualization.
>> - The kernel parameter is set.
>> - The userspace uses nested virtualiztion feature.
>>
>> Signed-off-by: Jintack Lim 
>> ---
>>  arch/arm/include/asm/kvm_host.h   | 11 +++
>>  arch/arm64/include/asm/kvm_host.h |  2 ++
>>  arch/arm64/kvm/nested.c   | 17 +
>>  virt/kvm/arm/arm.c|  4 
>>  4 files changed, 34 insertions(+)
>>
>> diff --git a/arch/arm/include/asm/kvm_host.h 
>> b/arch/arm/include/asm/kvm_host.h
>> index 00b0f97..7e9e6c8 100644
>> --- a/arch/arm/include/asm/kvm_host.h
>> +++ b/arch/arm/include/asm/kvm_host.h
>> @@ -303,4 +303,15 @@ static inline int __init kvmarm_nested_cfg(char *buf)
>>  {
>>   return 0;
>>  }
>> +
>> +static inline int init_nested_virt(void)
>> +{
>> + return 0;
>> +}
>> +
>> +static inline bool nested_virt_in_use(struct kvm_vcpu *vcpu)
>> +{
>> + return false;
>> +}
>> +
>>  #endif /* __ARM_KVM_HOST_H__ */
>> diff --git a/arch/arm64/include/asm/kvm_host.h 
>> b/arch/arm64/include/asm/kvm_host.h
>> index 6df0c7c..86d4b6c 100644
>> --- a/arch/arm64/include/asm/kvm_host.h
>> +++ b/arch/arm64/include/asm/kvm_host.h
>> @@ -387,5 +387,7 @@ static inline void __cpu_init_stage2(void)
>>  }
>>
>>  int __init kvmarm_nested_cfg(char *buf);
>> +int init_nested_virt(void);
>> +bool nested_virt_in_use(struct kvm_vcpu *vcpu);
>>
>>  #endif /* __ARM64_KVM_HOST_H__ */
>> diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c
>> index 79f38da..9a05c76 100644
>> --- a/arch/arm64/kvm/nested.c
>> +++ b/arch/arm64/kvm/nested.c
>> @@ -24,3 +24,20 @@ int __init kvmarm_nested_cfg(char *buf)
>>  {
>>   return strtobool(buf, _param);
>>  }
>> +
>> +int init_nested_virt(void)
>> +{
>> + if (nested_param && cpus_have_const_cap(ARM64_HAS_NESTED_VIRT))
>> + kvm_info("Nested virtualization is supported\n");
>> +
>> + return 0;
>> +}
>> +
>> +bool nested_virt_in_use(struct kvm_vcpu *vcpu)
>> +{
>> + if (nested_param && cpus_have_const_cap(ARM64_HAS_NESTED_VIRT)
>> + && test_bit(KVM_ARM_VCPU_NESTED_VIRT, vcpu->arch.features))
>> + return true;
>> +
>> + return false;
>> +}
>
> after reading through a lot of your patches, I feel like vm_has_el2()
> would be a more elegant name, but it's not a strict requirement to
> change it.

I think it's a nice name. Let me think about it :)

>
>> diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
>> index 1c1c772..36aae3a 100644
>> --- a/virt/kvm/arm/arm.c
>> +++ b/virt/kvm/arm/arm.c
>> @@ -1478,6 +1478,10 @@ int kvm_arch_init(void *opaque)
>>   if (err)
>>   goto out_err;
>>
>> + err = init_nested_virt();
>> + if (err)
>> + return err;
>> +
>>   err = init_subsystems();
>>   if (err)
>>   goto out_hyp;
>> --
>> 1.9.1
>>
>
> Thanks,
> -Christoffer
--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC 2/5] i3c: Add core I3C infrastructure

2017-08-01 Thread Boris Brezillon
On Tue, 1 Aug 2017 15:34:14 +0200
Boris Brezillon  wrote:

> On Tue, 1 Aug 2017 15:11:44 +0200
> Arnd Bergmann  wrote:
> 
> > On Tue, Aug 1, 2017 at 2:29 PM, Boris Brezillon
> >  wrote:  
> > > On Tue, 1 Aug 2017 14:00:05 +0200
> > > Arnd Bergmann  wrote:
> >   
> > >> Another argument for a combined bus would be devices that
> > >> can be attached to either i2c and i3c, depending on the host
> > >> capabilities.
> > >
> > > Hm, that's already the case, isn't it? And you'll anyway need to
> > > develop specific code for both cases in the I2C/I3C device driver
> > > because I2C and I3C transfers are different. So I don't see how it
> > > would help to have a single bus here.
> > >
> > >> We have discussed whether i2c and spi should be
> > >> merged into a single bus_type in the past, as a lot of devices
> > >> can be attached to either of them.
> > >
> > > Oh, really? What's the rational behind that? I mean, I2C and SPI are
> > > quite different, and even if some devices provide both interfaces, I
> > > don't see why we should merge them. But you probably had good reasons
> > > to do so.
> > 
> > Well, we never changed it, so at least the work required to merge
> > the two was considered too much to justify any advantages.
> > 
> > The main problem with having one driver that can operate on
> > different bus types (i2c plus either spi or i3c) is the handling for
> > the various combinations in configurations (e.g. I2C=m, SPI=y).
> > 
> > The easy case is having a module_init function that registers two
> > device drivers, but that requires having a Kconfig dependency
> > on both subsystems, and you can't use the module_i2c_driver()
> > helper.
> > 
> > The second way is to have a number of #ifdef and complex
> > Kconfig dependencies for the driver to only register the
> > device_driver objects for the buses that are enabled. This
> > is also doable, but everyone gets the logic wrong the first time.  
> 
> Hm, I understand now why you'd prefer to have a single bus. Can't we
> solve this problem with a module_i3c_i2c_driver() macro that would hide
> all this complexity from I2C/I3C drivers?

I just realized I forgot to add a "depends on I2C" in the I3C Kconfig
entry. Indeed, I'm unconditionally calling functions provided by the
I2C framework which have no dummy wrapper when I2C support is disabled.
I could of course conditionally compile some portion of the I3C
framework so that it still builds when I2C is disabled but I'm not sure
it's worth the trouble.

This "depends on I2C" should also solve the I2C+I3C driver issue, since
I2C is necessarily enabled when I3C is.

Am I missing something?
--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH v2 02/38] KVM: arm/arm64: Enable nested virtualization via command-line

2017-08-01 Thread Jintack Lim
On Sun, Jul 30, 2017 at 3:59 PM, Christoffer Dall  wrote:
> On Tue, Jul 18, 2017 at 11:58:28AM -0500, Jintack Lim wrote:
>> Add a new kernel parameter(kvm-arm.nested) to enable KVM/ARM nested
>> virtualization support. This kernel parameter on arm architecture is
>> ignored since nested virtualization is not supported on arm.
>>
>> Note that this kernel parameter will not have any impact until nested
>> virtualization support is completed. Just add this parameter first to
>> use it when implementing nested virtualization support.
>>
>> Signed-off-by: Jintack Lim 
>> ---
>>  Documentation/admin-guide/kernel-parameters.txt |  4 
>>  arch/arm/include/asm/kvm_host.h |  4 
>>  arch/arm64/include/asm/kvm_host.h   |  2 ++
>>  arch/arm64/kvm/Makefile |  2 ++
>>  arch/arm64/kvm/nested.c | 26 
>> +
>>  virt/kvm/arm/arm.c  |  2 ++
>>  6 files changed, 40 insertions(+)
>>  create mode 100644 arch/arm64/kvm/nested.c
>>
>> diff --git a/Documentation/admin-guide/kernel-parameters.txt 
>> b/Documentation/admin-guide/kernel-parameters.txt
>> index aa8341e..8fb152d 100644
>> --- a/Documentation/admin-guide/kernel-parameters.txt
>> +++ b/Documentation/admin-guide/kernel-parameters.txt
>> @@ -1841,6 +1841,10 @@
>>   [KVM,ARM] Trap guest accesses to GICv3 common
>>   system registers
>>
>> + kvm-arm.nested=
>> + [KVM,ARM] Allow nested virtualization in KVM/ARM.
>> + Default is 0 (disabled)
>
> We may want to say "on systems that support it" or something like that
> here as well.
>

Sounds good! Thanks.

>> +
>>   kvm-intel.ept=  [KVM,Intel] Disable extended page tables
>>   (virtualized MMU) support on capable Intel chips.
>>   Default is 1 (enabled)
>> diff --git a/arch/arm/include/asm/kvm_host.h 
>> b/arch/arm/include/asm/kvm_host.h
>> index 127e2dd..00b0f97 100644
>> --- a/arch/arm/include/asm/kvm_host.h
>> +++ b/arch/arm/include/asm/kvm_host.h
>> @@ -299,4 +299,8 @@ int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
>>  int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
>>  struct kvm_device_attr *attr);
>>
>> +static inline int __init kvmarm_nested_cfg(char *buf)
>> +{
>> + return 0;
>> +}
>>  #endif /* __ARM_KVM_HOST_H__ */
>> diff --git a/arch/arm64/include/asm/kvm_host.h 
>> b/arch/arm64/include/asm/kvm_host.h
>> index 0c4fd1f..dcc4df8 100644
>> --- a/arch/arm64/include/asm/kvm_host.h
>> +++ b/arch/arm64/include/asm/kvm_host.h
>> @@ -386,4 +386,6 @@ static inline void __cpu_init_stage2(void)
>> "PARange is %d bits, unsupported configuration!", parange);
>>  }
>>
>> +int __init kvmarm_nested_cfg(char *buf);
>> +
>>  #endif /* __ARM64_KVM_HOST_H__ */
>> diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
>> index 5d98100..f513047 100644
>> --- a/arch/arm64/kvm/Makefile
>> +++ b/arch/arm64/kvm/Makefile
>> @@ -35,3 +35,5 @@ kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-debug.o
>>  kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/irqchip.o
>>  kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arch_timer.o
>>  kvm-$(CONFIG_KVM_ARM_PMU) += $(KVM)/arm/pmu.o
>> +
>> +kvm-$(CONFIG_KVM_ARM_HOST) += nested.o
>> diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c
>> new file mode 100644
>> index 000..79f38da
>> --- /dev/null
>> +++ b/arch/arm64/kvm/nested.c
>> @@ -0,0 +1,26 @@
>> +/*
>> + * Copyright (C) 2017 - Columbia University and Linaro Ltd.
>> + * Author: Jintack Lim 
>> + *
>> + * This program is free software; you can redistribute it and/or modify
>> + * it under the terms of the GNU General Public License version 2 as
>> + * published by the Free Software Foundation.
>> + *
>> + * This program is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>> + * GNU General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU General Public License
>> + * along with this program.  If not, see .
>> + */
>> +
>> +#include 
>> +#include 
>> +
>> +static bool nested_param;
>> +
>> +int __init kvmarm_nested_cfg(char *buf)
>> +{
>> + return strtobool(buf, _param);
>> +}
>> diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
>> index a39a1e1..1c1c772 100644
>> --- a/virt/kvm/arm/arm.c
>> +++ b/virt/kvm/arm/arm.c
>> @@ -67,6 +67,8 @@
>>
>>  static DEFINE_PER_CPU(unsigned char, kvm_arm_hardware_enabled);
>>
>> +early_param("kvm-arm.nested", kvmarm_nested_cfg);
>> +
>>  static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu)
>>  {
>>   BUG_ON(preemptible());
>> --
>> 1.9.1
>>
--
To unsubscribe from this list: send the line "unsubscribe 

Re: [RFC 2/5] i3c: Add core I3C infrastructure

2017-08-01 Thread Boris Brezillon
On Tue, 1 Aug 2017 15:11:44 +0200
Arnd Bergmann  wrote:

> On Tue, Aug 1, 2017 at 2:29 PM, Boris Brezillon
>  wrote:
> > On Tue, 1 Aug 2017 14:00:05 +0200
> > Arnd Bergmann  wrote:  
> 
> >> Another argument for a combined bus would be devices that
> >> can be attached to either i2c and i3c, depending on the host
> >> capabilities.  
> >
> > Hm, that's already the case, isn't it? And you'll anyway need to
> > develop specific code for both cases in the I2C/I3C device driver
> > because I2C and I3C transfers are different. So I don't see how it
> > would help to have a single bus here.
> >  
> >> We have discussed whether i2c and spi should be
> >> merged into a single bus_type in the past, as a lot of devices
> >> can be attached to either of them.  
> >
> > Oh, really? What's the rational behind that? I mean, I2C and SPI are
> > quite different, and even if some devices provide both interfaces, I
> > don't see why we should merge them. But you probably had good reasons
> > to do so.  
> 
> Well, we never changed it, so at least the work required to merge
> the two was considered too much to justify any advantages.
> 
> The main problem with having one driver that can operate on
> different bus types (i2c plus either spi or i3c) is the handling for
> the various combinations in configurations (e.g. I2C=m, SPI=y).
> 
> The easy case is having a module_init function that registers two
> device drivers, but that requires having a Kconfig dependency
> on both subsystems, and you can't use the module_i2c_driver()
> helper.
> 
> The second way is to have a number of #ifdef and complex
> Kconfig dependencies for the driver to only register the
> device_driver objects for the buses that are enabled. This
> is also doable, but everyone gets the logic wrong the first time.

Hm, I understand now why you'd prefer to have a single bus. Can't we
solve this problem with a module_i3c_i2c_driver() macro that would hide
all this complexity from I2C/I3C drivers?

> 
> What we end up doing to work around this for other drivers is
> to have the base driver in one library module, and separate
> modules for the bus-specific portions, which can then
> use module_i2c_driver again. There are many instances
> for combined i2c/spi drivers in the kernel, and it works fine,
> but it adds a fair bit of overhead compared to having one
> driver that would e.g. use regmap to abstract the differences
> in the probe() function and otherwise keeps everything in
> one place.
> 
>Arnd

--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC 2/5] i3c: Add core I3C infrastructure

2017-08-01 Thread Arnd Bergmann
On Tue, Aug 1, 2017 at 2:29 PM, Boris Brezillon
 wrote:
> On Tue, 1 Aug 2017 14:00:05 +0200
> Arnd Bergmann  wrote:

>> Another argument for a combined bus would be devices that
>> can be attached to either i2c and i3c, depending on the host
>> capabilities.
>
> Hm, that's already the case, isn't it? And you'll anyway need to
> develop specific code for both cases in the I2C/I3C device driver
> because I2C and I3C transfers are different. So I don't see how it
> would help to have a single bus here.
>
>> We have discussed whether i2c and spi should be
>> merged into a single bus_type in the past, as a lot of devices
>> can be attached to either of them.
>
> Oh, really? What's the rational behind that? I mean, I2C and SPI are
> quite different, and even if some devices provide both interfaces, I
> don't see why we should merge them. But you probably had good reasons
> to do so.

Well, we never changed it, so at least the work required to merge
the two was considered too much to justify any advantages.

The main problem with having one driver that can operate on
different bus types (i2c plus either spi or i3c) is the handling for
the various combinations in configurations (e.g. I2C=m, SPI=y).

The easy case is having a module_init function that registers two
device drivers, but that requires having a Kconfig dependency
on both subsystems, and you can't use the module_i2c_driver()
helper.

The second way is to have a number of #ifdef and complex
Kconfig dependencies for the driver to only register the
device_driver objects for the buses that are enabled. This
is also doable, but everyone gets the logic wrong the first time.

What we end up doing to work around this for other drivers is
to have the base driver in one library module, and separate
modules for the bus-specific portions, which can then
use module_i2c_driver again. There are many instances
for combined i2c/spi drivers in the kernel, and it works fine,
but it adds a fair bit of overhead compared to having one
driver that would e.g. use regmap to abstract the differences
in the probe() function and otherwise keeps everything in
one place.

   Arnd
--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2] printk: Add boottime and real timestamps

2017-08-01 Thread Prarit Bhargava
printk.time=1/CONFIG_PRINTK_TIME=1 adds a unmodified local hardware clock
timestamp to printk messages.  The local hardware clock loses time each
day making it difficult to determine exactly when an issue has occurred in
the kernel log, and making it difficult to determine how kernel and
hardware issues relate to each other in real time.

Make printk output different timestampes by adding options for no
timestamp, the local hardware clock, the monotonic clock, and the real
clock.  Allow a user to pick one of the clocks by using the printk.time
kernel parameter.  Output the type of clock in
/sys/module/printk/parameters/time so userspace programs can interpret the
timestamp.

Real clock & 32-bit systems:  Selecting the real clock printk timestamp may
lead to unlikely situations where a timestamp is wrong because the real time
offset is read without the protection of a sequence lock in the call to
ktime_get_log_ts() in printk_get_ts().

v2: Use peterz's suggested Kconfig options.  Merge patchset together.  Fix
i386 !CONFIG_PRINTK builds.

Signed-off-by: Prarit Bhargava 
Cc: Mark Salyzyn 
Cc: Jonathan Corbet 
Cc: Petr Mladek 
Cc: Sergey Senozhatsky 
Cc: Steven Rostedt 
Cc: John Stultz 
Cc: Thomas Gleixner 
Cc: Stephen Boyd 
Cc: Andrew Morton 
Cc: Greg Kroah-Hartman 
Cc: "Paul E. McKenney" 
Cc: Christoffer Dall 
Cc: Deepa Dinamani 
Cc: Ingo Molnar 
Cc: Joel Fernandes 
Cc: Prarit Bhargava 
Cc: Kees Cook 
Cc: Peter Zijlstra 
Cc: Geert Uytterhoeven 
Cc: "Luis R. Rodriguez" 
Cc: Nicholas Piggin 
Cc: "Jason A. Donenfeld" 
Cc: Olof Johansson 
Cc: Josh Poimboeuf 
Cc: linux-doc@vger.kernel.org
---
 Documentation/admin-guide/kernel-parameters.txt|  6 +-
 arch/arm/configs/aspeed_g4_defconfig   |  4 +-
 arch/arm/configs/aspeed_g5_defconfig   |  4 +-
 arch/arm/configs/axm55xx_defconfig |  4 +-
 arch/arm/configs/bcm2835_defconfig |  4 +-
 arch/arm/configs/colibri_pxa270_defconfig  |  4 +-
 arch/arm/configs/colibri_pxa300_defconfig  |  4 +-
 arch/arm/configs/dove_defconfig|  4 +-
 arch/arm/configs/efm32_defconfig   |  4 +-
 arch/arm/configs/exynos_defconfig  |  4 +-
 arch/arm/configs/ezx_defconfig |  4 +-
 arch/arm/configs/h5000_defconfig   |  4 +-
 arch/arm/configs/hisi_defconfig|  4 +-
 arch/arm/configs/imote2_defconfig  |  4 +-
 arch/arm/configs/imx_v6_v7_defconfig   |  4 +-
 arch/arm/configs/keystone_defconfig|  4 +-
 arch/arm/configs/lpc18xx_defconfig |  4 +-
 arch/arm/configs/magician_defconfig|  4 +-
 arch/arm/configs/mmp2_defconfig|  4 +-
 arch/arm/configs/moxart_defconfig  |  4 +-
 arch/arm/configs/mps2_defconfig|  4 +-
 arch/arm/configs/multi_v7_defconfig|  4 +-
 arch/arm/configs/mvebu_v7_defconfig|  4 +-
 arch/arm/configs/mxs_defconfig |  4 +-
 arch/arm/configs/omap2plus_defconfig   |  4 +-
 arch/arm/configs/pxa168_defconfig  |  4 +-
 arch/arm/configs/pxa3xx_defconfig  |  4 +-
 arch/arm/configs/pxa910_defconfig  |  4 +-
 arch/arm/configs/pxa_defconfig |  4 +-
 arch/arm/configs/qcom_defconfig|  4 +-
 arch/arm/configs/raumfeld_defconfig|  4 +-
 arch/arm/configs/shmobile_defconfig|  4 +-
 arch/arm/configs/socfpga_defconfig |  4 +-
 arch/arm/configs/stm32_defconfig   |  4 +-
 arch/arm/configs/sunxi_defconfig   |  4 +-
 arch/arm/configs/tango4_defconfig  |  4 +-
 arch/arm/configs/tegra_defconfig   |  4 +-
 arch/arm/configs/u300_defconfig|  4 +-
 arch/arm/configs/u8500_defconfig   |  4 +-
 arch/arm/configs/vt8500_v6_v7_defconfig|  4 +-
 arch/arm/configs/xcep_defconfig|  4 +-
 arch/arm/configs/zx_defconfig  |  4 +-
 arch/arm64/configs/defconfig   |  4 +-
 arch/m68k/configs/amcore_defconfig |  4 +-
 arch/mips/configs/ath25_defconfig  |  4 +-
 arch/mips/configs/bcm47xx_defconfig|  4 +-
 arch/mips/configs/bmips_be_defconfig   |  4 +-
 

Re: [RFC 2/5] i3c: Add core I3C infrastructure

2017-08-01 Thread Boris Brezillon
On Tue, 1 Aug 2017 14:00:05 +0200
Arnd Bergmann  wrote:

> On Mon, Jul 31, 2017 at 11:15 PM, Boris Brezillon
>  wrote:
> > Hi Arnd,
> >
> > Le Mon, 31 Jul 2017 22:16:42 +0200,
> > Arnd Bergmann  a écrit :
> >  
> >> On Mon, Jul 31, 2017 at 6:24 PM, Boris Brezillon
> >>  wrote:  
> >> > Add core infrastructure to support I3C in Linux and document it.  
> >>  
> >> > - I2C backward compatibility has been designed to be transparent to I2C
> >> >   drivers and the I2C subsystem. The I3C master just registers an I2C
> >> >   adapter which creates a new I2C bus. I'd say that, from a
> >> >   representation PoV it's not ideal because what should appear as a
> >> >   single I3C bus exposing I3C and I2C devices here appears as 2
> >> >   different busses connected to each other through the parenting (the
> >> >   I3C master is the parent of the I2C and I3C busses).
> >> >   On the other hand, I don't see a better solution if we want something
> >> >   that is not invasive.  
> >>
> >> Can you describe the reasons for making i3c a separate subsystem then,
> >> rather than extending the i2c subsystem to handle both i2c devices as
> >> before and also i3c devices and hosts?  
> >
> > Actually, that's the first option I considered, but I3C and I2C are
> > really different. I'm not talking about the physical layer here, but
> > the way the bus has to be handled by the software layer. Actually, I
> > thing the I3C bus is philosophically closer to auto-discoverable busses
> > like USB than I2C or SPI.
> >
> > Indeed, all I3C devices can be discovered and do not need to be
> > described at the board level (using DT, board files, ACPI or whatever).
> > Also, some I3C devices are hotpluggable, and most importantly, all I3C
> > devices describe themselves during the discovery procedure (called DAA
> > in the I3C world).  
> 
> Side note: please make sure you define a way to describe them
> in DT anyway. We ended up needing additional DT properties
> as well as power sequencing for most discoverable buses (pci,
> usb, mmc, ...), I'm sure this one won't be an exception even though
> the standard says you don't need it and most devices will work
> without it.
> 
> > There is some kind of "device class" concept. In the I3C world it's
> > called DCR (Device Characteristic Register), but it plays the same role:
> > it's a set of generic interfaces devices have to comply with when they
> > declare themselves as being compatible with a DCR ID (like
> > accelerometer, gyroscope, or whatever). See this table of normalized
> > DCR for more information [1].
> >
> > Devices also expose a 48-bit Provisional ID which is made of
> > sub-fields. Two of them are particularly interesting: the manufacturer
> > ID and the part ID, which are comparable to the vendor and product ID in
> > the USB world.
> >
> > These three information (DCR, ManufacturerID and PartID) can be used to
> > match drivers instead of the compatible string or driver-name used for
> > I2C devices  
> 
> The matching would be fairly easy to accomodate: the i2c bus already
> handles two distinct ways: of_device_id tables and matching by
> name, so we could easily add another method here.

Should be doable. All we need to do is define device PIDs (Provisional
IDs) in the DT so that they can be attached to the real device when it's
discovered on the bus. Also note that some I3C devices come with a
static/I2C/legacy address which can be used when this device is
connected on an I2C bus. Such devices can be accessed in I2C mode using
this static address before they get assigned a dynamic one by the I3C
master. So, that would be another solution to describe I3C devs in the
DT, but this won't work for all devs.

> 
> > So, as you can imagine, dealing with an I3C bus is really different
> > from dealing with an I2C bus, and I found the "expose an i2c_adapter
> > object for each i3c_master" way simpler (and less invasive) than
> > extending the I2C framework to support I3C devices.
> >
> > Of course, I can move all the code in drivers/i2c/, but that won't
> > change the fact that I3C and I2C busses are completely different
> > with little to share between them.
> >
> > To me, the I2C backward compatibility is just a nice feature that was
> > added to help people smoothly transition from mixed I3C busses with
> > both I2C and I3C devices connected to it (I2C devices being here
> > when no (affordable) equivalent exist in the I3C world) to pure I3C
> > busses with only I3C devices connected to it.
> >
> > This being said, I'd be happy if you prove me wrong and propose a
> > solution that allows us to extend the I2C framework to support I3C
> > without to much pain ;-).  
> 
> I think the question is not whether it can be done or not, but whether
> it is a good idea. Obviously we can create some frankenstein bus
> design that combines arbitrary different device types by just containing

Re: [RFC 2/5] i3c: Add core I3C infrastructure

2017-08-01 Thread Arnd Bergmann
On Mon, Jul 31, 2017 at 11:15 PM, Boris Brezillon
 wrote:
> Hi Arnd,
>
> Le Mon, 31 Jul 2017 22:16:42 +0200,
> Arnd Bergmann  a écrit :
>
>> On Mon, Jul 31, 2017 at 6:24 PM, Boris Brezillon
>>  wrote:
>> > Add core infrastructure to support I3C in Linux and document it.
>>
>> > - I2C backward compatibility has been designed to be transparent to I2C
>> >   drivers and the I2C subsystem. The I3C master just registers an I2C
>> >   adapter which creates a new I2C bus. I'd say that, from a
>> >   representation PoV it's not ideal because what should appear as a
>> >   single I3C bus exposing I3C and I2C devices here appears as 2
>> >   different busses connected to each other through the parenting (the
>> >   I3C master is the parent of the I2C and I3C busses).
>> >   On the other hand, I don't see a better solution if we want something
>> >   that is not invasive.
>>
>> Can you describe the reasons for making i3c a separate subsystem then,
>> rather than extending the i2c subsystem to handle both i2c devices as
>> before and also i3c devices and hosts?
>
> Actually, that's the first option I considered, but I3C and I2C are
> really different. I'm not talking about the physical layer here, but
> the way the bus has to be handled by the software layer. Actually, I
> thing the I3C bus is philosophically closer to auto-discoverable busses
> like USB than I2C or SPI.
>
> Indeed, all I3C devices can be discovered and do not need to be
> described at the board level (using DT, board files, ACPI or whatever).
> Also, some I3C devices are hotpluggable, and most importantly, all I3C
> devices describe themselves during the discovery procedure (called DAA
> in the I3C world).

Side note: please make sure you define a way to describe them
in DT anyway. We ended up needing additional DT properties
as well as power sequencing for most discoverable buses (pci,
usb, mmc, ...), I'm sure this one won't be an exception even though
the standard says you don't need it and most devices will work
without it.

> There is some kind of "device class" concept. In the I3C world it's
> called DCR (Device Characteristic Register), but it plays the same role:
> it's a set of generic interfaces devices have to comply with when they
> declare themselves as being compatible with a DCR ID (like
> accelerometer, gyroscope, or whatever). See this table of normalized
> DCR for more information [1].
>
> Devices also expose a 48-bit Provisional ID which is made of
> sub-fields. Two of them are particularly interesting: the manufacturer
> ID and the part ID, which are comparable to the vendor and product ID in
> the USB world.
>
> These three information (DCR, ManufacturerID and PartID) can be used to
> match drivers instead of the compatible string or driver-name used for
> I2C devices

The matching would be fairly easy to accomodate: the i2c bus already
handles two distinct ways: of_device_id tables and matching by
name, so we could easily add another method here.

> So, as you can imagine, dealing with an I3C bus is really different
> from dealing with an I2C bus, and I found the "expose an i2c_adapter
> object for each i3c_master" way simpler (and less invasive) than
> extending the I2C framework to support I3C devices.
>
> Of course, I can move all the code in drivers/i2c/, but that won't
> change the fact that I3C and I2C busses are completely different
> with little to share between them.
>
> To me, the I2C backward compatibility is just a nice feature that was
> added to help people smoothly transition from mixed I3C busses with
> both I2C and I3C devices connected to it (I2C devices being here
> when no (affordable) equivalent exist in the I3C world) to pure I3C
> busses with only I3C devices connected to it.
>
> This being said, I'd be happy if you prove me wrong and propose a
> solution that allows us to extend the I2C framework to support I3C
> without to much pain ;-).

I think the question is not whether it can be done or not, but whether
it is a good idea. Obviously we can create some frankenstein bus
design that combines arbitrary different device types by just containing
the superset of the required information, and sprinking the code
with if()/else() to call one or the other function.

If there is very little shared code between the i2c and i3c
implementations, then the added complexity of having a combined
subsystem is clearly a strong argument against it.

On the other hand, there is value in representing the physical
bus hierarchy in the software model, and if i2c and i3c devices can
be attached to the same host bus, a good abstraction should
show them under the same parent. This is true for both the
kernel representation (in sysfs and the data structures) as well
as the device tree binding (assuming we will need to represent
i3c devices at all). The two don't have to use the same model,
but it's easier if they do.

Another argument for a 

Re: [RFC PATCH v2 38/38] KVM: arm64: Respect the virtual CPTR_EL2.TCPAC setting

2017-08-01 Thread Christoffer Dall
On Tue, Aug 01, 2017 at 07:03:35AM -0400, Jintack Lim wrote:
> Hi Christoffer,
> 
> On Mon, Jul 31, 2017 at 8:59 AM, Christoffer Dall  wrote:
> > On Tue, Jul 18, 2017 at 11:59:04AM -0500, Jintack Lim wrote:
> >> Forward CPACR_EL1 traps to the virtual EL2 if virtual CPTR_EL2 is
> >> configured to trap CPACR_EL1 accesses from EL1.
> >>
> >> This is for recursive nested virtualization.
> >>
> >> Signed-off-by: Jintack Lim 
> >> ---
> >>  arch/arm64/kvm/sys_regs.c | 5 +
> >>  1 file changed, 5 insertions(+)
> >>
> >> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
> >> index 6f67666..ba2966d 100644
> >> --- a/arch/arm64/kvm/sys_regs.c
> >> +++ b/arch/arm64/kvm/sys_regs.c
> >> @@ -1091,6 +1091,11 @@ static bool access_cpacr(struct kvm_vcpu *vcpu,
> >>   if (el12_reg(p) && forward_nv_traps(vcpu))
> >>   return kvm_inject_nested_sync(vcpu, kvm_vcpu_get_hsr(vcpu));
> >>
> >> + /* Forward this trap to the virtual EL2 if CPTR_EL2.TCPAC is set*/
> >> + if (!el12_reg(p) && !vcpu_mode_el2(vcpu) &&
> >> + (vcpu_sys_reg(vcpu, CPTR_EL2) & CPTR_EL2_TCPAC))
> >> + return kvm_inject_nested_sync(vcpu, kvm_vcpu_get_hsr(vcpu));
> >> +
> >
> > I'm trying to understand what should happen if the VM is in EL1 and
> > accesses CPACR_EL12, but the guest hypervisor did not set
> > CPTR_EL2.TCPAC, why would we get here, and if there's a good reason why
> 
> I guess what you meant is HCR_EL2.NV bit?
> 

No, HCR_EL2.NV is set, then we obviously get here, due to traps on _EL12
registers.

But if that wasn't the case (that's the time you'd be avaluating this
if-statement), then you're checking as part of the if-statement if the
virtual CPTR_EL2.TCPAC is set.  My question is, if the virtual
CPTR_EL2.TCPAC is not set, why would the physical one be set, which must
be the case if we're running this code, right?

> > we god here, is the EL12 access not supposed to undef at EL1 as opposed

I obviously meant *got* here.

> > to actually work, like it seems your code does when it doesn't take the
> > branch?
> 
> IIUC, we need to have this logic
> 
> if (el12_reg() && virtual HCR_EL2.NV == 0)
>inject_undef();
> 
> This is a good point, and should be applied for all traps controlled by NV 
> bit.
> 

Yes, but can this ever happen?

Thanks,
-Christoffer
--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2] Documentation: filesystems: update filesystem locking documentation

2017-08-01 Thread Jeff Layton
On Tue, 2017-08-01 at 07:09 -0400, Sean Anderson wrote:
> Documentation/filesystems/Locking no longer reflects current locking
> semantics. i_mutex is no longer used for locking, and has been superseded
> by i_rwsem. Additionally, ->iterate_shared() was not documented.
> 
> Signed-off-by: Sean Anderson 
> ---
> v2: changed 'yes's to 'exclusive's when describing i_rwsem usage
> 
>  Documentation/filesystems/Locking | 43 
> ++-
>  1 file changed, 24 insertions(+), 19 deletions(-)
> 
> diff --git a/Documentation/filesystems/Locking 
> b/Documentation/filesystems/Locking
> index fe25787ff6d4..c0cab97d2b1a 100644
> --- a/Documentation/filesystems/Locking
> +++ b/Documentation/filesystems/Locking
> @@ -69,31 +69,31 @@ prototypes:
>  
>  locking rules:
>   all may block
> - i_mutex(inode)
> -lookup:  yes
> -create:  yes
> -link:yes (both)
> -mknod:   yes
> -symlink: yes
> -mkdir:   yes
> -unlink:  yes (both)
> -rmdir:   yes (both)  (see below)
> -rename:  yes (all)   (see below)
> + i_rwsem(inode)
> +lookup:  shared
> +create:  exclusive
> +link:exclusive (both)
> +mknod:   exclusive
> +symlink: exclusive
> +mkdir:   exclusive
> +unlink:  exclusive (both)
> +rmdir:   exclusive (both)(see below)
> +rename:  exclusive (all) (see below)
>  readlink:no
>  get_link:no
> -setattr: yes
> +setattr: exclusive
>  permission:  no (may not block if called in rcu-walk mode)
>  get_acl: no
>  getattr: no
>  listxattr:   no
>  fiemap:  no
>  update_time: no
> -atomic_open: yes
> +atomic_open: exclusive
>  tmpfile: no
>  
>  
> - Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on
> -victim.
> + Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_rwsem
> + exclusive on victim.
>   cross-directory ->rename() has (per-superblock) ->s_vfs_rename_sem.
>  
>  See Documentation/filesystems/directory-locking for more detailed discussion
> @@ -111,10 +111,10 @@ prototypes:
>  
>  locking rules:
>   all may block
> - i_mutex(inode)
> + i_rwsem(inode)
>  list:no
>  get: no
> -set: yes
> +set: exclusive
>  
>  --- super_operations ---
>  prototypes:
> @@ -217,14 +217,14 @@ prototypes:
>  locking rules:
>   All except set_page_dirty and freepage may block
>  
> - PageLocked(page)i_mutex
> + PageLocked(page)i_rwsem
>  writepage:   yes, unlocks (see below)
>  readpage:yes, unlocks
>  writepages:
>  set_page_dirty   no
>  readpages:
> -write_begin: locks the page  yes
> -write_end:   yes, unlocksyes
> +write_begin: locks the page  exclusive
> +write_end:   yes, unlocksexclusive
>  bmap:
>  invalidatepage:  yes
>  releasepage: yes
> @@ -439,6 +439,7 @@ prototypes:
>   ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
>   ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
>   int (*iterate) (struct file *, struct dir_context *);
> + int (*iterate_shared) (struct file *, struct dir_context *);
>   unsigned int (*poll) (struct file *, struct poll_table_struct *);
>   long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
>   long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
> @@ -480,6 +481,10 @@ mutex or just to use i_size_read() instead.
>  Note: this does not protect the file->f_pos against concurrent modifications
>  since this is something the userspace has to take care about.
>  
> +->iterate() is called with i_rwsem exclusive.
> +
> +->iterate_shared() is called with i_rwsem at least shared.
> +
>  ->fasync() is responsible for maintaining the FASYNC bit in filp->f_flags.
>  Most instances call fasync_helper(), which does that maintenance, so it's
>  not normally something one needs to worry about.  Return values > 0 will be

Reviewed-by: Jeff Layton 
--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2] Documentation: filesystems: update filesystem locking documentation

2017-08-01 Thread Sean Anderson
Documentation/filesystems/Locking no longer reflects current locking
semantics. i_mutex is no longer used for locking, and has been superseded
by i_rwsem. Additionally, ->iterate_shared() was not documented.

Signed-off-by: Sean Anderson 
---
v2: changed 'yes's to 'exclusive's when describing i_rwsem usage

 Documentation/filesystems/Locking | 43 ++-
 1 file changed, 24 insertions(+), 19 deletions(-)

diff --git a/Documentation/filesystems/Locking 
b/Documentation/filesystems/Locking
index fe25787ff6d4..c0cab97d2b1a 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -69,31 +69,31 @@ prototypes:
 
 locking rules:
all may block
-   i_mutex(inode)
-lookup:yes
-create:yes
-link:  yes (both)
-mknod: yes
-symlink:   yes
-mkdir: yes
-unlink:yes (both)
-rmdir: yes (both)  (see below)
-rename:yes (all)   (see below)
+   i_rwsem(inode)
+lookup:shared
+create:exclusive
+link:  exclusive (both)
+mknod: exclusive
+symlink:   exclusive
+mkdir: exclusive
+unlink:exclusive (both)
+rmdir: exclusive (both)(see below)
+rename:exclusive (all) (see below)
 readlink:  no
 get_link:  no
-setattr:   yes
+setattr:   exclusive
 permission:no (may not block if called in rcu-walk mode)
 get_acl:   no
 getattr:   no
 listxattr: no
 fiemap:no
 update_time:   no
-atomic_open:   yes
+atomic_open:   exclusive
 tmpfile:   no
 
 
-   Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on
-victim.
+   Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_rwsem
+   exclusive on victim.
cross-directory ->rename() has (per-superblock) ->s_vfs_rename_sem.
 
 See Documentation/filesystems/directory-locking for more detailed discussion
@@ -111,10 +111,10 @@ prototypes:
 
 locking rules:
all may block
-   i_mutex(inode)
+   i_rwsem(inode)
 list:  no
 get:   no
-set:   yes
+set:   exclusive
 
 --- super_operations ---
 prototypes:
@@ -217,14 +217,14 @@ prototypes:
 locking rules:
All except set_page_dirty and freepage may block
 
-   PageLocked(page)i_mutex
+   PageLocked(page)i_rwsem
 writepage: yes, unlocks (see below)
 readpage:  yes, unlocks
 writepages:
 set_page_dirty no
 readpages:
-write_begin:   locks the page  yes
-write_end: yes, unlocksyes
+write_begin:   locks the page  exclusive
+write_end: yes, unlocksexclusive
 bmap:
 invalidatepage:yes
 releasepage:   yes
@@ -439,6 +439,7 @@ prototypes:
ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
int (*iterate) (struct file *, struct dir_context *);
+   int (*iterate_shared) (struct file *, struct dir_context *);
unsigned int (*poll) (struct file *, struct poll_table_struct *);
long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
@@ -480,6 +481,10 @@ mutex or just to use i_size_read() instead.
 Note: this does not protect the file->f_pos against concurrent modifications
 since this is something the userspace has to take care about.
 
+->iterate() is called with i_rwsem exclusive.
+
+->iterate_shared() is called with i_rwsem at least shared.
+
 ->fasync() is responsible for maintaining the FASYNC bit in filp->f_flags.
 Most instances call fasync_helper(), which does that maintenance, so it's
 not normally something one needs to worry about.  Return values > 0 will be
-- 
2.13.2



signature.asc
Description: OpenPGP digital signature


Re: [RFC PATCH v2 38/38] KVM: arm64: Respect the virtual CPTR_EL2.TCPAC setting

2017-08-01 Thread Jintack Lim
Hi Christoffer,

On Mon, Jul 31, 2017 at 8:59 AM, Christoffer Dall  wrote:
> On Tue, Jul 18, 2017 at 11:59:04AM -0500, Jintack Lim wrote:
>> Forward CPACR_EL1 traps to the virtual EL2 if virtual CPTR_EL2 is
>> configured to trap CPACR_EL1 accesses from EL1.
>>
>> This is for recursive nested virtualization.
>>
>> Signed-off-by: Jintack Lim 
>> ---
>>  arch/arm64/kvm/sys_regs.c | 5 +
>>  1 file changed, 5 insertions(+)
>>
>> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
>> index 6f67666..ba2966d 100644
>> --- a/arch/arm64/kvm/sys_regs.c
>> +++ b/arch/arm64/kvm/sys_regs.c
>> @@ -1091,6 +1091,11 @@ static bool access_cpacr(struct kvm_vcpu *vcpu,
>>   if (el12_reg(p) && forward_nv_traps(vcpu))
>>   return kvm_inject_nested_sync(vcpu, kvm_vcpu_get_hsr(vcpu));
>>
>> + /* Forward this trap to the virtual EL2 if CPTR_EL2.TCPAC is set*/
>> + if (!el12_reg(p) && !vcpu_mode_el2(vcpu) &&
>> + (vcpu_sys_reg(vcpu, CPTR_EL2) & CPTR_EL2_TCPAC))
>> + return kvm_inject_nested_sync(vcpu, kvm_vcpu_get_hsr(vcpu));
>> +
>
> I'm trying to understand what should happen if the VM is in EL1 and
> accesses CPACR_EL12, but the guest hypervisor did not set
> CPTR_EL2.TCPAC, why would we get here, and if there's a good reason why

I guess what you meant is HCR_EL2.NV bit?

> we god here, is the EL12 access not supposed to undef at EL1 as opposed
> to actually work, like it seems your code does when it doesn't take the
> branch?

IIUC, we need to have this logic

if (el12_reg() && virtual HCR_EL2.NV == 0)
   inject_undef();

This is a good point, and should be applied for all traps controlled by NV bit.

>
>>   /*
>>* When the virtual HCR_EL2.E2H == 1, an access to CPACR_EL1
>>* in the virtual EL2 is to access CPTR_EL2.
>> --
>> 1.9.1
>>
>
> Thanks,
> -Christoffer
--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH, RESEND 08/12] ima: added parser for RPM data type

2017-08-01 Thread Roberto Sassu

On 8/1/2017 12:27 PM, Christoph Hellwig wrote:

On Tue, Aug 01, 2017 at 12:20:36PM +0200, Roberto Sassu wrote:

This patch introduces a parser for RPM packages. It extracts the digests
from the RPMTAG_FILEDIGESTS header section and converts them to binary data
before adding them to the hash table.

The advantage of this data type is that verifiers can determine who
produced that data, as headers are signed by Linux distributions vendors.
RPM headers signatures can be provided as digest list metadata.


Err, parsing arbitrary file formats has no business in the kernel.


The benefit of this choice is that no actions are required for
Linux distribution vendors to support the solution I'm proposing,
because they already provide signed digest lists (RPM headers).

Since the proof of loading a digest list is the digest of the
digest list (included in the list metadata), if RPM headers are
converted to a different format, remote attestation verifiers
cannot check the signature.

If the concern is security, it would be possible to prevent unsigned
RPM headers from being parsed, if the PGP key type is upstreamed
(adding in CC keyri...@vger.kernel.org).

Roberto

--
HUAWEI TECHNOLOGIES Duesseldorf GmbH, HRB 56063
Managing Director: Bo PENG, Qiuen PENG, Shengli WANG
--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH v2 00/38] Nested Virtualization on KVM/ARM

2017-08-01 Thread Jintack Lim
Hi Christoffer,

On Mon, Jul 31, 2017 at 9:00 AM, Christoffer Dall  wrote:
> Hi Jintack,
>
> On Tue, Jul 18, 2017 at 11:58:26AM -0500, Jintack Lim wrote:
>> Nested virtualization is the ability to run a virtual machine inside another
>> virtual machine. In other words, it’s about running a hypervisor (the guest
>> hypervisor) on top of another hypervisor (the host hypervisor).
>>
>> Supporting nested virtualization on ARM means that the hypervisor provides 
>> not
>> only EL0/EL1 execution environment to VMs as it usually does but also the
>> virtualization extensions including EL2 execution environment. Once the host
>> hypervisor provides those execution environments to the VMs, then the guest
>> hypervisor can run its own VMs (nested VMs) naturally.
>>
>> This series supports nested virtualization on arm64. ARM recently announced 
>> an
>> extension (ARMv8.3) which has support for nested virtualization[1]. This 
>> patch
>> set is based on the ARMv8.3 specification and tested on the FastModel with
>> ARMv8.3 extension.
>>
>> The whole patch set to support nested virtualization is huge over 70
>> patches, so I categorized them into four parts: CPU, memory, VGIC, and timer
>> virtualization. This patch series is the first part.
>>
>> CPU virtualization patch series provides basic nested virtualization 
>> framework
>> and instruction emulations including v8.1 VHE feature and v8.3 nested
>> virtualization feature for VMs.
>>
>> This patch series again can be divided into four parts. Patch 1 to 5 
>> introduces
>> nested virtualization by discovering hardware feature, adding a kernel
>> parameter and allowing the userspace to set the initial CPU mode to EL2.
>>
>> Patch 6 to 25 are to support the EL2 execution environment, the virtual EL2, 
>> to
>> a VM on v8.0 architecture. We de-privilege the guest hypervisor and emulate 
>> the
>> virtual EL2 mode in EL1 using the hardware features provided by ARMv8.3; The
>> host hypervisor manages virtual EL2 register state for the guest hypervisor
>> and shadow EL1 register state that reflects the virtual EL2 register state to
>> run the guest hypervisor in EL1.
>>
>> Patch 26 to 33 add support for the virtual EL2 with Virtualization Host
>> Extensions. These patches emulate newly defined registers and bits in v8.1 
>> and
>> allow the virtual EL2 to access EL2 register states via EL1 register accesses
>> as in the real EL2.
>>
>> Patch 34 to 38 are to support for the virtual EL2 with nested virtualization.
>> These enable recursive nested virtualization.
>>
>> This patch set is tested on the FastModel with the v8.3 extension for arm64 
>> and
>> a cubietruck for arm32. On the FastModel, the host and the guest kernels are
>> compiled with and without VHE, so there are four combinations. I was able to
>> boot SMP Linux in the nested VM on all four configurations and able to run
>> hackbench. I also checked that regular VMs could boot when the nested
>> virtualization kernel parameter was not set. On the cubietruck, I also 
>> verified
>> that regular VMs could boot as well.
>>
>> I'll share my experiment setup shortly.
>>
>> Even though this work has some limitations and TODOs, I'd appreciate early
>> feedback on this RFC. Specifically, I'm interested in:
>>
>> - Overall design to manage vcpu context for the virtual EL2
>> - Verifying correct EL2 register configurations such as HCR_EL2, CPTR_EL2
>>   (Patch 30 and 32)
>> - Patch organization and coding style
>>
>> This patch series is based on kvm/next d38338e.
>> The whole patch series including memory, VGIC, and timer patches is available
>> here:
>>
>> g...@github.com:columbia/nesting-pub.git rfc-v2
>>
>> Limitations:
>> - There are some cases that the target exception level of a VM is ambiguous 
>> when
>>   emulating eret instruction. I'm discussing this issue with Christoffer and
>>   Marc. Meanwhile, I added a temporary patch (not included in this
>>   series. f1beaba in the repo) and used 4.10.0 kernel when testing the guest
>>   hypervisor with VHE.
>> - Recursive nested virtualization is not tested yet.
>> - Other hypervisors (such as Xen) on KVM are not tested.
>>
>> TODO:
>> - Submit memory, VGIC, and timer patches
>> - Evaluate regular VM performance to see if there's a negative impact.
>> - Test other hypervisors such as Xen on KVM
>> - Test recursive nested virtualization
>>
>
> I think this overall looks pretty good, and I think you can drop the RFC
> tag from the next revision, assuming the remaining patch sets for
> memory, vgic, and timers don't require some major controversial rework
> of these patches.

Thank you for your thorough review. I'm happy that we can drop the RFC tag :).

Thanks,
Jintack

>
> Thanks,
> -Christoffer
>
>> v1-->v2:
>> - Added support for the virtual EL2 with VHE
>> - Rewrote commit messages and comments from the perspective of supporting
>>   execution environments to VMs, rather than from the perspective of the 
>> guest
>>   hypervisor running in them.

Re: [RFC 2/5] i3c: Add core I3C infrastructure

2017-08-01 Thread Boris Brezillon
Hello Greg,

On Mon, 31 Jul 2017 18:40:21 -0700
Greg Kroah-Hartman  wrote:

> On Mon, Jul 31, 2017 at 06:24:47PM +0200, Boris Brezillon wrote:
> > Add core infrastructure to support I3C in Linux and document it.
> > 
> > This infrastructure is not complete yet and will be extended over
> > time.
> > 
> > There are a few design choices that are worth mentioning because they
> > impact the way I3C device drivers can interact with their devices:
> > 
> > - all functions used to send I3C/I2C frames must be called in
> >   non-atomic context. Mainly done this way to ease implementation, but
> >   this is still open to discussion. Please let me know if you think it's
> >   worth considering an asynchronous model here
> > - the bus element is a separate object and is not implicitly described
> >   by the master (as done in I2C). The reason is that I want to be able
> >   to handle multiple master connected to the same bus and visible to
> >   Linux.
> >   In this situation, we should only have one instance of the device and
> >   not one per master, and sharing the bus object would be part of the
> >   solution to gracefully handle this case.
> >   I'm not sure we will ever need to deal with multiple masters
> >   controlling the same bus and exposed under Linux, but separating the
> >   bus and master concept is pretty easy, hence the decision to do it
> >   like that.
> >   The other benefit of separating the bus and master concepts is that
> >   master devices appear under the bus directory in sysfs.
> > - I2C backward compatibility has been designed to be transparent to I2C
> >   drivers and the I2C subsystem. The I3C master just registers an I2C
> >   adapter which creates a new I2C bus. I'd say that, from a
> >   representation PoV it's not ideal because what should appear as a
> >   single I3C bus exposing I3C and I2C devices here appears as 2
> >   different busses connected to each other through the parenting (the
> >   I3C master is the parent of the I2C and I3C busses).
> >   On the other hand, I don't see a better solution if we want something
> >   that is not invasive.
> > - the whole API is exposed through a single header file (i3c.h), but I'm
> >   seriously considering the option of splitting the I3C driver/user API
> >   and the I3C master one, mainly to hide I3C core internals and restrict
> >   what I3C users can do to a limited set of functionalities (send
> >   I3C/I2C frames to a specific device and that's all).
> > 
> > Missing features in this preliminary version:
> > - no support for IBI (In Band Interrupts). This is something I'm working
> >   on, and I'm still unsure how to represent it: an irqchip or a
> >   completely independent representation that would be I3C specific.
> >   Right now, I'm more inclined to go for the irqchip approach, since
> >   this is something people are used to deal with already.
> > - no Hot Join support, which is similar to hotplug
> > - no support for multi-master and the associated concepts (mastership
> >   handover, support for secondary masters, ...)
> > - I2C devices can only be described using DT because this is the only
> >   use case I have. However, the framework can easily be extended with
> >   ACPI and board info support
> > - I3C slave framework. This has been completely omitted, but shouldn't
> >   have a huge impact on the I3C framework because I3C slaves don't see
> >   the whole bus, it's only about handling master requests and generating
> >   IBIs. Some of the struct, constant and enum definitions could be
> >   shared, but most of the I3C slave framework logic will be different
> > 
> > Signed-off-by: Boris Brezillon 
> > ---
> >  Documentation/i3c/conf.py   |   10 +
> >  Documentation/i3c/device-driver-api.rst |7 +
> >  Documentation/i3c/index.rst |9 +
> >  Documentation/i3c/master-driver-api.rst |8 +
> >  Documentation/i3c/protocol.rst  |  199 +
> >  Documentation/index.rst |1 +
> >  drivers/Kconfig |2 +
> >  drivers/Makefile|2 +-
> >  drivers/i3c/Kconfig |   24 +
> >  drivers/i3c/Makefile|3 +
> >  drivers/i3c/core.c  |  532 ++
> >  drivers/i3c/device.c|  138 
> >  drivers/i3c/internals.h |   45 ++
> >  drivers/i3c/master.c| 1225 
> > +++
> >  drivers/i3c/master/Kconfig  |0
> >  drivers/i3c/master/Makefile |0
> >  include/linux/i3c/ccc.h |  389 ++
> >  include/linux/i3c/device.h  |  212 ++
> >  include/linux/i3c/master.h  |  453 
> >  include/linux/mod_devicetable.h |   15 +
> >  20 files changed, 3273 insertions(+), 1 deletion(-)  
> 
> Any chance you can break the documentation out 

Re: [PATCH, RESEND 08/12] ima: added parser for RPM data type

2017-08-01 Thread Christoph Hellwig
On Tue, Aug 01, 2017 at 12:20:36PM +0200, Roberto Sassu wrote:
> This patch introduces a parser for RPM packages. It extracts the digests
> from the RPMTAG_FILEDIGESTS header section and converts them to binary data
> before adding them to the hash table.
> 
> The advantage of this data type is that verifiers can determine who
> produced that data, as headers are signed by Linux distributions vendors.
> RPM headers signatures can be provided as digest list metadata.

Err, parsing arbitrary file formats has no business in the kernel.
--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH, RESEND 08/12] ima: added parser for RPM data type

2017-08-01 Thread Roberto Sassu
This patch introduces a parser for RPM packages. It extracts the digests
from the RPMTAG_FILEDIGESTS header section and converts them to binary data
before adding them to the hash table.

The advantage of this data type is that verifiers can determine who
produced that data, as headers are signed by Linux distributions vendors.
RPM headers signatures can be provided as digest list metadata.

Signed-off-by: Roberto Sassu 
---
 security/integrity/ima/ima_digest_list.c | 86 +++-
 1 file changed, 85 insertions(+), 1 deletion(-)

diff --git a/security/integrity/ima/ima_digest_list.c 
b/security/integrity/ima/ima_digest_list.c
index c1ef79a..0b5916d 100644
--- a/security/integrity/ima/ima_digest_list.c
+++ b/security/integrity/ima/ima_digest_list.c
@@ -19,11 +19,13 @@
 #include "ima.h"
 #include "ima_template_lib.h"
 
+#define RPMTAG_FILEDIGESTS 1035
+
 enum digest_metadata_fields {DATA_ALGO, DATA_DIGEST, DATA_SIGNATURE,
 DATA_FILE_PATH, DATA_REF_ID, DATA_TYPE,
 DATA__LAST};
 
-enum digest_data_types {DATA_TYPE_COMPACT_LIST};
+enum digest_data_types {DATA_TYPE_COMPACT_LIST, DATA_TYPE_RPM};
 
 enum compact_list_entry_ids {COMPACT_LIST_ID_DIGEST};
 
@@ -33,6 +35,20 @@ struct compact_list_hdr {
u32 datalen;
 } __packed;
 
+struct rpm_hdr {
+   u32 magic;
+   u32 reserved;
+   u32 tags;
+   u32 datasize;
+} __packed;
+
+struct rpm_entryinfo {
+   int32_t tag;
+   u32 type;
+   int32_t offset;
+   u32 count;
+} __packed;
+
 static int ima_parse_compact_list(loff_t size, void *buf)
 {
void *bufp = buf, *bufendp = buf + size;
@@ -80,6 +96,71 @@ static int ima_parse_compact_list(loff_t size, void *buf)
return 0;
 }
 
+static int ima_parse_rpm(loff_t size, void *buf)
+{
+   void *bufp = buf, *bufendp = buf + size;
+   struct rpm_hdr *hdr = bufp;
+   u32 tags = be32_to_cpu(hdr->tags);
+   struct rpm_entryinfo *entry;
+   void *datap = bufp + sizeof(*hdr) + tags * sizeof(struct rpm_entryinfo);
+   int digest_len = hash_digest_size[ima_hash_algo];
+   u8 digest[digest_len];
+   int ret, i, j;
+
+   const unsigned char rpm_header_magic[8] = {
+   0x8e, 0xad, 0xe8, 0x01, 0x00, 0x00, 0x00, 0x00
+   };
+
+   if (size < sizeof(*hdr)) {
+   pr_err("Missing RPM header\n");
+   return -EINVAL;
+   }
+
+   if (memcmp(bufp, rpm_header_magic, sizeof(rpm_header_magic))) {
+   pr_err("Invalid RPM header\n");
+   return -EINVAL;
+   }
+
+   bufp += sizeof(*hdr);
+
+   for (i = 0; i < tags && (bufp + sizeof(*entry)) <= bufendp;
+i++, bufp += sizeof(*entry)) {
+   entry = bufp;
+
+   if (be32_to_cpu(entry->tag) != RPMTAG_FILEDIGESTS)
+   continue;
+
+   datap += be32_to_cpu(entry->offset);
+
+   for (j = 0; j < be32_to_cpu(entry->count) &&
+datap < bufendp; j++) {
+   if (strlen(datap) == 0) {
+   datap++;
+   continue;
+   }
+
+   if (datap + digest_len * 2 + 1 > bufendp) {
+   pr_err("RPM header read at invalid offset\n");
+   return -EINVAL;
+   }
+
+   ret = hex2bin(digest, datap, digest_len);
+   if (ret < 0)
+   return -EINVAL;
+
+   ret = ima_add_digest_data_entry(digest);
+   if (ret < 0 && ret != -EEXIST)
+   return ret;
+
+   datap += digest_len * 2 + 1;
+   }
+
+   break;
+   }
+
+   return 0;
+}
+
 static int ima_parse_digest_list_data(struct ima_field_data *data)
 {
void *digest_list;
@@ -107,6 +188,9 @@ static int ima_parse_digest_list_data(struct ima_field_data 
*data)
case DATA_TYPE_COMPACT_LIST:
ret = ima_parse_compact_list(digest_list_size, digest_list);
break;
+   case DATA_TYPE_RPM:
+   ret = ima_parse_rpm(digest_list_size, digest_list);
+   break;
default:
pr_err("Parser for data type %d not implemented\n", data_type);
ret = -EINVAL;
-- 
2.9.3

--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH, RESEND 06/12] ima: added parser of digest lists metadata

2017-08-01 Thread Roberto Sassu
Userspace applications will be able to load digest lists by supplying
their metadata.

Digest list metadata are:

- DATA_ALGO: algorithm of the digests to be uploaded
- DATA_DIGEST: digest of the file containing the digest list
- DATA_SIGNATURE: signature of the file containing the digest list
- DATA_FILE_PATH: pathname
- DATA_REF_ID: reference ID of the digest list
- DATA_TYPE: type of digest list

The new function ima_parse_digest_list_metadata() parses the metadata
and load each file individually. Then, it parses the data according
to the data type specified.

Since digest lists are measured, their digest is added to the hash table
so that IMA does not create a measurement entry for them (which would
affect the performance). The only measurement entry created will be
for the metadata.

Signed-off-by: Roberto Sassu 
---
 include/linux/fs.h   |   1 +
 security/integrity/ima/Kconfig   |  11 
 security/integrity/ima/Makefile  |   1 +
 security/integrity/ima/ima.h |   8 +++
 security/integrity/ima/ima_digest_list.c | 105 +++
 5 files changed, 126 insertions(+)
 create mode 100644 security/integrity/ima/ima_digest_list.c

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 6e1fd5d..2eb6e7c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2751,6 +2751,7 @@ extern int do_pipe_flags(int *, int);
id(KEXEC_IMAGE, kexec-image)\
id(KEXEC_INITRAMFS, kexec-initramfs)\
id(POLICY, security-policy) \
+   id(DIGEST_LIST, security-digest-list)   \
id(MAX_ID, )
 
 #define __fid_enumify(ENUM, dummy) READING_ ## ENUM,
diff --git a/security/integrity/ima/Kconfig b/security/integrity/ima/Kconfig
index 35ef693..8965dcc 100644
--- a/security/integrity/ima/Kconfig
+++ b/security/integrity/ima/Kconfig
@@ -227,3 +227,14 @@ config IMA_APPRAISE_SIGNED_INIT
default n
help
   This option requires user-space init to be signed.
+
+config IMA_DIGEST_LIST
+   bool "Measure files depending on uploaded digest lists"
+   depends on IMA
+   default n
+   help
+  This option allows users to load digest lists. If a measured
+  file has the same digest of one from loaded lists, IMA will
+  not create a new measurement entry. A measurement entry will
+  be created only when digest lists are loaded (this entry
+  contains the digest of digest lists metadata).
diff --git a/security/integrity/ima/Makefile b/security/integrity/ima/Makefile
index 29f198b..00dbe3a 100644
--- a/security/integrity/ima/Makefile
+++ b/security/integrity/ima/Makefile
@@ -9,4 +9,5 @@ ima-y := ima_fs.o ima_queue.o ima_init.o ima_main.o 
ima_crypto.o ima_api.o \
 ima_policy.o ima_template.o ima_template_lib.o
 ima-$(CONFIG_IMA_APPRAISE) += ima_appraise.o
 ima-$(CONFIG_HAVE_IMA_KEXEC) += ima_kexec.o
+ima-$(CONFIG_IMA_DIGEST_LIST) += ima_digest_list.o
 obj-$(CONFIG_IMA_BLACKLIST_KEYRING) += ima_mok.o
diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h
index a0c6808..9ecb7cc 100644
--- a/security/integrity/ima/ima.h
+++ b/security/integrity/ima/ima.h
@@ -157,6 +157,14 @@ int ima_restore_measurement_entry(struct 
ima_template_entry *entry);
 int ima_restore_measurement_list(loff_t bufsize, void *buf);
 struct ima_digest *ima_lookup_loaded_digest(u8 *digest);
 int ima_add_digest_data_entry(u8 *digest);
+#ifdef CONFIG_IMA_DIGEST_LIST
+ssize_t ima_parse_digest_list_metadata(loff_t size, void *buf);
+#else
+static inline ssize_t ima_parse_digest_list_metadata(loff_t size, void *buf)
+{
+   return -ENOTSUPP;
+}
+#endif
 int ima_measurements_show(struct seq_file *m, void *v);
 unsigned long ima_get_binary_runtime_size(void);
 int ima_init_template(void);
diff --git a/security/integrity/ima/ima_digest_list.c 
b/security/integrity/ima/ima_digest_list.c
new file mode 100644
index 000..3e1ff69b
--- /dev/null
+++ b/security/integrity/ima/ima_digest_list.c
@@ -0,0 +1,105 @@
+/*
+ * Copyright (C) 2017 Huawei Technologies Co. Ltd.
+ *
+ * Author: Roberto Sassu 
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, version 2 of the
+ * License.
+ *
+ * File: ima_digest_list.c
+ *  Functions to manage digest lists.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include 
+
+#include "ima.h"
+#include "ima_template_lib.h"
+
+enum digest_metadata_fields {DATA_ALGO, DATA_DIGEST, DATA_SIGNATURE,
+DATA_FILE_PATH, DATA_REF_ID, DATA_TYPE,
+DATA__LAST};
+
+static int ima_parse_digest_list_data(struct ima_field_data *data)
+{
+   void *digest_list;
+   loff_t digest_list_size;
+   u16 data_algo = le16_to_cpu(*(u16 *)data[DATA_ALGO].data);
+   u16 data_type = 

[PATCH V3 2/8] drivers: boot_constraint: Add boot_constraints_disable kernel parameter

2017-08-01 Thread Viresh Kumar
Users must be given an option to discard any constraints set by
bootloaders. For example, consider that a constraint is set for the LCD
controller's supply and the LCD driver isn't loaded by the kernel. If
the user doesn't need to use the LCD device, then he shouldn't be forced
to honour the constraint.

We can also think about finer control of such constraints with help of
some sysfs files, but a kernel parameter is fine to begin with.

Tested-by: Rajendra Nayak 
Signed-off-by: Viresh Kumar 
---
 Documentation/admin-guide/kernel-parameters.txt |  3 +++
 drivers/base/boot_constraints/core.c| 17 +
 2 files changed, 20 insertions(+)

diff --git a/Documentation/admin-guide/kernel-parameters.txt 
b/Documentation/admin-guide/kernel-parameters.txt
index d9c171ce4190..0706d1b6004d 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -426,6 +426,9 @@
embedded devices based on command line input.
See Documentation/block/cmdline-partition.txt
 
+   boot_constraints_disable
+   Do not set any boot constraints for devices.
+
boot_delay= Milliseconds to delay each printk during boot.
Values larger than 10 seconds (1) are changed to
no delay (0).
diff --git a/drivers/base/boot_constraints/core.c 
b/drivers/base/boot_constraints/core.c
index 366a05d6d9ba..e0c33b2b216f 100644
--- a/drivers/base/boot_constraints/core.c
+++ b/drivers/base/boot_constraints/core.c
@@ -24,6 +24,17 @@
 static LIST_HEAD(constraint_devices);
 static DEFINE_MUTEX(constraint_devices_mutex);
 
+static bool boot_constraints_disabled;
+
+static int __init constraints_disable(char *str)
+{
+   boot_constraints_disabled = true;
+   pr_debug("disabled\n");
+
+   return 0;
+}
+early_param("boot_constraints_disable", constraints_disable);
+
 /* Boot constraints core */
 
 static struct constraint_dev *constraint_device_find(struct device *dev)
@@ -126,6 +137,9 @@ int dev_boot_constraint_add(struct device *dev,
struct constraint *constraint;
int ret;
 
+   if (boot_constraints_disabled)
+   return -ENODEV;
+
mutex_lock(_devices_mutex);
 
/* Find or add the cdev type first */
@@ -184,6 +198,9 @@ void dev_boot_constraints_remove(struct device *dev)
struct constraint_dev *cdev;
struct constraint *constraint, *temp;
 
+   if (boot_constraints_disabled)
+   return;
+
mutex_lock(_devices_mutex);
 
cdev = constraint_device_find(dev);
-- 
2.13.0.71.gd7076ec9c9cb

--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH V3 0/8] drivers: Boot Constraints core

2017-08-01 Thread Viresh Kumar
Hi Greg,

Here is V3 of the boot constraints core based on the feedbacks I have
received during V2. This tested on real hardware (Qcom dragonboard 410c)
with a display controller configured from bootloader to display a flash
screen. Obviously it doesn't work seamlessly without this series and
works just fine with it. Rajendra Nayak helped getting this tested on
Qcom hardware.

Problem statement:

Some devices are powered ON by the bootloader before the bootloader
handovers control to Linux. It maybe important for those devices to keep
working until the time a Linux device driver probes the device and
reconfigure its resources.

A typical example of that can be the LCD controller, which is used by
the bootloaders to show image(s) while the platform is booting into
Linux.  The LCD controller can be using some resources, like clk,
regulators, etc, that are shared between several devices. These shared
resources should be configured to satisfy need of all the users.  If
another device's (X) driver gets probed before the LCD controller driver
in this case, then it may end up reconfiguring these resources to ranges
satisfying the current users (only device X) and that can make the LCD
screen unstable.

Of course we can have more complex cases where the same resource is
getting used by two devices while the kernel boots and the order in
which devices get probed wouldn't matter as the other device will surely
break then.

There are also cases where the resources may not be shared, but the
kernel will disable them forcefully as no users may have appeared until
a certain point in kernel boot. This makes sure that the resources stay
enabled. A wide variety of constraints can be satisfied using the new
framework.

Proposed solution:

This patchset introduces the concept of boot-constraints, which are set
by platform specific drivers (for now at least) at early init (like
subsys_initcall) and the kernel will satisfy them until the time driver
for such a device is probed (successfully or unsuccessfully). Once the
driver is probed, the driver core removes the constraints set for the
device. This series implements clk, regulator and PM domain constraints
for now.

The last patch isn't up for merge yet, and is used to test the boot
constraint framework on Qcom 410c along with some of the display
controller patches from Rob's series [1] to make sure the controller's
registers are configured properly.

Rebased over: drivers/driver-core-next (some debugfs dependencies)
Pushed here: git://git.kernel.org/pub/scm/linux/kernel/git/vireshk/linux.git 
device/boot-constraints

V2->V3:
- Removed DT support as we aren't sure about how to define the bindings
  yet.
- Added CLK and PM domain constraint types.
- A new directory is added for boot constraints, which will also contain
  platform specific drivers in future.
- Deferred devices are still supported, just that it wouldn't be called
  from generic code anymore but platform specific code.
- Tested on Qcom 410c dragonboard with display flash screen (Rajendra).
- Usual renaming/commit-log-updates/etc changes done.

V1->V2:
- Add support for setting constraints for devices created from DT.
- Allow handling deferred devices earlier then late_init.
- Remove 'default y' line from kconfig.
- Drop '=" after boot_constraints_disable kernel param.
- Dropped the dummy testing patch now.

--
viresh

[1] https://marc.info/?l=dri-devel=149979722606563=2

Rajendra Nayak (1):
  drivers: boot_constraint: Add Qualcomm display controller constraints

Viresh Kumar (7):
  drivers: Add boot constraints core
  drivers: boot_constraint: Add boot_constraints_disable kernel
parameter
  drivers: boot_constraint: Add support for supply constraints
  drivers: boot_constraint: Add support for clk constraints
  drivers: boot_constraint: Add support for PM constraints
  drivers: boot_constraint: Add debugfs support
  drivers: boot_constraint: Manage deferrable constraints

 Documentation/admin-guide/kernel-parameters.txt |   3 +
 drivers/base/Kconfig|  10 +
 drivers/base/Makefile   |   1 +
 drivers/base/base.h |   1 +
 drivers/base/boot_constraints/Makefile  |   3 +
 drivers/base/boot_constraints/clk.c |  74 ++
 drivers/base/boot_constraints/core.c| 300 
 drivers/base/boot_constraints/core.h|  50 
 drivers/base/boot_constraints/deferrable_dev.c  | 192 +++
 drivers/base/boot_constraints/pm.c  |  32 +++
 drivers/base/boot_constraints/qcom-display.c| 107 +
 drivers/base/boot_constraints/supply.c  | 108 +
 drivers/base/dd.c   |  32 ++-
 include/linux/boot_constraint.h |  68 ++
 14 files changed, 974 insertions(+), 7 deletions(-)
 create mode 100644 drivers/base/boot_constraints/Makefile
 create mode 100644 drivers/base/boot_constraints/clk.c
 create mode 100644 

Re: [RFC v6 21/62] powerpc: introduce execute-only pkey

2017-08-01 Thread Michael Ellerman
Thiago Jung Bauermann  writes:
> Ram Pai  writes:
...
>> +
>> +/* We got one, store it and use it from here on out */
>> +if (need_to_set_mm_pkey)
>> +mm->context.execute_only_pkey = execute_only_pkey;
>> +return execute_only_pkey;
>> +}
>
> If you follow the code flow in __execute_only_pkey, the AMR and UAMOR
> are read 3 times in total, and AMR is written twice. IAMR is read and
> written twice. Since they are SPRs and access to them is slow (or isn't
> it?),

SPRs read/writes are slow, but they're not *that* slow in comparison to
a system call (which I think is where this code is being called?).

So we should try to avoid too many SPR read/writes, but at the same time
we can accept more than the minimum if it makes the code much easier to
follow.

cheers
--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html