date:20180625

Re: [PATCH 1/2] fs: fsnotify: account fsnotify metadata to kmemcg

2018-06-25 Thread Amir Goldstein

On Tue, Jun 26, 2018 at 2:06 AM, Shakeel Butt  wrote:
> A lot of memory can be consumed by the events generated for the huge or
> unlimited queues if there is either no or slow listener.  This can cause
> system level memory pressure or OOMs.  So, it's better to account the
> fsnotify kmem caches to the memcg of the listener.
>
> However the listener can be in a different memcg than the memcg of the
> producer and these allocations happen in the context of the event
> producer. This patch introduces remote memcg charging scope API which the
> producer can use to charge the allocations to the memcg of the listener.
>
> There are seven fsnotify kmem caches and among them allocations from
> dnotify_struct_cache, dnotify_mark_cache, fanotify_mark_cache and
> inotify_inode_mark_cachep happens in the context of syscall from the
> listener.  So, SLAB_ACCOUNT is enough for these caches.
>
> The objects from fsnotify_mark_connector_cachep are not accounted as they
> are small compared to the notification mark or events and it is unclear
> whom to account connector to since it is shared by all events attached to
> the inode.
>
> The allocations from the event caches happen in the context of the event
> producer.  For such caches we will need to remote charge the allocations
> to the listener's memcg.  Thus we save the memcg reference in the
> fsnotify_group structure of the listener.
>
> This patch has also moved the members of fsnotify_group to keep the size
> same, at least for 64 bit build, even with additional member by filling
> the holes.
>
> Signed-off-by: Shakeel Butt 
> Cc: Michal Hocko 
> Cc: Jan Kara 
> Cc: Amir Goldstein 
> Cc: Greg Thelen 
> Cc: Johannes Weiner 
> Cc: Vladimir Davydov 
> Cc: Roman Gushchin 
> ---
> Changelog since v6:
> - Removed Jan's ACK as the code has changed a lot
> - Squashed the separate remote charging API path into this one
> - Removed kmalloc* & kmem_cache_alloc* APIs and only kept the scope API
> - Changed fsnotify remote charging code to use scope API
>
> Changelog since v5:
> - None
>
> Changelog since v4:
> - Fixed the build for CONFIG_MEMCG=n
>
> Changelog since v3:
> - Rebased over Jan's patches.
> - Some cleanup based on Amir's comments.
>
> Changelog since v2:
> - None
>
> Changelog since v1:
> - no more charging fsnotify_mark_connector objects
> - Fixed the build for SLOB
>
>  fs/notify/dnotify/dnotify.c  |  5 ++--
>  fs/notify/fanotify/fanotify.c| 17 ++--
>  fs/notify/fanotify/fanotify_user.c   |  5 +++-
>  fs/notify/group.c|  4 +++
>  fs/notify/inotify/inotify_fsnotify.c | 15 +-
>  fs/notify/inotify/inotify_user.c |  5 +++-
>  include/linux/fsnotify_backend.h | 12 +---
>  include/linux/memcontrol.h   |  7 +
>  include/linux/sched.h|  3 ++
>  include/linux/sched/mm.h | 41 
>  kernel/fork.c|  3 ++
>  mm/memcontrol.c  | 38 +++---
>  12 files changed, 139 insertions(+), 16 deletions(-)
>
> diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
> index e2bea2ac5dfb..a6365e6bc047 100644
> --- a/fs/notify/dnotify/dnotify.c
> +++ b/fs/notify/dnotify/dnotify.c
> @@ -384,8 +384,9 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned 
> long arg)
>
>  static int __init dnotify_init(void)
>  {
> -   dnotify_struct_cache = KMEM_CACHE(dnotify_struct, SLAB_PANIC);
> -   dnotify_mark_cache = KMEM_CACHE(dnotify_mark, SLAB_PANIC);
> +   dnotify_struct_cache = KMEM_CACHE(dnotify_struct,
> + SLAB_PANIC|SLAB_ACCOUNT);
> +   dnotify_mark_cache = KMEM_CACHE(dnotify_mark, 
> SLAB_PANIC|SLAB_ACCOUNT);
>
> dnotify_group = fsnotify_alloc_group(_fsnotify_ops);
> if (IS_ERR(dnotify_group))
> diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
> index f90842efea13..d6dfcf0ec21f 100644
> --- a/fs/notify/fanotify/fanotify.c
> +++ b/fs/notify/fanotify/fanotify.c
> @@ -11,6 +11,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>
>  #include "fanotify.h"
>
> @@ -140,8 +141,9 @@ struct fanotify_event_info *fanotify_alloc_event(struct 
> fsnotify_group *group,
>  struct inode *inode, u32 
> mask,
>  const struct path *path)
>  {
> -   struct fanotify_event_info *event;
> +   struct fanotify_event_info *event = NULL;
> gfp_t gfp = GFP_KERNEL;
> +   struct mem_cgroup *old_memcg = NULL;
>
> /*
>  * For queues with unlimited length lost events are not expected and
> @@ -151,19 +153,25 @@ struct fanotify_event_info *fanotify_alloc_event(struct 
> fsnotify_group *group,
> if (group->max_events == UINT_MAX)
> gfp |= __GFP_NOFAIL;
>
> +   /* Whoever is interested in the event, pays for the allocation. */
> +   if (group->memcg) {
> +

Re: [PATCH 1/2] fs: fsnotify: account fsnotify metadata to kmemcg

2018-06-25 Thread Amir Goldstein

On Tue, Jun 26, 2018 at 2:06 AM, Shakeel Butt  wrote:
> A lot of memory can be consumed by the events generated for the huge or
> unlimited queues if there is either no or slow listener.  This can cause
> system level memory pressure or OOMs.  So, it's better to account the
> fsnotify kmem caches to the memcg of the listener.
>
> However the listener can be in a different memcg than the memcg of the
> producer and these allocations happen in the context of the event
> producer. This patch introduces remote memcg charging scope API which the
> producer can use to charge the allocations to the memcg of the listener.
>
> There are seven fsnotify kmem caches and among them allocations from
> dnotify_struct_cache, dnotify_mark_cache, fanotify_mark_cache and
> inotify_inode_mark_cachep happens in the context of syscall from the
> listener.  So, SLAB_ACCOUNT is enough for these caches.
>
> The objects from fsnotify_mark_connector_cachep are not accounted as they
> are small compared to the notification mark or events and it is unclear
> whom to account connector to since it is shared by all events attached to
> the inode.
>
> The allocations from the event caches happen in the context of the event
> producer.  For such caches we will need to remote charge the allocations
> to the listener's memcg.  Thus we save the memcg reference in the
> fsnotify_group structure of the listener.
>
> This patch has also moved the members of fsnotify_group to keep the size
> same, at least for 64 bit build, even with additional member by filling
> the holes.
>
> Signed-off-by: Shakeel Butt 
> Cc: Michal Hocko 
> Cc: Jan Kara 
> Cc: Amir Goldstein 
> Cc: Greg Thelen 
> Cc: Johannes Weiner 
> Cc: Vladimir Davydov 
> Cc: Roman Gushchin 
> ---
> Changelog since v6:
> - Removed Jan's ACK as the code has changed a lot
> - Squashed the separate remote charging API path into this one
> - Removed kmalloc* & kmem_cache_alloc* APIs and only kept the scope API
> - Changed fsnotify remote charging code to use scope API
>
> Changelog since v5:
> - None
>
> Changelog since v4:
> - Fixed the build for CONFIG_MEMCG=n
>
> Changelog since v3:
> - Rebased over Jan's patches.
> - Some cleanup based on Amir's comments.
>
> Changelog since v2:
> - None
>
> Changelog since v1:
> - no more charging fsnotify_mark_connector objects
> - Fixed the build for SLOB
>
>  fs/notify/dnotify/dnotify.c  |  5 ++--
>  fs/notify/fanotify/fanotify.c| 17 ++--
>  fs/notify/fanotify/fanotify_user.c   |  5 +++-
>  fs/notify/group.c|  4 +++
>  fs/notify/inotify/inotify_fsnotify.c | 15 +-
>  fs/notify/inotify/inotify_user.c |  5 +++-
>  include/linux/fsnotify_backend.h | 12 +---
>  include/linux/memcontrol.h   |  7 +
>  include/linux/sched.h|  3 ++
>  include/linux/sched/mm.h | 41 
>  kernel/fork.c|  3 ++
>  mm/memcontrol.c  | 38 +++---
>  12 files changed, 139 insertions(+), 16 deletions(-)
>
> diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
> index e2bea2ac5dfb..a6365e6bc047 100644
> --- a/fs/notify/dnotify/dnotify.c
> +++ b/fs/notify/dnotify/dnotify.c
> @@ -384,8 +384,9 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned 
> long arg)
>
>  static int __init dnotify_init(void)
>  {
> -   dnotify_struct_cache = KMEM_CACHE(dnotify_struct, SLAB_PANIC);
> -   dnotify_mark_cache = KMEM_CACHE(dnotify_mark, SLAB_PANIC);
> +   dnotify_struct_cache = KMEM_CACHE(dnotify_struct,
> + SLAB_PANIC|SLAB_ACCOUNT);
> +   dnotify_mark_cache = KMEM_CACHE(dnotify_mark, 
> SLAB_PANIC|SLAB_ACCOUNT);
>
> dnotify_group = fsnotify_alloc_group(_fsnotify_ops);
> if (IS_ERR(dnotify_group))
> diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
> index f90842efea13..d6dfcf0ec21f 100644
> --- a/fs/notify/fanotify/fanotify.c
> +++ b/fs/notify/fanotify/fanotify.c
> @@ -11,6 +11,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>
>  #include "fanotify.h"
>
> @@ -140,8 +141,9 @@ struct fanotify_event_info *fanotify_alloc_event(struct 
> fsnotify_group *group,
>  struct inode *inode, u32 
> mask,
>  const struct path *path)
>  {
> -   struct fanotify_event_info *event;
> +   struct fanotify_event_info *event = NULL;
> gfp_t gfp = GFP_KERNEL;
> +   struct mem_cgroup *old_memcg = NULL;
>
> /*
>  * For queues with unlimited length lost events are not expected and
> @@ -151,19 +153,25 @@ struct fanotify_event_info *fanotify_alloc_event(struct 
> fsnotify_group *group,
> if (group->max_events == UINT_MAX)
> gfp |= __GFP_NOFAIL;
>
> +   /* Whoever is interested in the event, pays for the allocation. */
> +   if (group->memcg) {
> +

Re: moving affs + RDB partition support to staging?

2018-06-25 Thread jdow

As long as it preserves compatibility it should be OK, I suppose. Personally I'd 
make any partitioning tool front end gently force the block size towards 8k as 
the disk size gets larger. The file systems may also run into 2TB issues that 
are not obvious. An unused blocks list will have to go beyond a uint32_t size, 
for example. But a block list (OFS for sure, don't remember for the newer AFS) 
uses a tad under 1% of the disk all by itself. A block bitmap is not quite so 
bad. {^_-}


Just be sure you are aware of all the ramifications when you make a change. I 
remember thinking about this for awhile and then determining I REALLY did not 
want to think about it as my brain was getting tied into a gordian knot.


{^_^}

On 20180625 19:23, Michael Schmitz wrote:

Joanne,

Martin's boot log (including your patch) says:

Jun 19 21:19:09 merkaba kernel: [ 7891.843284]  sdb: RDSK (512) sdb1
(LNX^@)(res 2 spb 1) sdb2 (JXF^D)(res 2 spb 1) sdb3 (DOS^C)(res 2 spb
4)
Jun 19 21:19:09 merkaba kernel: [ 7891.844055] sd 7:0:0:0: [sdb]
Attached SCSI disk

so it's indeed a case of self inflicted damage (RDSK (512) means 512
byte blocks) and can be worked around by using a different block size.

Your memory serves right indeed - blocksize is in 512 bytes units.
I'll still submit a patch to Jens anyway as this may bite others yet.

Cheers,

   Michael


On Sun, Jun 24, 2018 at 11:40 PM, jdow  wrote:

BTW - anybody who uses 512 byte blocks with an Amiga file system is a famn
dool.

If memory serves the RDBs think in blocks rather than bytes so it should
work up to 2 gigablocks whatever your block size is. 512 blocks is
219902322 bytes. But that wastes just a WHOLE LOT of disk in block maps.
Go up to 4096 or 8192. The latter is 35 TB.

{^_^}
On 20180624 02:06, Martin Steigerwald wrote:


Hi.

Michael Schmitz - 27.04.18, 04:11:


test results at https://bugzilla.kernel.org/show_bug.cgi?id=43511
indicate the RDB parser bug is fixed by the patch given there, so if
Martin now submits the patch, all should be well?



Ok, better be honest than having anyone waiting for it:

I do not care enough about this, in order to motivate myself preparing
the a patch from Joanne Dow´s fix.

I am not even using my Amiga boxes anymore, not even the Sam440ep which
I still have in my apartment.

So RDB support in Linux it remains broken for disks larger 2 TB, unless
someone else does.

Thanks.


--
To unsubscribe from this list: send the line "unsubscribe linux-m68k" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: moving affs + RDB partition support to staging?

2018-06-25 Thread jdow

As long as it preserves compatibility it should be OK, I suppose. Personally I'd 
make any partitioning tool front end gently force the block size towards 8k as 
the disk size gets larger. The file systems may also run into 2TB issues that 
are not obvious. An unused blocks list will have to go beyond a uint32_t size, 
for example. But a block list (OFS for sure, don't remember for the newer AFS) 
uses a tad under 1% of the disk all by itself. A block bitmap is not quite so 
bad. {^_-}


Just be sure you are aware of all the ramifications when you make a change. I 
remember thinking about this for awhile and then determining I REALLY did not 
want to think about it as my brain was getting tied into a gordian knot.


{^_^}

On 20180625 19:23, Michael Schmitz wrote:

Joanne,

Martin's boot log (including your patch) says:

Jun 19 21:19:09 merkaba kernel: [ 7891.843284]  sdb: RDSK (512) sdb1
(LNX^@)(res 2 spb 1) sdb2 (JXF^D)(res 2 spb 1) sdb3 (DOS^C)(res 2 spb
4)
Jun 19 21:19:09 merkaba kernel: [ 7891.844055] sd 7:0:0:0: [sdb]
Attached SCSI disk

so it's indeed a case of self inflicted damage (RDSK (512) means 512
byte blocks) and can be worked around by using a different block size.

Your memory serves right indeed - blocksize is in 512 bytes units.
I'll still submit a patch to Jens anyway as this may bite others yet.

Cheers,

   Michael


On Sun, Jun 24, 2018 at 11:40 PM, jdow  wrote:

BTW - anybody who uses 512 byte blocks with an Amiga file system is a famn
dool.

If memory serves the RDBs think in blocks rather than bytes so it should
work up to 2 gigablocks whatever your block size is. 512 blocks is
219902322 bytes. But that wastes just a WHOLE LOT of disk in block maps.
Go up to 4096 or 8192. The latter is 35 TB.

{^_^}
On 20180624 02:06, Martin Steigerwald wrote:


Hi.

Michael Schmitz - 27.04.18, 04:11:


test results at https://bugzilla.kernel.org/show_bug.cgi?id=43511
indicate the RDB parser bug is fixed by the patch given there, so if
Martin now submits the patch, all should be well?



Ok, better be honest than having anyone waiting for it:

I do not care enough about this, in order to motivate myself preparing
the a patch from Joanne Dow´s fix.

I am not even using my Amiga boxes anymore, not even the Sam440ep which
I still have in my apartment.

So RDB support in Linux it remains broken for disks larger 2 TB, unless
someone else does.

Thanks.


--
To unsubscribe from this list: send the line "unsubscribe linux-m68k" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH v8 4/7] Bluetooth: hci_qca: Add wrapper functions for setting UART speed

2018-06-25 Thread Balakrishna Godavarthi

Hi Matthias,

On 2018-06-26 05:35, Matthias Kaehlcke wrote:

On Mon, Jun 25, 2018 at 04:43:54PM -0700, Matthias Kaehlcke wrote:

This is a nice improvement, a few remaining questions inline.

On Mon, Jun 25, 2018 at 07:10:10PM +0530, Balakrishna Godavarthi 
wrote:

> In function qca_setup, we set initial and operating speeds for Qualcomm
> Bluetooth SoC's. This block of code is common across different
> Qualcomm Bluetooth SoC's. Instead of duplicating the code, created
> a wrapper function to set the speeds. So that future coming SoC's
> can use these wrapper functions to set speeds.
>
> Signed-off-by: Balakrishna Godavarthi 
> ---
> Changes in v8:
> * common function to set INIT and operating speeds.
> * moved hardware flow control to qca_set_speed().
>
> Changes in v7:
> * initial patch
> * created wrapper functions for init and operating speeds.
> ---
>  drivers/bluetooth/hci_qca.c | 89 +++--
>  1 file changed, 65 insertions(+), 24 deletions(-)
>
> diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c
> index fe62420ef838..38b7dbe6c897 100644
> --- a/drivers/bluetooth/hci_qca.c
> +++ b/drivers/bluetooth/hci_qca.c
> @@ -119,6 +119,11 @@ struct qca_data {
>u64 votes_off;
>  };
>
> +enum qca_speed_type {
> +  QCA_INIT_SPEED = 1,
> +  QCA_OPER_SPEED
> +};
> +
>  struct qca_serdev {
>struct hci_uart  serdev_hu;
>struct gpio_desc *bt_en;
> @@ -923,6 +928,60 @@ static inline void host_set_baudrate(struct hci_uart 
*hu, unsigned int speed)
>hci_uart_set_baudrate(hu, speed);
>  }
>
> +static unsigned int qca_get_speed(struct hci_uart *hu,
> +enum qca_speed_type speed_type)
> +{
> +  unsigned int speed = 0;
> +
> +  if (speed_type == QCA_INIT_SPEED) {
> +  if (hu->init_speed)
> +  speed = hu->init_speed;
> +  else if (hu->proto->init_speed)
> +  speed = hu->proto->init_speed;
> +  } else {
> +  if (hu->oper_speed)
> +  speed = hu->oper_speed;
> +  else if (hu->proto->oper_speed)
> +  speed = hu->proto->oper_speed;
> +  }
> +
> +  return speed;
> +}
> +
> +static int qca_set_speed(struct hci_uart *hu, enum qca_speed_type speed_type)
> +{
> +  unsigned int speed, qca_baudrate;
> +  int ret;
> +
> +  if (speed_type == QCA_INIT_SPEED) {
> +  speed = qca_get_speed(hu, QCA_INIT_SPEED);
> +  if (speed)
> +  host_set_baudrate(hu, speed);
> +  else
> +  bt_dev_err(hu->hdev, "Init speed should be non zero");

The check for 'speed == 0' is done in multiple places. From this
code I deduce that it is expected that both INIT and OPER speed are
set to non-zero values. What happens if either of them is zero? Is the
driver still operational?

> +  return 0;
> +  }
> +
> +  speed = qca_get_speed(hu, QCA_OPER_SPEED);
> +  if (!speed) {
> +  bt_dev_err(hu->hdev, "operating speed should be non zero");
> +  return 0;
> +  }
> +
> +  qca_baudrate = qca_get_baudrate_value(speed);
> +  bt_dev_info(hu->hdev, "Set UART speed to %d", speed);
> +  ret = qca_set_baudrate(hu->hdev, qca_baudrate);
> +  if (ret) {
> +  bt_dev_err(hu->hdev, "Failed to change the baudrate (%d)", ret);
> +  return ret;
> +  }
> +
> +  host_set_baudrate(hu, speed);
> +
> +  return ret;
> +}

In the discussion on "[v7,8/8] Bluetooth: hci_qca: Add support for
Qualcomm Bluetooth chip wcn3990" you mentioned the possbility to move
the hci_uart_set_flow_control() calls into _set_speed(). This seemed
interesting but finally it isn't done in this series. Did you
encounter that it is not feasible/desirable for some reason?

I got that half wrong. "[v8,7/7] Bluetooth: hci_qca: Add support for
Qualcomm Bluetooth chip wcn3990" adds the flow control calls to
_set_speed() however there are still_set_flow_control() calls in
qca_setup(), which confused/s me.

Could you provide a brief summary on the situations (relevant for this
driver) in which flow controls needs to be enabled/disabled?

you will not find enable or disable of hardware flow control in this 
patch.

there is no hardware flow control in ROME chip.
you will find hardware flow control in wcn3990 i.e. patch [v8 7/7]

in wcn3990. we disable hardware flow control, when we sent mandatory 
commands to BT chip.

i.e while sending power on pulse i.e 0xFC byte for wcn3990 to boot up 
completely and sending change baudrate request to BT chip.
before sending these commands, we disable the chip flow control and 
enable flow control once we sent these commands.

so in our current code after integrating wcn3990, we disable flow 
control two times.

1. Before sending power on pulse i.e. qca_send_vendor_cmd(hdev, 
QCA_WCN3990_POWERON_PULSE); in qca_setup.

   so we find disable or enable hardware flow control in qca_setup()
2. Before sending change BT CHIP baudrate request i.e.  
qca_set_baudrate(hu->hdev, qca_baudrate);

Re: [PATCH v8 4/7] Bluetooth: hci_qca: Add wrapper functions for setting UART speed

2018-06-25 Thread Balakrishna Godavarthi

Hi Matthias,

On 2018-06-26 05:35, Matthias Kaehlcke wrote:

On Mon, Jun 25, 2018 at 04:43:54PM -0700, Matthias Kaehlcke wrote:

This is a nice improvement, a few remaining questions inline.

On Mon, Jun 25, 2018 at 07:10:10PM +0530, Balakrishna Godavarthi 
wrote:

> In function qca_setup, we set initial and operating speeds for Qualcomm
> Bluetooth SoC's. This block of code is common across different
> Qualcomm Bluetooth SoC's. Instead of duplicating the code, created
> a wrapper function to set the speeds. So that future coming SoC's
> can use these wrapper functions to set speeds.
>
> Signed-off-by: Balakrishna Godavarthi 
> ---
> Changes in v8:
> * common function to set INIT and operating speeds.
> * moved hardware flow control to qca_set_speed().
>
> Changes in v7:
> * initial patch
> * created wrapper functions for init and operating speeds.
> ---
>  drivers/bluetooth/hci_qca.c | 89 +++--
>  1 file changed, 65 insertions(+), 24 deletions(-)
>
> diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c
> index fe62420ef838..38b7dbe6c897 100644
> --- a/drivers/bluetooth/hci_qca.c
> +++ b/drivers/bluetooth/hci_qca.c
> @@ -119,6 +119,11 @@ struct qca_data {
>u64 votes_off;
>  };
>
> +enum qca_speed_type {
> +  QCA_INIT_SPEED = 1,
> +  QCA_OPER_SPEED
> +};
> +
>  struct qca_serdev {
>struct hci_uart  serdev_hu;
>struct gpio_desc *bt_en;
> @@ -923,6 +928,60 @@ static inline void host_set_baudrate(struct hci_uart 
*hu, unsigned int speed)
>hci_uart_set_baudrate(hu, speed);
>  }
>
> +static unsigned int qca_get_speed(struct hci_uart *hu,
> +enum qca_speed_type speed_type)
> +{
> +  unsigned int speed = 0;
> +
> +  if (speed_type == QCA_INIT_SPEED) {
> +  if (hu->init_speed)
> +  speed = hu->init_speed;
> +  else if (hu->proto->init_speed)
> +  speed = hu->proto->init_speed;
> +  } else {
> +  if (hu->oper_speed)
> +  speed = hu->oper_speed;
> +  else if (hu->proto->oper_speed)
> +  speed = hu->proto->oper_speed;
> +  }
> +
> +  return speed;
> +}
> +
> +static int qca_set_speed(struct hci_uart *hu, enum qca_speed_type speed_type)
> +{
> +  unsigned int speed, qca_baudrate;
> +  int ret;
> +
> +  if (speed_type == QCA_INIT_SPEED) {
> +  speed = qca_get_speed(hu, QCA_INIT_SPEED);
> +  if (speed)
> +  host_set_baudrate(hu, speed);
> +  else
> +  bt_dev_err(hu->hdev, "Init speed should be non zero");

The check for 'speed == 0' is done in multiple places. From this
code I deduce that it is expected that both INIT and OPER speed are
set to non-zero values. What happens if either of them is zero? Is the
driver still operational?

> +  return 0;
> +  }
> +
> +  speed = qca_get_speed(hu, QCA_OPER_SPEED);
> +  if (!speed) {
> +  bt_dev_err(hu->hdev, "operating speed should be non zero");
> +  return 0;
> +  }
> +
> +  qca_baudrate = qca_get_baudrate_value(speed);
> +  bt_dev_info(hu->hdev, "Set UART speed to %d", speed);
> +  ret = qca_set_baudrate(hu->hdev, qca_baudrate);
> +  if (ret) {
> +  bt_dev_err(hu->hdev, "Failed to change the baudrate (%d)", ret);
> +  return ret;
> +  }
> +
> +  host_set_baudrate(hu, speed);
> +
> +  return ret;
> +}

In the discussion on "[v7,8/8] Bluetooth: hci_qca: Add support for
Qualcomm Bluetooth chip wcn3990" you mentioned the possbility to move
the hci_uart_set_flow_control() calls into _set_speed(). This seemed
interesting but finally it isn't done in this series. Did you
encounter that it is not feasible/desirable for some reason?

I got that half wrong. "[v8,7/7] Bluetooth: hci_qca: Add support for
Qualcomm Bluetooth chip wcn3990" adds the flow control calls to
_set_speed() however there are still_set_flow_control() calls in
qca_setup(), which confused/s me.

Could you provide a brief summary on the situations (relevant for this
driver) in which flow controls needs to be enabled/disabled?

you will not find enable or disable of hardware flow control in this 
patch.

there is no hardware flow control in ROME chip.
you will find hardware flow control in wcn3990 i.e. patch [v8 7/7]

in wcn3990. we disable hardware flow control, when we sent mandatory 
commands to BT chip.

i.e while sending power on pulse i.e 0xFC byte for wcn3990 to boot up 
completely and sending change baudrate request to BT chip.
before sending these commands, we disable the chip flow control and 
enable flow control once we sent these commands.

so in our current code after integrating wcn3990, we disable flow 
control two times.

1. Before sending power on pulse i.e. qca_send_vendor_cmd(hdev, 
QCA_WCN3990_POWERON_PULSE); in qca_setup.

   so we find disable or enable hardware flow control in qca_setup()
2. Before sending change BT CHIP baudrate request i.e.  
qca_set_baudrate(hu->hdev, qca_baudrate);

Re: [PATCH 1/2] leds: core: Introduce generic pattern interface

2018-06-25 Thread Baolin Wang

Hi Pavel,

On 25 June 2018 at 20:18, Pavel Machek  wrote:
> On Mon 2018-06-25 13:03:19, Baolin Wang wrote:
>> From: Bjorn Andersson 
>>
>> Some LED controllers have support for autonomously controlling
>> brightness over time, according to some preprogrammed pattern or
>> function.
>>
>> This adds a new optional operator that LED class drivers can implement
>> if they support such functionality as well as a new device attribute to
>> configure the pattern for a given LED.
>
> Thanks for doing this!
>
>> index 5f67f7a..fe90a12 100644
>> --- a/Documentation/ABI/testing/sysfs-class-led
>> +++ b/Documentation/ABI/testing/sysfs-class-led
>> @@ -61,3 +61,19 @@ Description:
>>   gpio and backlight triggers. In case of the backlight trigger,
>>   it is useful when driving a LED which is intended to indicate
>>   a device in a standby like state.
>> +
>> +What: /sys/class/leds//pattern
>> +Date: June 2018
>> +KernelVersion: 4.18
>> +Description:
>> + Specify a pattern for the LED, for LED hardware that support
>> + altering the brightness as a function of time.
>> +
>> + The pattern is given by a series of tuples, of brightness and
>> + duration (ms). The LED is expected to traverse the series and
>> + each brightness value for the specified duration.
>> +
>> + As LED hardware might have different capabilities and precision
>> + the requested pattern might be slighly adjusted by the driver
>> + and the resulting pattern of such operation should be returned
>> + when this file is read.
>
> I'd add "Duration of 0 means brightness should immediately change to
> new value."

Sure.

>
>> + struct led_classdev *led_cdev = dev_get_drvdata(dev);
>> + struct led_pattern *pattern = NULL;
>> + unsigned long val;
>> + char *sbegin;
>> + char *elem;
>> + char *s;
>> + int ret, len = 0;
>> + bool odd = true;
>> +
>> + s = sbegin = kstrndup(buf, size, GFP_KERNEL);
>> + if (!s)
>> + return -ENOMEM;
>> +
>> + /* Trim trailing newline */
>> + s[strcspn(s, "\n")] = '\0';
>
> Is substring function best to use here? Will it do the right thing
> when \n is not present?

We always have a '\n' to present the end of the string passed from
userspace. Or anything I missed here?

>
>> + /* If the remaining string is empty, clear the pattern */
>> + if (!s[0]) {
>> + ret = led_cdev->pattern_clear(led_cdev);
>> + goto out;
>> + }
>> +
>> + pattern = kcalloc(size, sizeof(*pattern), GFP_KERNEL);
>> + if (!pattern) {
>> + ret = -ENOMEM;
>> + goto out;
>> + }
>> +
>> + /* Parse out the brightness & delta_t touples and check for repeat */
>> + while ((elem = strsep(, " ")) != NULL) {
>> + ret = kstrtoul(elem, 10, );
>> + if (ret)
>> + goto out;
>> +
>> + if (odd) {
>> + pattern[len].brightness = val;
>> + } else {
>> + /* Ensure we don't have any delta_t == 0 */
>> + if (!val) {
>> + ret = -EINVAL;
>> + goto out;
>> + }
>
> I believe we should support delta_t of 0 for "change immediately".

OK. Thanks for your comments.

-- 
Baolin Wang
Best Regards

Re: INFO: rcu detected stall in vprintk_emit

2018-06-25 Thread Sergey Senozhatsky

On (06/26/18 07:03), Dmitry Vyukov wrote:
> > I don't think this is a printk() issue per se, so I think Option B is
> > the only option. You should not get stuck in an infinite loop if we run
> > short on memory. Perhaps we could have an Option C which would exit
> > this loop gracefully with some kind of error. But I haven't looked at
> > the surrounding code to be sure if that is possible.
> 
> I suspect this is not even OOM.

Could be. But at the same time it stalls RCU, so OOM at some point
becomes realistic.

-ss

Re: [PATCH 1/2] leds: core: Introduce generic pattern interface

2018-06-25 Thread Baolin Wang

Hi Pavel,

On 25 June 2018 at 20:18, Pavel Machek  wrote:
> On Mon 2018-06-25 13:03:19, Baolin Wang wrote:
>> From: Bjorn Andersson 
>>
>> Some LED controllers have support for autonomously controlling
>> brightness over time, according to some preprogrammed pattern or
>> function.
>>
>> This adds a new optional operator that LED class drivers can implement
>> if they support such functionality as well as a new device attribute to
>> configure the pattern for a given LED.
>
> Thanks for doing this!
>
>> index 5f67f7a..fe90a12 100644
>> --- a/Documentation/ABI/testing/sysfs-class-led
>> +++ b/Documentation/ABI/testing/sysfs-class-led
>> @@ -61,3 +61,19 @@ Description:
>>   gpio and backlight triggers. In case of the backlight trigger,
>>   it is useful when driving a LED which is intended to indicate
>>   a device in a standby like state.
>> +
>> +What: /sys/class/leds//pattern
>> +Date: June 2018
>> +KernelVersion: 4.18
>> +Description:
>> + Specify a pattern for the LED, for LED hardware that support
>> + altering the brightness as a function of time.
>> +
>> + The pattern is given by a series of tuples, of brightness and
>> + duration (ms). The LED is expected to traverse the series and
>> + each brightness value for the specified duration.
>> +
>> + As LED hardware might have different capabilities and precision
>> + the requested pattern might be slighly adjusted by the driver
>> + and the resulting pattern of such operation should be returned
>> + when this file is read.
>
> I'd add "Duration of 0 means brightness should immediately change to
> new value."

Sure.

>
>> + struct led_classdev *led_cdev = dev_get_drvdata(dev);
>> + struct led_pattern *pattern = NULL;
>> + unsigned long val;
>> + char *sbegin;
>> + char *elem;
>> + char *s;
>> + int ret, len = 0;
>> + bool odd = true;
>> +
>> + s = sbegin = kstrndup(buf, size, GFP_KERNEL);
>> + if (!s)
>> + return -ENOMEM;
>> +
>> + /* Trim trailing newline */
>> + s[strcspn(s, "\n")] = '\0';
>
> Is substring function best to use here? Will it do the right thing
> when \n is not present?

We always have a '\n' to present the end of the string passed from
userspace. Or anything I missed here?

>
>> + /* If the remaining string is empty, clear the pattern */
>> + if (!s[0]) {
>> + ret = led_cdev->pattern_clear(led_cdev);
>> + goto out;
>> + }
>> +
>> + pattern = kcalloc(size, sizeof(*pattern), GFP_KERNEL);
>> + if (!pattern) {
>> + ret = -ENOMEM;
>> + goto out;
>> + }
>> +
>> + /* Parse out the brightness & delta_t touples and check for repeat */
>> + while ((elem = strsep(, " ")) != NULL) {
>> + ret = kstrtoul(elem, 10, );
>> + if (ret)
>> + goto out;
>> +
>> + if (odd) {
>> + pattern[len].brightness = val;
>> + } else {
>> + /* Ensure we don't have any delta_t == 0 */
>> + if (!val) {
>> + ret = -EINVAL;
>> + goto out;
>> + }
>
> I believe we should support delta_t of 0 for "change immediately".

OK. Thanks for your comments.

-- 
Baolin Wang
Best Regards

Re: INFO: rcu detected stall in vprintk_emit

2018-06-25 Thread Sergey Senozhatsky

On (06/26/18 07:03), Dmitry Vyukov wrote:
> > I don't think this is a printk() issue per se, so I think Option B is
> > the only option. You should not get stuck in an infinite loop if we run
> > short on memory. Perhaps we could have an Option C which would exit
> > this loop gracefully with some kind of error. But I haven't looked at
> > the surrounding code to be sure if that is possible.
> 
> I suspect this is not even OOM.

Could be. But at the same time it stalls RCU, so OOM at some point
becomes realistic.

-ss

RE: [Patch v2 14/15] CIFS: Add support for direct I/O write

2018-06-25 Thread Long Li

> Subject: Re: [Patch v2 14/15] CIFS: Add support for direct I/O write
> 
> On 5/30/2018 3:48 PM, Long Li wrote:
> > From: Long Li 
> >
> > Implement the function for direct I/O write. It doesn't support AIO,
> > which will be implemented in a follow up patch.
> >
> > Signed-off-by: Long Li 
> > ---
> >   fs/cifs/cifsfs.h |   1 +
> >   fs/cifs/file.c   | 165
> +++
> >   2 files changed, 166 insertions(+)
> >
> > diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index
> > 7fba9aa..e9c5103 100644
> > --- a/fs/cifs/cifsfs.h
> > +++ b/fs/cifs/cifsfs.h
> > @@ -105,6 +105,7 @@ extern ssize_t cifs_user_readv(struct kiocb *iocb,
> struct iov_iter *to);
> >   extern ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to);
> >   extern ssize_t cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to);
> >   extern ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter
> > *from);
> > +extern ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter
> > +*from);
> >   extern ssize_t cifs_strict_writev(struct kiocb *iocb, struct iov_iter 
> > *from);
> >   extern int cifs_lock(struct file *, int, struct file_lock *);
> >   extern int cifs_fsync(struct file *, loff_t, loff_t, int); diff
> > --git a/fs/cifs/file.c b/fs/cifs/file.c index e6e6f24..8c385b1 100644
> > --- a/fs/cifs/file.c
> > +++ b/fs/cifs/file.c
> > @@ -2461,6 +2461,35 @@ cifs_uncached_writedata_release(struct kref
> > *refcount)
> >
> >   static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
> >
> > +static void cifs_direct_writedata_release(struct kref *refcount) {
> > +   int i;
> > +   struct cifs_writedata *wdata = container_of(refcount,
> > +   struct cifs_writedata, refcount);
> > +
> > +   for (i = 0; i < wdata->nr_pages; i++)
> > +   put_page(wdata->pages[i]);
> > +
> > +   cifs_writedata_release(refcount);
> > +}
> > +
> > +static void cifs_direct_writev_complete(struct work_struct *work) {
> > +   struct cifs_writedata *wdata = container_of(work,
> > +   struct cifs_writedata, work);
> > +   struct inode *inode = d_inode(wdata->cfile->dentry);
> > +   struct cifsInodeInfo *cifsi = CIFS_I(inode);
> > +
> > +   spin_lock(>i_lock);
> > +   cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
> > +   if (cifsi->server_eof > inode->i_size)
> > +   i_size_write(inode, cifsi->server_eof);
> > +   spin_unlock(>i_lock);
> > +
> > +   complete(>done);
> > +   kref_put(>refcount, cifs_direct_writedata_release); }
> > +
> >   static void
> >   cifs_uncached_writev_complete(struct work_struct *work)
> >   {
> > @@ -2703,6 +2732,142 @@ static void collect_uncached_write_data(struct
> cifs_aio_ctx *ctx)
> > complete(>done);
> >   }
> >
> > +ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
> > +{
> > +   struct file *file = iocb->ki_filp;
> > +   ssize_t total_written = 0;
> > +   struct cifsFileInfo *cfile;
> > +   struct cifs_tcon *tcon;
> > +   struct cifs_sb_info *cifs_sb;
> > +   struct TCP_Server_Info *server;
> > +   pid_t pid;
> > +   unsigned long nr_pages;
> > +   loff_t offset = iocb->ki_pos;
> > +   size_t len = iov_iter_count(from);
> > +   int rc;
> > +   struct cifs_writedata *wdata;
> > +
> > +   /*
> > +* iov_iter_get_pages_alloc doesn't work with ITER_KVEC.
> > +* In this case, fall back to non-direct write function.
> > +*/
> > +   if (from->type & ITER_KVEC) {
> > +   cifs_dbg(FYI, "use non-direct cifs_user_writev for kvec
> I/O\n");
> > +   return cifs_user_writev(iocb, from);
> > +   }
> > +
> > +   rc = generic_write_checks(iocb, from);
> > +   if (rc <= 0)
> > +   return rc;
> > +
> > +   cifs_sb = CIFS_FILE_SB(file);
> > +   cfile = file->private_data;
> > +   tcon = tlink_tcon(cfile->tlink);
> > +   server = tcon->ses->server;
> > +
> > +   if (!server->ops->async_writev)
> > +   return -ENOSYS;
> > +
> > +   if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
> > +   pid = cfile->pid;
> > +   else
> > +   pid = current->tgid;
> > +
> > +   do {
> > +   unsigned int wsize, credits;
> > +   struct page **pagevec;
> > +   size_t start;
> > +   ssize_t cur_len;
> > +
> > +   rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
> > +  , );
> > +   if (rc)
> > +   break;
> > +
> > +   cur_len = iov_iter_get_pages_alloc(
> > +   from, , wsize, );
> > +   if (cur_len < 0) {
> > +   cifs_dbg(VFS,
> > +   "direct_writev couldn't get user pages "
> > +   "(rc=%zd) iter type %d iov_offset %lu count"
> > +   " %lu\n",
> > +   cur_len, from->type,
> > +   from->iov_offset, from->count);
> > +

RE: [Patch v2 14/15] CIFS: Add support for direct I/O write

2018-06-25 Thread Long Li

> Subject: Re: [Patch v2 14/15] CIFS: Add support for direct I/O write
> 
> On 5/30/2018 3:48 PM, Long Li wrote:
> > From: Long Li 
> >
> > Implement the function for direct I/O write. It doesn't support AIO,
> > which will be implemented in a follow up patch.
> >
> > Signed-off-by: Long Li 
> > ---
> >   fs/cifs/cifsfs.h |   1 +
> >   fs/cifs/file.c   | 165
> +++
> >   2 files changed, 166 insertions(+)
> >
> > diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index
> > 7fba9aa..e9c5103 100644
> > --- a/fs/cifs/cifsfs.h
> > +++ b/fs/cifs/cifsfs.h
> > @@ -105,6 +105,7 @@ extern ssize_t cifs_user_readv(struct kiocb *iocb,
> struct iov_iter *to);
> >   extern ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to);
> >   extern ssize_t cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to);
> >   extern ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter
> > *from);
> > +extern ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter
> > +*from);
> >   extern ssize_t cifs_strict_writev(struct kiocb *iocb, struct iov_iter 
> > *from);
> >   extern int cifs_lock(struct file *, int, struct file_lock *);
> >   extern int cifs_fsync(struct file *, loff_t, loff_t, int); diff
> > --git a/fs/cifs/file.c b/fs/cifs/file.c index e6e6f24..8c385b1 100644
> > --- a/fs/cifs/file.c
> > +++ b/fs/cifs/file.c
> > @@ -2461,6 +2461,35 @@ cifs_uncached_writedata_release(struct kref
> > *refcount)
> >
> >   static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
> >
> > +static void cifs_direct_writedata_release(struct kref *refcount) {
> > +   int i;
> > +   struct cifs_writedata *wdata = container_of(refcount,
> > +   struct cifs_writedata, refcount);
> > +
> > +   for (i = 0; i < wdata->nr_pages; i++)
> > +   put_page(wdata->pages[i]);
> > +
> > +   cifs_writedata_release(refcount);
> > +}
> > +
> > +static void cifs_direct_writev_complete(struct work_struct *work) {
> > +   struct cifs_writedata *wdata = container_of(work,
> > +   struct cifs_writedata, work);
> > +   struct inode *inode = d_inode(wdata->cfile->dentry);
> > +   struct cifsInodeInfo *cifsi = CIFS_I(inode);
> > +
> > +   spin_lock(>i_lock);
> > +   cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
> > +   if (cifsi->server_eof > inode->i_size)
> > +   i_size_write(inode, cifsi->server_eof);
> > +   spin_unlock(>i_lock);
> > +
> > +   complete(>done);
> > +   kref_put(>refcount, cifs_direct_writedata_release); }
> > +
> >   static void
> >   cifs_uncached_writev_complete(struct work_struct *work)
> >   {
> > @@ -2703,6 +2732,142 @@ static void collect_uncached_write_data(struct
> cifs_aio_ctx *ctx)
> > complete(>done);
> >   }
> >
> > +ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
> > +{
> > +   struct file *file = iocb->ki_filp;
> > +   ssize_t total_written = 0;
> > +   struct cifsFileInfo *cfile;
> > +   struct cifs_tcon *tcon;
> > +   struct cifs_sb_info *cifs_sb;
> > +   struct TCP_Server_Info *server;
> > +   pid_t pid;
> > +   unsigned long nr_pages;
> > +   loff_t offset = iocb->ki_pos;
> > +   size_t len = iov_iter_count(from);
> > +   int rc;
> > +   struct cifs_writedata *wdata;
> > +
> > +   /*
> > +* iov_iter_get_pages_alloc doesn't work with ITER_KVEC.
> > +* In this case, fall back to non-direct write function.
> > +*/
> > +   if (from->type & ITER_KVEC) {
> > +   cifs_dbg(FYI, "use non-direct cifs_user_writev for kvec
> I/O\n");
> > +   return cifs_user_writev(iocb, from);
> > +   }
> > +
> > +   rc = generic_write_checks(iocb, from);
> > +   if (rc <= 0)
> > +   return rc;
> > +
> > +   cifs_sb = CIFS_FILE_SB(file);
> > +   cfile = file->private_data;
> > +   tcon = tlink_tcon(cfile->tlink);
> > +   server = tcon->ses->server;
> > +
> > +   if (!server->ops->async_writev)
> > +   return -ENOSYS;
> > +
> > +   if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
> > +   pid = cfile->pid;
> > +   else
> > +   pid = current->tgid;
> > +
> > +   do {
> > +   unsigned int wsize, credits;
> > +   struct page **pagevec;
> > +   size_t start;
> > +   ssize_t cur_len;
> > +
> > +   rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
> > +  , );
> > +   if (rc)
> > +   break;
> > +
> > +   cur_len = iov_iter_get_pages_alloc(
> > +   from, , wsize, );
> > +   if (cur_len < 0) {
> > +   cifs_dbg(VFS,
> > +   "direct_writev couldn't get user pages "
> > +   "(rc=%zd) iter type %d iov_offset %lu count"
> > +   " %lu\n",
> > +   cur_len, from->type,
> > +   from->iov_offset, from->count);
> > +

RE: [Patch v2 13/15] CIFS: Add support for direct I/O read

2018-06-25 Thread Long Li

> Subject: Re: [Patch v2 13/15] CIFS: Add support for direct I/O read
> 
> 
> 
> On 5/30/2018 3:48 PM, Long Li wrote:
> > From: Long Li 
> >
> > Implement the function for direct I/O read. It doesn't support AIO,
> > which will be implemented in a follow up patch.
> >
> > Signed-off-by: Long Li 
> > ---
> >   fs/cifs/cifsfs.h |   1 +
> >   fs/cifs/file.c   | 149
> +++
> >   2 files changed, 150 insertions(+)
> >
> > diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index
> > 5f02318..7fba9aa 100644
> > --- a/fs/cifs/cifsfs.h
> > +++ b/fs/cifs/cifsfs.h
> > @@ -102,6 +102,7 @@ extern int cifs_open(struct inode *inode, struct file
> *file);
> >   extern int cifs_close(struct inode *inode, struct file *file);
> >   extern int cifs_closedir(struct inode *inode, struct file *file);
> >   extern ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter
> > *to);
> > +extern ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter
> > +*to);
> >   extern ssize_t cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to);
> >   extern ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter 
> > *from);
> >   extern ssize_t cifs_strict_writev(struct kiocb *iocb, struct
> > iov_iter *from); diff --git a/fs/cifs/file.c b/fs/cifs/file.c index
> > 87eece6..e6e6f24 100644
> > --- a/fs/cifs/file.c
> > +++ b/fs/cifs/file.c
> > @@ -2955,6 +2955,18 @@ cifs_read_allocate_pages(struct cifs_readdata
> *rdata, unsigned int nr_pages)
> > return rc;
> >   }
> >
> > +static void cifs_direct_readdata_release(struct kref *refcount) {
> > +   struct cifs_readdata *rdata = container_of(refcount,
> > +   struct cifs_readdata, refcount);
> > +   unsigned int i;
> > +
> > +   for (i = 0; i < rdata->nr_pages; i++)
> > +   put_page(rdata->pages[i]);
> > +
> > +   cifs_readdata_release(refcount);
> > +}
> > +
> >   static void
> >   cifs_uncached_readdata_release(struct kref *refcount)
> >   {
> > @@ -3267,6 +3279,143 @@ collect_uncached_read_data(struct
> cifs_aio_ctx *ctx)
> > complete(>done);
> >   }
> >
> > +static void cifs_direct_readv_complete(struct work_struct *work) {
> > +   struct cifs_readdata *rdata =
> > +   container_of(work, struct cifs_readdata, work);
> > +
> > +   complete(>done);
> > +   kref_put(>refcount, cifs_direct_readdata_release); }
> > +
> > +ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to) {
> > +   size_t len, cur_len, start;
> > +   unsigned int npages, rsize, credits;
> > +   struct file *file;
> > +   struct cifs_sb_info *cifs_sb;
> > +   struct cifsFileInfo *cfile;
> > +   struct cifs_tcon *tcon;
> > +   struct page **pagevec;
> > +   ssize_t rc, total_read = 0;
> > +   struct TCP_Server_Info *server;
> > +   loff_t offset = iocb->ki_pos;
> > +   pid_t pid;
> > +   struct cifs_readdata *rdata;
> > +
> > +   /*
> > +* iov_iter_get_pages_alloc() doesn't work with ITER_KVEC,
> > +* fall back to data copy read path
> > +*/
> > +   if (to->type & ITER_KVEC) {
> > +   cifs_dbg(FYI, "use non-direct cifs_user_readv for kvec
> I/O\n");
> > +   return cifs_user_readv(iocb, to);
> > +   }
> > +
> > +   len = iov_iter_count(to);
> > +   if (!len)
> > +   return 0;
> > +
> > +   file = iocb->ki_filp;
> > +   cifs_sb = CIFS_FILE_SB(file);
> > +   cfile = file->private_data;
> > +   tcon = tlink_tcon(cfile->tlink);
> > +   server = tcon->ses->server;
> > +
> > +   if (!server->ops->async_readv)
> > +   return -ENOSYS;
> > +
> > +   if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
> > +   pid = cfile->pid;
> > +   else
> > +   pid = current->tgid;
> > +
> > +   if ((file->f_flags & O_ACCMODE) == O_WRONLY)
> > +   cifs_dbg(FYI, "attempting read on write only file instance\n");
> 
> Confusing. Maybe "attempting read on write-only filehandle"?
> 
> > +
> > +   do {
> > +   rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
> > +   , );
> > +   if (rc)
> > +   break;
> > +
> > +   cur_len = min_t(const size_t, len, rsize);
> > +
> > +   rc = iov_iter_get_pages_alloc(to, , cur_len, );
> > +   if (rc < 0) {
> > +   cifs_dbg(VFS,
> > +   "couldn't get user pages (rc=%zd) iter
> type %d"
> > +   " iov_offset %lu count %lu\n",
> > +   rc, to->type, to->iov_offset, to->count);
> > +   dump_stack();
> > +   break;
> > +   }
> > +
> > +   rdata = cifs_readdata_direct_alloc(
> > +   pagevec, cifs_direct_readv_complete);
> > +   if (!rdata) {
> > +   add_credits_and_wake_if(server, credits, 0);
> > +   rc = -ENOMEM;
> > +   break;
> > +   }
> > +
> > +   npages = (rc + start + PAGE_SIZE-1) /

RE: [Patch v2 13/15] CIFS: Add support for direct I/O read

2018-06-25 Thread Long Li

> Subject: Re: [Patch v2 13/15] CIFS: Add support for direct I/O read
> 
> 
> 
> On 5/30/2018 3:48 PM, Long Li wrote:
> > From: Long Li 
> >
> > Implement the function for direct I/O read. It doesn't support AIO,
> > which will be implemented in a follow up patch.
> >
> > Signed-off-by: Long Li 
> > ---
> >   fs/cifs/cifsfs.h |   1 +
> >   fs/cifs/file.c   | 149
> +++
> >   2 files changed, 150 insertions(+)
> >
> > diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index
> > 5f02318..7fba9aa 100644
> > --- a/fs/cifs/cifsfs.h
> > +++ b/fs/cifs/cifsfs.h
> > @@ -102,6 +102,7 @@ extern int cifs_open(struct inode *inode, struct file
> *file);
> >   extern int cifs_close(struct inode *inode, struct file *file);
> >   extern int cifs_closedir(struct inode *inode, struct file *file);
> >   extern ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter
> > *to);
> > +extern ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter
> > +*to);
> >   extern ssize_t cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to);
> >   extern ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter 
> > *from);
> >   extern ssize_t cifs_strict_writev(struct kiocb *iocb, struct
> > iov_iter *from); diff --git a/fs/cifs/file.c b/fs/cifs/file.c index
> > 87eece6..e6e6f24 100644
> > --- a/fs/cifs/file.c
> > +++ b/fs/cifs/file.c
> > @@ -2955,6 +2955,18 @@ cifs_read_allocate_pages(struct cifs_readdata
> *rdata, unsigned int nr_pages)
> > return rc;
> >   }
> >
> > +static void cifs_direct_readdata_release(struct kref *refcount) {
> > +   struct cifs_readdata *rdata = container_of(refcount,
> > +   struct cifs_readdata, refcount);
> > +   unsigned int i;
> > +
> > +   for (i = 0; i < rdata->nr_pages; i++)
> > +   put_page(rdata->pages[i]);
> > +
> > +   cifs_readdata_release(refcount);
> > +}
> > +
> >   static void
> >   cifs_uncached_readdata_release(struct kref *refcount)
> >   {
> > @@ -3267,6 +3279,143 @@ collect_uncached_read_data(struct
> cifs_aio_ctx *ctx)
> > complete(>done);
> >   }
> >
> > +static void cifs_direct_readv_complete(struct work_struct *work) {
> > +   struct cifs_readdata *rdata =
> > +   container_of(work, struct cifs_readdata, work);
> > +
> > +   complete(>done);
> > +   kref_put(>refcount, cifs_direct_readdata_release); }
> > +
> > +ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to) {
> > +   size_t len, cur_len, start;
> > +   unsigned int npages, rsize, credits;
> > +   struct file *file;
> > +   struct cifs_sb_info *cifs_sb;
> > +   struct cifsFileInfo *cfile;
> > +   struct cifs_tcon *tcon;
> > +   struct page **pagevec;
> > +   ssize_t rc, total_read = 0;
> > +   struct TCP_Server_Info *server;
> > +   loff_t offset = iocb->ki_pos;
> > +   pid_t pid;
> > +   struct cifs_readdata *rdata;
> > +
> > +   /*
> > +* iov_iter_get_pages_alloc() doesn't work with ITER_KVEC,
> > +* fall back to data copy read path
> > +*/
> > +   if (to->type & ITER_KVEC) {
> > +   cifs_dbg(FYI, "use non-direct cifs_user_readv for kvec
> I/O\n");
> > +   return cifs_user_readv(iocb, to);
> > +   }
> > +
> > +   len = iov_iter_count(to);
> > +   if (!len)
> > +   return 0;
> > +
> > +   file = iocb->ki_filp;
> > +   cifs_sb = CIFS_FILE_SB(file);
> > +   cfile = file->private_data;
> > +   tcon = tlink_tcon(cfile->tlink);
> > +   server = tcon->ses->server;
> > +
> > +   if (!server->ops->async_readv)
> > +   return -ENOSYS;
> > +
> > +   if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
> > +   pid = cfile->pid;
> > +   else
> > +   pid = current->tgid;
> > +
> > +   if ((file->f_flags & O_ACCMODE) == O_WRONLY)
> > +   cifs_dbg(FYI, "attempting read on write only file instance\n");
> 
> Confusing. Maybe "attempting read on write-only filehandle"?
> 
> > +
> > +   do {
> > +   rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
> > +   , );
> > +   if (rc)
> > +   break;
> > +
> > +   cur_len = min_t(const size_t, len, rsize);
> > +
> > +   rc = iov_iter_get_pages_alloc(to, , cur_len, );
> > +   if (rc < 0) {
> > +   cifs_dbg(VFS,
> > +   "couldn't get user pages (rc=%zd) iter
> type %d"
> > +   " iov_offset %lu count %lu\n",
> > +   rc, to->type, to->iov_offset, to->count);
> > +   dump_stack();
> > +   break;
> > +   }
> > +
> > +   rdata = cifs_readdata_direct_alloc(
> > +   pagevec, cifs_direct_readv_complete);
> > +   if (!rdata) {
> > +   add_credits_and_wake_if(server, credits, 0);
> > +   rc = -ENOMEM;
> > +   break;
> > +   }
> > +
> > +   npages = (rc + start + PAGE_SIZE-1) /

linux-next: Tree for Jun 26

2018-06-25 Thread Stephen Rothwell

Hi all,

Changes since 20180625:

The cifs tree lost its build failure.

The drm tree still had its build failure for which I disabled some
sample code.

The nvdimm tree gained a conflict against the tip tree.

Non-merge commits (relative to Linus' tree): 2240
 2288 files changed, 72718 insertions(+), 39312 deletions(-)



I have created today's linux-next tree at
git://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
(patches at http://www.kernel.org/pub/linux/kernel/next/ ).  If you
are tracking the linux-next tree using git, you should not use "git pull"
to do so as that will try to merge the new linux-next release with the
old one.  You should use "git fetch" and checkout or reset to the new
master.

You can see which trees have been included by looking in the Next/Trees
file in the source.  There are also quilt-import.log and merge.log
files in the Next directory.  Between each merge, the tree was built
with a ppc64_defconfig for powerpc, an allmodconfig for x86_64, a
multi_v7_defconfig for arm and a native build of tools/perf. After
the final fixups (if any), I do an x86_64 modules_install followed by
builds for x86_64 allnoconfig, powerpc allnoconfig (32 and 64 bit),
ppc44x_defconfig, allyesconfig and pseries_le_defconfig and i386, sparc
and sparc64 defconfig. And finally, a simple boot test of the powerpc
pseries_le_defconfig kernel in qemu (with and without kvm enabled).

Below is a summary of the state of the merge.

I am currently merging 281 trees (counting Linus' and 65 trees of bug
fix patches pending for the current merge release).

Stats about the size of the tree over time can be seen at
http://neuling.org/linux-next-size.html .

Status of my local build tests will be at
http://kisskb.ellerman.id.au/linux-next .  If maintainers want to give
advice about cross compilers/configs that work, we are always open to add
more builds.

Thanks to Randy Dunlap for doing many randconfig builds.  And to Paul
Gortmaker for triage and bug fixes.

-- 
Cheers,
Stephen Rothwell

$ git checkout master
$ git reset --hard stable
Merging origin/master (6f0d349d922b Merge 
git://git.kernel.org/pub/scm/linux/kernel/git/davem/net)
Merging fixes/master (147a89bc71e7 Merge tag 'kconfig-v4.17' of 
git://git.kernel.org/pub/scm/linux/kernel/git/masahiroy/linux-kbuild)
Merging kbuild-current/fixes (4c1b0de3c413 kconfig: loop boundary condition fix)
Merging arc-current/for-curr (6ddb19a32e79 ARC: Improve cmpxchg syscall 
implementation)
Merging arm-current/fixes (92d44a42af81 ARM: fix kill( ,SIGFPE) breakage)
Merging arm64-fixes/for-next/fixes (71c8fc0c96ab arm64: mm: Ensure writes to 
swapper are ordered wrt subsequent cache maintenance)
Merging m68k-current/for-linus (b12c8a70643f m68k: Set default dma mask for 
platform devices)
Merging powerpc-fixes/fixes (fadd03c61592 powerpc/mm/hash/4k: Free hugetlb page 
table caches correctly.)
Merging sparc/master (1aaccb5fa0ea Merge tag 'rtc-4.18' of 
git://git.kernel.org/pub/scm/linux/kernel/git/abelloni/linux)
Merging fscrypt-current/for-stable (ae64f9bd1d36 Linux 4.15-rc2)
Merging net/master (6f0d349d922b Merge 
git://git.kernel.org/pub/scm/linux/kernel/git/davem/net)
Merging bpf/master (68d676a08962 nfp: bpf: don't stop offload if replace failed)
Merging ipsec/master (86126b77dcd5 xfrm: free skb if nlsk pointer is NULL)
Merging netfilter/master (ad9852af9758 netfilter: nf_ct_helper: Fix possible 
panic after nf_conntrack_helper_unregister)
Merging ipvs/master (312564269535 net: netsec: reduce DMA mask to 40 bits)
Merging wireless-drivers/master (92963318a255 mt7601u: remove warning when 
avg_rssi is zero)
Merging mac80211/master (bf2b61a6838f cfg80211: fix rcu in 
cfg80211_unregister_wdev)
Merging rdma-fixes/for-rc (375dc53d032f IB/rxe: Fix missing completion for 
mem_reg work requests)
Merging sound-current/for-linus (c9a4c63888db ALSA: seq: Fix UBSAN warning at 
SNDRV_SEQ_IOCTL_QUERY_NEXT_CLIENT ioctl)
Merging sound-asoc-fixes/for-linus (edb45c1a4a15 Merge branch 'asoc-4.18' into 
asoc-linus)
Merging regmap-fixes/for-linus (ce397d215ccd Linux 4.18-rc1)
Merging regulator-fixes/for-linus (50635ef0b99b Merge branch 'regulator-4.18' 
into regulator-linus)
Merging spi-fixes/for-linus (dfa94eebba38 Merge branch 'spi-4.18' into 
spi-linus)
Merging pci-current/for-linus (c4ba05f2d89f PCI: shpchp: Manage SHPC 
unconditionally on non-ACPI systems)
Merging driver-core.current/driver-core-linus (7daf201d7fe8 Linux 4.18-rc2)
Merging tty.current/tty-linus (ce397d215ccd Linux 4.18-rc1)
Merging usb.current/usb-linus (d5a4f93511b7 usb: typec: tcpm: fix logbuffer 
index is wrong if _tcpm_log is re-entered)
Merging usb-gadget-fixes/fixes (1d8e5c002758 dwc2: gadget: Fix ISOC IN DDMA PID 
bitfield value calculation)
Merging usb-serial-fixes/usb-linus (24160628a34a USB: serial: cp210x: add 
CESINEL device ids)
Merging usb-chipidea-fixes/ci-for-usb-stable (964728f9f407 USB: chipidea:

linux-next: Tree for Jun 26

2018-06-25 Thread Stephen Rothwell

Hi all,

Changes since 20180625:

The cifs tree lost its build failure.

The drm tree still had its build failure for which I disabled some
sample code.

The nvdimm tree gained a conflict against the tip tree.

Non-merge commits (relative to Linus' tree): 2240
 2288 files changed, 72718 insertions(+), 39312 deletions(-)



I have created today's linux-next tree at
git://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
(patches at http://www.kernel.org/pub/linux/kernel/next/ ).  If you
are tracking the linux-next tree using git, you should not use "git pull"
to do so as that will try to merge the new linux-next release with the
old one.  You should use "git fetch" and checkout or reset to the new
master.

You can see which trees have been included by looking in the Next/Trees
file in the source.  There are also quilt-import.log and merge.log
files in the Next directory.  Between each merge, the tree was built
with a ppc64_defconfig for powerpc, an allmodconfig for x86_64, a
multi_v7_defconfig for arm and a native build of tools/perf. After
the final fixups (if any), I do an x86_64 modules_install followed by
builds for x86_64 allnoconfig, powerpc allnoconfig (32 and 64 bit),
ppc44x_defconfig, allyesconfig and pseries_le_defconfig and i386, sparc
and sparc64 defconfig. And finally, a simple boot test of the powerpc
pseries_le_defconfig kernel in qemu (with and without kvm enabled).

Below is a summary of the state of the merge.

I am currently merging 281 trees (counting Linus' and 65 trees of bug
fix patches pending for the current merge release).

Stats about the size of the tree over time can be seen at
http://neuling.org/linux-next-size.html .

Status of my local build tests will be at
http://kisskb.ellerman.id.au/linux-next .  If maintainers want to give
advice about cross compilers/configs that work, we are always open to add
more builds.

Thanks to Randy Dunlap for doing many randconfig builds.  And to Paul
Gortmaker for triage and bug fixes.

-- 
Cheers,
Stephen Rothwell

$ git checkout master
$ git reset --hard stable
Merging origin/master (6f0d349d922b Merge 
git://git.kernel.org/pub/scm/linux/kernel/git/davem/net)
Merging fixes/master (147a89bc71e7 Merge tag 'kconfig-v4.17' of 
git://git.kernel.org/pub/scm/linux/kernel/git/masahiroy/linux-kbuild)
Merging kbuild-current/fixes (4c1b0de3c413 kconfig: loop boundary condition fix)
Merging arc-current/for-curr (6ddb19a32e79 ARC: Improve cmpxchg syscall 
implementation)
Merging arm-current/fixes (92d44a42af81 ARM: fix kill( ,SIGFPE) breakage)
Merging arm64-fixes/for-next/fixes (71c8fc0c96ab arm64: mm: Ensure writes to 
swapper are ordered wrt subsequent cache maintenance)
Merging m68k-current/for-linus (b12c8a70643f m68k: Set default dma mask for 
platform devices)
Merging powerpc-fixes/fixes (fadd03c61592 powerpc/mm/hash/4k: Free hugetlb page 
table caches correctly.)
Merging sparc/master (1aaccb5fa0ea Merge tag 'rtc-4.18' of 
git://git.kernel.org/pub/scm/linux/kernel/git/abelloni/linux)
Merging fscrypt-current/for-stable (ae64f9bd1d36 Linux 4.15-rc2)
Merging net/master (6f0d349d922b Merge 
git://git.kernel.org/pub/scm/linux/kernel/git/davem/net)
Merging bpf/master (68d676a08962 nfp: bpf: don't stop offload if replace failed)
Merging ipsec/master (86126b77dcd5 xfrm: free skb if nlsk pointer is NULL)
Merging netfilter/master (ad9852af9758 netfilter: nf_ct_helper: Fix possible 
panic after nf_conntrack_helper_unregister)
Merging ipvs/master (312564269535 net: netsec: reduce DMA mask to 40 bits)
Merging wireless-drivers/master (92963318a255 mt7601u: remove warning when 
avg_rssi is zero)
Merging mac80211/master (bf2b61a6838f cfg80211: fix rcu in 
cfg80211_unregister_wdev)
Merging rdma-fixes/for-rc (375dc53d032f IB/rxe: Fix missing completion for 
mem_reg work requests)
Merging sound-current/for-linus (c9a4c63888db ALSA: seq: Fix UBSAN warning at 
SNDRV_SEQ_IOCTL_QUERY_NEXT_CLIENT ioctl)
Merging sound-asoc-fixes/for-linus (edb45c1a4a15 Merge branch 'asoc-4.18' into 
asoc-linus)
Merging regmap-fixes/for-linus (ce397d215ccd Linux 4.18-rc1)
Merging regulator-fixes/for-linus (50635ef0b99b Merge branch 'regulator-4.18' 
into regulator-linus)
Merging spi-fixes/for-linus (dfa94eebba38 Merge branch 'spi-4.18' into 
spi-linus)
Merging pci-current/for-linus (c4ba05f2d89f PCI: shpchp: Manage SHPC 
unconditionally on non-ACPI systems)
Merging driver-core.current/driver-core-linus (7daf201d7fe8 Linux 4.18-rc2)
Merging tty.current/tty-linus (ce397d215ccd Linux 4.18-rc1)
Merging usb.current/usb-linus (d5a4f93511b7 usb: typec: tcpm: fix logbuffer 
index is wrong if _tcpm_log is re-entered)
Merging usb-gadget-fixes/fixes (1d8e5c002758 dwc2: gadget: Fix ISOC IN DDMA PID 
bitfield value calculation)
Merging usb-serial-fixes/usb-linus (24160628a34a USB: serial: cp210x: add 
CESINEL device ids)
Merging usb-chipidea-fixes/ci-for-usb-stable (964728f9f407 USB: chipidea:

RE: [Patch v2 11/15] CIFS: Pass page offset for calculating signature

2018-06-25 Thread Long Li

> Subject: Re: [Patch v2 11/15] CIFS: Pass page offset for calculating signature
> 
> On 5/30/2018 3:48 PM, Long Li wrote:
> > From: Long Li 
> >
> > When calculating signature for the packet, it needs to read into the
> > correct page offset for the data.
> >
> > Signed-off-by: Long Li 
> > ---
> >   fs/cifs/cifsencrypt.c | 9 +
> >   1 file changed, 5 insertions(+), 4 deletions(-)
> >
> > diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index
> > a6ef088..e88303c 100644
> > --- a/fs/cifs/cifsencrypt.c
> > +++ b/fs/cifs/cifsencrypt.c
> > @@ -68,11 +68,12 @@ int __cifs_calc_signature(struct smb_rqst *rqst,
> >
> > /* now hash over the rq_pages array */
> > for (i = 0; i < rqst->rq_npages; i++) {
> > -   void *kaddr = kmap(rqst->rq_pages[i]);
> > -   size_t len = rqst->rq_pagesz;
> > +   void *kaddr;
> > +   unsigned int len, offset;
> >
> > -   if (i == rqst->rq_npages - 1)
> > -   len = rqst->rq_tailsz;
> > +   rqst_page_get_length(rqst, i, , );
> > +
> > +   kaddr = (char *) kmap(rqst->rq_pages[i]) + offset;
> 
> I suppose it's more robust to map a page at a time, but it's pretty expensive.
> Is this the only way to iterate over a potentially very large block of data? 
> For
> example, a 1MB segment means 256 kmap/kunmaps.

I will look into not mapping those pages while doing I/O.

This code path is for RDMA send/receive, and it's rarely used for transferring 
large amount of data.

> 
> Tom.
> 
> >
> > crypto_shash_update(shash, kaddr, len);
> >
> >

RE: [Patch v2 11/15] CIFS: Pass page offset for calculating signature

2018-06-25 Thread Long Li

> Subject: Re: [Patch v2 11/15] CIFS: Pass page offset for calculating signature
> 
> On 5/30/2018 3:48 PM, Long Li wrote:
> > From: Long Li 
> >
> > When calculating signature for the packet, it needs to read into the
> > correct page offset for the data.
> >
> > Signed-off-by: Long Li 
> > ---
> >   fs/cifs/cifsencrypt.c | 9 +
> >   1 file changed, 5 insertions(+), 4 deletions(-)
> >
> > diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index
> > a6ef088..e88303c 100644
> > --- a/fs/cifs/cifsencrypt.c
> > +++ b/fs/cifs/cifsencrypt.c
> > @@ -68,11 +68,12 @@ int __cifs_calc_signature(struct smb_rqst *rqst,
> >
> > /* now hash over the rq_pages array */
> > for (i = 0; i < rqst->rq_npages; i++) {
> > -   void *kaddr = kmap(rqst->rq_pages[i]);
> > -   size_t len = rqst->rq_pagesz;
> > +   void *kaddr;
> > +   unsigned int len, offset;
> >
> > -   if (i == rqst->rq_npages - 1)
> > -   len = rqst->rq_tailsz;
> > +   rqst_page_get_length(rqst, i, , );
> > +
> > +   kaddr = (char *) kmap(rqst->rq_pages[i]) + offset;
> 
> I suppose it's more robust to map a page at a time, but it's pretty expensive.
> Is this the only way to iterate over a potentially very large block of data? 
> For
> example, a 1MB segment means 256 kmap/kunmaps.

I will look into not mapping those pages while doing I/O.

This code path is for RDMA send/receive, and it's rarely used for transferring 
large amount of data.

> 
> Tom.
> 
> >
> > crypto_shash_update(shash, kaddr, len);
> >
> >

Re: [PATCH v34 2/4] virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_HINT

2018-06-25 Thread Michael S. Tsirkin

On Tue, Jun 26, 2018 at 11:46:35AM +0800, Wei Wang wrote:
> On 06/26/2018 09:37 AM, Michael S. Tsirkin wrote:
> > On Mon, Jun 25, 2018 at 08:05:10PM +0800, Wei Wang wrote:
> > 
> > > @@ -326,17 +353,6 @@ static void stats_handle_request(struct 
> > > virtio_balloon *vb)
> > >   virtqueue_kick(vq);
> > >   }
> > > -static void virtballoon_changed(struct virtio_device *vdev)
> > > -{
> > > - struct virtio_balloon *vb = vdev->priv;
> > > - unsigned long flags;
> > > -
> > > - spin_lock_irqsave(>stop_update_lock, flags);
> > > - if (!vb->stop_update)
> > > - queue_work(system_freezable_wq, >update_balloon_size_work);
> > > - spin_unlock_irqrestore(>stop_update_lock, flags);
> > > -}
> > > -
> > >   static inline s64 towards_target(struct virtio_balloon *vb)
> > >   {
> > >   s64 target;
> > > @@ -353,6 +369,35 @@ static inline s64 towards_target(struct 
> > > virtio_balloon *vb)
> > >   return target - vb->num_pages;
> > >   }
> > > +static void virtballoon_changed(struct virtio_device *vdev)
> > > +{
> > > + struct virtio_balloon *vb = vdev->priv;
> > > + unsigned long flags;
> > > + s64 diff = towards_target(vb);
> > > +
> > > + if (diff) {
> > > + spin_lock_irqsave(>stop_update_lock, flags);
> > > + if (!vb->stop_update)
> > > + queue_work(system_freezable_wq,
> > > +>update_balloon_size_work);
> > > + spin_unlock_irqrestore(>stop_update_lock, flags);
> > > + }
> > > +
> > > + if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
> > > + virtio_cread(vdev, struct virtio_balloon_config,
> > > +  free_page_report_cmd_id, >cmd_id_received);
> > > + if (vb->cmd_id_received !=
> > > + VIRTIO_BALLOON_FREE_PAGE_REPORT_STOP_ID &&
> > > + vb->cmd_id_received != vb->cmd_id_active) {
> > > + spin_lock_irqsave(>stop_update_lock, flags);
> > > + if (!vb->stop_update)
> > > + queue_work(vb->balloon_wq,
> > > +>report_free_page_work);
> > > + spin_unlock_irqrestore(>stop_update_lock, flags);
> > > + }
> > > + }
> > > +}
> > > +
> > >   static void update_balloon_size(struct virtio_balloon *vb)
> > >   {
> > >   u32 actual = vb->num_pages;
> > > @@ -425,44 +470,253 @@ static void update_balloon_size_func(struct 
> > > work_struct *work)
> > >   queue_work(system_freezable_wq, work);
> > >   }
> > > +static void free_page_vq_cb(struct virtqueue *vq)
> > > +{
> > > + unsigned int len;
> > > + void *buf;
> > > + struct virtio_balloon *vb = vq->vdev->priv;
> > > +
> > > + while (1) {
> > > + buf = virtqueue_get_buf(vq, );
> > > +
> > > + if (!buf || buf == >cmd_start || buf == >cmd_stop)
> > > + break;
> > If there's any buffer after this one we might never get another
> > callback.
> 
> I think every used buffer can get the callback, because host takes from the
> arrays one by one, and puts back each with a vq notify.

It's probabky racy even in this case. Besides, host is free to do it in
any way that's legal in spec.

> 
> 
> > > + free_pages((unsigned long)buf, ARRAY_ALLOC_ORDER);
> > > + }
> > > +}
> > > +
> > >   static int init_vqs(struct virtio_balloon *vb)
> > >   {
> > > - struct virtqueue *vqs[3];
> > > - vq_callback_t *callbacks[] = { balloon_ack, balloon_ack, stats_request 
> > > };
> > > - static const char * const names[] = { "inflate", "deflate", "stats" };
> > > - int err, nvqs;
> > > + struct virtqueue *vqs[VIRTIO_BALLOON_VQ_MAX];
> > > + vq_callback_t *callbacks[VIRTIO_BALLOON_VQ_MAX];
> > > + const char *names[VIRTIO_BALLOON_VQ_MAX];
> > > + struct scatterlist sg;
> > > + int ret;
> > >   /*
> > > -  * We expect two virtqueues: inflate and deflate, and
> > > -  * optionally stat.
> > > +  * Inflateq and deflateq are used unconditionally. The names[]
> > > +  * will be NULL if the related feature is not enabled, which will
> > > +  * cause no allocation for the corresponding virtqueue in find_vqs.
> > >*/
> > > - nvqs = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ) ? 3 : 2;
> > > - err = virtio_find_vqs(vb->vdev, nvqs, vqs, callbacks, names, NULL);
> > > - if (err)
> > > - return err;
> > > + callbacks[VIRTIO_BALLOON_VQ_INFLATE] = balloon_ack;
> > > + names[VIRTIO_BALLOON_VQ_INFLATE] = "inflate";
> > > + callbacks[VIRTIO_BALLOON_VQ_DEFLATE] = balloon_ack;
> > > + names[VIRTIO_BALLOON_VQ_DEFLATE] = "deflate";
> > > + names[VIRTIO_BALLOON_VQ_STATS] = NULL;
> > > + names[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL;
> > > - vb->inflate_vq = vqs[0];
> > > - vb->deflate_vq = vqs[1];
> > >   if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
> > > - struct scatterlist sg;
> > > - unsigned int num_stats;
> > > - vb->stats_vq = vqs[2];
> > > + names[VIRTIO_BALLOON_VQ_STATS] = "stats";
> > >

Re: [PATCH v34 2/4] virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_HINT

2018-06-25 Thread Michael S. Tsirkin

On Tue, Jun 26, 2018 at 11:46:35AM +0800, Wei Wang wrote:
> On 06/26/2018 09:37 AM, Michael S. Tsirkin wrote:
> > On Mon, Jun 25, 2018 at 08:05:10PM +0800, Wei Wang wrote:
> > 
> > > @@ -326,17 +353,6 @@ static void stats_handle_request(struct 
> > > virtio_balloon *vb)
> > >   virtqueue_kick(vq);
> > >   }
> > > -static void virtballoon_changed(struct virtio_device *vdev)
> > > -{
> > > - struct virtio_balloon *vb = vdev->priv;
> > > - unsigned long flags;
> > > -
> > > - spin_lock_irqsave(>stop_update_lock, flags);
> > > - if (!vb->stop_update)
> > > - queue_work(system_freezable_wq, >update_balloon_size_work);
> > > - spin_unlock_irqrestore(>stop_update_lock, flags);
> > > -}
> > > -
> > >   static inline s64 towards_target(struct virtio_balloon *vb)
> > >   {
> > >   s64 target;
> > > @@ -353,6 +369,35 @@ static inline s64 towards_target(struct 
> > > virtio_balloon *vb)
> > >   return target - vb->num_pages;
> > >   }
> > > +static void virtballoon_changed(struct virtio_device *vdev)
> > > +{
> > > + struct virtio_balloon *vb = vdev->priv;
> > > + unsigned long flags;
> > > + s64 diff = towards_target(vb);
> > > +
> > > + if (diff) {
> > > + spin_lock_irqsave(>stop_update_lock, flags);
> > > + if (!vb->stop_update)
> > > + queue_work(system_freezable_wq,
> > > +>update_balloon_size_work);
> > > + spin_unlock_irqrestore(>stop_update_lock, flags);
> > > + }
> > > +
> > > + if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
> > > + virtio_cread(vdev, struct virtio_balloon_config,
> > > +  free_page_report_cmd_id, >cmd_id_received);
> > > + if (vb->cmd_id_received !=
> > > + VIRTIO_BALLOON_FREE_PAGE_REPORT_STOP_ID &&
> > > + vb->cmd_id_received != vb->cmd_id_active) {
> > > + spin_lock_irqsave(>stop_update_lock, flags);
> > > + if (!vb->stop_update)
> > > + queue_work(vb->balloon_wq,
> > > +>report_free_page_work);
> > > + spin_unlock_irqrestore(>stop_update_lock, flags);
> > > + }
> > > + }
> > > +}
> > > +
> > >   static void update_balloon_size(struct virtio_balloon *vb)
> > >   {
> > >   u32 actual = vb->num_pages;
> > > @@ -425,44 +470,253 @@ static void update_balloon_size_func(struct 
> > > work_struct *work)
> > >   queue_work(system_freezable_wq, work);
> > >   }
> > > +static void free_page_vq_cb(struct virtqueue *vq)
> > > +{
> > > + unsigned int len;
> > > + void *buf;
> > > + struct virtio_balloon *vb = vq->vdev->priv;
> > > +
> > > + while (1) {
> > > + buf = virtqueue_get_buf(vq, );
> > > +
> > > + if (!buf || buf == >cmd_start || buf == >cmd_stop)
> > > + break;
> > If there's any buffer after this one we might never get another
> > callback.
> 
> I think every used buffer can get the callback, because host takes from the
> arrays one by one, and puts back each with a vq notify.

It's probabky racy even in this case. Besides, host is free to do it in
any way that's legal in spec.

> 
> 
> > > + free_pages((unsigned long)buf, ARRAY_ALLOC_ORDER);
> > > + }
> > > +}
> > > +
> > >   static int init_vqs(struct virtio_balloon *vb)
> > >   {
> > > - struct virtqueue *vqs[3];
> > > - vq_callback_t *callbacks[] = { balloon_ack, balloon_ack, stats_request 
> > > };
> > > - static const char * const names[] = { "inflate", "deflate", "stats" };
> > > - int err, nvqs;
> > > + struct virtqueue *vqs[VIRTIO_BALLOON_VQ_MAX];
> > > + vq_callback_t *callbacks[VIRTIO_BALLOON_VQ_MAX];
> > > + const char *names[VIRTIO_BALLOON_VQ_MAX];
> > > + struct scatterlist sg;
> > > + int ret;
> > >   /*
> > > -  * We expect two virtqueues: inflate and deflate, and
> > > -  * optionally stat.
> > > +  * Inflateq and deflateq are used unconditionally. The names[]
> > > +  * will be NULL if the related feature is not enabled, which will
> > > +  * cause no allocation for the corresponding virtqueue in find_vqs.
> > >*/
> > > - nvqs = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ) ? 3 : 2;
> > > - err = virtio_find_vqs(vb->vdev, nvqs, vqs, callbacks, names, NULL);
> > > - if (err)
> > > - return err;
> > > + callbacks[VIRTIO_BALLOON_VQ_INFLATE] = balloon_ack;
> > > + names[VIRTIO_BALLOON_VQ_INFLATE] = "inflate";
> > > + callbacks[VIRTIO_BALLOON_VQ_DEFLATE] = balloon_ack;
> > > + names[VIRTIO_BALLOON_VQ_DEFLATE] = "deflate";
> > > + names[VIRTIO_BALLOON_VQ_STATS] = NULL;
> > > + names[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL;
> > > - vb->inflate_vq = vqs[0];
> > > - vb->deflate_vq = vqs[1];
> > >   if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
> > > - struct scatterlist sg;
> > > - unsigned int num_stats;
> > > - vb->stats_vq = vqs[2];
> > > + names[VIRTIO_BALLOON_VQ_STATS] = "stats";
> > >

Re: [PATCH v3 3/8] staging: rtl8192u: User memset to initialize memory, instead of loop.

2018-06-25 Thread Greg Kroah-Hartman

On Mon, Jun 25, 2018 at 08:50:26PM +0100, John Whitmore wrote:
> On Mon, Jun 25, 2018 at 02:05:04PM +0100, Justin Skists wrote:
> > 
> > > On 25 June 2018 at 13:36 John Whitmore  wrote:
> > > 
> > > 
> > > On Mon, Jun 25, 2018 at 12:06:30PM +0300, Andy Shevchenko wrote:
> > > > On Sun, Jun 24, 2018 at 6:34 PM, John Whitmore 
> > > >  wrote:
> > > > > Replaced memory initialising loop with memset, as suggested by Andy 
> > > > > Shevchenko
> > > > >
> > > > 
> > > > Suggested-by ?
> > > >
> > > 
> > > Em, not sure how to respond, it certainly wasn't my idea. I was just 
> > > making
> > > coding style changes, badly. ;)
> > 
> > Suggested-by is a tag for patches, to give credit. For example:
> > 
> > https://elixir.bootlin.com/linux/v4.18-rc1/source/Documentation/process/submitting-patches.rst
> > 
> > See section "13) Using Reported-by:, Tested-by:, Reviewed-by:, 
> > Suggested-by: and Fixes:"
> > 
> > Hope that helps,
> > Justin.
> 
> Oops... that helps thank you. I have to re-read that document, it obvioiusly
> didn't all sink in :(

I've dropped this patch, can you fix it up and resend?

thanks,

greg k-h

Re: [PATCH v3 3/8] staging: rtl8192u: User memset to initialize memory, instead of loop.

2018-06-25 Thread Greg Kroah-Hartman

On Mon, Jun 25, 2018 at 08:50:26PM +0100, John Whitmore wrote:
> On Mon, Jun 25, 2018 at 02:05:04PM +0100, Justin Skists wrote:
> > 
> > > On 25 June 2018 at 13:36 John Whitmore  wrote:
> > > 
> > > 
> > > On Mon, Jun 25, 2018 at 12:06:30PM +0300, Andy Shevchenko wrote:
> > > > On Sun, Jun 24, 2018 at 6:34 PM, John Whitmore 
> > > >  wrote:
> > > > > Replaced memory initialising loop with memset, as suggested by Andy 
> > > > > Shevchenko
> > > > >
> > > > 
> > > > Suggested-by ?
> > > >
> > > 
> > > Em, not sure how to respond, it certainly wasn't my idea. I was just 
> > > making
> > > coding style changes, badly. ;)
> > 
> > Suggested-by is a tag for patches, to give credit. For example:
> > 
> > https://elixir.bootlin.com/linux/v4.18-rc1/source/Documentation/process/submitting-patches.rst
> > 
> > See section "13) Using Reported-by:, Tested-by:, Reviewed-by:, 
> > Suggested-by: and Fixes:"
> > 
> > Hope that helps,
> > Justin.
> 
> Oops... that helps thank you. I have to re-read that document, it obvioiusly
> didn't all sink in :(

I've dropped this patch, can you fix it up and resend?

thanks,

greg k-h

Re: [PATCH v3 5/8] staging: rtl8192u: Use %s and func instead of hardcoded string - Style

2018-06-25 Thread Greg KH

On Sun, Jun 24, 2018 at 04:34:51PM +0100, John Whitmore wrote:
> Changed a number of hard coded function names to use %s and __func__
> 
> Mailing list response suggest that there is a better method for debugging
> using netdev_dbg(). I can't argue with that, but for the moment this change
> will clear the checkpatch.pl Warning.
> 
> Signed-off-by: John Whitmore 
> ---
>  drivers/staging/rtl8192u/ieee80211/rtl819x_HTProc.c | 12 ++--
>  1 file changed, 6 insertions(+), 6 deletions(-)

Did not apply :(

Re: [PATCH v3 5/8] staging: rtl8192u: Use %s and func instead of hardcoded string - Style

2018-06-25 Thread Greg KH

On Sun, Jun 24, 2018 at 04:34:51PM +0100, John Whitmore wrote:
> Changed a number of hard coded function names to use %s and __func__
> 
> Mailing list response suggest that there is a better method for debugging
> using netdev_dbg(). I can't argue with that, but for the moment this change
> will clear the checkpatch.pl Warning.
> 
> Signed-off-by: John Whitmore 
> ---
>  drivers/staging/rtl8192u/ieee80211/rtl819x_HTProc.c | 12 ++--
>  1 file changed, 6 insertions(+), 6 deletions(-)

Did not apply :(

Re: F_OFD_GETLK implemented wrong with CIFS protocol version 2.0+

2018-06-25 Thread Steve French

We are taking a look at this - Ronnie had some ideas.  Probably simply
not implemented - hopefully not too hard to fix.
On Mon, Jun 25, 2018 at 6:58 PM Laura Abbott  wrote:
>
> Hi,
>
> A while back, someone reported a failure on Fedora when trying to boot
> a QEMU image off of a CIFS share. The issue was reduced down to a
> test case (https://bugzilla.redhat.com/show_bug.cgi?id=1484130#c8)
>
> # cat test-ofd-lock.c
> #define _GNU_SOURCE
> #include 
> #include 
> #include 
> #include 
>
> int main(int argc, char **argv)
> {
>  int ret;
>  int fd;
>  struct flock fl = {
>  .l_whence = SEEK_SET,
>  .l_start  = 0,
>  .l_len= 0,
>  .l_type   = F_RDLCK,
>  };
>  if (argc < 2) {
>  fprintf(stderr, "Usage: %s \n", argv[0]);
>  return 1;
>  }
>  fd = open(argv[1], O_RDWR);
>  if (fd < 0) {
>  perror("open");
>  return errno;
>  }
>  ret = fcntl(fd, F_OFD_SETLK, );
>  if (ret) {
>  perror("setlk");
>  return errno;
>  }
>  fl.l_type = F_WRLCK;
>  ret = fcntl(fd, F_OFD_GETLK, );
>  if (ret) {
>  perror("getlk");
>  return errno;
>  }
>  if (fl.l_type != F_UNLCK) {
>  fprintf(stderr, "get lock test failed\n");
>  return 1;
>  }
>  return 0;
> }
> [root@localhost ~]# make test-ofd-lock
> cc test-ofd-lock.c   -o test-ofd-lock
> [root@localhost ~]# touch /tmp/test && ./test-ofd-lock /tmp/test
> [root@localhost ~]# echo $?
> 0
> [root@localhost ~]# touch /mnt/test && ./test-ofd-lock /mnt/test
> get lock test failed
> [root@localhost ~]# mount | grep /mnt
> //192.168.31.1/tddownload on /mnt type cifs (rw,relatime,vers=3.0,
> cache=strict,username=admin,domain=,uid=0,
> noforceuid,gid=0,noforcegid,addr=192.168.31.1,file_mode=0755,
> dir_mode=0755,nounix,serverino,mapposix,rsize=1048576,
> wsize=1048576,echo_interval=60,actimeo=1,user=admin)
>
>
> As explained by one of the QEMU developers
> (https://bugzilla.redhat.com/show_bug.cgi?id=1484130#c37)
>
> '''
> It is a kernel bug. The code snippet in comment 8 shows clearly that the 
> kernel
> is doing the wrong thing, which cannot be fixed/worked around by QEMU.
>
> In man 2 fcntl:
>
> F_OFD_GETLK (struct flock *)
>On input to this call, lock describes an open file description 
> lock
> we would like to place on the file.  If the lock could  be  placed,  fcntl()  
> does  not
>actually  place  it,  but  returns F_UNLCK in the l_type field 
> of lock
> and leaves the other fields of the structure unchanged.  If one or more 
> incompatible
>locks would prevent this lock being placed, then details about 
> one of
> these locks are returned via lock, as described above for F_GETLK.
>
> which is not the case with the new CIFS behaviour.
> ''
>
> You can read the full context at 
> https://bugzilla.redhat.com/show_bug.cgi?id=1484130
>
> Any suggestions?
>
> Thanks,
> Laura
> --
> To unsubscribe from this list: send the line "unsubscribe linux-cifs" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html



-- 
Thanks,

Steve

Re: F_OFD_GETLK implemented wrong with CIFS protocol version 2.0+

2018-06-25 Thread Steve French

We are taking a look at this - Ronnie had some ideas.  Probably simply
not implemented - hopefully not too hard to fix.
On Mon, Jun 25, 2018 at 6:58 PM Laura Abbott  wrote:
>
> Hi,
>
> A while back, someone reported a failure on Fedora when trying to boot
> a QEMU image off of a CIFS share. The issue was reduced down to a
> test case (https://bugzilla.redhat.com/show_bug.cgi?id=1484130#c8)
>
> # cat test-ofd-lock.c
> #define _GNU_SOURCE
> #include 
> #include 
> #include 
> #include 
>
> int main(int argc, char **argv)
> {
>  int ret;
>  int fd;
>  struct flock fl = {
>  .l_whence = SEEK_SET,
>  .l_start  = 0,
>  .l_len= 0,
>  .l_type   = F_RDLCK,
>  };
>  if (argc < 2) {
>  fprintf(stderr, "Usage: %s \n", argv[0]);
>  return 1;
>  }
>  fd = open(argv[1], O_RDWR);
>  if (fd < 0) {
>  perror("open");
>  return errno;
>  }
>  ret = fcntl(fd, F_OFD_SETLK, );
>  if (ret) {
>  perror("setlk");
>  return errno;
>  }
>  fl.l_type = F_WRLCK;
>  ret = fcntl(fd, F_OFD_GETLK, );
>  if (ret) {
>  perror("getlk");
>  return errno;
>  }
>  if (fl.l_type != F_UNLCK) {
>  fprintf(stderr, "get lock test failed\n");
>  return 1;
>  }
>  return 0;
> }
> [root@localhost ~]# make test-ofd-lock
> cc test-ofd-lock.c   -o test-ofd-lock
> [root@localhost ~]# touch /tmp/test && ./test-ofd-lock /tmp/test
> [root@localhost ~]# echo $?
> 0
> [root@localhost ~]# touch /mnt/test && ./test-ofd-lock /mnt/test
> get lock test failed
> [root@localhost ~]# mount | grep /mnt
> //192.168.31.1/tddownload on /mnt type cifs (rw,relatime,vers=3.0,
> cache=strict,username=admin,domain=,uid=0,
> noforceuid,gid=0,noforcegid,addr=192.168.31.1,file_mode=0755,
> dir_mode=0755,nounix,serverino,mapposix,rsize=1048576,
> wsize=1048576,echo_interval=60,actimeo=1,user=admin)
>
>
> As explained by one of the QEMU developers
> (https://bugzilla.redhat.com/show_bug.cgi?id=1484130#c37)
>
> '''
> It is a kernel bug. The code snippet in comment 8 shows clearly that the 
> kernel
> is doing the wrong thing, which cannot be fixed/worked around by QEMU.
>
> In man 2 fcntl:
>
> F_OFD_GETLK (struct flock *)
>On input to this call, lock describes an open file description 
> lock
> we would like to place on the file.  If the lock could  be  placed,  fcntl()  
> does  not
>actually  place  it,  but  returns F_UNLCK in the l_type field 
> of lock
> and leaves the other fields of the structure unchanged.  If one or more 
> incompatible
>locks would prevent this lock being placed, then details about 
> one of
> these locks are returned via lock, as described above for F_GETLK.
>
> which is not the case with the new CIFS behaviour.
> ''
>
> You can read the full context at 
> https://bugzilla.redhat.com/show_bug.cgi?id=1484130
>
> Any suggestions?
>
> Thanks,
> Laura
> --
> To unsubscribe from this list: send the line "unsubscribe linux-cifs" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html



-- 
Thanks,

Steve

[PATCH 1/1] kvm: selftests: add cr4_cpuid_sync_test

2018-06-25 Thread Wei Huang

KVM is supposed to update some guest VM's CPUID bits (e.g. OSXSAVE) when
CR4 is changed. A bug was found in KVM recently and it was fixed by
Commit c4d2188206ba ("KVM: x86: Update cpuid properly when CR4.OSXAVE or
CR4.PKE is changed"). This patch adds a test to verify the synchronization
between guest VM's CR4 and CPUID bits.

Signed-off-by: Wei Huang 
---
 tools/testing/selftests/kvm/Makefile  |   1 +
 tools/testing/selftests/kvm/cr4_cpuid_sync_test.c | 129 ++
 2 files changed, 130 insertions(+)
 create mode 100644 tools/testing/selftests/kvm/cr4_cpuid_sync_test.c

diff --git a/tools/testing/selftests/kvm/Makefile 
b/tools/testing/selftests/kvm/Makefile
index d9d0031..65bda4f 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -9,6 +9,7 @@ LIBKVM_x86_64 = lib/x86.c lib/vmx.c
 TEST_GEN_PROGS_x86_64 = set_sregs_test
 TEST_GEN_PROGS_x86_64 += sync_regs_test
 TEST_GEN_PROGS_x86_64 += vmx_tsc_adjust_test
+TEST_GEN_PROGS_x86_64 += cr4_cpuid_sync_test
 
 TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M))
 LIBKVM += $(LIBKVM_$(UNAME_M))
diff --git a/tools/testing/selftests/kvm/cr4_cpuid_sync_test.c 
b/tools/testing/selftests/kvm/cr4_cpuid_sync_test.c
new file mode 100644
index 000..dbbaf3c
--- /dev/null
+++ b/tools/testing/selftests/kvm/cr4_cpuid_sync_test.c
@@ -0,0 +1,129 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * CR4 and CPUID sync test
+ *
+ * Copyright 2018, Red Hat, Inc. and/or its affiliates.
+ *
+ * Author:
+ *   Wei Huang 
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "x86.h"
+
+#define X86_FEATURE_XSAVE  (1<<26)
+#define X86_FEATURE_OSXSAVE(1<<27)
+#define VCPU_ID1
+
+enum {
+   GUEST_UPDATE_CR4 = 0x1000,
+   GUEST_FAILED,
+   GUEST_DONE,
+};
+
+static void exit_to_hv(uint16_t port)
+{
+   __asm__ __volatile__("in %[port], %%al"
+:
+: [port]"d"(port)
+: "rax");
+}
+
+static inline bool cr4_cpuid_is_sync(void)
+{
+   int func, subfunc;
+   uint32_t eax, ebx, ecx, edx;
+   uint64_t cr4;
+
+   func = 0x1;
+   subfunc = 0x0;
+   __asm__ __volatile__("cpuid"
+: "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
+: "a"(func), "c"(subfunc));
+
+   cr4 = get_cr4();
+
+   return (!!(ecx & X86_FEATURE_OSXSAVE)) == (!!(cr4 & X86_CR4_OSXSAVE));
+}
+
+static void guest_code(void)
+{
+   uint64_t cr4;
+
+   /* turn on CR4.OSXSAVE */
+   cr4 = get_cr4();
+   cr4 |= X86_CR4_OSXSAVE;
+   set_cr4(cr4);
+
+   /* verify CR4.OSXSAVE == CPUID.OSXSAVE */
+   if (!cr4_cpuid_is_sync())
+   exit_to_hv(GUEST_FAILED);
+
+   /* notify hypervisor to change CR4 */
+   exit_to_hv(GUEST_UPDATE_CR4);
+
+   /* check again */
+   if (!cr4_cpuid_is_sync())
+   exit_to_hv(GUEST_FAILED);
+
+   exit_to_hv(GUEST_DONE);
+}
+
+int main(int argc, char *argv[])
+{
+   struct kvm_run *run;
+   struct kvm_vm *vm;
+   struct kvm_sregs sregs;
+   struct kvm_cpuid_entry2 *entry;
+   int rc;
+
+   entry = kvm_get_supported_cpuid_entry(1);
+   if (!(entry->ecx & X86_FEATURE_XSAVE)) {
+   printf("XSAVE feature not supported, skipping test\n");
+   return 0;
+   }
+
+   /* Tell stdout not to buffer its content */
+   setbuf(stdout, NULL);
+
+   /* Create VM */
+   vm = vm_create_default_vmx(VCPU_ID, guest_code);
+   vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+   run = vcpu_state(vm, VCPU_ID);
+
+   while (1) {
+   rc = _vcpu_run(vm, VCPU_ID);
+
+   if (run->exit_reason == KVM_EXIT_IO) {
+   switch (run->io.port) {
+   case GUEST_UPDATE_CR4:
+   /* emulate hypervisor clearing CR4.OSXSAVE */
+   vcpu_sregs_get(vm, VCPU_ID, );
+   sregs.cr4 &= ~X86_CR4_OSXSAVE;
+   vcpu_sregs_set(vm, VCPU_ID, );
+   break;
+   case GUEST_FAILED:
+   TEST_ASSERT(false, "Guest CR4 bit (OSXSAVE) 
unsynchronized with CPUID bit.");
+   break;
+   case GUEST_DONE:
+   goto done;
+   default:
+   TEST_ASSERT(false, "Unknown port 0x%x.",
+   run->io.port);
+   }
+   }
+   }
+
+   kvm_vm_free(vm);
+
+done:
+   return 0;
+}
-- 
1.8.3.1

[PATCH 1/1] kvm: selftests: add cr4_cpuid_sync_test

2018-06-25 Thread Wei Huang

KVM is supposed to update some guest VM's CPUID bits (e.g. OSXSAVE) when
CR4 is changed. A bug was found in KVM recently and it was fixed by
Commit c4d2188206ba ("KVM: x86: Update cpuid properly when CR4.OSXAVE or
CR4.PKE is changed"). This patch adds a test to verify the synchronization
between guest VM's CR4 and CPUID bits.

Signed-off-by: Wei Huang 
---
 tools/testing/selftests/kvm/Makefile  |   1 +
 tools/testing/selftests/kvm/cr4_cpuid_sync_test.c | 129 ++
 2 files changed, 130 insertions(+)
 create mode 100644 tools/testing/selftests/kvm/cr4_cpuid_sync_test.c

diff --git a/tools/testing/selftests/kvm/Makefile 
b/tools/testing/selftests/kvm/Makefile
index d9d0031..65bda4f 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -9,6 +9,7 @@ LIBKVM_x86_64 = lib/x86.c lib/vmx.c
 TEST_GEN_PROGS_x86_64 = set_sregs_test
 TEST_GEN_PROGS_x86_64 += sync_regs_test
 TEST_GEN_PROGS_x86_64 += vmx_tsc_adjust_test
+TEST_GEN_PROGS_x86_64 += cr4_cpuid_sync_test
 
 TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M))
 LIBKVM += $(LIBKVM_$(UNAME_M))
diff --git a/tools/testing/selftests/kvm/cr4_cpuid_sync_test.c 
b/tools/testing/selftests/kvm/cr4_cpuid_sync_test.c
new file mode 100644
index 000..dbbaf3c
--- /dev/null
+++ b/tools/testing/selftests/kvm/cr4_cpuid_sync_test.c
@@ -0,0 +1,129 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * CR4 and CPUID sync test
+ *
+ * Copyright 2018, Red Hat, Inc. and/or its affiliates.
+ *
+ * Author:
+ *   Wei Huang 
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "x86.h"
+
+#define X86_FEATURE_XSAVE  (1<<26)
+#define X86_FEATURE_OSXSAVE(1<<27)
+#define VCPU_ID1
+
+enum {
+   GUEST_UPDATE_CR4 = 0x1000,
+   GUEST_FAILED,
+   GUEST_DONE,
+};
+
+static void exit_to_hv(uint16_t port)
+{
+   __asm__ __volatile__("in %[port], %%al"
+:
+: [port]"d"(port)
+: "rax");
+}
+
+static inline bool cr4_cpuid_is_sync(void)
+{
+   int func, subfunc;
+   uint32_t eax, ebx, ecx, edx;
+   uint64_t cr4;
+
+   func = 0x1;
+   subfunc = 0x0;
+   __asm__ __volatile__("cpuid"
+: "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
+: "a"(func), "c"(subfunc));
+
+   cr4 = get_cr4();
+
+   return (!!(ecx & X86_FEATURE_OSXSAVE)) == (!!(cr4 & X86_CR4_OSXSAVE));
+}
+
+static void guest_code(void)
+{
+   uint64_t cr4;
+
+   /* turn on CR4.OSXSAVE */
+   cr4 = get_cr4();
+   cr4 |= X86_CR4_OSXSAVE;
+   set_cr4(cr4);
+
+   /* verify CR4.OSXSAVE == CPUID.OSXSAVE */
+   if (!cr4_cpuid_is_sync())
+   exit_to_hv(GUEST_FAILED);
+
+   /* notify hypervisor to change CR4 */
+   exit_to_hv(GUEST_UPDATE_CR4);
+
+   /* check again */
+   if (!cr4_cpuid_is_sync())
+   exit_to_hv(GUEST_FAILED);
+
+   exit_to_hv(GUEST_DONE);
+}
+
+int main(int argc, char *argv[])
+{
+   struct kvm_run *run;
+   struct kvm_vm *vm;
+   struct kvm_sregs sregs;
+   struct kvm_cpuid_entry2 *entry;
+   int rc;
+
+   entry = kvm_get_supported_cpuid_entry(1);
+   if (!(entry->ecx & X86_FEATURE_XSAVE)) {
+   printf("XSAVE feature not supported, skipping test\n");
+   return 0;
+   }
+
+   /* Tell stdout not to buffer its content */
+   setbuf(stdout, NULL);
+
+   /* Create VM */
+   vm = vm_create_default_vmx(VCPU_ID, guest_code);
+   vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+   run = vcpu_state(vm, VCPU_ID);
+
+   while (1) {
+   rc = _vcpu_run(vm, VCPU_ID);
+
+   if (run->exit_reason == KVM_EXIT_IO) {
+   switch (run->io.port) {
+   case GUEST_UPDATE_CR4:
+   /* emulate hypervisor clearing CR4.OSXSAVE */
+   vcpu_sregs_get(vm, VCPU_ID, );
+   sregs.cr4 &= ~X86_CR4_OSXSAVE;
+   vcpu_sregs_set(vm, VCPU_ID, );
+   break;
+   case GUEST_FAILED:
+   TEST_ASSERT(false, "Guest CR4 bit (OSXSAVE) 
unsynchronized with CPUID bit.");
+   break;
+   case GUEST_DONE:
+   goto done;
+   default:
+   TEST_ASSERT(false, "Unknown port 0x%x.",
+   run->io.port);
+   }
+   }
+   }
+
+   kvm_vm_free(vm);
+
+done:
+   return 0;
+}
-- 
1.8.3.1

[BUG? v4.16.12] OOM kill of qemu causes whacky pty shenanigans

2018-06-25 Thread Dave Chinner

HI folks,

I've come across a strange problem recently when doing some MPI
based CPU+IO load test simulations recently. I've been running them
on the same machine I use to host all my filesystem test VMs
(16p/32t, 64GB RAM) which is currently running 4.16.12.  Both the
MPI job context and the qemu VM contexts are run from inside
separate, unrelated ssh + screen sessions.

When I size the MPI job large enough (18 million cells in the mesh
for the CFD simulation requires about 45GB of RAM) it runs the host
out of memory and the OOM killer picks the largest VM (32GB RAM) to
kill as it doesn't track the MPI job as a single memory hog. The OOM
kill message is nothing unusual:


[1042458.854842] snappyHexMesh invoked oom-killer: 
gfp_mask=0x14200ca(GFP_HIGHUSER_MOVABLE), nodemask=(null), order=0, 
oom_score_adj=0
[1042458.868065] snappyHexMesh cpuset=session-1.scope mems_allowed=0-1
[1042458.875088] CPU: 12 PID: 39442 Comm: snappyHexMesh Not tainted 
4.16.0-2-amd64 #1 Debian 4.16.12-1
[1042458.885182] Hardware name: Dell Inc. PowerEdge R820/0YWR73, BIOS 1.5.0 
03/08/2013
[1042458.893727] Call Trace:
[1042458.896658]  dump_stack+0x5c/0x85
[1042458.900552]  dump_header+0x6b/0x289
[1042458.904632]  ? apparmor_capable+0xa4/0xe0
[1042458.909301]  ? cap_inode_getsecurity+0x220/0x220
[1042458.914644]  oom_kill_process+0x228/0x470
[1042458.919311]  out_of_memory+0x2ab/0x4b0
[1042458.923686]  __alloc_pages_slowpath+0x9f2/0xd80
[1042458.928936]  __alloc_pages_nodemask+0x236/0x250
[1042458.934184]  filemap_fault+0x1f9/0x630
[1042458.938562]  ? page_add_file_rmap+0x109/0x200
[1042458.943618]  ? alloc_set_pte+0x452/0x500
[1042458.948190]  ? _cond_resched+0x15/0x40
[1042458.952618]  __xfs_filemap_fault+0x72/0x200 [xfs]
[1042458.958062]  __do_fault+0x1f/0xb0
[1042458.961953]  __handle_mm_fault+0xca6/0x1220
[1042458.966815]  handle_mm_fault+0xdc/0x210
[1042458.971290]  __do_page_fault+0x256/0x4e0
[1042458.975860]  ? page_fault+0x2f/0x50
[1042458.979942]  page_fault+0x45/0x50
[1042458.983834] RIP: 425e80c0:0x7ffe648cb690
[1042458.988401] RSP: 0018:0006aee0 EFLAGS: 55f1425e80b0
[1042458.988416] Mem-Info:
[1042458.997564] active_anon:14864071 inactive_anon:1334793 isolated_anon:0
  active_file:2 inactive_file:49 isolated_file:0
  unevictable:0 dirty:5 writeback:0 unstable:0
  slab_reclaimable:28879 slab_unreclaimable:51679
  mapped:82 shmem:117 pagetables:41302 bounce:0
  free:55214 free_pcp:503 free_cma:0
[1042459.035688] Node 0 active_anon:29395060kB inactive_anon:2795292kB 
active_file:48kB inactive_file:104kB unevictable:0kB isolated(anon):0kB 
isolated(file):0kB mapped:160kB dirty:0kB writeback:0kB shmem:324kB shmem_thp: 
0kB shmem_pmdmapped: 0kB anon_thp: 19171328kB writeback_tmp:0kB unstable:0kB 
all_unreclaimable? yes
[1042459.067024] Node 1 active_anon:30061224kB inactive_anon:2543880kB 
active_file:0kB inactive_file:152kB unevictable:0kB isolated(anon):0kB 
isolated(file):0kB mapped:164kB dirty:20kB writeback:0kB shmem:144kB shmem_thp: 
0kB shmem_pmdmapped: 0kB anon_thp: 13232128kB writeback_tmp:0kB unstable:0kB 
all_unreclaimable? yes
[1042459.098359] Node 0 DMA free:15880kB min:20kB low:32kB high:44kB 
active_anon:0kB inactive_anon:0kB active_file:0kB inactive_file:0kB 
unevictable:0kB writepending:0kB present:15980kB managed:15896kB mlocked:0kB 
kernel_stack:0kB pagetables:0kB bounce:0kB free_pcp:0kB local_pcp:0kB 
free_cma:0kB
[1042459.127285] lowmem_reserve[]: 0 3174 32097 32097 32097
[1042459.133225] Node 0 DMA32 free:119936kB min:kB low:7692kB high:10940kB 
active_anon:2734924kB inactive_anon:416452kB active_file:0kB inactive_file:0kB 
unevictable:0kB writepending:0kB present:3345344kB managed:3279776kB 
mlocked:0kB kernel_stack:0kB pagetables:4kB bounce:0kB free_pcp:0kB 
local_pcp:0kB free_cma:0kB
[1042459.164576] lowmem_reserve[]: 0 0 28922 28922 28922
[1042459.170225] Node 0 Normal free:40080kB min:40504kB low:70120kB 
high:99736kB active_anon:26660136kB inactive_anon:2378840kB active_file:48kB 
inactive_file:104kB unevictable:0kB writepending:0kB present:30146560kB 
managed:29616592kB mlocked:0kB kernel_stack:6808kB pagetables:82392kB 
bounce:0kB free_pcp:1680kB local_pcp:124kB free_cma:0kB
[1042459.203605] lowmem_reserve[]: 0 0 0 0 0
[1042459.208094] Node 1 Normal free:44960kB min:45136kB low:78136kB 
high:36kB active_anon:30061224kB inactive_anon:2544016kB active_file:0kB 
inactive_file:224kB unevictable:0kB writepending:4kB present:33554432kB 
managed:33005280kB mlocked:0kB kernel_stack:4520kB pagetables:82812kB 
bounce:0kB free_pcp:388kB local_pcp:28kB free_cma:0kB
[1042459.241274] lowmem_reserve[]: 0 0 0 0 0
[1042459.245756] Node 0 DMA: 0*4kB 1*8kB (U) 0*16kB 0*32kB 2*64kB (U) 1*128kB 
(U) 1*256kB (U) 0*512kB 1*1024kB (U) 1*2048kB (M) 3*4096kB (M) = 15880kB
[1042459.260531] Node 0 DMA32: 26*4kB (UM) 33*8kB (UM) 25*16kB (UM) 110*32kB 
(UME) 79*64kB (UME) 48*128kB (UME) 26*256kB (UME) 13*512kB (UME) 9*1024kB

[BUG? v4.16.12] OOM kill of qemu causes whacky pty shenanigans

2018-06-25 Thread Dave Chinner

HI folks,

I've come across a strange problem recently when doing some MPI
based CPU+IO load test simulations recently. I've been running them
on the same machine I use to host all my filesystem test VMs
(16p/32t, 64GB RAM) which is currently running 4.16.12.  Both the
MPI job context and the qemu VM contexts are run from inside
separate, unrelated ssh + screen sessions.

When I size the MPI job large enough (18 million cells in the mesh
for the CFD simulation requires about 45GB of RAM) it runs the host
out of memory and the OOM killer picks the largest VM (32GB RAM) to
kill as it doesn't track the MPI job as a single memory hog. The OOM
kill message is nothing unusual:


[1042458.854842] snappyHexMesh invoked oom-killer: 
gfp_mask=0x14200ca(GFP_HIGHUSER_MOVABLE), nodemask=(null), order=0, 
oom_score_adj=0
[1042458.868065] snappyHexMesh cpuset=session-1.scope mems_allowed=0-1
[1042458.875088] CPU: 12 PID: 39442 Comm: snappyHexMesh Not tainted 
4.16.0-2-amd64 #1 Debian 4.16.12-1
[1042458.885182] Hardware name: Dell Inc. PowerEdge R820/0YWR73, BIOS 1.5.0 
03/08/2013
[1042458.893727] Call Trace:
[1042458.896658]  dump_stack+0x5c/0x85
[1042458.900552]  dump_header+0x6b/0x289
[1042458.904632]  ? apparmor_capable+0xa4/0xe0
[1042458.909301]  ? cap_inode_getsecurity+0x220/0x220
[1042458.914644]  oom_kill_process+0x228/0x470
[1042458.919311]  out_of_memory+0x2ab/0x4b0
[1042458.923686]  __alloc_pages_slowpath+0x9f2/0xd80
[1042458.928936]  __alloc_pages_nodemask+0x236/0x250
[1042458.934184]  filemap_fault+0x1f9/0x630
[1042458.938562]  ? page_add_file_rmap+0x109/0x200
[1042458.943618]  ? alloc_set_pte+0x452/0x500
[1042458.948190]  ? _cond_resched+0x15/0x40
[1042458.952618]  __xfs_filemap_fault+0x72/0x200 [xfs]
[1042458.958062]  __do_fault+0x1f/0xb0
[1042458.961953]  __handle_mm_fault+0xca6/0x1220
[1042458.966815]  handle_mm_fault+0xdc/0x210
[1042458.971290]  __do_page_fault+0x256/0x4e0
[1042458.975860]  ? page_fault+0x2f/0x50
[1042458.979942]  page_fault+0x45/0x50
[1042458.983834] RIP: 425e80c0:0x7ffe648cb690
[1042458.988401] RSP: 0018:0006aee0 EFLAGS: 55f1425e80b0
[1042458.988416] Mem-Info:
[1042458.997564] active_anon:14864071 inactive_anon:1334793 isolated_anon:0
  active_file:2 inactive_file:49 isolated_file:0
  unevictable:0 dirty:5 writeback:0 unstable:0
  slab_reclaimable:28879 slab_unreclaimable:51679
  mapped:82 shmem:117 pagetables:41302 bounce:0
  free:55214 free_pcp:503 free_cma:0
[1042459.035688] Node 0 active_anon:29395060kB inactive_anon:2795292kB 
active_file:48kB inactive_file:104kB unevictable:0kB isolated(anon):0kB 
isolated(file):0kB mapped:160kB dirty:0kB writeback:0kB shmem:324kB shmem_thp: 
0kB shmem_pmdmapped: 0kB anon_thp: 19171328kB writeback_tmp:0kB unstable:0kB 
all_unreclaimable? yes
[1042459.067024] Node 1 active_anon:30061224kB inactive_anon:2543880kB 
active_file:0kB inactive_file:152kB unevictable:0kB isolated(anon):0kB 
isolated(file):0kB mapped:164kB dirty:20kB writeback:0kB shmem:144kB shmem_thp: 
0kB shmem_pmdmapped: 0kB anon_thp: 13232128kB writeback_tmp:0kB unstable:0kB 
all_unreclaimable? yes
[1042459.098359] Node 0 DMA free:15880kB min:20kB low:32kB high:44kB 
active_anon:0kB inactive_anon:0kB active_file:0kB inactive_file:0kB 
unevictable:0kB writepending:0kB present:15980kB managed:15896kB mlocked:0kB 
kernel_stack:0kB pagetables:0kB bounce:0kB free_pcp:0kB local_pcp:0kB 
free_cma:0kB
[1042459.127285] lowmem_reserve[]: 0 3174 32097 32097 32097
[1042459.133225] Node 0 DMA32 free:119936kB min:kB low:7692kB high:10940kB 
active_anon:2734924kB inactive_anon:416452kB active_file:0kB inactive_file:0kB 
unevictable:0kB writepending:0kB present:3345344kB managed:3279776kB 
mlocked:0kB kernel_stack:0kB pagetables:4kB bounce:0kB free_pcp:0kB 
local_pcp:0kB free_cma:0kB
[1042459.164576] lowmem_reserve[]: 0 0 28922 28922 28922
[1042459.170225] Node 0 Normal free:40080kB min:40504kB low:70120kB 
high:99736kB active_anon:26660136kB inactive_anon:2378840kB active_file:48kB 
inactive_file:104kB unevictable:0kB writepending:0kB present:30146560kB 
managed:29616592kB mlocked:0kB kernel_stack:6808kB pagetables:82392kB 
bounce:0kB free_pcp:1680kB local_pcp:124kB free_cma:0kB
[1042459.203605] lowmem_reserve[]: 0 0 0 0 0
[1042459.208094] Node 1 Normal free:44960kB min:45136kB low:78136kB 
high:36kB active_anon:30061224kB inactive_anon:2544016kB active_file:0kB 
inactive_file:224kB unevictable:0kB writepending:4kB present:33554432kB 
managed:33005280kB mlocked:0kB kernel_stack:4520kB pagetables:82812kB 
bounce:0kB free_pcp:388kB local_pcp:28kB free_cma:0kB
[1042459.241274] lowmem_reserve[]: 0 0 0 0 0
[1042459.245756] Node 0 DMA: 0*4kB 1*8kB (U) 0*16kB 0*32kB 2*64kB (U) 1*128kB 
(U) 1*256kB (U) 0*512kB 1*1024kB (U) 1*2048kB (M) 3*4096kB (M) = 15880kB
[1042459.260531] Node 0 DMA32: 26*4kB (UM) 33*8kB (UM) 25*16kB (UM) 110*32kB 
(UME) 79*64kB (UME) 48*128kB (UME) 26*256kB (UME) 13*512kB (UME) 9*1024kB

Re: [PATCH v4] dcdbas: Add support for WSMT ACPI table

2018-06-25 Thread Stuart Hayes




On 6/14/2018 12:26 PM, Andy Shevchenko wrote:
> On Thu, Jun 14, 2018 at 6:45 PM, Stuart Hayes  
> wrote:
>>
>> If the WSMT ACPI table is present and indicates that a fixed communication
>> buffer should be used, use the firmware-specified buffer instead of
>> allocating a buffer in memory for communications between the dcdbas driver
>> and firmare.
> 
> Thanks for an update.
> 
> I answered to previous thread. The questions / comments are still
> applicable here.
> 

I answered your questions in the previous thread (a while back).

Please let me know if there are any more concerns with this.  Thanks!

Re: [PATCH v4] dcdbas: Add support for WSMT ACPI table

2018-06-25 Thread Stuart Hayes




On 6/14/2018 12:26 PM, Andy Shevchenko wrote:
> On Thu, Jun 14, 2018 at 6:45 PM, Stuart Hayes  
> wrote:
>>
>> If the WSMT ACPI table is present and indicates that a fixed communication
>> buffer should be used, use the firmware-specified buffer instead of
>> allocating a buffer in memory for communications between the dcdbas driver
>> and firmare.
> 
> Thanks for an update.
> 
> I answered to previous thread. The questions / comments are still
> applicable here.
> 

I answered your questions in the previous thread (a while back).

Please let me know if there are any more concerns with this.  Thanks!

[PATCH 04/12] powerpc: Implement hw_breakpoint_arch_parse()

2018-06-25 Thread Frederic Weisbecker

Migrate to the new API in order to remove arch_validate_hwbkpt_settings()
that clumsily mixes up architecture validation and commit

Acked-by: Michael Ellerman 
Signed-off-by: Frederic Weisbecker 
Cc: Linus Torvalds 
Cc: Andy Lutomirski 
Cc: Yoshinori Sato 
Cc: Rich Felker 
Cc: Ingo Molnar 
Cc: Thomas Gleixner 
Cc: Will Deacon 
Cc: Mark Rutland 
Cc: Max Filippov 
Cc: Chris Zankel 
Cc: Catalin Marinas 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Michael Ellerman 
Cc: Peter Zijlstra 
Cc: Arnaldo Carvalho de Melo 
Cc: Alexander Shishkin 
Cc: Jiri Olsa 
Cc: Namhyung Kim 
Cc: Joel Fernandes 
---
 arch/powerpc/include/asm/hw_breakpoint.h |  6 -
 arch/powerpc/kernel/hw_breakpoint.c  | 41 
 2 files changed, 26 insertions(+), 21 deletions(-)

diff --git a/arch/powerpc/include/asm/hw_breakpoint.h 
b/arch/powerpc/include/asm/hw_breakpoint.h
index 4d0b1bf..38ae180 100644
--- a/arch/powerpc/include/asm/hw_breakpoint.h
+++ b/arch/powerpc/include/asm/hw_breakpoint.h
@@ -52,6 +52,7 @@ struct arch_hw_breakpoint {
 #include 
 #include 
 
+struct perf_event_attr;
 struct perf_event;
 struct pmu;
 struct perf_sample_data;
@@ -61,7 +62,10 @@ struct perf_sample_data;
 extern int hw_breakpoint_slots(int type);
 extern int arch_bp_generic_fields(int type, int *gen_bp_type);
 extern int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw);
-extern int arch_validate_hwbkpt_settings(struct perf_event *bp);
+extern int hw_breakpoint_arch_parse(struct perf_event *bp,
+   const struct perf_event_attr *attr,
+   struct arch_hw_breakpoint *hw);
+#define hw_breakpoint_arch_parse hw_breakpoint_arch_parse
 extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
unsigned long val, void *data);
 int arch_install_hw_breakpoint(struct perf_event *bp);
diff --git a/arch/powerpc/kernel/hw_breakpoint.c 
b/arch/powerpc/kernel/hw_breakpoint.c
index 899bcec..fec8a67 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -139,30 +139,31 @@ int arch_bp_generic_fields(int type, int *gen_bp_type)
 /*
  * Validate the arch-specific HW Breakpoint register settings
  */
-int arch_validate_hwbkpt_settings(struct perf_event *bp)
+int hw_breakpoint_arch_parse(struct perf_event *bp,
+const struct perf_event_attr *attr,
+struct arch_hw_breakpoint *hw)
 {
int ret = -EINVAL, length_max;
-   struct arch_hw_breakpoint *info = counter_arch_bp(bp);
 
if (!bp)
return ret;
 
-   info->type = HW_BRK_TYPE_TRANSLATE;
-   if (bp->attr.bp_type & HW_BREAKPOINT_R)
-   info->type |= HW_BRK_TYPE_READ;
-   if (bp->attr.bp_type & HW_BREAKPOINT_W)
-   info->type |= HW_BRK_TYPE_WRITE;
-   if (info->type == HW_BRK_TYPE_TRANSLATE)
+   hw->type = HW_BRK_TYPE_TRANSLATE;
+   if (attr->bp_type & HW_BREAKPOINT_R)
+   hw->type |= HW_BRK_TYPE_READ;
+   if (attr->bp_type & HW_BREAKPOINT_W)
+   hw->type |= HW_BRK_TYPE_WRITE;
+   if (hw->type == HW_BRK_TYPE_TRANSLATE)
/* must set alteast read or write */
return ret;
-   if (!(bp->attr.exclude_user))
-   info->type |= HW_BRK_TYPE_USER;
-   if (!(bp->attr.exclude_kernel))
-   info->type |= HW_BRK_TYPE_KERNEL;
-   if (!(bp->attr.exclude_hv))
-   info->type |= HW_BRK_TYPE_HYP;
-   info->address = bp->attr.bp_addr;
-   info->len = bp->attr.bp_len;
+   if (!attr->exclude_user)
+   hw->type |= HW_BRK_TYPE_USER;
+   if (!attr->exclude_kernel)
+   hw->type |= HW_BRK_TYPE_KERNEL;
+   if (!attr->exclude_hv)
+   hw->type |= HW_BRK_TYPE_HYP;
+   hw->address = attr->bp_addr;
+   hw->len = attr->bp_len;
 
/*
 * Since breakpoint length can be a maximum of HW_BREAKPOINT_LEN(8)
@@ -176,12 +177,12 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
if (cpu_has_feature(CPU_FTR_DAWR)) {
length_max = 512 ; /* 64 doublewords */
/* DAWR region can't cross 512 boundary */
-   if ((bp->attr.bp_addr >> 9) !=
-   ((bp->attr.bp_addr + bp->attr.bp_len - 1) >> 9))
+   if ((attr->bp_addr >> 9) !=
+   ((attr->bp_addr + attr->bp_len - 1) >> 9))
return -EINVAL;
}
-   if (info->len >
-   (length_max - (info->address & HW_BREAKPOINT_ALIGN)))
+   if (hw->len >
+   (length_max - (hw->address & HW_BREAKPOINT_ALIGN)))
return -EINVAL;
return 0;
 }
-- 
2.7.4

[PATCH 04/12] powerpc: Implement hw_breakpoint_arch_parse()

2018-06-25 Thread Frederic Weisbecker

Migrate to the new API in order to remove arch_validate_hwbkpt_settings()
that clumsily mixes up architecture validation and commit

Acked-by: Michael Ellerman 
Signed-off-by: Frederic Weisbecker 
Cc: Linus Torvalds 
Cc: Andy Lutomirski 
Cc: Yoshinori Sato 
Cc: Rich Felker 
Cc: Ingo Molnar 
Cc: Thomas Gleixner 
Cc: Will Deacon 
Cc: Mark Rutland 
Cc: Max Filippov 
Cc: Chris Zankel 
Cc: Catalin Marinas 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Michael Ellerman 
Cc: Peter Zijlstra 
Cc: Arnaldo Carvalho de Melo 
Cc: Alexander Shishkin 
Cc: Jiri Olsa 
Cc: Namhyung Kim 
Cc: Joel Fernandes 
---
 arch/powerpc/include/asm/hw_breakpoint.h |  6 -
 arch/powerpc/kernel/hw_breakpoint.c  | 41 
 2 files changed, 26 insertions(+), 21 deletions(-)

diff --git a/arch/powerpc/include/asm/hw_breakpoint.h 
b/arch/powerpc/include/asm/hw_breakpoint.h
index 4d0b1bf..38ae180 100644
--- a/arch/powerpc/include/asm/hw_breakpoint.h
+++ b/arch/powerpc/include/asm/hw_breakpoint.h
@@ -52,6 +52,7 @@ struct arch_hw_breakpoint {
 #include 
 #include 
 
+struct perf_event_attr;
 struct perf_event;
 struct pmu;
 struct perf_sample_data;
@@ -61,7 +62,10 @@ struct perf_sample_data;
 extern int hw_breakpoint_slots(int type);
 extern int arch_bp_generic_fields(int type, int *gen_bp_type);
 extern int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw);
-extern int arch_validate_hwbkpt_settings(struct perf_event *bp);
+extern int hw_breakpoint_arch_parse(struct perf_event *bp,
+   const struct perf_event_attr *attr,
+   struct arch_hw_breakpoint *hw);
+#define hw_breakpoint_arch_parse hw_breakpoint_arch_parse
 extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
unsigned long val, void *data);
 int arch_install_hw_breakpoint(struct perf_event *bp);
diff --git a/arch/powerpc/kernel/hw_breakpoint.c 
b/arch/powerpc/kernel/hw_breakpoint.c
index 899bcec..fec8a67 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -139,30 +139,31 @@ int arch_bp_generic_fields(int type, int *gen_bp_type)
 /*
  * Validate the arch-specific HW Breakpoint register settings
  */
-int arch_validate_hwbkpt_settings(struct perf_event *bp)
+int hw_breakpoint_arch_parse(struct perf_event *bp,
+const struct perf_event_attr *attr,
+struct arch_hw_breakpoint *hw)
 {
int ret = -EINVAL, length_max;
-   struct arch_hw_breakpoint *info = counter_arch_bp(bp);
 
if (!bp)
return ret;
 
-   info->type = HW_BRK_TYPE_TRANSLATE;
-   if (bp->attr.bp_type & HW_BREAKPOINT_R)
-   info->type |= HW_BRK_TYPE_READ;
-   if (bp->attr.bp_type & HW_BREAKPOINT_W)
-   info->type |= HW_BRK_TYPE_WRITE;
-   if (info->type == HW_BRK_TYPE_TRANSLATE)
+   hw->type = HW_BRK_TYPE_TRANSLATE;
+   if (attr->bp_type & HW_BREAKPOINT_R)
+   hw->type |= HW_BRK_TYPE_READ;
+   if (attr->bp_type & HW_BREAKPOINT_W)
+   hw->type |= HW_BRK_TYPE_WRITE;
+   if (hw->type == HW_BRK_TYPE_TRANSLATE)
/* must set alteast read or write */
return ret;
-   if (!(bp->attr.exclude_user))
-   info->type |= HW_BRK_TYPE_USER;
-   if (!(bp->attr.exclude_kernel))
-   info->type |= HW_BRK_TYPE_KERNEL;
-   if (!(bp->attr.exclude_hv))
-   info->type |= HW_BRK_TYPE_HYP;
-   info->address = bp->attr.bp_addr;
-   info->len = bp->attr.bp_len;
+   if (!attr->exclude_user)
+   hw->type |= HW_BRK_TYPE_USER;
+   if (!attr->exclude_kernel)
+   hw->type |= HW_BRK_TYPE_KERNEL;
+   if (!attr->exclude_hv)
+   hw->type |= HW_BRK_TYPE_HYP;
+   hw->address = attr->bp_addr;
+   hw->len = attr->bp_len;
 
/*
 * Since breakpoint length can be a maximum of HW_BREAKPOINT_LEN(8)
@@ -176,12 +177,12 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
if (cpu_has_feature(CPU_FTR_DAWR)) {
length_max = 512 ; /* 64 doublewords */
/* DAWR region can't cross 512 boundary */
-   if ((bp->attr.bp_addr >> 9) !=
-   ((bp->attr.bp_addr + bp->attr.bp_len - 1) >> 9))
+   if ((attr->bp_addr >> 9) !=
+   ((attr->bp_addr + attr->bp_len - 1) >> 9))
return -EINVAL;
}
-   if (info->len >
-   (length_max - (info->address & HW_BREAKPOINT_ALIGN)))
+   if (hw->len >
+   (length_max - (hw->address & HW_BREAKPOINT_ALIGN)))
return -EINVAL;
return 0;
 }
-- 
2.7.4

[PATCH 12/12] perf/breakpoint: Clean up and consolidate modify_user_hw_breakpoint_check()

2018-06-25 Thread Frederic Weisbecker

Remove the dance around old and new attributes. Just don't modify the
previous breakpoint at all until we have verified everything.

Reported-by: Linus Torvalds 
Original-patch-by: Andy Lutomirski 
Signed-off-by: Frederic Weisbecker 
Cc: Linus Torvalds 
Cc: Andy Lutomirski 
Cc: Yoshinori Sato 
Cc: Rich Felker 
Cc: Ingo Molnar 
Cc: Thomas Gleixner 
Cc: Will Deacon 
Cc: Mark Rutland 
Cc: Max Filippov 
Cc: Chris Zankel 
Cc: Catalin Marinas 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Michael Ellerman 
Cc: Peter Zijlstra 
Cc: Arnaldo Carvalho de Melo 
Cc: Alexander Shishkin 
Cc: Jiri Olsa 
Cc: Namhyung Kim 
Cc: Joel Fernandes 
---
 kernel/events/hw_breakpoint.c | 46 ---
 1 file changed, 26 insertions(+), 20 deletions(-)

diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c
index 7138770..b3814fc 100644
--- a/kernel/events/hw_breakpoint.c
+++ b/kernel/events/hw_breakpoint.c
@@ -461,37 +461,43 @@ register_user_hw_breakpoint(struct perf_event_attr *attr,
 }
 EXPORT_SYMBOL_GPL(register_user_hw_breakpoint);
 
+static void hw_breakpoint_copy_attr(struct perf_event_attr *to,
+   struct perf_event_attr *from)
+{
+   to->bp_addr = from->bp_addr;
+   to->bp_type = from->bp_type;
+   to->bp_len  = from->bp_len;
+   to->disabled = from->disabled;
+}
+
 int
 modify_user_hw_breakpoint_check(struct perf_event *bp, struct perf_event_attr 
*attr,
bool check)
 {
-   u64 old_addr = bp->attr.bp_addr;
-   u64 old_len  = bp->attr.bp_len;
-   int old_type = bp->attr.bp_type;
-   bool modify  = attr->bp_type != old_type;
struct arch_hw_breakpoint hw;
-   int err = 0;
-
-   bp->attr.bp_addr = attr->bp_addr;
-   bp->attr.bp_type = attr->bp_type;
-   bp->attr.bp_len  = attr->bp_len;
-
-   if (check && memcmp(>attr, attr, sizeof(*attr)))
-   return -EINVAL;
+   int err;
 
err = hw_breakpoint_parse(bp, attr, );
-   if (!err && modify)
-   err = modify_bp_slot(bp, old_type, bp->attr.bp_type);
-
-   if (err) {
-   bp->attr.bp_addr = old_addr;
-   bp->attr.bp_type = old_type;
-   bp->attr.bp_len  = old_len;
+   if (err)
return err;
+
+   if (check) {
+   struct perf_event_attr old_attr;
+
+   old_attr = bp->attr;
+   hw_breakpoint_copy_attr(_attr, attr);
+   if (memcmp(_attr, attr, sizeof(*attr)))
+   return -EINVAL;
+   }
+
+   if (bp->attr.bp_type != attr->bp_type) {
+   err = modify_bp_slot(bp, bp->attr.bp_type, attr->bp_type);
+   if (err)
+   return err;
}
 
+   hw_breakpoint_copy_attr(>attr, attr);
bp->hw.info = hw;
-   bp->attr.disabled = attr->disabled;
 
return 0;
 }
-- 
2.7.4

[PATCH 07/12] sh: Remove "struct arch_hw_breakpoint::name" unused field

2018-06-25 Thread Frederic Weisbecker

This field seem to be unused, perhaps a leftover from old code...

Signed-off-by: Frederic Weisbecker 
Cc: Linus Torvalds 
Cc: Andy Lutomirski 
Cc: Yoshinori Sato 
Cc: Rich Felker 
Cc: Ingo Molnar 
Cc: Thomas Gleixner 
Cc: Will Deacon 
Cc: Mark Rutland 
Cc: Max Filippov 
Cc: Chris Zankel 
Cc: Catalin Marinas 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Michael Ellerman 
Cc: Peter Zijlstra 
Cc: Arnaldo Carvalho de Melo 
Cc: Alexander Shishkin 
Cc: Jiri Olsa 
Cc: Namhyung Kim 
Cc: Joel Fernandes 
---
 arch/sh/include/asm/hw_breakpoint.h | 1 -
 arch/sh/kernel/hw_breakpoint.c  | 7 ---
 2 files changed, 8 deletions(-)

diff --git a/arch/sh/include/asm/hw_breakpoint.h 
b/arch/sh/include/asm/hw_breakpoint.h
index 8a88ed0..dae622d 100644
--- a/arch/sh/include/asm/hw_breakpoint.h
+++ b/arch/sh/include/asm/hw_breakpoint.h
@@ -10,7 +10,6 @@
 #include 
 
 struct arch_hw_breakpoint {
-   char*name; /* Contains name of the symbol to set bkpt */
unsigned long   address;
u16 len;
u16 type;
diff --git a/arch/sh/kernel/hw_breakpoint.c b/arch/sh/kernel/hw_breakpoint.c
index 38791fe..c453a0c 100644
--- a/arch/sh/kernel/hw_breakpoint.c
+++ b/arch/sh/kernel/hw_breakpoint.c
@@ -248,13 +248,6 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
}
 
/*
-* For kernel-addresses, either the address or symbol name can be
-* specified.
-*/
-   if (info->name)
-   info->address = (unsigned long)kallsyms_lookup_name(info->name);
-
-   /*
 * Check that the low-order bits of the address are appropriate
 * for the alignment implied by len.
 */
-- 
2.7.4

[PATCH 08/12] sh: Implement hw_breakpoint_arch_parse()

2018-06-25 Thread Frederic Weisbecker

Migrate to the new API in order to remove arch_validate_hwbkpt_settings()
that clumsily mixes up architecture validation and commit

Signed-off-by: Frederic Weisbecker 
Cc: Linus Torvalds 
Cc: Andy Lutomirski 
Cc: Yoshinori Sato 
Cc: Rich Felker 
Cc: Ingo Molnar 
Cc: Thomas Gleixner 
Cc: Will Deacon 
Cc: Mark Rutland 
Cc: Max Filippov 
Cc: Chris Zankel 
Cc: Catalin Marinas 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Michael Ellerman 
Cc: Peter Zijlstra 
Cc: Arnaldo Carvalho de Melo 
Cc: Alexander Shishkin 
Cc: Jiri Olsa 
Cc: Namhyung Kim 
Cc: Joel Fernandes 
---
 arch/sh/include/asm/hw_breakpoint.h |  6 +-
 arch/sh/kernel/hw_breakpoint.c  | 37 +++--
 2 files changed, 24 insertions(+), 19 deletions(-)

diff --git a/arch/sh/include/asm/hw_breakpoint.h 
b/arch/sh/include/asm/hw_breakpoint.h
index dae622d..867edcc 100644
--- a/arch/sh/include/asm/hw_breakpoint.h
+++ b/arch/sh/include/asm/hw_breakpoint.h
@@ -40,6 +40,7 @@ struct sh_ubc {
struct clk  *clk;   /* optional interface clock / MSTP bit */
 };
 
+struct perf_event_attr;
 struct perf_event;
 struct task_struct;
 struct pmu;
@@ -54,7 +55,10 @@ static inline int hw_breakpoint_slots(int type)
 
 /* arch/sh/kernel/hw_breakpoint.c */
 extern int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw);
-extern int arch_validate_hwbkpt_settings(struct perf_event *bp);
+extern int hw_breakpoint_arch_parse(struct perf_event *bp,
+   const struct perf_event_attr *attr,
+   struct arch_hw_breakpoint *hw);
+#define hw_breakpoint_arch_parse hw_breakpoint_arch_parse
 extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
   unsigned long val, void *data);
 
diff --git a/arch/sh/kernel/hw_breakpoint.c b/arch/sh/kernel/hw_breakpoint.c
index c453a0c..d9ff3b4 100644
--- a/arch/sh/kernel/hw_breakpoint.c
+++ b/arch/sh/kernel/hw_breakpoint.c
@@ -173,40 +173,40 @@ int arch_bp_generic_fields(int sh_len, int sh_type,
return 0;
 }
 
-static int arch_build_bp_info(struct perf_event *bp)
+static int arch_build_bp_info(struct perf_event *bp,
+ const struct perf_event_attr *attr,
+ struct arch_hw_breakpoint *hw)
 {
-   struct arch_hw_breakpoint *info = counter_arch_bp(bp);
-
-   info->address = bp->attr.bp_addr;
+   hw->address = attr->bp_addr;
 
/* Len */
-   switch (bp->attr.bp_len) {
+   switch (attr->bp_len) {
case HW_BREAKPOINT_LEN_1:
-   info->len = SH_BREAKPOINT_LEN_1;
+   hw->len = SH_BREAKPOINT_LEN_1;
break;
case HW_BREAKPOINT_LEN_2:
-   info->len = SH_BREAKPOINT_LEN_2;
+   hw->len = SH_BREAKPOINT_LEN_2;
break;
case HW_BREAKPOINT_LEN_4:
-   info->len = SH_BREAKPOINT_LEN_4;
+   hw->len = SH_BREAKPOINT_LEN_4;
break;
case HW_BREAKPOINT_LEN_8:
-   info->len = SH_BREAKPOINT_LEN_8;
+   hw->len = SH_BREAKPOINT_LEN_8;
break;
default:
return -EINVAL;
}
 
/* Type */
-   switch (bp->attr.bp_type) {
+   switch (attr->bp_type) {
case HW_BREAKPOINT_R:
-   info->type = SH_BREAKPOINT_READ;
+   hw->type = SH_BREAKPOINT_READ;
break;
case HW_BREAKPOINT_W:
-   info->type = SH_BREAKPOINT_WRITE;
+   hw->type = SH_BREAKPOINT_WRITE;
break;
case HW_BREAKPOINT_W | HW_BREAKPOINT_R:
-   info->type = SH_BREAKPOINT_RW;
+   hw->type = SH_BREAKPOINT_RW;
break;
default:
return -EINVAL;
@@ -218,19 +218,20 @@ static int arch_build_bp_info(struct perf_event *bp)
 /*
  * Validate the arch-specific HW Breakpoint register settings
  */
-int arch_validate_hwbkpt_settings(struct perf_event *bp)
+int hw_breakpoint_arch_parse(struct perf_event *bp,
+const struct perf_event_attr *attr,
+struct arch_hw_breakpoint *hw)
 {
-   struct arch_hw_breakpoint *info = counter_arch_bp(bp);
unsigned int align;
int ret;
 
-   ret = arch_build_bp_info(bp);
+   ret = arch_build_bp_info(bp, attr, hw);
if (ret)
return ret;
 
ret = -EINVAL;
 
-   switch (info->len) {
+   switch (hw->len) {
case SH_BREAKPOINT_LEN_1:
align = 0;
break;
@@ -251,7 +252,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
 * Check that the low-order bits of the address are appropriate
 * for the alignment implied by len.
 */
-   if (info->address & align)
+   if (hw->address & align)
return -EINVAL;
 
return 0;
-- 
2.7.4

[PATCH 11/12] perf/breakpoint: Pass new breakpoint type to modify_breakpoint_slot()

2018-06-25 Thread Frederic Weisbecker

We soon won't be able to rely on bp->attr anymore to get the new
type of the modifying breakpoint because the new attributes are going
to be copied only once we successfully modified the breakpoint slot.

This will fix the current misdesigned layout where the new attr are
copied to the modifying breakpoint before we actually know if the
modification will be validated.

In order to prepare for that, allow modify_breakpoint_slot() to take
the new breakpoint type.

Signed-off-by: Frederic Weisbecker 
Cc: Linus Torvalds 
Cc: Andy Lutomirski 
Cc: Yoshinori Sato 
Cc: Rich Felker 
Cc: Ingo Molnar 
Cc: Thomas Gleixner 
Cc: Will Deacon 
Cc: Mark Rutland 
Cc: Max Filippov 
Cc: Chris Zankel 
Cc: Catalin Marinas 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Michael Ellerman 
Cc: Peter Zijlstra 
Cc: Arnaldo Carvalho de Melo 
Cc: Alexander Shishkin 
Cc: Jiri Olsa 
Cc: Namhyung Kim 
Cc: Joel Fernandes 
---
 kernel/events/hw_breakpoint.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c
index e7bc8d0..7138770 100644
--- a/kernel/events/hw_breakpoint.c
+++ b/kernel/events/hw_breakpoint.c
@@ -345,13 +345,13 @@ void release_bp_slot(struct perf_event *bp)
mutex_unlock(_bp_mutex);
 }
 
-static int __modify_bp_slot(struct perf_event *bp, u64 old_type)
+static int __modify_bp_slot(struct perf_event *bp, u64 old_type, u64 new_type)
 {
int err;
 
__release_bp_slot(bp, old_type);
 
-   err = __reserve_bp_slot(bp, bp->attr.bp_type);
+   err = __reserve_bp_slot(bp, new_type);
if (err) {
/*
 * Reserve the old_type slot back in case
@@ -367,12 +367,12 @@ static int __modify_bp_slot(struct perf_event *bp, u64 
old_type)
return err;
 }
 
-static int modify_bp_slot(struct perf_event *bp, u64 old_type)
+static int modify_bp_slot(struct perf_event *bp, u64 old_type, u64 new_type)
 {
int ret;
 
mutex_lock(_bp_mutex);
-   ret = __modify_bp_slot(bp, old_type);
+   ret = __modify_bp_slot(bp, old_type, new_type);
mutex_unlock(_bp_mutex);
return ret;
 }
@@ -481,7 +481,7 @@ modify_user_hw_breakpoint_check(struct perf_event *bp, 
struct perf_event_attr *a
 
err = hw_breakpoint_parse(bp, attr, );
if (!err && modify)
-   err = modify_bp_slot(bp, old_type);
+   err = modify_bp_slot(bp, old_type, bp->attr.bp_type);
 
if (err) {
bp->attr.bp_addr = old_addr;
-- 
2.7.4

[PATCH 12/12] perf/breakpoint: Clean up and consolidate modify_user_hw_breakpoint_check()

2018-06-25 Thread Frederic Weisbecker

Remove the dance around old and new attributes. Just don't modify the
previous breakpoint at all until we have verified everything.

Reported-by: Linus Torvalds 
Original-patch-by: Andy Lutomirski 
Signed-off-by: Frederic Weisbecker 
Cc: Linus Torvalds 
Cc: Andy Lutomirski 
Cc: Yoshinori Sato 
Cc: Rich Felker 
Cc: Ingo Molnar 
Cc: Thomas Gleixner 
Cc: Will Deacon 
Cc: Mark Rutland 
Cc: Max Filippov 
Cc: Chris Zankel 
Cc: Catalin Marinas 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Michael Ellerman 
Cc: Peter Zijlstra 
Cc: Arnaldo Carvalho de Melo 
Cc: Alexander Shishkin 
Cc: Jiri Olsa 
Cc: Namhyung Kim 
Cc: Joel Fernandes 
---
 kernel/events/hw_breakpoint.c | 46 ---
 1 file changed, 26 insertions(+), 20 deletions(-)

diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c
index 7138770..b3814fc 100644
--- a/kernel/events/hw_breakpoint.c
+++ b/kernel/events/hw_breakpoint.c
@@ -461,37 +461,43 @@ register_user_hw_breakpoint(struct perf_event_attr *attr,
 }
 EXPORT_SYMBOL_GPL(register_user_hw_breakpoint);
 
+static void hw_breakpoint_copy_attr(struct perf_event_attr *to,
+   struct perf_event_attr *from)
+{
+   to->bp_addr = from->bp_addr;
+   to->bp_type = from->bp_type;
+   to->bp_len  = from->bp_len;
+   to->disabled = from->disabled;
+}
+
 int
 modify_user_hw_breakpoint_check(struct perf_event *bp, struct perf_event_attr 
*attr,
bool check)
 {
-   u64 old_addr = bp->attr.bp_addr;
-   u64 old_len  = bp->attr.bp_len;
-   int old_type = bp->attr.bp_type;
-   bool modify  = attr->bp_type != old_type;
struct arch_hw_breakpoint hw;
-   int err = 0;
-
-   bp->attr.bp_addr = attr->bp_addr;
-   bp->attr.bp_type = attr->bp_type;
-   bp->attr.bp_len  = attr->bp_len;
-
-   if (check && memcmp(>attr, attr, sizeof(*attr)))
-   return -EINVAL;
+   int err;
 
err = hw_breakpoint_parse(bp, attr, );
-   if (!err && modify)
-   err = modify_bp_slot(bp, old_type, bp->attr.bp_type);
-
-   if (err) {
-   bp->attr.bp_addr = old_addr;
-   bp->attr.bp_type = old_type;
-   bp->attr.bp_len  = old_len;
+   if (err)
return err;
+
+   if (check) {
+   struct perf_event_attr old_attr;
+
+   old_attr = bp->attr;
+   hw_breakpoint_copy_attr(_attr, attr);
+   if (memcmp(_attr, attr, sizeof(*attr)))
+   return -EINVAL;
+   }
+
+   if (bp->attr.bp_type != attr->bp_type) {
+   err = modify_bp_slot(bp, bp->attr.bp_type, attr->bp_type);
+   if (err)
+   return err;
}
 
+   hw_breakpoint_copy_attr(>attr, attr);
bp->hw.info = hw;
-   bp->attr.disabled = attr->disabled;
 
return 0;
 }
-- 
2.7.4

[PATCH 07/12] sh: Remove "struct arch_hw_breakpoint::name" unused field

2018-06-25 Thread Frederic Weisbecker

This field seem to be unused, perhaps a leftover from old code...

Signed-off-by: Frederic Weisbecker 
Cc: Linus Torvalds 
Cc: Andy Lutomirski 
Cc: Yoshinori Sato 
Cc: Rich Felker 
Cc: Ingo Molnar 
Cc: Thomas Gleixner 
Cc: Will Deacon 
Cc: Mark Rutland 
Cc: Max Filippov 
Cc: Chris Zankel 
Cc: Catalin Marinas 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Michael Ellerman 
Cc: Peter Zijlstra 
Cc: Arnaldo Carvalho de Melo 
Cc: Alexander Shishkin 
Cc: Jiri Olsa 
Cc: Namhyung Kim 
Cc: Joel Fernandes 
---
 arch/sh/include/asm/hw_breakpoint.h | 1 -
 arch/sh/kernel/hw_breakpoint.c  | 7 ---
 2 files changed, 8 deletions(-)

diff --git a/arch/sh/include/asm/hw_breakpoint.h 
b/arch/sh/include/asm/hw_breakpoint.h
index 8a88ed0..dae622d 100644
--- a/arch/sh/include/asm/hw_breakpoint.h
+++ b/arch/sh/include/asm/hw_breakpoint.h
@@ -10,7 +10,6 @@
 #include 
 
 struct arch_hw_breakpoint {
-   char*name; /* Contains name of the symbol to set bkpt */
unsigned long   address;
u16 len;
u16 type;
diff --git a/arch/sh/kernel/hw_breakpoint.c b/arch/sh/kernel/hw_breakpoint.c
index 38791fe..c453a0c 100644
--- a/arch/sh/kernel/hw_breakpoint.c
+++ b/arch/sh/kernel/hw_breakpoint.c
@@ -248,13 +248,6 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
}
 
/*
-* For kernel-addresses, either the address or symbol name can be
-* specified.
-*/
-   if (info->name)
-   info->address = (unsigned long)kallsyms_lookup_name(info->name);
-
-   /*
 * Check that the low-order bits of the address are appropriate
 * for the alignment implied by len.
 */
-- 
2.7.4

[PATCH 08/12] sh: Implement hw_breakpoint_arch_parse()

2018-06-25 Thread Frederic Weisbecker

Migrate to the new API in order to remove arch_validate_hwbkpt_settings()
that clumsily mixes up architecture validation and commit

Signed-off-by: Frederic Weisbecker 
Cc: Linus Torvalds 
Cc: Andy Lutomirski 
Cc: Yoshinori Sato 
Cc: Rich Felker 
Cc: Ingo Molnar 
Cc: Thomas Gleixner 
Cc: Will Deacon 
Cc: Mark Rutland 
Cc: Max Filippov 
Cc: Chris Zankel 
Cc: Catalin Marinas 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Michael Ellerman 
Cc: Peter Zijlstra 
Cc: Arnaldo Carvalho de Melo 
Cc: Alexander Shishkin 
Cc: Jiri Olsa 
Cc: Namhyung Kim 
Cc: Joel Fernandes 
---
 arch/sh/include/asm/hw_breakpoint.h |  6 +-
 arch/sh/kernel/hw_breakpoint.c  | 37 +++--
 2 files changed, 24 insertions(+), 19 deletions(-)

diff --git a/arch/sh/include/asm/hw_breakpoint.h 
b/arch/sh/include/asm/hw_breakpoint.h
index dae622d..867edcc 100644
--- a/arch/sh/include/asm/hw_breakpoint.h
+++ b/arch/sh/include/asm/hw_breakpoint.h
@@ -40,6 +40,7 @@ struct sh_ubc {
struct clk  *clk;   /* optional interface clock / MSTP bit */
 };
 
+struct perf_event_attr;
 struct perf_event;
 struct task_struct;
 struct pmu;
@@ -54,7 +55,10 @@ static inline int hw_breakpoint_slots(int type)
 
 /* arch/sh/kernel/hw_breakpoint.c */
 extern int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw);
-extern int arch_validate_hwbkpt_settings(struct perf_event *bp);
+extern int hw_breakpoint_arch_parse(struct perf_event *bp,
+   const struct perf_event_attr *attr,
+   struct arch_hw_breakpoint *hw);
+#define hw_breakpoint_arch_parse hw_breakpoint_arch_parse
 extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
   unsigned long val, void *data);
 
diff --git a/arch/sh/kernel/hw_breakpoint.c b/arch/sh/kernel/hw_breakpoint.c
index c453a0c..d9ff3b4 100644
--- a/arch/sh/kernel/hw_breakpoint.c
+++ b/arch/sh/kernel/hw_breakpoint.c
@@ -173,40 +173,40 @@ int arch_bp_generic_fields(int sh_len, int sh_type,
return 0;
 }
 
-static int arch_build_bp_info(struct perf_event *bp)
+static int arch_build_bp_info(struct perf_event *bp,
+ const struct perf_event_attr *attr,
+ struct arch_hw_breakpoint *hw)
 {
-   struct arch_hw_breakpoint *info = counter_arch_bp(bp);
-
-   info->address = bp->attr.bp_addr;
+   hw->address = attr->bp_addr;
 
/* Len */
-   switch (bp->attr.bp_len) {
+   switch (attr->bp_len) {
case HW_BREAKPOINT_LEN_1:
-   info->len = SH_BREAKPOINT_LEN_1;
+   hw->len = SH_BREAKPOINT_LEN_1;
break;
case HW_BREAKPOINT_LEN_2:
-   info->len = SH_BREAKPOINT_LEN_2;
+   hw->len = SH_BREAKPOINT_LEN_2;
break;
case HW_BREAKPOINT_LEN_4:
-   info->len = SH_BREAKPOINT_LEN_4;
+   hw->len = SH_BREAKPOINT_LEN_4;
break;
case HW_BREAKPOINT_LEN_8:
-   info->len = SH_BREAKPOINT_LEN_8;
+   hw->len = SH_BREAKPOINT_LEN_8;
break;
default:
return -EINVAL;
}
 
/* Type */
-   switch (bp->attr.bp_type) {
+   switch (attr->bp_type) {
case HW_BREAKPOINT_R:
-   info->type = SH_BREAKPOINT_READ;
+   hw->type = SH_BREAKPOINT_READ;
break;
case HW_BREAKPOINT_W:
-   info->type = SH_BREAKPOINT_WRITE;
+   hw->type = SH_BREAKPOINT_WRITE;
break;
case HW_BREAKPOINT_W | HW_BREAKPOINT_R:
-   info->type = SH_BREAKPOINT_RW;
+   hw->type = SH_BREAKPOINT_RW;
break;
default:
return -EINVAL;
@@ -218,19 +218,20 @@ static int arch_build_bp_info(struct perf_event *bp)
 /*
  * Validate the arch-specific HW Breakpoint register settings
  */
-int arch_validate_hwbkpt_settings(struct perf_event *bp)
+int hw_breakpoint_arch_parse(struct perf_event *bp,
+const struct perf_event_attr *attr,
+struct arch_hw_breakpoint *hw)
 {
-   struct arch_hw_breakpoint *info = counter_arch_bp(bp);
unsigned int align;
int ret;
 
-   ret = arch_build_bp_info(bp);
+   ret = arch_build_bp_info(bp, attr, hw);
if (ret)
return ret;
 
ret = -EINVAL;
 
-   switch (info->len) {
+   switch (hw->len) {
case SH_BREAKPOINT_LEN_1:
align = 0;
break;
@@ -251,7 +252,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
 * Check that the low-order bits of the address are appropriate
 * for the alignment implied by len.
 */
-   if (info->address & align)
+   if (hw->address & align)
return -EINVAL;
 
return 0;
-- 
2.7.4

[PATCH 11/12] perf/breakpoint: Pass new breakpoint type to modify_breakpoint_slot()

2018-06-25 Thread Frederic Weisbecker

We soon won't be able to rely on bp->attr anymore to get the new
type of the modifying breakpoint because the new attributes are going
to be copied only once we successfully modified the breakpoint slot.

This will fix the current misdesigned layout where the new attr are
copied to the modifying breakpoint before we actually know if the
modification will be validated.

In order to prepare for that, allow modify_breakpoint_slot() to take
the new breakpoint type.

Signed-off-by: Frederic Weisbecker 
Cc: Linus Torvalds 
Cc: Andy Lutomirski 
Cc: Yoshinori Sato 
Cc: Rich Felker 
Cc: Ingo Molnar 
Cc: Thomas Gleixner 
Cc: Will Deacon 
Cc: Mark Rutland 
Cc: Max Filippov 
Cc: Chris Zankel 
Cc: Catalin Marinas 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Michael Ellerman 
Cc: Peter Zijlstra 
Cc: Arnaldo Carvalho de Melo 
Cc: Alexander Shishkin 
Cc: Jiri Olsa 
Cc: Namhyung Kim 
Cc: Joel Fernandes 
---
 kernel/events/hw_breakpoint.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c
index e7bc8d0..7138770 100644
--- a/kernel/events/hw_breakpoint.c
+++ b/kernel/events/hw_breakpoint.c
@@ -345,13 +345,13 @@ void release_bp_slot(struct perf_event *bp)
mutex_unlock(_bp_mutex);
 }
 
-static int __modify_bp_slot(struct perf_event *bp, u64 old_type)
+static int __modify_bp_slot(struct perf_event *bp, u64 old_type, u64 new_type)
 {
int err;
 
__release_bp_slot(bp, old_type);
 
-   err = __reserve_bp_slot(bp, bp->attr.bp_type);
+   err = __reserve_bp_slot(bp, new_type);
if (err) {
/*
 * Reserve the old_type slot back in case
@@ -367,12 +367,12 @@ static int __modify_bp_slot(struct perf_event *bp, u64 
old_type)
return err;
 }
 
-static int modify_bp_slot(struct perf_event *bp, u64 old_type)
+static int modify_bp_slot(struct perf_event *bp, u64 old_type, u64 new_type)
 {
int ret;
 
mutex_lock(_bp_mutex);
-   ret = __modify_bp_slot(bp, old_type);
+   ret = __modify_bp_slot(bp, old_type, new_type);
mutex_unlock(_bp_mutex);
return ret;
 }
@@ -481,7 +481,7 @@ modify_user_hw_breakpoint_check(struct perf_event *bp, 
struct perf_event_attr *a
 
err = hw_breakpoint_parse(bp, attr, );
if (!err && modify)
-   err = modify_bp_slot(bp, old_type);
+   err = modify_bp_slot(bp, old_type, bp->attr.bp_type);
 
if (err) {
bp->attr.bp_addr = old_addr;
-- 
2.7.4

[PATCH 10/12] perf/breakpoint: Remove default hw_breakpoint_arch_parse()

2018-06-25 Thread Frederic Weisbecker

All architectures have implemented it, we can now remove the poor weak
version.

Signed-off-by: Frederic Weisbecker 
Cc: Linus Torvalds 
Cc: Andy Lutomirski 
Cc: Yoshinori Sato 
Cc: Rich Felker 
Cc: Ingo Molnar 
Cc: Thomas Gleixner 
Cc: Will Deacon 
Cc: Mark Rutland 
Cc: Max Filippov 
Cc: Chris Zankel 
Cc: Catalin Marinas 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Michael Ellerman 
Cc: Peter Zijlstra 
Cc: Arnaldo Carvalho de Melo 
Cc: Alexander Shishkin 
Cc: Jiri Olsa 
Cc: Namhyung Kim 
Cc: Joel Fernandes 
---
 arch/arm/include/asm/hw_breakpoint.h |  1 -
 arch/arm64/include/asm/hw_breakpoint.h   |  1 -
 arch/powerpc/include/asm/hw_breakpoint.h |  1 -
 arch/sh/include/asm/hw_breakpoint.h  |  1 -
 arch/x86/include/asm/hw_breakpoint.h |  1 -
 arch/xtensa/include/asm/hw_breakpoint.h  |  1 -
 kernel/events/hw_breakpoint.c| 17 -
 7 files changed, 23 deletions(-)

diff --git a/arch/arm/include/asm/hw_breakpoint.h 
b/arch/arm/include/asm/hw_breakpoint.h
index 1e02925..ac54c06 100644
--- a/arch/arm/include/asm/hw_breakpoint.h
+++ b/arch/arm/include/asm/hw_breakpoint.h
@@ -122,7 +122,6 @@ extern int arch_check_bp_in_kernelspace(struct 
arch_hw_breakpoint *hw);
 extern int hw_breakpoint_arch_parse(struct perf_event *bp,
const struct perf_event_attr *attr,
struct arch_hw_breakpoint *hw);
-#define hw_breakpoint_arch_parse hw_breakpoint_arch_parse
 extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
   unsigned long val, void *data);
 
diff --git a/arch/arm64/include/asm/hw_breakpoint.h 
b/arch/arm64/include/asm/hw_breakpoint.h
index bf9c305..6a53e59 100644
--- a/arch/arm64/include/asm/hw_breakpoint.h
+++ b/arch/arm64/include/asm/hw_breakpoint.h
@@ -129,7 +129,6 @@ extern int arch_check_bp_in_kernelspace(struct 
arch_hw_breakpoint *hw);
 extern int hw_breakpoint_arch_parse(struct perf_event *bp,
const struct perf_event_attr *attr,
struct arch_hw_breakpoint *hw);
-#define hw_breakpoint_arch_parse hw_breakpoint_arch_parse
 extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
   unsigned long val, void *data);
 
diff --git a/arch/powerpc/include/asm/hw_breakpoint.h 
b/arch/powerpc/include/asm/hw_breakpoint.h
index 38ae180..27d6e3c 100644
--- a/arch/powerpc/include/asm/hw_breakpoint.h
+++ b/arch/powerpc/include/asm/hw_breakpoint.h
@@ -65,7 +65,6 @@ extern int arch_check_bp_in_kernelspace(struct 
arch_hw_breakpoint *hw);
 extern int hw_breakpoint_arch_parse(struct perf_event *bp,
const struct perf_event_attr *attr,
struct arch_hw_breakpoint *hw);
-#define hw_breakpoint_arch_parse hw_breakpoint_arch_parse
 extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
unsigned long val, void *data);
 int arch_install_hw_breakpoint(struct perf_event *bp);
diff --git a/arch/sh/include/asm/hw_breakpoint.h 
b/arch/sh/include/asm/hw_breakpoint.h
index 867edcc..199d17b 100644
--- a/arch/sh/include/asm/hw_breakpoint.h
+++ b/arch/sh/include/asm/hw_breakpoint.h
@@ -58,7 +58,6 @@ extern int arch_check_bp_in_kernelspace(struct 
arch_hw_breakpoint *hw);
 extern int hw_breakpoint_arch_parse(struct perf_event *bp,
const struct perf_event_attr *attr,
struct arch_hw_breakpoint *hw);
-#define hw_breakpoint_arch_parse hw_breakpoint_arch_parse
 extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
   unsigned long val, void *data);
 
diff --git a/arch/x86/include/asm/hw_breakpoint.h 
b/arch/x86/include/asm/hw_breakpoint.h
index 6c88e8e2..a1f0e90 100644
--- a/arch/x86/include/asm/hw_breakpoint.h
+++ b/arch/x86/include/asm/hw_breakpoint.h
@@ -57,7 +57,6 @@ extern int arch_check_bp_in_kernelspace(struct 
arch_hw_breakpoint *hw);
 extern int hw_breakpoint_arch_parse(struct perf_event *bp,
const struct perf_event_attr *attr,
struct arch_hw_breakpoint *hw);
-#define hw_breakpoint_arch_parse hw_breakpoint_arch_parse
 extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
   unsigned long val, void *data);
 
diff --git a/arch/xtensa/include/asm/hw_breakpoint.h 
b/arch/xtensa/include/asm/hw_breakpoint.h
index f347c21..9f119c1 100644
--- a/arch/xtensa/include/asm/hw_breakpoint.h
+++ b/arch/xtensa/include/asm/hw_breakpoint.h
@@ -40,7 +40,6 @@ int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint 
*hw);
 int hw_breakpoint_arch_parse(struct perf_event *bp,
 const struct perf_event_attr *attr,

[PATCH 10/12] perf/breakpoint: Remove default hw_breakpoint_arch_parse()

2018-06-25 Thread Frederic Weisbecker

All architectures have implemented it, we can now remove the poor weak
version.

Signed-off-by: Frederic Weisbecker 
Cc: Linus Torvalds 
Cc: Andy Lutomirski 
Cc: Yoshinori Sato 
Cc: Rich Felker 
Cc: Ingo Molnar 
Cc: Thomas Gleixner 
Cc: Will Deacon 
Cc: Mark Rutland 
Cc: Max Filippov 
Cc: Chris Zankel 
Cc: Catalin Marinas 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Michael Ellerman 
Cc: Peter Zijlstra 
Cc: Arnaldo Carvalho de Melo 
Cc: Alexander Shishkin 
Cc: Jiri Olsa 
Cc: Namhyung Kim 
Cc: Joel Fernandes 
---
 arch/arm/include/asm/hw_breakpoint.h |  1 -
 arch/arm64/include/asm/hw_breakpoint.h   |  1 -
 arch/powerpc/include/asm/hw_breakpoint.h |  1 -
 arch/sh/include/asm/hw_breakpoint.h  |  1 -
 arch/x86/include/asm/hw_breakpoint.h |  1 -
 arch/xtensa/include/asm/hw_breakpoint.h  |  1 -
 kernel/events/hw_breakpoint.c| 17 -
 7 files changed, 23 deletions(-)

diff --git a/arch/arm/include/asm/hw_breakpoint.h 
b/arch/arm/include/asm/hw_breakpoint.h
index 1e02925..ac54c06 100644
--- a/arch/arm/include/asm/hw_breakpoint.h
+++ b/arch/arm/include/asm/hw_breakpoint.h
@@ -122,7 +122,6 @@ extern int arch_check_bp_in_kernelspace(struct 
arch_hw_breakpoint *hw);
 extern int hw_breakpoint_arch_parse(struct perf_event *bp,
const struct perf_event_attr *attr,
struct arch_hw_breakpoint *hw);
-#define hw_breakpoint_arch_parse hw_breakpoint_arch_parse
 extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
   unsigned long val, void *data);
 
diff --git a/arch/arm64/include/asm/hw_breakpoint.h 
b/arch/arm64/include/asm/hw_breakpoint.h
index bf9c305..6a53e59 100644
--- a/arch/arm64/include/asm/hw_breakpoint.h
+++ b/arch/arm64/include/asm/hw_breakpoint.h
@@ -129,7 +129,6 @@ extern int arch_check_bp_in_kernelspace(struct 
arch_hw_breakpoint *hw);
 extern int hw_breakpoint_arch_parse(struct perf_event *bp,
const struct perf_event_attr *attr,
struct arch_hw_breakpoint *hw);
-#define hw_breakpoint_arch_parse hw_breakpoint_arch_parse
 extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
   unsigned long val, void *data);
 
diff --git a/arch/powerpc/include/asm/hw_breakpoint.h 
b/arch/powerpc/include/asm/hw_breakpoint.h
index 38ae180..27d6e3c 100644
--- a/arch/powerpc/include/asm/hw_breakpoint.h
+++ b/arch/powerpc/include/asm/hw_breakpoint.h
@@ -65,7 +65,6 @@ extern int arch_check_bp_in_kernelspace(struct 
arch_hw_breakpoint *hw);
 extern int hw_breakpoint_arch_parse(struct perf_event *bp,
const struct perf_event_attr *attr,
struct arch_hw_breakpoint *hw);
-#define hw_breakpoint_arch_parse hw_breakpoint_arch_parse
 extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
unsigned long val, void *data);
 int arch_install_hw_breakpoint(struct perf_event *bp);
diff --git a/arch/sh/include/asm/hw_breakpoint.h 
b/arch/sh/include/asm/hw_breakpoint.h
index 867edcc..199d17b 100644
--- a/arch/sh/include/asm/hw_breakpoint.h
+++ b/arch/sh/include/asm/hw_breakpoint.h
@@ -58,7 +58,6 @@ extern int arch_check_bp_in_kernelspace(struct 
arch_hw_breakpoint *hw);
 extern int hw_breakpoint_arch_parse(struct perf_event *bp,
const struct perf_event_attr *attr,
struct arch_hw_breakpoint *hw);
-#define hw_breakpoint_arch_parse hw_breakpoint_arch_parse
 extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
   unsigned long val, void *data);
 
diff --git a/arch/x86/include/asm/hw_breakpoint.h 
b/arch/x86/include/asm/hw_breakpoint.h
index 6c88e8e2..a1f0e90 100644
--- a/arch/x86/include/asm/hw_breakpoint.h
+++ b/arch/x86/include/asm/hw_breakpoint.h
@@ -57,7 +57,6 @@ extern int arch_check_bp_in_kernelspace(struct 
arch_hw_breakpoint *hw);
 extern int hw_breakpoint_arch_parse(struct perf_event *bp,
const struct perf_event_attr *attr,
struct arch_hw_breakpoint *hw);
-#define hw_breakpoint_arch_parse hw_breakpoint_arch_parse
 extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
   unsigned long val, void *data);
 
diff --git a/arch/xtensa/include/asm/hw_breakpoint.h 
b/arch/xtensa/include/asm/hw_breakpoint.h
index f347c21..9f119c1 100644
--- a/arch/xtensa/include/asm/hw_breakpoint.h
+++ b/arch/xtensa/include/asm/hw_breakpoint.h
@@ -40,7 +40,6 @@ int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint 
*hw);
 int hw_breakpoint_arch_parse(struct perf_event *bp,
 const struct perf_event_attr *attr,

[PATCH 06/12] arm64: Implement hw_breakpoint_arch_parse()

2018-06-25 Thread Frederic Weisbecker

Migrate to the new API in order to remove arch_validate_hwbkpt_settings()
that clumsily mixes up architecture validation and commit.

Acked-by: Will Deacon 
Acked-by: Mark Rutland 
Signed-off-by: Frederic Weisbecker 
Cc: Linus Torvalds 
Cc: Andy Lutomirski 
Cc: Yoshinori Sato 
Cc: Rich Felker 
Cc: Ingo Molnar 
Cc: Thomas Gleixner 
Cc: Will Deacon 
Cc: Mark Rutland 
Cc: Max Filippov 
Cc: Chris Zankel 
Cc: Catalin Marinas 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Michael Ellerman 
Cc: Peter Zijlstra 
Cc: Arnaldo Carvalho de Melo 
Cc: Alexander Shishkin 
Cc: Jiri Olsa 
Cc: Namhyung Kim 
Cc: Joel Fernandes 
---
 arch/arm64/include/asm/hw_breakpoint.h |  6 ++-
 arch/arm64/kernel/hw_breakpoint.c  | 79 +-
 2 files changed, 45 insertions(+), 40 deletions(-)

diff --git a/arch/arm64/include/asm/hw_breakpoint.h 
b/arch/arm64/include/asm/hw_breakpoint.h
index 9f4a3d4..bf9c305 100644
--- a/arch/arm64/include/asm/hw_breakpoint.h
+++ b/arch/arm64/include/asm/hw_breakpoint.h
@@ -119,13 +119,17 @@ static inline void decode_ctrl_reg(u32 reg,
 
 struct task_struct;
 struct notifier_block;
+struct perf_event_attr;
 struct perf_event;
 struct pmu;
 
 extern int arch_bp_generic_fields(struct arch_hw_breakpoint_ctrl ctrl,
  int *gen_len, int *gen_type, int *offset);
 extern int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw);
-extern int arch_validate_hwbkpt_settings(struct perf_event *bp);
+extern int hw_breakpoint_arch_parse(struct perf_event *bp,
+   const struct perf_event_attr *attr,
+   struct arch_hw_breakpoint *hw);
+#define hw_breakpoint_arch_parse hw_breakpoint_arch_parse
 extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
   unsigned long val, void *data);
 
diff --git a/arch/arm64/kernel/hw_breakpoint.c 
b/arch/arm64/kernel/hw_breakpoint.c
index 6a90d12..8c96443 100644
--- a/arch/arm64/kernel/hw_breakpoint.c
+++ b/arch/arm64/kernel/hw_breakpoint.c
@@ -420,53 +420,53 @@ int arch_bp_generic_fields(struct arch_hw_breakpoint_ctrl 
ctrl,
 /*
  * Construct an arch_hw_breakpoint from a perf_event.
  */
-static int arch_build_bp_info(struct perf_event *bp)
+static int arch_build_bp_info(struct perf_event *bp,
+ const struct perf_event_attr *attr,
+ struct arch_hw_breakpoint *hw)
 {
-   struct arch_hw_breakpoint *info = counter_arch_bp(bp);
-
/* Type */
-   switch (bp->attr.bp_type) {
+   switch (attr->bp_type) {
case HW_BREAKPOINT_X:
-   info->ctrl.type = ARM_BREAKPOINT_EXECUTE;
+   hw->ctrl.type = ARM_BREAKPOINT_EXECUTE;
break;
case HW_BREAKPOINT_R:
-   info->ctrl.type = ARM_BREAKPOINT_LOAD;
+   hw->ctrl.type = ARM_BREAKPOINT_LOAD;
break;
case HW_BREAKPOINT_W:
-   info->ctrl.type = ARM_BREAKPOINT_STORE;
+   hw->ctrl.type = ARM_BREAKPOINT_STORE;
break;
case HW_BREAKPOINT_RW:
-   info->ctrl.type = ARM_BREAKPOINT_LOAD | ARM_BREAKPOINT_STORE;
+   hw->ctrl.type = ARM_BREAKPOINT_LOAD | ARM_BREAKPOINT_STORE;
break;
default:
return -EINVAL;
}
 
/* Len */
-   switch (bp->attr.bp_len) {
+   switch (attr->bp_len) {
case HW_BREAKPOINT_LEN_1:
-   info->ctrl.len = ARM_BREAKPOINT_LEN_1;
+   hw->ctrl.len = ARM_BREAKPOINT_LEN_1;
break;
case HW_BREAKPOINT_LEN_2:
-   info->ctrl.len = ARM_BREAKPOINT_LEN_2;
+   hw->ctrl.len = ARM_BREAKPOINT_LEN_2;
break;
case HW_BREAKPOINT_LEN_3:
-   info->ctrl.len = ARM_BREAKPOINT_LEN_3;
+   hw->ctrl.len = ARM_BREAKPOINT_LEN_3;
break;
case HW_BREAKPOINT_LEN_4:
-   info->ctrl.len = ARM_BREAKPOINT_LEN_4;
+   hw->ctrl.len = ARM_BREAKPOINT_LEN_4;
break;
case HW_BREAKPOINT_LEN_5:
-   info->ctrl.len = ARM_BREAKPOINT_LEN_5;
+   hw->ctrl.len = ARM_BREAKPOINT_LEN_5;
break;
case HW_BREAKPOINT_LEN_6:
-   info->ctrl.len = ARM_BREAKPOINT_LEN_6;
+   hw->ctrl.len = ARM_BREAKPOINT_LEN_6;
break;
case HW_BREAKPOINT_LEN_7:
-   info->ctrl.len = ARM_BREAKPOINT_LEN_7;
+   hw->ctrl.len = ARM_BREAKPOINT_LEN_7;
break;
case HW_BREAKPOINT_LEN_8:
-   info->ctrl.len = ARM_BREAKPOINT_LEN_8;
+   hw->ctrl.len = ARM_BREAKPOINT_LEN_8;
break;
default:
return -EINVAL;
@@ -477,37 +477,37 @@ static int arch_build_bp_info(struct perf_event *bp)
 * AArch32 also requires breakpoints of length 2 for Thumb.
 *

[PATCH 09/12] xtensa: Implement hw_breakpoint_arch_parse()

2018-06-25 Thread Frederic Weisbecker

Migrate to the new API in order to remove arch_validate_hwbkpt_settings()
that clumsily mixes up architecture validation and commit

Signed-off-by: Frederic Weisbecker 
Cc: Linus Torvalds 
Cc: Andy Lutomirski 
Cc: Yoshinori Sato 
Cc: Rich Felker 
Cc: Ingo Molnar 
Cc: Thomas Gleixner 
Cc: Will Deacon 
Cc: Mark Rutland 
Cc: Max Filippov 
Cc: Chris Zankel 
Cc: Catalin Marinas 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Michael Ellerman 
Cc: Peter Zijlstra 
Cc: Arnaldo Carvalho de Melo 
Cc: Alexander Shishkin 
Cc: Jiri Olsa 
Cc: Namhyung Kim 
Cc: Joel Fernandes 
---
 arch/xtensa/include/asm/hw_breakpoint.h |  6 +-
 arch/xtensa/kernel/hw_breakpoint.c  | 33 -
 2 files changed, 17 insertions(+), 22 deletions(-)

diff --git a/arch/xtensa/include/asm/hw_breakpoint.h 
b/arch/xtensa/include/asm/hw_breakpoint.h
index 2525bf6..f347c21 100644
--- a/arch/xtensa/include/asm/hw_breakpoint.h
+++ b/arch/xtensa/include/asm/hw_breakpoint.h
@@ -30,13 +30,17 @@ struct arch_hw_breakpoint {
u16 type;
 };
 
+struct perf_event_attr;
 struct perf_event;
 struct pt_regs;
 struct task_struct;
 
 int hw_breakpoint_slots(int type);
 int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw);
-int arch_validate_hwbkpt_settings(struct perf_event *bp);
+int hw_breakpoint_arch_parse(struct perf_event *bp,
+const struct perf_event_attr *attr,
+struct arch_hw_breakpoint *hw);
+#define hw_breakpoint_arch_parse hw_breakpoint_arch_parse
 int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
unsigned long val, void *data);
 
diff --git a/arch/xtensa/kernel/hw_breakpoint.c 
b/arch/xtensa/kernel/hw_breakpoint.c
index 6e34c38..c2e387c 100644
--- a/arch/xtensa/kernel/hw_breakpoint.c
+++ b/arch/xtensa/kernel/hw_breakpoint.c
@@ -47,50 +47,41 @@ int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint 
*hw)
 /*
  * Construct an arch_hw_breakpoint from a perf_event.
  */
-static int arch_build_bp_info(struct perf_event *bp)
+int hw_breakpoint_arch_parse(struct perf_event *bp,
+const struct perf_event_attr *attr,
+struct arch_hw_breakpoint *hw)
 {
-   struct arch_hw_breakpoint *info = counter_arch_bp(bp);
-
/* Type */
-   switch (bp->attr.bp_type) {
+   switch (attr->bp_type) {
case HW_BREAKPOINT_X:
-   info->type = XTENSA_BREAKPOINT_EXECUTE;
+   hw->type = XTENSA_BREAKPOINT_EXECUTE;
break;
case HW_BREAKPOINT_R:
-   info->type = XTENSA_BREAKPOINT_LOAD;
+   hw->type = XTENSA_BREAKPOINT_LOAD;
break;
case HW_BREAKPOINT_W:
-   info->type = XTENSA_BREAKPOINT_STORE;
+   hw->type = XTENSA_BREAKPOINT_STORE;
break;
case HW_BREAKPOINT_RW:
-   info->type = XTENSA_BREAKPOINT_LOAD | XTENSA_BREAKPOINT_STORE;
+   hw->type = XTENSA_BREAKPOINT_LOAD | XTENSA_BREAKPOINT_STORE;
break;
default:
return -EINVAL;
}
 
/* Len */
-   info->len = bp->attr.bp_len;
-   if (info->len < 1 || info->len > 64 || !is_power_of_2(info->len))
+   hw->len = attr->bp_len;
+   if (hw->len < 1 || hw->len > 64 || !is_power_of_2(hw->len))
return -EINVAL;
 
/* Address */
-   info->address = bp->attr.bp_addr;
-   if (info->address & (info->len - 1))
+   hw->address = attr->bp_addr;
+   if (hw->address & (hw->len - 1))
return -EINVAL;
 
return 0;
 }
 
-int arch_validate_hwbkpt_settings(struct perf_event *bp)
-{
-   int ret;
-
-   /* Build the arch_hw_breakpoint. */
-   ret = arch_build_bp_info(bp);
-   return ret;
-}
-
 int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
unsigned long val, void *data)
 {
-- 
2.7.4

[PATCH 06/12] arm64: Implement hw_breakpoint_arch_parse()

2018-06-25 Thread Frederic Weisbecker

Migrate to the new API in order to remove arch_validate_hwbkpt_settings()
that clumsily mixes up architecture validation and commit.

Acked-by: Will Deacon 
Acked-by: Mark Rutland 
Signed-off-by: Frederic Weisbecker 
Cc: Linus Torvalds 
Cc: Andy Lutomirski 
Cc: Yoshinori Sato 
Cc: Rich Felker 
Cc: Ingo Molnar 
Cc: Thomas Gleixner 
Cc: Will Deacon 
Cc: Mark Rutland 
Cc: Max Filippov 
Cc: Chris Zankel 
Cc: Catalin Marinas 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Michael Ellerman 
Cc: Peter Zijlstra 
Cc: Arnaldo Carvalho de Melo 
Cc: Alexander Shishkin 
Cc: Jiri Olsa 
Cc: Namhyung Kim 
Cc: Joel Fernandes 
---
 arch/arm64/include/asm/hw_breakpoint.h |  6 ++-
 arch/arm64/kernel/hw_breakpoint.c  | 79 +-
 2 files changed, 45 insertions(+), 40 deletions(-)

diff --git a/arch/arm64/include/asm/hw_breakpoint.h 
b/arch/arm64/include/asm/hw_breakpoint.h
index 9f4a3d4..bf9c305 100644
--- a/arch/arm64/include/asm/hw_breakpoint.h
+++ b/arch/arm64/include/asm/hw_breakpoint.h
@@ -119,13 +119,17 @@ static inline void decode_ctrl_reg(u32 reg,
 
 struct task_struct;
 struct notifier_block;
+struct perf_event_attr;
 struct perf_event;
 struct pmu;
 
 extern int arch_bp_generic_fields(struct arch_hw_breakpoint_ctrl ctrl,
  int *gen_len, int *gen_type, int *offset);
 extern int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw);
-extern int arch_validate_hwbkpt_settings(struct perf_event *bp);
+extern int hw_breakpoint_arch_parse(struct perf_event *bp,
+   const struct perf_event_attr *attr,
+   struct arch_hw_breakpoint *hw);
+#define hw_breakpoint_arch_parse hw_breakpoint_arch_parse
 extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
   unsigned long val, void *data);
 
diff --git a/arch/arm64/kernel/hw_breakpoint.c 
b/arch/arm64/kernel/hw_breakpoint.c
index 6a90d12..8c96443 100644
--- a/arch/arm64/kernel/hw_breakpoint.c
+++ b/arch/arm64/kernel/hw_breakpoint.c
@@ -420,53 +420,53 @@ int arch_bp_generic_fields(struct arch_hw_breakpoint_ctrl 
ctrl,
 /*
  * Construct an arch_hw_breakpoint from a perf_event.
  */
-static int arch_build_bp_info(struct perf_event *bp)
+static int arch_build_bp_info(struct perf_event *bp,
+ const struct perf_event_attr *attr,
+ struct arch_hw_breakpoint *hw)
 {
-   struct arch_hw_breakpoint *info = counter_arch_bp(bp);
-
/* Type */
-   switch (bp->attr.bp_type) {
+   switch (attr->bp_type) {
case HW_BREAKPOINT_X:
-   info->ctrl.type = ARM_BREAKPOINT_EXECUTE;
+   hw->ctrl.type = ARM_BREAKPOINT_EXECUTE;
break;
case HW_BREAKPOINT_R:
-   info->ctrl.type = ARM_BREAKPOINT_LOAD;
+   hw->ctrl.type = ARM_BREAKPOINT_LOAD;
break;
case HW_BREAKPOINT_W:
-   info->ctrl.type = ARM_BREAKPOINT_STORE;
+   hw->ctrl.type = ARM_BREAKPOINT_STORE;
break;
case HW_BREAKPOINT_RW:
-   info->ctrl.type = ARM_BREAKPOINT_LOAD | ARM_BREAKPOINT_STORE;
+   hw->ctrl.type = ARM_BREAKPOINT_LOAD | ARM_BREAKPOINT_STORE;
break;
default:
return -EINVAL;
}
 
/* Len */
-   switch (bp->attr.bp_len) {
+   switch (attr->bp_len) {
case HW_BREAKPOINT_LEN_1:
-   info->ctrl.len = ARM_BREAKPOINT_LEN_1;
+   hw->ctrl.len = ARM_BREAKPOINT_LEN_1;
break;
case HW_BREAKPOINT_LEN_2:
-   info->ctrl.len = ARM_BREAKPOINT_LEN_2;
+   hw->ctrl.len = ARM_BREAKPOINT_LEN_2;
break;
case HW_BREAKPOINT_LEN_3:
-   info->ctrl.len = ARM_BREAKPOINT_LEN_3;
+   hw->ctrl.len = ARM_BREAKPOINT_LEN_3;
break;
case HW_BREAKPOINT_LEN_4:
-   info->ctrl.len = ARM_BREAKPOINT_LEN_4;
+   hw->ctrl.len = ARM_BREAKPOINT_LEN_4;
break;
case HW_BREAKPOINT_LEN_5:
-   info->ctrl.len = ARM_BREAKPOINT_LEN_5;
+   hw->ctrl.len = ARM_BREAKPOINT_LEN_5;
break;
case HW_BREAKPOINT_LEN_6:
-   info->ctrl.len = ARM_BREAKPOINT_LEN_6;
+   hw->ctrl.len = ARM_BREAKPOINT_LEN_6;
break;
case HW_BREAKPOINT_LEN_7:
-   info->ctrl.len = ARM_BREAKPOINT_LEN_7;
+   hw->ctrl.len = ARM_BREAKPOINT_LEN_7;
break;
case HW_BREAKPOINT_LEN_8:
-   info->ctrl.len = ARM_BREAKPOINT_LEN_8;
+   hw->ctrl.len = ARM_BREAKPOINT_LEN_8;
break;
default:
return -EINVAL;
@@ -477,37 +477,37 @@ static int arch_build_bp_info(struct perf_event *bp)
 * AArch32 also requires breakpoints of length 2 for Thumb.
 *

[PATCH 09/12] xtensa: Implement hw_breakpoint_arch_parse()

2018-06-25 Thread Frederic Weisbecker

Migrate to the new API in order to remove arch_validate_hwbkpt_settings()
that clumsily mixes up architecture validation and commit

Signed-off-by: Frederic Weisbecker 
Cc: Linus Torvalds 
Cc: Andy Lutomirski 
Cc: Yoshinori Sato 
Cc: Rich Felker 
Cc: Ingo Molnar 
Cc: Thomas Gleixner 
Cc: Will Deacon 
Cc: Mark Rutland 
Cc: Max Filippov 
Cc: Chris Zankel 
Cc: Catalin Marinas 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Michael Ellerman 
Cc: Peter Zijlstra 
Cc: Arnaldo Carvalho de Melo 
Cc: Alexander Shishkin 
Cc: Jiri Olsa 
Cc: Namhyung Kim 
Cc: Joel Fernandes 
---
 arch/xtensa/include/asm/hw_breakpoint.h |  6 +-
 arch/xtensa/kernel/hw_breakpoint.c  | 33 -
 2 files changed, 17 insertions(+), 22 deletions(-)

diff --git a/arch/xtensa/include/asm/hw_breakpoint.h 
b/arch/xtensa/include/asm/hw_breakpoint.h
index 2525bf6..f347c21 100644
--- a/arch/xtensa/include/asm/hw_breakpoint.h
+++ b/arch/xtensa/include/asm/hw_breakpoint.h
@@ -30,13 +30,17 @@ struct arch_hw_breakpoint {
u16 type;
 };
 
+struct perf_event_attr;
 struct perf_event;
 struct pt_regs;
 struct task_struct;
 
 int hw_breakpoint_slots(int type);
 int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw);
-int arch_validate_hwbkpt_settings(struct perf_event *bp);
+int hw_breakpoint_arch_parse(struct perf_event *bp,
+const struct perf_event_attr *attr,
+struct arch_hw_breakpoint *hw);
+#define hw_breakpoint_arch_parse hw_breakpoint_arch_parse
 int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
unsigned long val, void *data);
 
diff --git a/arch/xtensa/kernel/hw_breakpoint.c 
b/arch/xtensa/kernel/hw_breakpoint.c
index 6e34c38..c2e387c 100644
--- a/arch/xtensa/kernel/hw_breakpoint.c
+++ b/arch/xtensa/kernel/hw_breakpoint.c
@@ -47,50 +47,41 @@ int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint 
*hw)
 /*
  * Construct an arch_hw_breakpoint from a perf_event.
  */
-static int arch_build_bp_info(struct perf_event *bp)
+int hw_breakpoint_arch_parse(struct perf_event *bp,
+const struct perf_event_attr *attr,
+struct arch_hw_breakpoint *hw)
 {
-   struct arch_hw_breakpoint *info = counter_arch_bp(bp);
-
/* Type */
-   switch (bp->attr.bp_type) {
+   switch (attr->bp_type) {
case HW_BREAKPOINT_X:
-   info->type = XTENSA_BREAKPOINT_EXECUTE;
+   hw->type = XTENSA_BREAKPOINT_EXECUTE;
break;
case HW_BREAKPOINT_R:
-   info->type = XTENSA_BREAKPOINT_LOAD;
+   hw->type = XTENSA_BREAKPOINT_LOAD;
break;
case HW_BREAKPOINT_W:
-   info->type = XTENSA_BREAKPOINT_STORE;
+   hw->type = XTENSA_BREAKPOINT_STORE;
break;
case HW_BREAKPOINT_RW:
-   info->type = XTENSA_BREAKPOINT_LOAD | XTENSA_BREAKPOINT_STORE;
+   hw->type = XTENSA_BREAKPOINT_LOAD | XTENSA_BREAKPOINT_STORE;
break;
default:
return -EINVAL;
}
 
/* Len */
-   info->len = bp->attr.bp_len;
-   if (info->len < 1 || info->len > 64 || !is_power_of_2(info->len))
+   hw->len = attr->bp_len;
+   if (hw->len < 1 || hw->len > 64 || !is_power_of_2(hw->len))
return -EINVAL;
 
/* Address */
-   info->address = bp->attr.bp_addr;
-   if (info->address & (info->len - 1))
+   hw->address = attr->bp_addr;
+   if (hw->address & (hw->len - 1))
return -EINVAL;
 
return 0;
 }
 
-int arch_validate_hwbkpt_settings(struct perf_event *bp)
-{
-   int ret;
-
-   /* Build the arch_hw_breakpoint. */
-   ret = arch_build_bp_info(bp);
-   return ret;
-}
-
 int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
unsigned long val, void *data)
 {
-- 
2.7.4

[PATCH 01/12] perf/breakpoint: Split attribute parse and commit

2018-06-25 Thread Frederic Weisbecker

arch_validate_hwbkpt_settings() mixes up attribute check and commit into
a single code entity. Therefore the validation may return an error due to
incorrect atributes while still leaving halfway modified architecture
breakpoint data.

This is harmless when we deal with a new breakpoint but it becomes a
problem when we modify an existing breakpoint.

Split attribute parse and commit to fix that. The architecture is
passed a "struct arch_hw_breakpoint" to fill on top of the new attr
and the core takes care about copying the backend data once it's fully
validated. The architectures then need to implement the new API.

Reported-by: Linus Torvalds 
Original-patch-by: Andy Lutomirski 
Signed-off-by: Frederic Weisbecker 
Cc: Linus Torvalds 
Cc: Andy Lutomirski 
Cc: Yoshinori Sato 
Cc: Rich Felker 
Cc: Ingo Molnar 
Cc: Thomas Gleixner 
Cc: Will Deacon 
Cc: Mark Rutland 
Cc: Max Filippov 
Cc: Chris Zankel 
Cc: Catalin Marinas 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Michael Ellerman 
Cc: Peter Zijlstra 
Cc: Arnaldo Carvalho de Melo 
Cc: Alexander Shishkin 
Cc: Jiri Olsa 
Cc: Namhyung Kim 
Cc: Joel Fernandes 
---
 kernel/events/hw_breakpoint.c | 57 +++
 1 file changed, 41 insertions(+), 16 deletions(-)

diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c
index 6e28d28..314e2a9 100644
--- a/kernel/events/hw_breakpoint.c
+++ b/kernel/events/hw_breakpoint.c
@@ -400,16 +400,35 @@ int dbg_release_bp_slot(struct perf_event *bp)
return 0;
 }
 
-static int validate_hw_breakpoint(struct perf_event *bp)
+#ifndef hw_breakpoint_arch_parse
+int hw_breakpoint_arch_parse(struct perf_event *bp,
+const struct perf_event_attr *attr,
+struct arch_hw_breakpoint *hw)
 {
-   int ret;
+   int err;
 
-   ret = arch_validate_hwbkpt_settings(bp);
-   if (ret)
-   return ret;
+   err = arch_validate_hwbkpt_settings(bp);
+   if (err)
+   return err;
+
+   *hw = bp->hw.info;
+
+   return 0;
+}
+#endif
+
+static int hw_breakpoint_parse(struct perf_event *bp,
+  const struct perf_event_attr *attr,
+  struct arch_hw_breakpoint *hw)
+{
+   int err;
+
+   err = hw_breakpoint_arch_parse(bp, attr, hw);
+   if (err)
+   return err;
 
if (arch_check_bp_in_kernelspace(bp)) {
-   if (bp->attr.exclude_kernel)
+   if (attr->exclude_kernel)
return -EINVAL;
/*
 * Don't let unprivileged users set a breakpoint in the trap
@@ -424,19 +443,22 @@ static int validate_hw_breakpoint(struct perf_event *bp)
 
 int register_perf_hw_breakpoint(struct perf_event *bp)
 {
-   int ret;
+   struct arch_hw_breakpoint hw;
+   int err;
 
-   ret = reserve_bp_slot(bp);
-   if (ret)
-   return ret;
+   err = reserve_bp_slot(bp);
+   if (err)
+   return err;
 
-   ret = validate_hw_breakpoint(bp);
-
-   /* if arch_validate_hwbkpt_settings() fails then release bp slot */
-   if (ret)
+   err = hw_breakpoint_parse(bp, >attr, );
+   if (err) {
release_bp_slot(bp);
+   return err;
+   }
 
-   return ret;
+   bp->hw.info = hw;
+
+   return 0;
 }
 
 /**
@@ -464,6 +486,7 @@ modify_user_hw_breakpoint_check(struct perf_event *bp, 
struct perf_event_attr *a
u64 old_len  = bp->attr.bp_len;
int old_type = bp->attr.bp_type;
bool modify  = attr->bp_type != old_type;
+   struct arch_hw_breakpoint hw;
int err = 0;
 
bp->attr.bp_addr = attr->bp_addr;
@@ -473,7 +496,7 @@ modify_user_hw_breakpoint_check(struct perf_event *bp, 
struct perf_event_attr *a
if (check && memcmp(>attr, attr, sizeof(*attr)))
return -EINVAL;
 
-   err = validate_hw_breakpoint(bp);
+   err = hw_breakpoint_parse(bp, attr, );
if (!err && modify)
err = modify_bp_slot(bp, old_type);
 
@@ -484,7 +507,9 @@ modify_user_hw_breakpoint_check(struct perf_event *bp, 
struct perf_event_attr *a
return err;
}
 
+   bp->hw.info = hw;
bp->attr.disabled = attr->disabled;
+
return 0;
 }
 
-- 
2.7.4

[PATCH 02/12] perf/breakpoint: Pass arch breakpoint struct to arch_check_bp_in_kernelspace()

2018-06-25 Thread Frederic Weisbecker

We can't pass the breakpoint directly on arch_check_bp_in_kernelspace()
anymore because its architecture internal datas (struct arch_hw_breakpoint)
are not yet filled by the time we call the function, and most
implementation need this backend to be up to date. So arrange the
function to take the probing struct instead.

Signed-off-by: Frederic Weisbecker 
Cc: Linus Torvalds 
Cc: Andy Lutomirski 
Cc: Yoshinori Sato 
Cc: Rich Felker 
Cc: Ingo Molnar 
Cc: Thomas Gleixner 
Cc: Will Deacon 
Cc: Mark Rutland 
Cc: Max Filippov 
Cc: Chris Zankel 
Cc: Catalin Marinas 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Michael Ellerman 
Cc: Peter Zijlstra 
Cc: Arnaldo Carvalho de Melo 
Cc: Alexander Shishkin 
Cc: Jiri Olsa 
Cc: Namhyung Kim 
Cc: Joel Fernandes 
---
 arch/arm/include/asm/hw_breakpoint.h |  2 +-
 arch/arm/kernel/hw_breakpoint.c  | 11 +++--
 arch/arm64/include/asm/hw_breakpoint.h   |  2 +-
 arch/arm64/kernel/hw_breakpoint.c|  9 ++--
 arch/powerpc/include/asm/hw_breakpoint.h |  2 +-
 arch/powerpc/kernel/hw_breakpoint.c  |  6 +--
 arch/sh/include/asm/hw_breakpoint.h  |  2 +-
 arch/sh/kernel/hw_breakpoint.c   |  9 ++--
 arch/x86/include/asm/hw_breakpoint.h |  2 +-
 arch/x86/kernel/hw_breakpoint.c  | 71 +---
 arch/xtensa/include/asm/hw_breakpoint.h  |  2 +-
 arch/xtensa/kernel/hw_breakpoint.c   |  7 ++--
 kernel/events/hw_breakpoint.c|  2 +-
 13 files changed, 63 insertions(+), 64 deletions(-)

diff --git a/arch/arm/include/asm/hw_breakpoint.h 
b/arch/arm/include/asm/hw_breakpoint.h
index e46e4e7..d5a0f52 100644
--- a/arch/arm/include/asm/hw_breakpoint.h
+++ b/arch/arm/include/asm/hw_breakpoint.h
@@ -117,7 +117,7 @@ struct pmu;
 
 extern int arch_bp_generic_fields(struct arch_hw_breakpoint_ctrl ctrl,
  int *gen_len, int *gen_type);
-extern int arch_check_bp_in_kernelspace(struct perf_event *bp);
+extern int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw);
 extern int arch_validate_hwbkpt_settings(struct perf_event *bp);
 extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
   unsigned long val, void *data);
diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c
index 629e251..385dcf4 100644
--- a/arch/arm/kernel/hw_breakpoint.c
+++ b/arch/arm/kernel/hw_breakpoint.c
@@ -456,14 +456,13 @@ static int get_hbp_len(u8 hbp_len)
 /*
  * Check whether bp virtual address is in kernel space.
  */
-int arch_check_bp_in_kernelspace(struct perf_event *bp)
+int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw)
 {
unsigned int len;
unsigned long va;
-   struct arch_hw_breakpoint *info = counter_arch_bp(bp);
 
-   va = info->address;
-   len = get_hbp_len(info->ctrl.len);
+   va = hw->address;
+   len = get_hbp_len(hw->ctrl.len);
 
return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE);
 }
@@ -576,7 +575,7 @@ static int arch_build_bp_info(struct perf_event *bp)
 
/* Privilege */
info->ctrl.privilege = ARM_BREAKPOINT_USER;
-   if (arch_check_bp_in_kernelspace(bp))
+   if (arch_check_bp_in_kernelspace(info))
info->ctrl.privilege |= ARM_BREAKPOINT_PRIV;
 
/* Enabled? */
@@ -640,7 +639,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
return -EINVAL;
 
/* We don't allow mismatch breakpoints in kernel space. */
-   if (arch_check_bp_in_kernelspace(bp))
+   if (arch_check_bp_in_kernelspace(info))
return -EPERM;
 
/*
diff --git a/arch/arm64/include/asm/hw_breakpoint.h 
b/arch/arm64/include/asm/hw_breakpoint.h
index 4177076..9f4a3d4 100644
--- a/arch/arm64/include/asm/hw_breakpoint.h
+++ b/arch/arm64/include/asm/hw_breakpoint.h
@@ -124,7 +124,7 @@ struct pmu;
 
 extern int arch_bp_generic_fields(struct arch_hw_breakpoint_ctrl ctrl,
  int *gen_len, int *gen_type, int *offset);
-extern int arch_check_bp_in_kernelspace(struct perf_event *bp);
+extern int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw);
 extern int arch_validate_hwbkpt_settings(struct perf_event *bp);
 extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
   unsigned long val, void *data);
diff --git a/arch/arm64/kernel/hw_breakpoint.c 
b/arch/arm64/kernel/hw_breakpoint.c
index 413dbe5..6a90d12 100644
--- a/arch/arm64/kernel/hw_breakpoint.c
+++ b/arch/arm64/kernel/hw_breakpoint.c
@@ -343,14 +343,13 @@ static int get_hbp_len(u8 hbp_len)
 /*
  * Check whether bp virtual address is in kernel space.
  */
-int arch_check_bp_in_kernelspace(struct perf_event *bp)
+int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw)
 {
unsigned int len;
unsigned long va;
-   struct arch_hw_breakpoint *info =

[PATCH 01/12] perf/breakpoint: Split attribute parse and commit

2018-06-25 Thread Frederic Weisbecker

arch_validate_hwbkpt_settings() mixes up attribute check and commit into
a single code entity. Therefore the validation may return an error due to
incorrect atributes while still leaving halfway modified architecture
breakpoint data.

This is harmless when we deal with a new breakpoint but it becomes a
problem when we modify an existing breakpoint.

Split attribute parse and commit to fix that. The architecture is
passed a "struct arch_hw_breakpoint" to fill on top of the new attr
and the core takes care about copying the backend data once it's fully
validated. The architectures then need to implement the new API.

Reported-by: Linus Torvalds 
Original-patch-by: Andy Lutomirski 
Signed-off-by: Frederic Weisbecker 
Cc: Linus Torvalds 
Cc: Andy Lutomirski 
Cc: Yoshinori Sato 
Cc: Rich Felker 
Cc: Ingo Molnar 
Cc: Thomas Gleixner 
Cc: Will Deacon 
Cc: Mark Rutland 
Cc: Max Filippov 
Cc: Chris Zankel 
Cc: Catalin Marinas 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Michael Ellerman 
Cc: Peter Zijlstra 
Cc: Arnaldo Carvalho de Melo 
Cc: Alexander Shishkin 
Cc: Jiri Olsa 
Cc: Namhyung Kim 
Cc: Joel Fernandes 
---
 kernel/events/hw_breakpoint.c | 57 +++
 1 file changed, 41 insertions(+), 16 deletions(-)

diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c
index 6e28d28..314e2a9 100644
--- a/kernel/events/hw_breakpoint.c
+++ b/kernel/events/hw_breakpoint.c
@@ -400,16 +400,35 @@ int dbg_release_bp_slot(struct perf_event *bp)
return 0;
 }
 
-static int validate_hw_breakpoint(struct perf_event *bp)
+#ifndef hw_breakpoint_arch_parse
+int hw_breakpoint_arch_parse(struct perf_event *bp,
+const struct perf_event_attr *attr,
+struct arch_hw_breakpoint *hw)
 {
-   int ret;
+   int err;
 
-   ret = arch_validate_hwbkpt_settings(bp);
-   if (ret)
-   return ret;
+   err = arch_validate_hwbkpt_settings(bp);
+   if (err)
+   return err;
+
+   *hw = bp->hw.info;
+
+   return 0;
+}
+#endif
+
+static int hw_breakpoint_parse(struct perf_event *bp,
+  const struct perf_event_attr *attr,
+  struct arch_hw_breakpoint *hw)
+{
+   int err;
+
+   err = hw_breakpoint_arch_parse(bp, attr, hw);
+   if (err)
+   return err;
 
if (arch_check_bp_in_kernelspace(bp)) {
-   if (bp->attr.exclude_kernel)
+   if (attr->exclude_kernel)
return -EINVAL;
/*
 * Don't let unprivileged users set a breakpoint in the trap
@@ -424,19 +443,22 @@ static int validate_hw_breakpoint(struct perf_event *bp)
 
 int register_perf_hw_breakpoint(struct perf_event *bp)
 {
-   int ret;
+   struct arch_hw_breakpoint hw;
+   int err;
 
-   ret = reserve_bp_slot(bp);
-   if (ret)
-   return ret;
+   err = reserve_bp_slot(bp);
+   if (err)
+   return err;
 
-   ret = validate_hw_breakpoint(bp);
-
-   /* if arch_validate_hwbkpt_settings() fails then release bp slot */
-   if (ret)
+   err = hw_breakpoint_parse(bp, >attr, );
+   if (err) {
release_bp_slot(bp);
+   return err;
+   }
 
-   return ret;
+   bp->hw.info = hw;
+
+   return 0;
 }
 
 /**
@@ -464,6 +486,7 @@ modify_user_hw_breakpoint_check(struct perf_event *bp, 
struct perf_event_attr *a
u64 old_len  = bp->attr.bp_len;
int old_type = bp->attr.bp_type;
bool modify  = attr->bp_type != old_type;
+   struct arch_hw_breakpoint hw;
int err = 0;
 
bp->attr.bp_addr = attr->bp_addr;
@@ -473,7 +496,7 @@ modify_user_hw_breakpoint_check(struct perf_event *bp, 
struct perf_event_attr *a
if (check && memcmp(>attr, attr, sizeof(*attr)))
return -EINVAL;
 
-   err = validate_hw_breakpoint(bp);
+   err = hw_breakpoint_parse(bp, attr, );
if (!err && modify)
err = modify_bp_slot(bp, old_type);
 
@@ -484,7 +507,9 @@ modify_user_hw_breakpoint_check(struct perf_event *bp, 
struct perf_event_attr *a
return err;
}
 
+   bp->hw.info = hw;
bp->attr.disabled = attr->disabled;
+
return 0;
 }
 
-- 
2.7.4

[PATCH 02/12] perf/breakpoint: Pass arch breakpoint struct to arch_check_bp_in_kernelspace()

2018-06-25 Thread Frederic Weisbecker

We can't pass the breakpoint directly on arch_check_bp_in_kernelspace()
anymore because its architecture internal datas (struct arch_hw_breakpoint)
are not yet filled by the time we call the function, and most
implementation need this backend to be up to date. So arrange the
function to take the probing struct instead.

Signed-off-by: Frederic Weisbecker 
Cc: Linus Torvalds 
Cc: Andy Lutomirski 
Cc: Yoshinori Sato 
Cc: Rich Felker 
Cc: Ingo Molnar 
Cc: Thomas Gleixner 
Cc: Will Deacon 
Cc: Mark Rutland 
Cc: Max Filippov 
Cc: Chris Zankel 
Cc: Catalin Marinas 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Michael Ellerman 
Cc: Peter Zijlstra 
Cc: Arnaldo Carvalho de Melo 
Cc: Alexander Shishkin 
Cc: Jiri Olsa 
Cc: Namhyung Kim 
Cc: Joel Fernandes 
---
 arch/arm/include/asm/hw_breakpoint.h |  2 +-
 arch/arm/kernel/hw_breakpoint.c  | 11 +++--
 arch/arm64/include/asm/hw_breakpoint.h   |  2 +-
 arch/arm64/kernel/hw_breakpoint.c|  9 ++--
 arch/powerpc/include/asm/hw_breakpoint.h |  2 +-
 arch/powerpc/kernel/hw_breakpoint.c  |  6 +--
 arch/sh/include/asm/hw_breakpoint.h  |  2 +-
 arch/sh/kernel/hw_breakpoint.c   |  9 ++--
 arch/x86/include/asm/hw_breakpoint.h |  2 +-
 arch/x86/kernel/hw_breakpoint.c  | 71 +---
 arch/xtensa/include/asm/hw_breakpoint.h  |  2 +-
 arch/xtensa/kernel/hw_breakpoint.c   |  7 ++--
 kernel/events/hw_breakpoint.c|  2 +-
 13 files changed, 63 insertions(+), 64 deletions(-)

diff --git a/arch/arm/include/asm/hw_breakpoint.h 
b/arch/arm/include/asm/hw_breakpoint.h
index e46e4e7..d5a0f52 100644
--- a/arch/arm/include/asm/hw_breakpoint.h
+++ b/arch/arm/include/asm/hw_breakpoint.h
@@ -117,7 +117,7 @@ struct pmu;
 
 extern int arch_bp_generic_fields(struct arch_hw_breakpoint_ctrl ctrl,
  int *gen_len, int *gen_type);
-extern int arch_check_bp_in_kernelspace(struct perf_event *bp);
+extern int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw);
 extern int arch_validate_hwbkpt_settings(struct perf_event *bp);
 extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
   unsigned long val, void *data);
diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c
index 629e251..385dcf4 100644
--- a/arch/arm/kernel/hw_breakpoint.c
+++ b/arch/arm/kernel/hw_breakpoint.c
@@ -456,14 +456,13 @@ static int get_hbp_len(u8 hbp_len)
 /*
  * Check whether bp virtual address is in kernel space.
  */
-int arch_check_bp_in_kernelspace(struct perf_event *bp)
+int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw)
 {
unsigned int len;
unsigned long va;
-   struct arch_hw_breakpoint *info = counter_arch_bp(bp);
 
-   va = info->address;
-   len = get_hbp_len(info->ctrl.len);
+   va = hw->address;
+   len = get_hbp_len(hw->ctrl.len);
 
return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE);
 }
@@ -576,7 +575,7 @@ static int arch_build_bp_info(struct perf_event *bp)
 
/* Privilege */
info->ctrl.privilege = ARM_BREAKPOINT_USER;
-   if (arch_check_bp_in_kernelspace(bp))
+   if (arch_check_bp_in_kernelspace(info))
info->ctrl.privilege |= ARM_BREAKPOINT_PRIV;
 
/* Enabled? */
@@ -640,7 +639,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
return -EINVAL;
 
/* We don't allow mismatch breakpoints in kernel space. */
-   if (arch_check_bp_in_kernelspace(bp))
+   if (arch_check_bp_in_kernelspace(info))
return -EPERM;
 
/*
diff --git a/arch/arm64/include/asm/hw_breakpoint.h 
b/arch/arm64/include/asm/hw_breakpoint.h
index 4177076..9f4a3d4 100644
--- a/arch/arm64/include/asm/hw_breakpoint.h
+++ b/arch/arm64/include/asm/hw_breakpoint.h
@@ -124,7 +124,7 @@ struct pmu;
 
 extern int arch_bp_generic_fields(struct arch_hw_breakpoint_ctrl ctrl,
  int *gen_len, int *gen_type, int *offset);
-extern int arch_check_bp_in_kernelspace(struct perf_event *bp);
+extern int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw);
 extern int arch_validate_hwbkpt_settings(struct perf_event *bp);
 extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
   unsigned long val, void *data);
diff --git a/arch/arm64/kernel/hw_breakpoint.c 
b/arch/arm64/kernel/hw_breakpoint.c
index 413dbe5..6a90d12 100644
--- a/arch/arm64/kernel/hw_breakpoint.c
+++ b/arch/arm64/kernel/hw_breakpoint.c
@@ -343,14 +343,13 @@ static int get_hbp_len(u8 hbp_len)
 /*
  * Check whether bp virtual address is in kernel space.
  */
-int arch_check_bp_in_kernelspace(struct perf_event *bp)
+int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw)
 {
unsigned int len;
unsigned long va;
-   struct arch_hw_breakpoint *info =

[PATCH 05/12] arm: Implement hw_breakpoint_arch_parse()

2018-06-25 Thread Frederic Weisbecker

Migrate to the new API in order to remove arch_validate_hwbkpt_settings()
that clumsily mixes up architecture validation and commit.

Acked-by: Mark Rutland 
Signed-off-by: Frederic Weisbecker 
Cc: Linus Torvalds 
Cc: Andy Lutomirski 
Cc: Yoshinori Sato 
Cc: Rich Felker 
Cc: Ingo Molnar 
Cc: Thomas Gleixner 
Cc: Will Deacon 
Cc: Mark Rutland 
Cc: Max Filippov 
Cc: Chris Zankel 
Cc: Catalin Marinas 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Michael Ellerman 
Cc: Peter Zijlstra 
Cc: Arnaldo Carvalho de Melo 
Cc: Alexander Shishkin 
Cc: Jiri Olsa 
Cc: Namhyung Kim 
Cc: Joel Fernandes 
---
 arch/arm/include/asm/hw_breakpoint.h |  6 ++-
 arch/arm/kernel/hw_breakpoint.c  | 71 ++--
 2 files changed, 41 insertions(+), 36 deletions(-)

diff --git a/arch/arm/include/asm/hw_breakpoint.h 
b/arch/arm/include/asm/hw_breakpoint.h
index d5a0f52..1e02925 100644
--- a/arch/arm/include/asm/hw_breakpoint.h
+++ b/arch/arm/include/asm/hw_breakpoint.h
@@ -111,6 +111,7 @@ static inline void decode_ctrl_reg(u32 reg,
asm volatile("mcr p14, 0, %0, " #N "," #M ", " #OP2 : : "r" (VAL));\
 } while (0)
 
+struct perf_event_attr;
 struct notifier_block;
 struct perf_event;
 struct pmu;
@@ -118,7 +119,10 @@ struct pmu;
 extern int arch_bp_generic_fields(struct arch_hw_breakpoint_ctrl ctrl,
  int *gen_len, int *gen_type);
 extern int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw);
-extern int arch_validate_hwbkpt_settings(struct perf_event *bp);
+extern int hw_breakpoint_arch_parse(struct perf_event *bp,
+   const struct perf_event_attr *attr,
+   struct arch_hw_breakpoint *hw);
+#define hw_breakpoint_arch_parse hw_breakpoint_arch_parse
 extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
   unsigned long val, void *data);
 
diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c
index 385dcf4..1d5fbf1 100644
--- a/arch/arm/kernel/hw_breakpoint.c
+++ b/arch/arm/kernel/hw_breakpoint.c
@@ -517,42 +517,42 @@ int arch_bp_generic_fields(struct arch_hw_breakpoint_ctrl 
ctrl,
 /*
  * Construct an arch_hw_breakpoint from a perf_event.
  */
-static int arch_build_bp_info(struct perf_event *bp)
+static int arch_build_bp_info(struct perf_event *bp,
+ const struct perf_event_attr *attr,
+ struct arch_hw_breakpoint *hw)
 {
-   struct arch_hw_breakpoint *info = counter_arch_bp(bp);
-
/* Type */
-   switch (bp->attr.bp_type) {
+   switch (attr->bp_type) {
case HW_BREAKPOINT_X:
-   info->ctrl.type = ARM_BREAKPOINT_EXECUTE;
+   hw->ctrl.type = ARM_BREAKPOINT_EXECUTE;
break;
case HW_BREAKPOINT_R:
-   info->ctrl.type = ARM_BREAKPOINT_LOAD;
+   hw->ctrl.type = ARM_BREAKPOINT_LOAD;
break;
case HW_BREAKPOINT_W:
-   info->ctrl.type = ARM_BREAKPOINT_STORE;
+   hw->ctrl.type = ARM_BREAKPOINT_STORE;
break;
case HW_BREAKPOINT_RW:
-   info->ctrl.type = ARM_BREAKPOINT_LOAD | ARM_BREAKPOINT_STORE;
+   hw->ctrl.type = ARM_BREAKPOINT_LOAD | ARM_BREAKPOINT_STORE;
break;
default:
return -EINVAL;
}
 
/* Len */
-   switch (bp->attr.bp_len) {
+   switch (attr->bp_len) {
case HW_BREAKPOINT_LEN_1:
-   info->ctrl.len = ARM_BREAKPOINT_LEN_1;
+   hw->ctrl.len = ARM_BREAKPOINT_LEN_1;
break;
case HW_BREAKPOINT_LEN_2:
-   info->ctrl.len = ARM_BREAKPOINT_LEN_2;
+   hw->ctrl.len = ARM_BREAKPOINT_LEN_2;
break;
case HW_BREAKPOINT_LEN_4:
-   info->ctrl.len = ARM_BREAKPOINT_LEN_4;
+   hw->ctrl.len = ARM_BREAKPOINT_LEN_4;
break;
case HW_BREAKPOINT_LEN_8:
-   info->ctrl.len = ARM_BREAKPOINT_LEN_8;
-   if ((info->ctrl.type != ARM_BREAKPOINT_EXECUTE)
+   hw->ctrl.len = ARM_BREAKPOINT_LEN_8;
+   if ((hw->ctrl.type != ARM_BREAKPOINT_EXECUTE)
&& max_watchpoint_len >= 8)
break;
default:
@@ -565,24 +565,24 @@ static int arch_build_bp_info(struct perf_event *bp)
 * by the hardware and must be aligned to the appropriate number of
 * bytes.
 */
-   if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE &&
-   info->ctrl.len != ARM_BREAKPOINT_LEN_2 &&
-   info->ctrl.len != ARM_BREAKPOINT_LEN_4)
+   if (hw->ctrl.type == ARM_BREAKPOINT_EXECUTE &&
+   hw->ctrl.len != ARM_BREAKPOINT_LEN_2 &&
+   hw->ctrl.len != ARM_BREAKPOINT_LEN_4)
return -EINVAL;
 
/* Address */
-   info->address = bp->attr.bp_addr;
+

[PATCH 05/12] arm: Implement hw_breakpoint_arch_parse()

2018-06-25 Thread Frederic Weisbecker

Migrate to the new API in order to remove arch_validate_hwbkpt_settings()
that clumsily mixes up architecture validation and commit.

Acked-by: Mark Rutland 
Signed-off-by: Frederic Weisbecker 
Cc: Linus Torvalds 
Cc: Andy Lutomirski 
Cc: Yoshinori Sato 
Cc: Rich Felker 
Cc: Ingo Molnar 
Cc: Thomas Gleixner 
Cc: Will Deacon 
Cc: Mark Rutland 
Cc: Max Filippov 
Cc: Chris Zankel 
Cc: Catalin Marinas 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Michael Ellerman 
Cc: Peter Zijlstra 
Cc: Arnaldo Carvalho de Melo 
Cc: Alexander Shishkin 
Cc: Jiri Olsa 
Cc: Namhyung Kim 
Cc: Joel Fernandes 
---
 arch/arm/include/asm/hw_breakpoint.h |  6 ++-
 arch/arm/kernel/hw_breakpoint.c  | 71 ++--
 2 files changed, 41 insertions(+), 36 deletions(-)

diff --git a/arch/arm/include/asm/hw_breakpoint.h 
b/arch/arm/include/asm/hw_breakpoint.h
index d5a0f52..1e02925 100644
--- a/arch/arm/include/asm/hw_breakpoint.h
+++ b/arch/arm/include/asm/hw_breakpoint.h
@@ -111,6 +111,7 @@ static inline void decode_ctrl_reg(u32 reg,
asm volatile("mcr p14, 0, %0, " #N "," #M ", " #OP2 : : "r" (VAL));\
 } while (0)
 
+struct perf_event_attr;
 struct notifier_block;
 struct perf_event;
 struct pmu;
@@ -118,7 +119,10 @@ struct pmu;
 extern int arch_bp_generic_fields(struct arch_hw_breakpoint_ctrl ctrl,
  int *gen_len, int *gen_type);
 extern int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw);
-extern int arch_validate_hwbkpt_settings(struct perf_event *bp);
+extern int hw_breakpoint_arch_parse(struct perf_event *bp,
+   const struct perf_event_attr *attr,
+   struct arch_hw_breakpoint *hw);
+#define hw_breakpoint_arch_parse hw_breakpoint_arch_parse
 extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
   unsigned long val, void *data);
 
diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c
index 385dcf4..1d5fbf1 100644
--- a/arch/arm/kernel/hw_breakpoint.c
+++ b/arch/arm/kernel/hw_breakpoint.c
@@ -517,42 +517,42 @@ int arch_bp_generic_fields(struct arch_hw_breakpoint_ctrl 
ctrl,
 /*
  * Construct an arch_hw_breakpoint from a perf_event.
  */
-static int arch_build_bp_info(struct perf_event *bp)
+static int arch_build_bp_info(struct perf_event *bp,
+ const struct perf_event_attr *attr,
+ struct arch_hw_breakpoint *hw)
 {
-   struct arch_hw_breakpoint *info = counter_arch_bp(bp);
-
/* Type */
-   switch (bp->attr.bp_type) {
+   switch (attr->bp_type) {
case HW_BREAKPOINT_X:
-   info->ctrl.type = ARM_BREAKPOINT_EXECUTE;
+   hw->ctrl.type = ARM_BREAKPOINT_EXECUTE;
break;
case HW_BREAKPOINT_R:
-   info->ctrl.type = ARM_BREAKPOINT_LOAD;
+   hw->ctrl.type = ARM_BREAKPOINT_LOAD;
break;
case HW_BREAKPOINT_W:
-   info->ctrl.type = ARM_BREAKPOINT_STORE;
+   hw->ctrl.type = ARM_BREAKPOINT_STORE;
break;
case HW_BREAKPOINT_RW:
-   info->ctrl.type = ARM_BREAKPOINT_LOAD | ARM_BREAKPOINT_STORE;
+   hw->ctrl.type = ARM_BREAKPOINT_LOAD | ARM_BREAKPOINT_STORE;
break;
default:
return -EINVAL;
}
 
/* Len */
-   switch (bp->attr.bp_len) {
+   switch (attr->bp_len) {
case HW_BREAKPOINT_LEN_1:
-   info->ctrl.len = ARM_BREAKPOINT_LEN_1;
+   hw->ctrl.len = ARM_BREAKPOINT_LEN_1;
break;
case HW_BREAKPOINT_LEN_2:
-   info->ctrl.len = ARM_BREAKPOINT_LEN_2;
+   hw->ctrl.len = ARM_BREAKPOINT_LEN_2;
break;
case HW_BREAKPOINT_LEN_4:
-   info->ctrl.len = ARM_BREAKPOINT_LEN_4;
+   hw->ctrl.len = ARM_BREAKPOINT_LEN_4;
break;
case HW_BREAKPOINT_LEN_8:
-   info->ctrl.len = ARM_BREAKPOINT_LEN_8;
-   if ((info->ctrl.type != ARM_BREAKPOINT_EXECUTE)
+   hw->ctrl.len = ARM_BREAKPOINT_LEN_8;
+   if ((hw->ctrl.type != ARM_BREAKPOINT_EXECUTE)
&& max_watchpoint_len >= 8)
break;
default:
@@ -565,24 +565,24 @@ static int arch_build_bp_info(struct perf_event *bp)
 * by the hardware and must be aligned to the appropriate number of
 * bytes.
 */
-   if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE &&
-   info->ctrl.len != ARM_BREAKPOINT_LEN_2 &&
-   info->ctrl.len != ARM_BREAKPOINT_LEN_4)
+   if (hw->ctrl.type == ARM_BREAKPOINT_EXECUTE &&
+   hw->ctrl.len != ARM_BREAKPOINT_LEN_2 &&
+   hw->ctrl.len != ARM_BREAKPOINT_LEN_4)
return -EINVAL;
 
/* Address */
-   info->address = bp->attr.bp_addr;
+

[PATCH 03/12] x86: Implement hw_breakpoint_arch_parse()

2018-06-25 Thread Frederic Weisbecker

Migrate to the new API in order to remove arch_validate_hwbkpt_settings()
that clumsily mixes up architecture validation and commit.

Original-patch-by: Andy Lutomirski 
Signed-off-by: Frederic Weisbecker 
Cc: Linus Torvalds 
Cc: Andy Lutomirski 
Cc: Yoshinori Sato 
Cc: Rich Felker 
Cc: Ingo Molnar 
Cc: Thomas Gleixner 
Cc: Will Deacon 
Cc: Mark Rutland 
Cc: Max Filippov 
Cc: Chris Zankel 
Cc: Catalin Marinas 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Michael Ellerman 
Cc: Peter Zijlstra 
Cc: Arnaldo Carvalho de Melo 
Cc: Alexander Shishkin 
Cc: Jiri Olsa 
Cc: Namhyung Kim 
Cc: Joel Fernandes 
---
 arch/x86/include/asm/hw_breakpoint.h |  6 +++-
 arch/x86/kernel/hw_breakpoint.c  | 60 ++--
 2 files changed, 35 insertions(+), 31 deletions(-)

diff --git a/arch/x86/include/asm/hw_breakpoint.h 
b/arch/x86/include/asm/hw_breakpoint.h
index 7892459..6c88e8e2 100644
--- a/arch/x86/include/asm/hw_breakpoint.h
+++ b/arch/x86/include/asm/hw_breakpoint.h
@@ -49,11 +49,15 @@ static inline int hw_breakpoint_slots(int type)
return HBP_NUM;
 }
 
+struct perf_event_attr;
 struct perf_event;
 struct pmu;
 
 extern int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw);
-extern int arch_validate_hwbkpt_settings(struct perf_event *bp);
+extern int hw_breakpoint_arch_parse(struct perf_event *bp,
+   const struct perf_event_attr *attr,
+   struct arch_hw_breakpoint *hw);
+#define hw_breakpoint_arch_parse hw_breakpoint_arch_parse
 extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
   unsigned long val, void *data);
 
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
index c433791..34a5c17 100644
--- a/arch/x86/kernel/hw_breakpoint.c
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -239,19 +239,20 @@ int arch_check_bp_in_kernelspace(struct 
arch_hw_breakpoint *hw)
return (va >= TASK_SIZE_MAX) || ((va + len - 1) >= TASK_SIZE_MAX);
 }
 
-static int arch_build_bp_info(struct perf_event *bp)
+static int arch_build_bp_info(struct perf_event *bp,
+ const struct perf_event_attr *attr,
+ struct arch_hw_breakpoint *hw)
 {
-   struct arch_hw_breakpoint *info = counter_arch_bp(bp);
-
-   info->address = bp->attr.bp_addr;
+   hw->address = attr->bp_addr;
+   hw->mask = 0;
 
/* Type */
-   switch (bp->attr.bp_type) {
+   switch (attr->bp_type) {
case HW_BREAKPOINT_W:
-   info->type = X86_BREAKPOINT_WRITE;
+   hw->type = X86_BREAKPOINT_WRITE;
break;
case HW_BREAKPOINT_W | HW_BREAKPOINT_R:
-   info->type = X86_BREAKPOINT_RW;
+   hw->type = X86_BREAKPOINT_RW;
break;
case HW_BREAKPOINT_X:
/*
@@ -259,23 +260,23 @@ static int arch_build_bp_info(struct perf_event *bp)
 * acceptable for kprobes.  On non-kprobes kernels, we don't
 * allow kernel breakpoints at all.
 */
-   if (bp->attr.bp_addr >= TASK_SIZE_MAX) {
+   if (attr->bp_addr >= TASK_SIZE_MAX) {
 #ifdef CONFIG_KPROBES
-   if (within_kprobe_blacklist(bp->attr.bp_addr))
+   if (within_kprobe_blacklist(attr->bp_addr))
return -EINVAL;
 #else
return -EINVAL;
 #endif
}
 
-   info->type = X86_BREAKPOINT_EXECUTE;
+   hw->type = X86_BREAKPOINT_EXECUTE;
/*
 * x86 inst breakpoints need to have a specific undefined len.
 * But we still need to check userspace is not trying to setup
 * an unsupported length, to get a range breakpoint for example.
 */
-   if (bp->attr.bp_len == sizeof(long)) {
-   info->len = X86_BREAKPOINT_LEN_X;
+   if (attr->bp_len == sizeof(long)) {
+   hw->len = X86_BREAKPOINT_LEN_X;
return 0;
}
default:
@@ -283,28 +284,26 @@ static int arch_build_bp_info(struct perf_event *bp)
}
 
/* Len */
-   info->mask = 0;
-
-   switch (bp->attr.bp_len) {
+   switch (attr->bp_len) {
case HW_BREAKPOINT_LEN_1:
-   info->len = X86_BREAKPOINT_LEN_1;
+   hw->len = X86_BREAKPOINT_LEN_1;
break;
case HW_BREAKPOINT_LEN_2:
-   info->len = X86_BREAKPOINT_LEN_2;
+   hw->len = X86_BREAKPOINT_LEN_2;
break;
case HW_BREAKPOINT_LEN_4:
-   info->len = X86_BREAKPOINT_LEN_4;
+   hw->len = X86_BREAKPOINT_LEN_4;
break;
 #ifdef CONFIG_X86_64
case HW_BREAKPOINT_LEN_8:
-   info->len = X86_BREAKPOINT_LEN_8;
+

[PATCH 03/12] x86: Implement hw_breakpoint_arch_parse()

2018-06-25 Thread Frederic Weisbecker

Migrate to the new API in order to remove arch_validate_hwbkpt_settings()
that clumsily mixes up architecture validation and commit.

Original-patch-by: Andy Lutomirski 
Signed-off-by: Frederic Weisbecker 
Cc: Linus Torvalds 
Cc: Andy Lutomirski 
Cc: Yoshinori Sato 
Cc: Rich Felker 
Cc: Ingo Molnar 
Cc: Thomas Gleixner 
Cc: Will Deacon 
Cc: Mark Rutland 
Cc: Max Filippov 
Cc: Chris Zankel 
Cc: Catalin Marinas 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Michael Ellerman 
Cc: Peter Zijlstra 
Cc: Arnaldo Carvalho de Melo 
Cc: Alexander Shishkin 
Cc: Jiri Olsa 
Cc: Namhyung Kim 
Cc: Joel Fernandes 
---
 arch/x86/include/asm/hw_breakpoint.h |  6 +++-
 arch/x86/kernel/hw_breakpoint.c  | 60 ++--
 2 files changed, 35 insertions(+), 31 deletions(-)

diff --git a/arch/x86/include/asm/hw_breakpoint.h 
b/arch/x86/include/asm/hw_breakpoint.h
index 7892459..6c88e8e2 100644
--- a/arch/x86/include/asm/hw_breakpoint.h
+++ b/arch/x86/include/asm/hw_breakpoint.h
@@ -49,11 +49,15 @@ static inline int hw_breakpoint_slots(int type)
return HBP_NUM;
 }
 
+struct perf_event_attr;
 struct perf_event;
 struct pmu;
 
 extern int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw);
-extern int arch_validate_hwbkpt_settings(struct perf_event *bp);
+extern int hw_breakpoint_arch_parse(struct perf_event *bp,
+   const struct perf_event_attr *attr,
+   struct arch_hw_breakpoint *hw);
+#define hw_breakpoint_arch_parse hw_breakpoint_arch_parse
 extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
   unsigned long val, void *data);
 
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
index c433791..34a5c17 100644
--- a/arch/x86/kernel/hw_breakpoint.c
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -239,19 +239,20 @@ int arch_check_bp_in_kernelspace(struct 
arch_hw_breakpoint *hw)
return (va >= TASK_SIZE_MAX) || ((va + len - 1) >= TASK_SIZE_MAX);
 }
 
-static int arch_build_bp_info(struct perf_event *bp)
+static int arch_build_bp_info(struct perf_event *bp,
+ const struct perf_event_attr *attr,
+ struct arch_hw_breakpoint *hw)
 {
-   struct arch_hw_breakpoint *info = counter_arch_bp(bp);
-
-   info->address = bp->attr.bp_addr;
+   hw->address = attr->bp_addr;
+   hw->mask = 0;
 
/* Type */
-   switch (bp->attr.bp_type) {
+   switch (attr->bp_type) {
case HW_BREAKPOINT_W:
-   info->type = X86_BREAKPOINT_WRITE;
+   hw->type = X86_BREAKPOINT_WRITE;
break;
case HW_BREAKPOINT_W | HW_BREAKPOINT_R:
-   info->type = X86_BREAKPOINT_RW;
+   hw->type = X86_BREAKPOINT_RW;
break;
case HW_BREAKPOINT_X:
/*
@@ -259,23 +260,23 @@ static int arch_build_bp_info(struct perf_event *bp)
 * acceptable for kprobes.  On non-kprobes kernels, we don't
 * allow kernel breakpoints at all.
 */
-   if (bp->attr.bp_addr >= TASK_SIZE_MAX) {
+   if (attr->bp_addr >= TASK_SIZE_MAX) {
 #ifdef CONFIG_KPROBES
-   if (within_kprobe_blacklist(bp->attr.bp_addr))
+   if (within_kprobe_blacklist(attr->bp_addr))
return -EINVAL;
 #else
return -EINVAL;
 #endif
}
 
-   info->type = X86_BREAKPOINT_EXECUTE;
+   hw->type = X86_BREAKPOINT_EXECUTE;
/*
 * x86 inst breakpoints need to have a specific undefined len.
 * But we still need to check userspace is not trying to setup
 * an unsupported length, to get a range breakpoint for example.
 */
-   if (bp->attr.bp_len == sizeof(long)) {
-   info->len = X86_BREAKPOINT_LEN_X;
+   if (attr->bp_len == sizeof(long)) {
+   hw->len = X86_BREAKPOINT_LEN_X;
return 0;
}
default:
@@ -283,28 +284,26 @@ static int arch_build_bp_info(struct perf_event *bp)
}
 
/* Len */
-   info->mask = 0;
-
-   switch (bp->attr.bp_len) {
+   switch (attr->bp_len) {
case HW_BREAKPOINT_LEN_1:
-   info->len = X86_BREAKPOINT_LEN_1;
+   hw->len = X86_BREAKPOINT_LEN_1;
break;
case HW_BREAKPOINT_LEN_2:
-   info->len = X86_BREAKPOINT_LEN_2;
+   hw->len = X86_BREAKPOINT_LEN_2;
break;
case HW_BREAKPOINT_LEN_4:
-   info->len = X86_BREAKPOINT_LEN_4;
+   hw->len = X86_BREAKPOINT_LEN_4;
break;
 #ifdef CONFIG_X86_64
case HW_BREAKPOINT_LEN_8:
-   info->len = X86_BREAKPOINT_LEN_8;
+

[GIT PULL] breakpoint: Rework arch validation v4

2018-06-25 Thread Frederic Weisbecker

Ingo,

Please pull the perf/breakpoint-v4 branch that can be found at:

git://git.kernel.org/pub/scm/linux/kernel/git/frederic/linux-dynticks.git
perf/breakpoint-v4

HEAD: ba25ee9c7b3ef1543c2a24a7ca6a621433803ee4

Only change since v3 is a rebase against latest tip:perf/core

---
When we modify a hardware breakpoint, the architecture code fills up
the architecture data as the validation of generic attributes progresses.
If something goes wrong in the middle, the architecture data changes
aren't rolled back and we are left with a halfway fiddled breakpoint.

This set fixes the various misdesigns that back this bad behaviour.

Thanks,
Frederic
---

Frederic Weisbecker (12):
  perf/breakpoint: Split attribute parse and commit
  perf/breakpoint: Pass arch breakpoint struct to 
arch_check_bp_in_kernelspace()
  x86: Implement hw_breakpoint_arch_parse()
  powerpc: Implement hw_breakpoint_arch_parse()
  arm: Implement hw_breakpoint_arch_parse()
  arm64: Implement hw_breakpoint_arch_parse()
  sh: Remove "struct arch_hw_breakpoint::name" unused field
  sh: Implement hw_breakpoint_arch_parse()
  xtensa: Implement hw_breakpoint_arch_parse()
  perf/breakpoint: Remove default hw_breakpoint_arch_parse()
  perf/breakpoint: Pass new breakpoint type to modify_breakpoint_slot()
  perf/breakpoint: Clean up and consolidate 
modify_user_hw_breakpoint_check()


 arch/arm/include/asm/hw_breakpoint.h |   7 +-
 arch/arm/kernel/hw_breakpoint.c  |  78 +-
 arch/arm64/include/asm/hw_breakpoint.h   |   7 +-
 arch/arm64/kernel/hw_breakpoint.c|  86 ++--
 arch/powerpc/include/asm/hw_breakpoint.h |   7 +-
 arch/powerpc/kernel/hw_breakpoint.c  |  47 ++-
 arch/sh/include/asm/hw_breakpoint.h  |   8 +-
 arch/sh/kernel/hw_breakpoint.c   |  53 ++---
 arch/x86/include/asm/hw_breakpoint.h |   7 +-
 arch/x86/kernel/hw_breakpoint.c  | 131 ---
 arch/xtensa/include/asm/hw_breakpoint.h  |   7 +-
 arch/xtensa/kernel/hw_breakpoint.c   |  40 --
 kernel/events/hw_breakpoint.c|  92 +-
 13 files changed, 294 insertions(+), 276 deletions(-)

[GIT PULL] breakpoint: Rework arch validation v4

2018-06-25 Thread Frederic Weisbecker

Ingo,

Please pull the perf/breakpoint-v4 branch that can be found at:

git://git.kernel.org/pub/scm/linux/kernel/git/frederic/linux-dynticks.git
perf/breakpoint-v4

HEAD: ba25ee9c7b3ef1543c2a24a7ca6a621433803ee4

Only change since v3 is a rebase against latest tip:perf/core

---
When we modify a hardware breakpoint, the architecture code fills up
the architecture data as the validation of generic attributes progresses.
If something goes wrong in the middle, the architecture data changes
aren't rolled back and we are left with a halfway fiddled breakpoint.

This set fixes the various misdesigns that back this bad behaviour.

Thanks,
Frederic
---

Frederic Weisbecker (12):
  perf/breakpoint: Split attribute parse and commit
  perf/breakpoint: Pass arch breakpoint struct to 
arch_check_bp_in_kernelspace()
  x86: Implement hw_breakpoint_arch_parse()
  powerpc: Implement hw_breakpoint_arch_parse()
  arm: Implement hw_breakpoint_arch_parse()
  arm64: Implement hw_breakpoint_arch_parse()
  sh: Remove "struct arch_hw_breakpoint::name" unused field
  sh: Implement hw_breakpoint_arch_parse()
  xtensa: Implement hw_breakpoint_arch_parse()
  perf/breakpoint: Remove default hw_breakpoint_arch_parse()
  perf/breakpoint: Pass new breakpoint type to modify_breakpoint_slot()
  perf/breakpoint: Clean up and consolidate 
modify_user_hw_breakpoint_check()


 arch/arm/include/asm/hw_breakpoint.h |   7 +-
 arch/arm/kernel/hw_breakpoint.c  |  78 +-
 arch/arm64/include/asm/hw_breakpoint.h   |   7 +-
 arch/arm64/kernel/hw_breakpoint.c|  86 ++--
 arch/powerpc/include/asm/hw_breakpoint.h |   7 +-
 arch/powerpc/kernel/hw_breakpoint.c  |  47 ++-
 arch/sh/include/asm/hw_breakpoint.h  |   8 +-
 arch/sh/kernel/hw_breakpoint.c   |  53 ++---
 arch/x86/include/asm/hw_breakpoint.h |   7 +-
 arch/x86/kernel/hw_breakpoint.c  | 131 ---
 arch/xtensa/include/asm/hw_breakpoint.h  |   7 +-
 arch/xtensa/kernel/hw_breakpoint.c   |  40 --
 kernel/events/hw_breakpoint.c|  92 +-
 13 files changed, 294 insertions(+), 276 deletions(-)

Re: [PATCH] arm: dts: socfpga: denali needs nand_x_clk too

2018-06-25 Thread Masahiro Yamada

2018-06-25 23:55 GMT+09:00 Boris Brezillon :
> On Mon, 25 Jun 2018 09:50:18 -0500
> Dinh Nguyen  wrote:
>
>> On 06/22/2018 10:58 AM, Richard Weinberger wrote:
>> > Masahiro,
>> >
>> > Am Freitag, 22. Juni 2018, 16:37:21 CEST schrieb Masahiro Yamada:
>> >> Hi Richard,
>> >>
>> >>
>> >> 2018-06-19 21:07 GMT+09:00 Richard Weinberger :
>> >>> The denali NAND flash controller needs at least two clocks to operate,
>> >>> nand_clk and nand_x_clk.
>> >>> Since 1bb88666775e ("mtd: nand: denali: handle timing parameters by
>> >>> setup_data_interface()") nand_x_clk is used to derive timing settings.
>> >>>
>> >>> Signed-off-by: Richard Weinberger 
>> >>> ---
>> >>> Strictly speaking denali needs a ecc_clk too, but AFAIK such a clock
>> >>> is not present on this SoC.
>> >>> But my SoCFPGA knowledge is very limited.
>> >>>
>> >>> Thanks,
>> >>> //richard
>> >>> ---
>> >>>  arch/arm/boot/dts/socfpga.dtsi | 3 ++-
>> >>>  1 file changed, 2 insertions(+), 1 deletion(-)
>> >>>
>> >>> diff --git a/arch/arm/boot/dts/socfpga.dtsi 
>> >>> b/arch/arm/boot/dts/socfpga.dtsi
>> >>> index 486d4e7433ed..562f7b375bbd 100644
>> >>> --- a/arch/arm/boot/dts/socfpga.dtsi
>> >>> +++ b/arch/arm/boot/dts/socfpga.dtsi
>> >>> @@ -754,7 +754,8 @@
>> >>> reg-names = "nand_data", "denali_reg";
>> >>> interrupts = <0x0 0x90 0x4>;
>> >>> dma-mask = <0x>;
>> >>> -   clocks = <_clk>;
>> >>> +   clocks = <_clk>, <_x_clk>;
>> >>> +   clock-names = "nand", "nand_x";
>> >>
>> >>
>> >> IMHO, this should be
>> >>
>> >>   clocks = <_clk>, <_x_clk>, 
>> >> <_x_clk>;
>> >>   clock-names = "nand", "nand_x", "ecc";
>>
>> No, it should be just the nand_x and ecc.
>>
>> There's already a patch to use the nand_x_clk and not the nand_clk.


Different people try to fix the problem in different ways.

I think it is due to miscommunication across sub-systems.


>>
>> https://git.kernel.org/pub/scm/linux/kernel/git/dinguyen/linux.git/commit/?h=socfpga_for_next_v4.19_fixes_v1=1709ab58eb79b19bceb2287d111bf1bd2df1cf6d


This does not break your board.
However, it is not helpful any more.

I already fix the Denali driver
to use the hard-coded clock frequency
if the old binding is used.


BTW, Marek issued Reviewed-by to this patch.



> Hm, are you sure this is accurate? I might be wrong but I find it weird
> that the denali NAND controller IP has been adapted by Xilinx to only

Xilinx?

Do you mean Altera (Intel)?


> take one clk. Isn't that the same clk is feeding all clk inputs of the
> denali block?




-- 
Best Regards
Masahiro Yamada

Re: [PATCH v10 0/7] i2c: Add FSI-attached I2C master algorithm

2018-06-25 Thread Wolfram Sang

On Wed, Jun 13, 2018 at 02:36:12PM -0500, Eddie James wrote:
> This series adds an algorithm for an I2C master physically located on an FSI
> slave device. The I2C master has multiple ports, each of which may be 
> connected
> to an I2C slave. Access to the I2C master registers is achieved over FSI bus.
> 
> Due to the multi-port nature of the I2C master, the driver instantiates a new
> I2C adapter for each port connected to a slave. The connected ports should be
> defined in the device tree under the I2C master device.

Thanks for this series and your patience.

While I can see why it also helps reviewing to send it as a series of
multiple patches, I consider applying the driver itself as just one
hunk. I am not decided on this yet.

I have a few comments, especially about recovery. I replied to the
relevant patches with more detail.

Also, are you (or someone from your company) willing to maintain the
driver? Then, an addition to MAINTAINERS would be much appreciated.

Thanks,

   Wolfram

Re: [PATCH] arm: dts: socfpga: denali needs nand_x_clk too

2018-06-25 Thread Masahiro Yamada

2018-06-25 23:55 GMT+09:00 Boris Brezillon :
> On Mon, 25 Jun 2018 09:50:18 -0500
> Dinh Nguyen  wrote:
>
>> On 06/22/2018 10:58 AM, Richard Weinberger wrote:
>> > Masahiro,
>> >
>> > Am Freitag, 22. Juni 2018, 16:37:21 CEST schrieb Masahiro Yamada:
>> >> Hi Richard,
>> >>
>> >>
>> >> 2018-06-19 21:07 GMT+09:00 Richard Weinberger :
>> >>> The denali NAND flash controller needs at least two clocks to operate,
>> >>> nand_clk and nand_x_clk.
>> >>> Since 1bb88666775e ("mtd: nand: denali: handle timing parameters by
>> >>> setup_data_interface()") nand_x_clk is used to derive timing settings.
>> >>>
>> >>> Signed-off-by: Richard Weinberger 
>> >>> ---
>> >>> Strictly speaking denali needs a ecc_clk too, but AFAIK such a clock
>> >>> is not present on this SoC.
>> >>> But my SoCFPGA knowledge is very limited.
>> >>>
>> >>> Thanks,
>> >>> //richard
>> >>> ---
>> >>>  arch/arm/boot/dts/socfpga.dtsi | 3 ++-
>> >>>  1 file changed, 2 insertions(+), 1 deletion(-)
>> >>>
>> >>> diff --git a/arch/arm/boot/dts/socfpga.dtsi 
>> >>> b/arch/arm/boot/dts/socfpga.dtsi
>> >>> index 486d4e7433ed..562f7b375bbd 100644
>> >>> --- a/arch/arm/boot/dts/socfpga.dtsi
>> >>> +++ b/arch/arm/boot/dts/socfpga.dtsi
>> >>> @@ -754,7 +754,8 @@
>> >>> reg-names = "nand_data", "denali_reg";
>> >>> interrupts = <0x0 0x90 0x4>;
>> >>> dma-mask = <0x>;
>> >>> -   clocks = <_clk>;
>> >>> +   clocks = <_clk>, <_x_clk>;
>> >>> +   clock-names = "nand", "nand_x";
>> >>
>> >>
>> >> IMHO, this should be
>> >>
>> >>   clocks = <_clk>, <_x_clk>, 
>> >> <_x_clk>;
>> >>   clock-names = "nand", "nand_x", "ecc";
>>
>> No, it should be just the nand_x and ecc.
>>
>> There's already a patch to use the nand_x_clk and not the nand_clk.


Different people try to fix the problem in different ways.

I think it is due to miscommunication across sub-systems.


>>
>> https://git.kernel.org/pub/scm/linux/kernel/git/dinguyen/linux.git/commit/?h=socfpga_for_next_v4.19_fixes_v1=1709ab58eb79b19bceb2287d111bf1bd2df1cf6d


This does not break your board.
However, it is not helpful any more.

I already fix the Denali driver
to use the hard-coded clock frequency
if the old binding is used.


BTW, Marek issued Reviewed-by to this patch.



> Hm, are you sure this is accurate? I might be wrong but I find it weird
> that the denali NAND controller IP has been adapted by Xilinx to only

Xilinx?

Do you mean Altera (Intel)?


> take one clk. Isn't that the same clk is feeding all clk inputs of the
> denali block?




-- 
Best Regards
Masahiro Yamada

Re: [PATCH v10 0/7] i2c: Add FSI-attached I2C master algorithm

2018-06-25 Thread Wolfram Sang

On Wed, Jun 13, 2018 at 02:36:12PM -0500, Eddie James wrote:
> This series adds an algorithm for an I2C master physically located on an FSI
> slave device. The I2C master has multiple ports, each of which may be 
> connected
> to an I2C slave. Access to the I2C master registers is achieved over FSI bus.
> 
> Due to the multi-port nature of the I2C master, the driver instantiates a new
> I2C adapter for each port connected to a slave. The connected ports should be
> defined in the device tree under the I2C master device.

Thanks for this series and your patience.

While I can see why it also helps reviewing to send it as a series of
multiple patches, I consider applying the driver itself as just one
hunk. I am not decided on this yet.

I have a few comments, especially about recovery. I replied to the
relevant patches with more detail.

Also, are you (or someone from your company) willing to maintain the
driver? Then, an addition to MAINTAINERS would be much appreciated.

Thanks,

   Wolfram

Re: [PATCH v10 1/7] dt-bindings: i2c: Add FSI-attached I2C master dt binding documentation

2018-06-25 Thread Wolfram Sang

On Wed, Jun 13, 2018 at 02:36:13PM -0500, Eddie James wrote:
> Document the bindings.
> 
> Signed-off-by: Eddie James >

Broken email address here. checkpatch warns about it.

Re: [PATCH v10 1/7] dt-bindings: i2c: Add FSI-attached I2C master dt binding documentation

2018-06-25 Thread Wolfram Sang

On Wed, Jun 13, 2018 at 02:36:13PM -0500, Eddie James wrote:
> Document the bindings.
> 
> Signed-off-by: Eddie James >

Broken email address here. checkpatch warns about it.

Re: [PATCH v10 4/7] i2c: fsi: Add abort and hardware reset procedures

2018-06-25 Thread Wolfram Sang

On Wed, Jun 13, 2018 at 02:36:16PM -0500, Eddie James wrote:
> Add abort procedure for failed transfers. Add engine and bus reset
> procedures to recover from as many faults as possible.

I think this is a way too aggressive recovery. Your are doing the 9
pulse toggles basically on any error while this is only when the device
keeps SDA low and you want to recover from that. If SDA is not stuck
low, sending a STOP should do. Or do you have a known case where this is
not going to work?

Also, you implement the pulse toggling manually. Can't you just populate
{get|set}_{scl|sda} and use the generic routine we have in the core?

Re: [PATCH v10 4/7] i2c: fsi: Add abort and hardware reset procedures

2018-06-25 Thread Wolfram Sang

On Wed, Jun 13, 2018 at 02:36:16PM -0500, Eddie James wrote:
> Add abort procedure for failed transfers. Add engine and bus reset
> procedures to recover from as many faults as possible.

I think this is a way too aggressive recovery. Your are doing the 9
pulse toggles basically on any error while this is only when the device
keeps SDA low and you want to recover from that. If SDA is not stuck
low, sending a STOP should do. Or do you have a known case where this is
not going to work?

Also, you implement the pulse toggling manually. Can't you just populate
{get|set}_{scl|sda} and use the generic routine we have in the core?

Re: [PATCH v10 7/7] i2c: fsi: Add bus recovery

2018-06-25 Thread Wolfram Sang

On Wed, Jun 13, 2018 at 02:36:19PM -0500, Eddie James wrote:
> Bus recovery should reset the engine and force clock the bus 9 times
> to recover most situations.
> 
> Signed-off-by: Eddie James 
> ---
>  drivers/i2c/busses/i2c-fsi.c | 19 +++
>  1 file changed, 19 insertions(+)
> 
> diff --git a/drivers/i2c/busses/i2c-fsi.c b/drivers/i2c/busses/i2c-fsi.c
> index d6cab4b..940b198 100644
> --- a/drivers/i2c/busses/i2c-fsi.c
> +++ b/drivers/i2c/busses/i2c-fsi.c
> @@ -611,6 +611,24 @@ static u32 fsi_i2c_functionality(struct i2c_adapter 
> *adap)
>   I2C_FUNC_SMBUS_BLOCK_DATA;
>  }
>  
> +static int fsi_i2c_recover_bus(struct i2c_adapter *adap)
> +{
> + int rc;
> + struct fsi_i2c_port *port = adap->algo_data;
> + struct fsi_i2c_master *master = port->master;
> +
> + mutex_lock(>lock);
> +
> + rc = fsi_i2c_reset(master, port->port);
> +
> + mutex_unlock(>lock);
> + return rc;
> +}
> +
> +static struct i2c_bus_recovery_info fsi_i2c_bus_recovery_info = {
> + .recover_bus = fsi_i2c_recover_bus,
> +};
> +

This all won't have any effect since you never call i2c_recover_bus
which calls back into i2c_bus_recovery_info callbacks.

Re: [PATCH v10 7/7] i2c: fsi: Add bus recovery

2018-06-25 Thread Wolfram Sang

On Wed, Jun 13, 2018 at 02:36:19PM -0500, Eddie James wrote:
> Bus recovery should reset the engine and force clock the bus 9 times
> to recover most situations.
> 
> Signed-off-by: Eddie James 
> ---
>  drivers/i2c/busses/i2c-fsi.c | 19 +++
>  1 file changed, 19 insertions(+)
> 
> diff --git a/drivers/i2c/busses/i2c-fsi.c b/drivers/i2c/busses/i2c-fsi.c
> index d6cab4b..940b198 100644
> --- a/drivers/i2c/busses/i2c-fsi.c
> +++ b/drivers/i2c/busses/i2c-fsi.c
> @@ -611,6 +611,24 @@ static u32 fsi_i2c_functionality(struct i2c_adapter 
> *adap)
>   I2C_FUNC_SMBUS_BLOCK_DATA;
>  }
>  
> +static int fsi_i2c_recover_bus(struct i2c_adapter *adap)
> +{
> + int rc;
> + struct fsi_i2c_port *port = adap->algo_data;
> + struct fsi_i2c_master *master = port->master;
> +
> + mutex_lock(>lock);
> +
> + rc = fsi_i2c_reset(master, port->port);
> +
> + mutex_unlock(>lock);
> + return rc;
> +}
> +
> +static struct i2c_bus_recovery_info fsi_i2c_bus_recovery_info = {
> + .recover_bus = fsi_i2c_recover_bus,
> +};
> +

This all won't have any effect since you never call i2c_recover_bus
which calls back into i2c_bus_recovery_info callbacks.

Re: [PATCH v10 5/7] i2c: fsi: Add transfer implementation

2018-06-25 Thread Wolfram Sang

On Wed, Jun 13, 2018 at 02:36:17PM -0500, Eddie James wrote:
> Execute I2C transfers from the FSI-attached I2C master. Use polling
> instead of interrupts as we have no hardware IRQ over FSI.
> 
> Signed-off-by: Eddie James 
> ---
>  drivers/i2c/busses/i2c-fsi.c | 195 
> ++-
>  1 file changed, 193 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/i2c/busses/i2c-fsi.c b/drivers/i2c/busses/i2c-fsi.c
> index 4611a0b..8c0a6cb 100644
> --- a/drivers/i2c/busses/i2c-fsi.c
> +++ b/drivers/i2c/busses/i2c-fsi.c
> @@ -155,6 +155,7 @@ struct fsi_i2c_port {
>   struct i2c_adapter  adapter;
>   struct fsi_i2c_master   *master;
>   u16 port;
> + u16 xfrd;
>  };
>  
>  static int fsi_i2c_read_reg(struct fsi_device *fsi, unsigned int reg,
> @@ -230,6 +231,99 @@ static int fsi_i2c_set_port(struct fsi_i2c_port *port)
>   return fsi_i2c_write_reg(fsi, I2C_FSI_RESET_ERR, );
>  }
>  
> +static int fsi_i2c_start(struct fsi_i2c_port *port, struct i2c_msg *msg,
> +  bool stop)
> +{
> + struct fsi_i2c_master *i2c = port->master;
> + u32 cmd = I2C_CMD_WITH_START | I2C_CMD_WITH_ADDR;
> +
> + port->xfrd = 0;
> +
> + if (msg->flags & I2C_M_RD)
> + cmd |= I2C_CMD_READ;

Since you support MANGLING, I'd think you can easily support
I2C_M_REV_DIR_ADDR here, too?

Re: [PATCH v2 00/10] Split i2c_lock_adapter into i2c_lock_root and i2c_lock_segment

2018-06-25 Thread Wolfram Sang

On Wed, Jun 20, 2018 at 07:17:53AM +0200, Peter Rosin wrote:
> Hi!
> 
> With the introduction of mux-locked I2C muxes, the concept of
> locking only a segment of the I2C adapter tree was added. At the
> time, I did not want to cause a lot of extra churn, so left most
> users of i2c_lock_adapter alone and apparently didn't think enough
> about it; they simply continued to lock the whole adapter tree.
> However, i2c_lock_adapter is in fact wrong for almost every caller
> (there is naturally an exception) that is itself not a driver for
> a root adapter. What normal drivers generally want is to only
> lock the segment of the adapter tree that their device sits on.
> 
> In fact, if a device sits behind a mux-locked I2C mux, and its
> driver calls i2c_lock_adapter followed by an unlocked I2C transfer,
> things will deadlock (since even a mux-locked I2C adapter will lock
> its parent at some point). If the device is not sitting behind a
> mux-locked I2C mux (i.e. either directly on the root adapter or
> behind a (chain of) parent-locked I2C muxes) the root/segment
> distinction is of no consequence; the root adapter is locked either
> way.
> 
> Mux-locked I2C muxes are probably not that common, and putting any
> of the affected devices behind one is probably even rarer, which
> is why we have not seen any deadlocks. At least not that I know
> of...
> 
> Since silently changing the semantics of i2c_lock_adapter might
> be quite a surprise, especially for out-of-tree users, this series
> instead removes the function and forces all users to explicitly
> name I2C_LOCK_SEGMENT or I2C_LOCK_ROOT_ADAPTER in a call to
> i2c_lock_bus, as suggested by Wolfram. Yes, users will be a teensy
> bit more wordy, but open-coding I2C locking from random drivers
> should be avoided, so it's perhaps a good thing if it doesn't look
> too neat?
> 
> I suggest that Wolfram takes this series through the I2C tree and
> creates an immutable branch for the other subsystems. The series
> is based on v4.18-r1.

Applied to a seperate branch named "i2c/precise-locking-names" which I
will merge into for-next, so it will get proper testing already. Once we
get the missing acks from media, MFD, and IIO maintainers, I will merge
it into for-4.19.

Thank you, Peter!

Re: [PATCH 1/5] i2c: smbus: add unlocked __i2c_smbus_xfer variant

2018-06-25 Thread Wolfram Sang



> This is not perfectly equivalent, since i2c_smbus_xfer was callable from
> atomic/irq context if you happened to end up emulating SMBus with an I2C
> transfer, and that is no longer the case with this patch. It is unknown
> (to me) if anything depends on that quirk, but it seems fragile enough to
> simply break those cases and require them to call i2c_transfer directly
> instead.

Couldn't we just add the same trylock-code path here as well? I always
wondered why I2C and SMBus were not in sync when it came to that. Yet, I
didn't want to change the code for no reason, but it seems we now have
one?

Rest of the series looks good to me, very nice diffstat!

Re: [PATCH v10 5/7] i2c: fsi: Add transfer implementation

2018-06-25 Thread Wolfram Sang

On Wed, Jun 13, 2018 at 02:36:17PM -0500, Eddie James wrote:
> Execute I2C transfers from the FSI-attached I2C master. Use polling
> instead of interrupts as we have no hardware IRQ over FSI.
> 
> Signed-off-by: Eddie James 
> ---
>  drivers/i2c/busses/i2c-fsi.c | 195 
> ++-
>  1 file changed, 193 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/i2c/busses/i2c-fsi.c b/drivers/i2c/busses/i2c-fsi.c
> index 4611a0b..8c0a6cb 100644
> --- a/drivers/i2c/busses/i2c-fsi.c
> +++ b/drivers/i2c/busses/i2c-fsi.c
> @@ -155,6 +155,7 @@ struct fsi_i2c_port {
>   struct i2c_adapter  adapter;
>   struct fsi_i2c_master   *master;
>   u16 port;
> + u16 xfrd;
>  };
>  
>  static int fsi_i2c_read_reg(struct fsi_device *fsi, unsigned int reg,
> @@ -230,6 +231,99 @@ static int fsi_i2c_set_port(struct fsi_i2c_port *port)
>   return fsi_i2c_write_reg(fsi, I2C_FSI_RESET_ERR, );
>  }
>  
> +static int fsi_i2c_start(struct fsi_i2c_port *port, struct i2c_msg *msg,
> +  bool stop)
> +{
> + struct fsi_i2c_master *i2c = port->master;
> + u32 cmd = I2C_CMD_WITH_START | I2C_CMD_WITH_ADDR;
> +
> + port->xfrd = 0;
> +
> + if (msg->flags & I2C_M_RD)
> + cmd |= I2C_CMD_READ;

Since you support MANGLING, I'd think you can easily support
I2C_M_REV_DIR_ADDR here, too?

Re: [PATCH v2 00/10] Split i2c_lock_adapter into i2c_lock_root and i2c_lock_segment

2018-06-25 Thread Wolfram Sang

On Wed, Jun 20, 2018 at 07:17:53AM +0200, Peter Rosin wrote:
> Hi!
> 
> With the introduction of mux-locked I2C muxes, the concept of
> locking only a segment of the I2C adapter tree was added. At the
> time, I did not want to cause a lot of extra churn, so left most
> users of i2c_lock_adapter alone and apparently didn't think enough
> about it; they simply continued to lock the whole adapter tree.
> However, i2c_lock_adapter is in fact wrong for almost every caller
> (there is naturally an exception) that is itself not a driver for
> a root adapter. What normal drivers generally want is to only
> lock the segment of the adapter tree that their device sits on.
> 
> In fact, if a device sits behind a mux-locked I2C mux, and its
> driver calls i2c_lock_adapter followed by an unlocked I2C transfer,
> things will deadlock (since even a mux-locked I2C adapter will lock
> its parent at some point). If the device is not sitting behind a
> mux-locked I2C mux (i.e. either directly on the root adapter or
> behind a (chain of) parent-locked I2C muxes) the root/segment
> distinction is of no consequence; the root adapter is locked either
> way.
> 
> Mux-locked I2C muxes are probably not that common, and putting any
> of the affected devices behind one is probably even rarer, which
> is why we have not seen any deadlocks. At least not that I know
> of...
> 
> Since silently changing the semantics of i2c_lock_adapter might
> be quite a surprise, especially for out-of-tree users, this series
> instead removes the function and forces all users to explicitly
> name I2C_LOCK_SEGMENT or I2C_LOCK_ROOT_ADAPTER in a call to
> i2c_lock_bus, as suggested by Wolfram. Yes, users will be a teensy
> bit more wordy, but open-coding I2C locking from random drivers
> should be avoided, so it's perhaps a good thing if it doesn't look
> too neat?
> 
> I suggest that Wolfram takes this series through the I2C tree and
> creates an immutable branch for the other subsystems. The series
> is based on v4.18-r1.

Applied to a seperate branch named "i2c/precise-locking-names" which I
will merge into for-next, so it will get proper testing already. Once we
get the missing acks from media, MFD, and IIO maintainers, I will merge
it into for-4.19.

Thank you, Peter!

Re: [PATCH 1/5] i2c: smbus: add unlocked __i2c_smbus_xfer variant

2018-06-25 Thread Wolfram Sang



> This is not perfectly equivalent, since i2c_smbus_xfer was callable from
> atomic/irq context if you happened to end up emulating SMBus with an I2C
> transfer, and that is no longer the case with this patch. It is unknown
> (to me) if anything depends on that quirk, but it seems fragile enough to
> simply break those cases and require them to call i2c_transfer directly
> instead.

Couldn't we just add the same trylock-code path here as well? I always
wondered why I2C and SMBus were not in sync when it came to that. Yet, I
didn't want to change the code for no reason, but it seems we now have
one?

Rest of the series looks good to me, very nice diffstat!

Re: [PATCH] i2c: smbus: kill memory leak on emulated and failed DMA SMBus xfers

2018-06-25 Thread Wolfram Sang

On Wed, Jun 20, 2018 at 11:43:23AM +0200, Peter Rosin wrote:
> If DMA safe memory was allocated, but the subsequent I2C transfer
> fails the memory is leaked. Plug this leak.
> 
> Fixes: 8a77821e74d6 ("i2c: smbus: use DMA safe buffers for emulated SMBus 
> transactions")
> Signed-off-by: Peter Rosin 

Applied to for-current, thanks!

Re: [PATCH] i2c: smbus: kill memory leak on emulated and failed DMA SMBus xfers

2018-06-25 Thread Wolfram Sang

On Wed, Jun 20, 2018 at 11:43:23AM +0200, Peter Rosin wrote:
> If DMA safe memory was allocated, but the subsequent I2C transfer
> fails the memory is leaked. Plug this leak.
> 
> Fixes: 8a77821e74d6 ("i2c: smbus: use DMA safe buffers for emulated SMBus 
> transactions")
> Signed-off-by: Peter Rosin 

Applied to for-current, thanks!

Re: moving affs + RDB partition support to staging?

2018-06-25 Thread Michael Schmitz

Joanne,

Martin's boot log (including your patch) says:

Jun 19 21:19:09 merkaba kernel: [ 7891.843284]  sdb: RDSK (512) sdb1
(LNX^@)(res 2 spb 1) sdb2 (JXF^D)(res 2 spb 1) sdb3 (DOS^C)(res 2 spb
4)
Jun 19 21:19:09 merkaba kernel: [ 7891.844055] sd 7:0:0:0: [sdb]
Attached SCSI disk

so it's indeed a case of self inflicted damage (RDSK (512) means 512
byte blocks) and can be worked around by using a different block size.

Your memory serves right indeed - blocksize is in 512 bytes units.
I'll still submit a patch to Jens anyway as this may bite others yet.

Cheers,

  Michael


On Sun, Jun 24, 2018 at 11:40 PM, jdow  wrote:
> BTW - anybody who uses 512 byte blocks with an Amiga file system is a famn
> dool.
>
> If memory serves the RDBs think in blocks rather than bytes so it should
> work up to 2 gigablocks whatever your block size is. 512 blocks is
> 219902322 bytes. But that wastes just a WHOLE LOT of disk in block maps.
> Go up to 4096 or 8192. The latter is 35 TB.
>
> {^_^}
> On 20180624 02:06, Martin Steigerwald wrote:
>>
>> Hi.
>>
>> Michael Schmitz - 27.04.18, 04:11:
>>>
>>> test results at https://bugzilla.kernel.org/show_bug.cgi?id=43511
>>> indicate the RDB parser bug is fixed by the patch given there, so if
>>> Martin now submits the patch, all should be well?
>>
>>
>> Ok, better be honest than having anyone waiting for it:
>>
>> I do not care enough about this, in order to motivate myself preparing
>> the a patch from Joanne Dow´s fix.
>>
>> I am not even using my Amiga boxes anymore, not even the Sam440ep which
>> I still have in my apartment.
>>
>> So RDB support in Linux it remains broken for disks larger 2 TB, unless
>> someone else does.
>>
>> Thanks.
>>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-m68k" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: moving affs + RDB partition support to staging?

2018-06-25 Thread Michael Schmitz

Joanne,

Martin's boot log (including your patch) says:

Jun 19 21:19:09 merkaba kernel: [ 7891.843284]  sdb: RDSK (512) sdb1
(LNX^@)(res 2 spb 1) sdb2 (JXF^D)(res 2 spb 1) sdb3 (DOS^C)(res 2 spb
4)
Jun 19 21:19:09 merkaba kernel: [ 7891.844055] sd 7:0:0:0: [sdb]
Attached SCSI disk

so it's indeed a case of self inflicted damage (RDSK (512) means 512
byte blocks) and can be worked around by using a different block size.

Your memory serves right indeed - blocksize is in 512 bytes units.
I'll still submit a patch to Jens anyway as this may bite others yet.

Cheers,

  Michael


On Sun, Jun 24, 2018 at 11:40 PM, jdow  wrote:
> BTW - anybody who uses 512 byte blocks with an Amiga file system is a famn
> dool.
>
> If memory serves the RDBs think in blocks rather than bytes so it should
> work up to 2 gigablocks whatever your block size is. 512 blocks is
> 219902322 bytes. But that wastes just a WHOLE LOT of disk in block maps.
> Go up to 4096 or 8192. The latter is 35 TB.
>
> {^_^}
> On 20180624 02:06, Martin Steigerwald wrote:
>>
>> Hi.
>>
>> Michael Schmitz - 27.04.18, 04:11:
>>>
>>> test results at https://bugzilla.kernel.org/show_bug.cgi?id=43511
>>> indicate the RDB parser bug is fixed by the patch given there, so if
>>> Martin now submits the patch, all should be well?
>>
>>
>> Ok, better be honest than having anyone waiting for it:
>>
>> I do not care enough about this, in order to motivate myself preparing
>> the a patch from Joanne Dow´s fix.
>>
>> I am not even using my Amiga boxes anymore, not even the Sam440ep which
>> I still have in my apartment.
>>
>> So RDB support in Linux it remains broken for disks larger 2 TB, unless
>> someone else does.
>>
>> Thanks.
>>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-m68k" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH resend 1/5] remoteproc: Rename subdev functions to start/stop

2018-06-25 Thread Bjorn Andersson

On Mon 25 Jun 18:34 PDT 2018, Alex Elder wrote:

> From: Bjorn Andersson 
> 
> "start" and "stop" are more suitable names for how these two operations
> are used, and they fit better with the upcoming introduction of two
> additional operations in the struct.
> 
> [el...@linaro.org: minor comment edits]
> 
> Signed-off-by: Bjorn Andersson 
> Acked-by: Alex Elder 
> Tested-by: Fabien Dessenne 

Sorry for not spotting this before, but per section 11 of
Documentation/process/submitting-patches.rst the tag part should read:


Tested-by: Fabien Dessenne 
Signed-off-by: Bjorn Andersson 
[el...@linaro.org: minor comment edits]
Signed-off-by: Alex Elder 


I.e. as you're sending the email you must be the last one certifying the
origin of the patch.

Regards,
Bjorn

> ---
>  drivers/remoteproc/remoteproc_core.c | 30 ++--
>  include/linux/remoteproc.h   | 14 ++---
>  2 files changed, 22 insertions(+), 22 deletions(-)
> 
> diff --git a/drivers/remoteproc/remoteproc_core.c 
> b/drivers/remoteproc/remoteproc_core.c
> index a9609d971f7f..5dd58e6bea88 100644
> --- a/drivers/remoteproc/remoteproc_core.c
> +++ b/drivers/remoteproc/remoteproc_core.c
> @@ -774,13 +774,13 @@ static int rproc_handle_resources(struct rproc *rproc,
>   return ret;
>  }
>  
> -static int rproc_probe_subdevices(struct rproc *rproc)
> +static int rproc_start_subdevices(struct rproc *rproc)
>  {
>   struct rproc_subdev *subdev;
>   int ret;
>  
>   list_for_each_entry(subdev, >subdevs, node) {
> - ret = subdev->probe(subdev);
> + ret = subdev->start(subdev);
>   if (ret)
>   goto unroll_registration;
>   }
> @@ -789,17 +789,17 @@ static int rproc_probe_subdevices(struct rproc *rproc)
>  
>  unroll_registration:
>   list_for_each_entry_continue_reverse(subdev, >subdevs, node)
> - subdev->remove(subdev, true);
> + subdev->stop(subdev, true);
>  
>   return ret;
>  }
>  
> -static void rproc_remove_subdevices(struct rproc *rproc, bool crashed)
> +static void rproc_stop_subdevices(struct rproc *rproc, bool crashed)
>  {
>   struct rproc_subdev *subdev;
>  
>   list_for_each_entry_reverse(subdev, >subdevs, node)
> - subdev->remove(subdev, crashed);
> + subdev->stop(subdev, crashed);
>  }
>  
>  /**
> @@ -901,8 +901,8 @@ static int rproc_start(struct rproc *rproc, const struct 
> firmware *fw)
>   return ret;
>   }
>  
> - /* probe any subdevices for the remote processor */
> - ret = rproc_probe_subdevices(rproc);
> + /* Start any subdevices for the remote processor */
> + ret = rproc_start_subdevices(rproc);
>   if (ret) {
>   dev_err(dev, "failed to probe subdevices for %s: %d\n",
>   rproc->name, ret);
> @@ -1014,8 +1014,8 @@ static int rproc_stop(struct rproc *rproc, bool crashed)
>   struct device *dev = >dev;
>   int ret;
>  
> - /* remove any subdevices for the remote processor */
> - rproc_remove_subdevices(rproc, crashed);
> + /* Stop any subdevices for the remote processor */
> + rproc_stop_subdevices(rproc, crashed);
>  
>   /* the installed resource table is no longer accessible */
>   rproc->table_ptr = rproc->cached_table;
> @@ -1657,16 +1657,16 @@ EXPORT_SYMBOL(rproc_del);
>   * rproc_add_subdev() - add a subdevice to a remoteproc
>   * @rproc: rproc handle to add the subdevice to
>   * @subdev: subdev handle to register
> - * @probe: function to call when the rproc boots
> - * @remove: function to call when the rproc shuts down
> + * @start: function to call after the rproc is started
> + * @stop: function to call before the rproc is stopped
>   */
>  void rproc_add_subdev(struct rproc *rproc,
> struct rproc_subdev *subdev,
> -   int (*probe)(struct rproc_subdev *subdev),
> -   void (*remove)(struct rproc_subdev *subdev, bool crashed))
> +   int (*start)(struct rproc_subdev *subdev),
> +   void (*stop)(struct rproc_subdev *subdev, bool crashed))
>  {
> - subdev->probe = probe;
> - subdev->remove = remove;
> + subdev->start = start;
> + subdev->stop = stop;
>  
>   list_add_tail(>node, >subdevs);
>  }
> diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
> index dfdaede9139e..bf55bf2a5ee1 100644
> --- a/include/linux/remoteproc.h
> +++ b/include/linux/remoteproc.h
> @@ -477,15 +477,15 @@ struct rproc {
>  /**
>   * struct rproc_subdev - subdevice tied to a remoteproc
>   * @node: list node related to the rproc subdevs list
> - * @probe: probe function, called as the rproc is started
> - * @remove: remove function, called as the rproc is being stopped, the 
> @crashed
> - *   parameter indicates if this originates from the a recovery
> + * @start: start function, called after the rproc has been started
> + * @stop: stop function, called before

Re: [PATCH resend 1/5] remoteproc: Rename subdev functions to start/stop

2018-06-25 Thread Bjorn Andersson

On Mon 25 Jun 18:34 PDT 2018, Alex Elder wrote:

> From: Bjorn Andersson 
> 
> "start" and "stop" are more suitable names for how these two operations
> are used, and they fit better with the upcoming introduction of two
> additional operations in the struct.
> 
> [el...@linaro.org: minor comment edits]
> 
> Signed-off-by: Bjorn Andersson 
> Acked-by: Alex Elder 
> Tested-by: Fabien Dessenne 

Sorry for not spotting this before, but per section 11 of
Documentation/process/submitting-patches.rst the tag part should read:


Tested-by: Fabien Dessenne 
Signed-off-by: Bjorn Andersson 
[el...@linaro.org: minor comment edits]
Signed-off-by: Alex Elder 


I.e. as you're sending the email you must be the last one certifying the
origin of the patch.

Regards,
Bjorn

> ---
>  drivers/remoteproc/remoteproc_core.c | 30 ++--
>  include/linux/remoteproc.h   | 14 ++---
>  2 files changed, 22 insertions(+), 22 deletions(-)
> 
> diff --git a/drivers/remoteproc/remoteproc_core.c 
> b/drivers/remoteproc/remoteproc_core.c
> index a9609d971f7f..5dd58e6bea88 100644
> --- a/drivers/remoteproc/remoteproc_core.c
> +++ b/drivers/remoteproc/remoteproc_core.c
> @@ -774,13 +774,13 @@ static int rproc_handle_resources(struct rproc *rproc,
>   return ret;
>  }
>  
> -static int rproc_probe_subdevices(struct rproc *rproc)
> +static int rproc_start_subdevices(struct rproc *rproc)
>  {
>   struct rproc_subdev *subdev;
>   int ret;
>  
>   list_for_each_entry(subdev, >subdevs, node) {
> - ret = subdev->probe(subdev);
> + ret = subdev->start(subdev);
>   if (ret)
>   goto unroll_registration;
>   }
> @@ -789,17 +789,17 @@ static int rproc_probe_subdevices(struct rproc *rproc)
>  
>  unroll_registration:
>   list_for_each_entry_continue_reverse(subdev, >subdevs, node)
> - subdev->remove(subdev, true);
> + subdev->stop(subdev, true);
>  
>   return ret;
>  }
>  
> -static void rproc_remove_subdevices(struct rproc *rproc, bool crashed)
> +static void rproc_stop_subdevices(struct rproc *rproc, bool crashed)
>  {
>   struct rproc_subdev *subdev;
>  
>   list_for_each_entry_reverse(subdev, >subdevs, node)
> - subdev->remove(subdev, crashed);
> + subdev->stop(subdev, crashed);
>  }
>  
>  /**
> @@ -901,8 +901,8 @@ static int rproc_start(struct rproc *rproc, const struct 
> firmware *fw)
>   return ret;
>   }
>  
> - /* probe any subdevices for the remote processor */
> - ret = rproc_probe_subdevices(rproc);
> + /* Start any subdevices for the remote processor */
> + ret = rproc_start_subdevices(rproc);
>   if (ret) {
>   dev_err(dev, "failed to probe subdevices for %s: %d\n",
>   rproc->name, ret);
> @@ -1014,8 +1014,8 @@ static int rproc_stop(struct rproc *rproc, bool crashed)
>   struct device *dev = >dev;
>   int ret;
>  
> - /* remove any subdevices for the remote processor */
> - rproc_remove_subdevices(rproc, crashed);
> + /* Stop any subdevices for the remote processor */
> + rproc_stop_subdevices(rproc, crashed);
>  
>   /* the installed resource table is no longer accessible */
>   rproc->table_ptr = rproc->cached_table;
> @@ -1657,16 +1657,16 @@ EXPORT_SYMBOL(rproc_del);
>   * rproc_add_subdev() - add a subdevice to a remoteproc
>   * @rproc: rproc handle to add the subdevice to
>   * @subdev: subdev handle to register
> - * @probe: function to call when the rproc boots
> - * @remove: function to call when the rproc shuts down
> + * @start: function to call after the rproc is started
> + * @stop: function to call before the rproc is stopped
>   */
>  void rproc_add_subdev(struct rproc *rproc,
> struct rproc_subdev *subdev,
> -   int (*probe)(struct rproc_subdev *subdev),
> -   void (*remove)(struct rproc_subdev *subdev, bool crashed))
> +   int (*start)(struct rproc_subdev *subdev),
> +   void (*stop)(struct rproc_subdev *subdev, bool crashed))
>  {
> - subdev->probe = probe;
> - subdev->remove = remove;
> + subdev->start = start;
> + subdev->stop = stop;
>  
>   list_add_tail(>node, >subdevs);
>  }
> diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
> index dfdaede9139e..bf55bf2a5ee1 100644
> --- a/include/linux/remoteproc.h
> +++ b/include/linux/remoteproc.h
> @@ -477,15 +477,15 @@ struct rproc {
>  /**
>   * struct rproc_subdev - subdevice tied to a remoteproc
>   * @node: list node related to the rproc subdevs list
> - * @probe: probe function, called as the rproc is started
> - * @remove: remove function, called as the rproc is being stopped, the 
> @crashed
> - *   parameter indicates if this originates from the a recovery
> + * @start: start function, called after the rproc has been started
> + * @stop: stop function, called before

linux-next: manual merge of the nvdimm tree with the tip tree

2018-06-25 Thread Stephen Rothwell

Hi all,

Today's linux-next merge of the nvdimm tree got a conflict in:

  arch/x86/kernel/cpu/mcheck/mce.c

between commit:

  d3d6923cd1ae ("x86/mce: Carve out the crashing_cpu check")

from the tip tree and commit:

  f6785eac562b ("x86/memory_failure: Introduce {set,clear}_mce_nospec()")

from the nvdimm tree.

I fixed it up (see below) and can carry the fix as necessary. This
is now fixed as far as linux-next is concerned, but any non trivial
conflicts should be mentioned to your upstream maintainer when your tree
is submitted for merging.  You may also want to consider cooperating
with the maintainer of the conflicting tree to minimise any particularly
complex conflicts.

-- 
Cheers,
Stephen Rothwell

diff --cc arch/x86/kernel/cpu/mcheck/mce.c
index 9a16f15f79eb,a0fbf0a8b7e6..
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@@ -1076,129 -1070,6 +1072,100 @@@ static int do_memory_failure(struct mc
return ret;
  }
  
- #ifndef mce_unmap_kpfn
- static void mce_unmap_kpfn(unsigned long pfn)
- {
-   unsigned long decoy_addr;
- 
-   /*
-* Unmap this page from the kernel 1:1 mappings to make sure
-* we don't log more errors because of speculative access to
-* the page.
-* We would like to just call:
-*  set_memory_np((unsigned long)pfn_to_kaddr(pfn), 1);
-* but doing that would radically increase the odds of a
-* speculative access to the poison page because we'd have
-* the virtual address of the kernel 1:1 mapping sitting
-* around in registers.
-* Instead we get tricky.  We create a non-canonical address
-* that looks just like the one we want, but has bit 63 flipped.
-* This relies on set_memory_np() not checking whether we passed
-* a legal address.
-*/
- 
-   decoy_addr = (pfn << PAGE_SHIFT) + (PAGE_OFFSET ^ BIT(63));
- 
-   if (set_memory_np(decoy_addr, 1))
-   pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn);
- }
- #endif
- 
- 
 +/*
 + * Cases where we avoid rendezvous handler timeout:
 + * 1) If this CPU is offline.
 + *
 + * 2) If crashing_cpu was set, e.g. we're entering kdump and we need to
 + *  skip those CPUs which remain looping in the 1st kernel - see
 + *  crash_nmi_callback().
 + *
 + * Note: there still is a small window between kexec-ing and the new,
 + * kdump kernel establishing a new #MC handler where a broadcasted MCE
 + * might not get handled properly.
 + */
 +static bool __mc_check_crashing_cpu(int cpu)
 +{
 +  if (cpu_is_offline(cpu) ||
 +  (crashing_cpu != -1 && crashing_cpu != cpu)) {
 +  u64 mcgstatus;
 +
 +  mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
 +  if (mcgstatus & MCG_STATUS_RIPV) {
 +  mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
 +  return true;
 +  }
 +  }
 +  return false;
 +}
 +
 +static void __mc_scan_banks(struct mce *m, struct mce *final,
 +  unsigned long *toclear, unsigned long *valid_banks,
 +  int no_way_out, int *worst)
 +{
 +  struct mca_config *cfg = _cfg;
 +  int severity, i;
 +
 +  for (i = 0; i < cfg->banks; i++) {
 +  __clear_bit(i, toclear);
 +  if (!test_bit(i, valid_banks))
 +  continue;
 +
 +  if (!mce_banks[i].ctl)
 +  continue;
 +
 +  m->misc = 0;
 +  m->addr = 0;
 +  m->bank = i;
 +
 +  m->status = mce_rdmsrl(msr_ops.status(i));
 +  if (!(m->status & MCI_STATUS_VAL))
 +  continue;
 +
 +  /*
 +   * Corrected or non-signaled errors are handled by
 +   * machine_check_poll(). Leave them alone, unless this panics.
 +   */
 +  if (!(m->status & (cfg->ser ? MCI_STATUS_S : MCI_STATUS_UC)) &&
 +  !no_way_out)
 +  continue;
 +
 +  /* Set taint even when machine check was not enabled. */
 +  add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
 +
 +  severity = mce_severity(m, cfg->tolerant, NULL, true);
 +
 +  /*
 +   * When machine check was for corrected/deferred handler don't
 +   * touch, unless we're panicking.
 +   */
 +  if ((severity == MCE_KEEP_SEVERITY ||
 +   severity == MCE_UCNA_SEVERITY) && !no_way_out)
 +  continue;
 +
 +  __set_bit(i, toclear);
 +
 +  /* Machine check event was not enabled. Clear, but ignore. */
 +  if (severity == MCE_NO_SEVERITY)
 +  continue;
 +
 +  mce_read_aux(m, i);
 +
 +  /* assuming valid severity level != 0 */
 +  m->severity = severity;
 +
 +  mce_log(m);
 +
 +

linux-next: manual merge of the nvdimm tree with the tip tree

2018-06-25 Thread Stephen Rothwell

Hi all,

Today's linux-next merge of the nvdimm tree got a conflict in:

  arch/x86/kernel/cpu/mcheck/mce.c

between commit:

  d3d6923cd1ae ("x86/mce: Carve out the crashing_cpu check")

from the tip tree and commit:

  f6785eac562b ("x86/memory_failure: Introduce {set,clear}_mce_nospec()")

from the nvdimm tree.

I fixed it up (see below) and can carry the fix as necessary. This
is now fixed as far as linux-next is concerned, but any non trivial
conflicts should be mentioned to your upstream maintainer when your tree
is submitted for merging.  You may also want to consider cooperating
with the maintainer of the conflicting tree to minimise any particularly
complex conflicts.

-- 
Cheers,
Stephen Rothwell

diff --cc arch/x86/kernel/cpu/mcheck/mce.c
index 9a16f15f79eb,a0fbf0a8b7e6..
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@@ -1076,129 -1070,6 +1072,100 @@@ static int do_memory_failure(struct mc
return ret;
  }
  
- #ifndef mce_unmap_kpfn
- static void mce_unmap_kpfn(unsigned long pfn)
- {
-   unsigned long decoy_addr;
- 
-   /*
-* Unmap this page from the kernel 1:1 mappings to make sure
-* we don't log more errors because of speculative access to
-* the page.
-* We would like to just call:
-*  set_memory_np((unsigned long)pfn_to_kaddr(pfn), 1);
-* but doing that would radically increase the odds of a
-* speculative access to the poison page because we'd have
-* the virtual address of the kernel 1:1 mapping sitting
-* around in registers.
-* Instead we get tricky.  We create a non-canonical address
-* that looks just like the one we want, but has bit 63 flipped.
-* This relies on set_memory_np() not checking whether we passed
-* a legal address.
-*/
- 
-   decoy_addr = (pfn << PAGE_SHIFT) + (PAGE_OFFSET ^ BIT(63));
- 
-   if (set_memory_np(decoy_addr, 1))
-   pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn);
- }
- #endif
- 
- 
 +/*
 + * Cases where we avoid rendezvous handler timeout:
 + * 1) If this CPU is offline.
 + *
 + * 2) If crashing_cpu was set, e.g. we're entering kdump and we need to
 + *  skip those CPUs which remain looping in the 1st kernel - see
 + *  crash_nmi_callback().
 + *
 + * Note: there still is a small window between kexec-ing and the new,
 + * kdump kernel establishing a new #MC handler where a broadcasted MCE
 + * might not get handled properly.
 + */
 +static bool __mc_check_crashing_cpu(int cpu)
 +{
 +  if (cpu_is_offline(cpu) ||
 +  (crashing_cpu != -1 && crashing_cpu != cpu)) {
 +  u64 mcgstatus;
 +
 +  mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
 +  if (mcgstatus & MCG_STATUS_RIPV) {
 +  mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
 +  return true;
 +  }
 +  }
 +  return false;
 +}
 +
 +static void __mc_scan_banks(struct mce *m, struct mce *final,
 +  unsigned long *toclear, unsigned long *valid_banks,
 +  int no_way_out, int *worst)
 +{
 +  struct mca_config *cfg = _cfg;
 +  int severity, i;
 +
 +  for (i = 0; i < cfg->banks; i++) {
 +  __clear_bit(i, toclear);
 +  if (!test_bit(i, valid_banks))
 +  continue;
 +
 +  if (!mce_banks[i].ctl)
 +  continue;
 +
 +  m->misc = 0;
 +  m->addr = 0;
 +  m->bank = i;
 +
 +  m->status = mce_rdmsrl(msr_ops.status(i));
 +  if (!(m->status & MCI_STATUS_VAL))
 +  continue;
 +
 +  /*
 +   * Corrected or non-signaled errors are handled by
 +   * machine_check_poll(). Leave them alone, unless this panics.
 +   */
 +  if (!(m->status & (cfg->ser ? MCI_STATUS_S : MCI_STATUS_UC)) &&
 +  !no_way_out)
 +  continue;
 +
 +  /* Set taint even when machine check was not enabled. */
 +  add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
 +
 +  severity = mce_severity(m, cfg->tolerant, NULL, true);
 +
 +  /*
 +   * When machine check was for corrected/deferred handler don't
 +   * touch, unless we're panicking.
 +   */
 +  if ((severity == MCE_KEEP_SEVERITY ||
 +   severity == MCE_UCNA_SEVERITY) && !no_way_out)
 +  continue;
 +
 +  __set_bit(i, toclear);
 +
 +  /* Machine check event was not enabled. Clear, but ignore. */
 +  if (severity == MCE_NO_SEVERITY)
 +  continue;
 +
 +  mce_read_aux(m, i);
 +
 +  /* assuming valid severity level != 0 */
 +  m->severity = severity;
 +
 +  mce_log(m);
 +
 +

Re: [PATCH 4/5] remoteproc: rename subdev probe and remove functions

2018-06-25 Thread Bjorn Andersson

On Mon 25 Jun 18:32 PDT 2018, Alex Elder wrote:

> On 05/29/2018 06:53 AM, Alex Elder wrote:
> > On 05/29/2018 04:12 AM, Arnaud Pouliquen wrote:
> >> Hello Alex
> >>
> >>
> >> We have the same needs (prepare unprepare steps) on our platform. We
> >> tested you core patches and they answers to our need.
> > 
> > I'm very glad to hear that.  Would you offer your "Tested-by" on these?
> > 
> > On your comment below, yes, I will re-send v2 and will separate the
> > core from the glink code.
> 
> Arnaud, despite what I said above, I'm about to resend the code but
> will *not* be separating the core code from glink code.  It turns
> out that the glink code (and smd and ssr) are really part of the
> core code at the moment.  So after talking with Bjorn I agreed to
> just send the code without splitting them.
> 

I wasn't trying to say that it's part of the core, but after reading the
patches again I see that my memory failed me on how these where split.

I'm okay merging this patch even though it touches the two separate
files.

Regards,
Bjorn

Re: [PATCH 4/5] remoteproc: rename subdev probe and remove functions

2018-06-25 Thread Bjorn Andersson

On Mon 25 Jun 18:32 PDT 2018, Alex Elder wrote:

> On 05/29/2018 06:53 AM, Alex Elder wrote:
> > On 05/29/2018 04:12 AM, Arnaud Pouliquen wrote:
> >> Hello Alex
> >>
> >>
> >> We have the same needs (prepare unprepare steps) on our platform. We
> >> tested you core patches and they answers to our need.
> > 
> > I'm very glad to hear that.  Would you offer your "Tested-by" on these?
> > 
> > On your comment below, yes, I will re-send v2 and will separate the
> > core from the glink code.
> 
> Arnaud, despite what I said above, I'm about to resend the code but
> will *not* be separating the core code from glink code.  It turns
> out that the glink code (and smd and ssr) are really part of the
> core code at the moment.  So after talking with Bjorn I agreed to
> just send the code without splitting them.
> 

I wasn't trying to say that it's part of the core, but after reading the
patches again I see that my memory failed me on how these where split.

I'm okay merging this patch even though it touches the two separate
files.

Regards,
Bjorn

Re: [PATCH 1/8] time: Add persistent clock support

2018-06-25 Thread Baolin Wang

Hi John,

On 26 June 2018 at 01:23, John Stultz  wrote:
> On Sat, Jun 23, 2018 at 5:14 PM, Thomas Gleixner  wrote:
>> On Wed, 13 Jun 2018, Baolin Wang wrote:
>>> Moreover we can register the clocksource with CLOCK_SOURCE_SUSPEND_NONSTOP
>>> to be one persistent clock, then we can simplify the suspend/resume
>>> accounting by removing CLOCK_SOURCE_SUSPEND_NONSTOP timing. After that
>>> we can only compensate the OS time by persistent clock or RTC.
>>
>> That makes sense because it adds a gazillion lines of code and removes 5?
>> Not really,
>>
>>> +/**
>>> + * persistent_clock_read_data - data required to read persistent clock
>>> + * @read: Returns a cycle value from persistent clock.
>>> + * @last_cycles: Clock cycle value at last update.
>>> + * @last_ns: Time value (nanoseconds) at last update.
>>> + * @mask: Bitmask for two's complement subtraction of non 64bit clocks.
>>> + * @mult: Cycle to nanosecond multiplier.
>>> + * @shift: Cycle to nanosecond divisor.
>>> + */
>>> +struct persistent_clock_read_data {
>>> + u64 (*read)(void);
>>> + u64 last_cycles;
>>> + u64 last_ns;
>>> + u64 mask;
>>> + u32 mult;
>>> + u32 shift;
>>> +};
>>> +/**
>>> + * persistent_clock - represent the persistent clock
>>> + * @read_data: Data required to read from persistent clock.
>>> + * @seq: Sequence counter for protecting updates.
>>> + * @freq: The frequency of the persistent clock.
>>> + * @wrap: Duration for persistent clock can run before wrapping.
>>> + * @alarm: Update timeout for persistent clock wrap.
>>> + * @alarm_inited: Indicate if the alarm has been initialized.
>>> + */
>>> +struct persistent_clock {
>>> + struct persistent_clock_read_data read_data;
>>> + seqcount_t seq;
>>> + u32 freq;
>>> + ktime_t wrap;
>>> + struct alarm alarm;
>>> + bool alarm_inited;
>>> +};
>>
>> NAK!
>>
>> There is no reason to invent yet another set of data structures and yet
>> more read functions with a sequence counter. which are just a bad and
>> broken copy of the existing timekeeping/clocksource code. And of course the
>> stuff is not serialized against multiple registrations, etc. etc.
>>
>> Plus the utter nonsense that any call site has to do the same thing over
>> and over:
>>
>> register():
>> start_alarm_timer();
>>
>> Why is this required in the first place? It's not at all. The only place
>> where such an alarm timer will be required is when the system actually goes
>> to suspend. Starting it at registration time is pointless and even counter
>> productive. Assume the clocksource wraps every 2 hours. So you start it at
>> boot time and after 119 minutes uptime the system suspends. So it will
>> wakeup one minute later to update the clocksource. Heck no. If the timer is
>> started when the machine actually suspends it will wake up earliest in 120
>> minutes.
>>
>> And you even add that to the TSC which does not need it at all. It will
>> wrap in about 400 years on a 2GHZ machine. So you degrade the functionality
>> instead of improving it.
>>
>> So no, this is not going anywhere.
>>
>> Let's look at the problem itself:
>>
>>You want to use one clocksource for timekeeping during runtime which is
>>fast and accurate and another one for suspend time injection which is
>>slower and/or less accurate because the fast one stops in suspend.
>>
>>Plus you need an alarmtimer which makes sure that the clocksource does
>>not wrap around during suspend.
>>
>> Now lets look what we have already:
>>
>>Both clocksources already exist and are registered as clocksources with
>>all required data in the clocksource core.
>>
>> Ergo the only sane and logical conclusion is to expand the existing
>> infrastructure to handle that.
>>
>> When a clocksource is registered, then the registration function already
>> makes decisions about using it as timekeeping clocksource. So add a few
>> lines of code to check whether the newly registered clocksource is suitable
>> and preferred for suspend.
>>
>> if (!stops_in_suspend(newcs)) {
>> if (!suspend_cs || is_preferred_suspend_cs(newcs))
>> suspend_cs = newcs;
>> }
>>
>> The is_preferred_suspend_cs() can be based on rating, the maximum suspend
>> length which can be achieved or whatever is sensible. It should start of as
>> a very simple decision function based on rating and not an prematurely
>> overengineered monstrosity.
>>
>> The suspend/resume() code needs a few very simple changes:
>>
>> xxx_suspend():
>> clocksource_prepare_suspend();
>>
>>   Note, this is _NOT_ timekeeping_suspend() because that is invoked _AFTER_
>>   alarmtimer_suspend(). So if an alarm timer is required it needs to be
>>   armed before that. A trivial solution might be to just call it from
>>   alarmtimer_suspend(), but that a minor detail to worry about.
>>
>> timekeeping_suspend()
>> {
>> clocksource_enter_suspend();
>> ...
>>
>> timekeeping_resume()
>> {
>>

Re: [PATCH 1/8] time: Add persistent clock support

2018-06-25 Thread Baolin Wang

Hi John,

On 26 June 2018 at 01:23, John Stultz  wrote:
> On Sat, Jun 23, 2018 at 5:14 PM, Thomas Gleixner  wrote:
>> On Wed, 13 Jun 2018, Baolin Wang wrote:
>>> Moreover we can register the clocksource with CLOCK_SOURCE_SUSPEND_NONSTOP
>>> to be one persistent clock, then we can simplify the suspend/resume
>>> accounting by removing CLOCK_SOURCE_SUSPEND_NONSTOP timing. After that
>>> we can only compensate the OS time by persistent clock or RTC.
>>
>> That makes sense because it adds a gazillion lines of code and removes 5?
>> Not really,
>>
>>> +/**
>>> + * persistent_clock_read_data - data required to read persistent clock
>>> + * @read: Returns a cycle value from persistent clock.
>>> + * @last_cycles: Clock cycle value at last update.
>>> + * @last_ns: Time value (nanoseconds) at last update.
>>> + * @mask: Bitmask for two's complement subtraction of non 64bit clocks.
>>> + * @mult: Cycle to nanosecond multiplier.
>>> + * @shift: Cycle to nanosecond divisor.
>>> + */
>>> +struct persistent_clock_read_data {
>>> + u64 (*read)(void);
>>> + u64 last_cycles;
>>> + u64 last_ns;
>>> + u64 mask;
>>> + u32 mult;
>>> + u32 shift;
>>> +};
>>> +/**
>>> + * persistent_clock - represent the persistent clock
>>> + * @read_data: Data required to read from persistent clock.
>>> + * @seq: Sequence counter for protecting updates.
>>> + * @freq: The frequency of the persistent clock.
>>> + * @wrap: Duration for persistent clock can run before wrapping.
>>> + * @alarm: Update timeout for persistent clock wrap.
>>> + * @alarm_inited: Indicate if the alarm has been initialized.
>>> + */
>>> +struct persistent_clock {
>>> + struct persistent_clock_read_data read_data;
>>> + seqcount_t seq;
>>> + u32 freq;
>>> + ktime_t wrap;
>>> + struct alarm alarm;
>>> + bool alarm_inited;
>>> +};
>>
>> NAK!
>>
>> There is no reason to invent yet another set of data structures and yet
>> more read functions with a sequence counter. which are just a bad and
>> broken copy of the existing timekeeping/clocksource code. And of course the
>> stuff is not serialized against multiple registrations, etc. etc.
>>
>> Plus the utter nonsense that any call site has to do the same thing over
>> and over:
>>
>> register():
>> start_alarm_timer();
>>
>> Why is this required in the first place? It's not at all. The only place
>> where such an alarm timer will be required is when the system actually goes
>> to suspend. Starting it at registration time is pointless and even counter
>> productive. Assume the clocksource wraps every 2 hours. So you start it at
>> boot time and after 119 minutes uptime the system suspends. So it will
>> wakeup one minute later to update the clocksource. Heck no. If the timer is
>> started when the machine actually suspends it will wake up earliest in 120
>> minutes.
>>
>> And you even add that to the TSC which does not need it at all. It will
>> wrap in about 400 years on a 2GHZ machine. So you degrade the functionality
>> instead of improving it.
>>
>> So no, this is not going anywhere.
>>
>> Let's look at the problem itself:
>>
>>You want to use one clocksource for timekeeping during runtime which is
>>fast and accurate and another one for suspend time injection which is
>>slower and/or less accurate because the fast one stops in suspend.
>>
>>Plus you need an alarmtimer which makes sure that the clocksource does
>>not wrap around during suspend.
>>
>> Now lets look what we have already:
>>
>>Both clocksources already exist and are registered as clocksources with
>>all required data in the clocksource core.
>>
>> Ergo the only sane and logical conclusion is to expand the existing
>> infrastructure to handle that.
>>
>> When a clocksource is registered, then the registration function already
>> makes decisions about using it as timekeeping clocksource. So add a few
>> lines of code to check whether the newly registered clocksource is suitable
>> and preferred for suspend.
>>
>> if (!stops_in_suspend(newcs)) {
>> if (!suspend_cs || is_preferred_suspend_cs(newcs))
>> suspend_cs = newcs;
>> }
>>
>> The is_preferred_suspend_cs() can be based on rating, the maximum suspend
>> length which can be achieved or whatever is sensible. It should start of as
>> a very simple decision function based on rating and not an prematurely
>> overengineered monstrosity.
>>
>> The suspend/resume() code needs a few very simple changes:
>>
>> xxx_suspend():
>> clocksource_prepare_suspend();
>>
>>   Note, this is _NOT_ timekeeping_suspend() because that is invoked _AFTER_
>>   alarmtimer_suspend(). So if an alarm timer is required it needs to be
>>   armed before that. A trivial solution might be to just call it from
>>   alarmtimer_suspend(), but that a minor detail to worry about.
>>
>> timekeeping_suspend()
>> {
>> clocksource_enter_suspend();
>> ...
>>
>> timekeeping_resume()
>> {
>>

Re: [PATCH 1/8] time: Add persistent clock support

2018-06-25 Thread Baolin Wang

Hi Thomas,

On 24 June 2018 at 08:14, Thomas Gleixner  wrote:
> On Wed, 13 Jun 2018, Baolin Wang wrote:
>> Moreover we can register the clocksource with CLOCK_SOURCE_SUSPEND_NONSTOP
>> to be one persistent clock, then we can simplify the suspend/resume
>> accounting by removing CLOCK_SOURCE_SUSPEND_NONSTOP timing. After that
>> we can only compensate the OS time by persistent clock or RTC.
>
> That makes sense because it adds a gazillion lines of code and removes 5?
> Not really,
>
>> +/**
>> + * persistent_clock_read_data - data required to read persistent clock
>> + * @read: Returns a cycle value from persistent clock.
>> + * @last_cycles: Clock cycle value at last update.
>> + * @last_ns: Time value (nanoseconds) at last update.
>> + * @mask: Bitmask for two's complement subtraction of non 64bit clocks.
>> + * @mult: Cycle to nanosecond multiplier.
>> + * @shift: Cycle to nanosecond divisor.
>> + */
>> +struct persistent_clock_read_data {
>> + u64 (*read)(void);
>> + u64 last_cycles;
>> + u64 last_ns;
>> + u64 mask;
>> + u32 mult;
>> + u32 shift;
>> +};
>> +/**
>> + * persistent_clock - represent the persistent clock
>> + * @read_data: Data required to read from persistent clock.
>> + * @seq: Sequence counter for protecting updates.
>> + * @freq: The frequency of the persistent clock.
>> + * @wrap: Duration for persistent clock can run before wrapping.
>> + * @alarm: Update timeout for persistent clock wrap.
>> + * @alarm_inited: Indicate if the alarm has been initialized.
>> + */
>> +struct persistent_clock {
>> + struct persistent_clock_read_data read_data;
>> + seqcount_t seq;
>> + u32 freq;
>> + ktime_t wrap;
>> + struct alarm alarm;
>> + bool alarm_inited;
>> +};
>
> NAK!
>
> There is no reason to invent yet another set of data structures and yet
> more read functions with a sequence counter. which are just a bad and
> broken copy of the existing timekeeping/clocksource code. And of course the
> stuff is not serialized against multiple registrations, etc. etc.
>
> Plus the utter nonsense that any call site has to do the same thing over
> and over:
>
> register():
> start_alarm_timer();
>
> Why is this required in the first place? It's not at all. The only place
> where such an alarm timer will be required is when the system actually goes
> to suspend. Starting it at registration time is pointless and even counter
> productive. Assume the clocksource wraps every 2 hours. So you start it at
> boot time and after 119 minutes uptime the system suspends. So it will
> wakeup one minute later to update the clocksource. Heck no. If the timer is
> started when the machine actually suspends it will wake up earliest in 120
> minutes.
>
> And you even add that to the TSC which does not need it at all. It will
> wrap in about 400 years on a 2GHZ machine. So you degrade the functionality
> instead of improving it.
>
> So no, this is not going anywhere.
>
> Let's look at the problem itself:
>
>You want to use one clocksource for timekeeping during runtime which is
>fast and accurate and another one for suspend time injection which is
>slower and/or less accurate because the fast one stops in suspend.
>
>Plus you need an alarmtimer which makes sure that the clocksource does
>not wrap around during suspend.
>
> Now lets look what we have already:
>
>Both clocksources already exist and are registered as clocksources with
>all required data in the clocksource core.
>
> Ergo the only sane and logical conclusion is to expand the existing
> infrastructure to handle that.
>
> When a clocksource is registered, then the registration function already
> makes decisions about using it as timekeeping clocksource. So add a few
> lines of code to check whether the newly registered clocksource is suitable
> and preferred for suspend.
>
> if (!stops_in_suspend(newcs)) {
> if (!suspend_cs || is_preferred_suspend_cs(newcs))
> suspend_cs = newcs;
> }
>
> The is_preferred_suspend_cs() can be based on rating, the maximum suspend
> length which can be achieved or whatever is sensible. It should start of as
> a very simple decision function based on rating and not an prematurely
> overengineered monstrosity.
>
> The suspend/resume() code needs a few very simple changes:
>
> xxx_suspend():
> clocksource_prepare_suspend();
>
>   Note, this is _NOT_ timekeeping_suspend() because that is invoked _AFTER_
>   alarmtimer_suspend(). So if an alarm timer is required it needs to be
>   armed before that. A trivial solution might be to just call it from
>   alarmtimer_suspend(), but that a minor detail to worry about.
>
> timekeeping_suspend()
> {
> clocksource_enter_suspend();
> ...
>
> timekeeping_resume()
> {
> ...
> if (clocksource_leave_suspend()) {
> ts_delta = ns_to_timespec64(nsec);
> sleeptime_injected = true;
> } else if (..

Re: [PATCH 1/8] time: Add persistent clock support

2018-06-25 Thread Baolin Wang

Hi Thomas,

On 24 June 2018 at 08:14, Thomas Gleixner  wrote:
> On Wed, 13 Jun 2018, Baolin Wang wrote:
>> Moreover we can register the clocksource with CLOCK_SOURCE_SUSPEND_NONSTOP
>> to be one persistent clock, then we can simplify the suspend/resume
>> accounting by removing CLOCK_SOURCE_SUSPEND_NONSTOP timing. After that
>> we can only compensate the OS time by persistent clock or RTC.
>
> That makes sense because it adds a gazillion lines of code and removes 5?
> Not really,
>
>> +/**
>> + * persistent_clock_read_data - data required to read persistent clock
>> + * @read: Returns a cycle value from persistent clock.
>> + * @last_cycles: Clock cycle value at last update.
>> + * @last_ns: Time value (nanoseconds) at last update.
>> + * @mask: Bitmask for two's complement subtraction of non 64bit clocks.
>> + * @mult: Cycle to nanosecond multiplier.
>> + * @shift: Cycle to nanosecond divisor.
>> + */
>> +struct persistent_clock_read_data {
>> + u64 (*read)(void);
>> + u64 last_cycles;
>> + u64 last_ns;
>> + u64 mask;
>> + u32 mult;
>> + u32 shift;
>> +};
>> +/**
>> + * persistent_clock - represent the persistent clock
>> + * @read_data: Data required to read from persistent clock.
>> + * @seq: Sequence counter for protecting updates.
>> + * @freq: The frequency of the persistent clock.
>> + * @wrap: Duration for persistent clock can run before wrapping.
>> + * @alarm: Update timeout for persistent clock wrap.
>> + * @alarm_inited: Indicate if the alarm has been initialized.
>> + */
>> +struct persistent_clock {
>> + struct persistent_clock_read_data read_data;
>> + seqcount_t seq;
>> + u32 freq;
>> + ktime_t wrap;
>> + struct alarm alarm;
>> + bool alarm_inited;
>> +};
>
> NAK!
>
> There is no reason to invent yet another set of data structures and yet
> more read functions with a sequence counter. which are just a bad and
> broken copy of the existing timekeeping/clocksource code. And of course the
> stuff is not serialized against multiple registrations, etc. etc.
>
> Plus the utter nonsense that any call site has to do the same thing over
> and over:
>
> register():
> start_alarm_timer();
>
> Why is this required in the first place? It's not at all. The only place
> where such an alarm timer will be required is when the system actually goes
> to suspend. Starting it at registration time is pointless and even counter
> productive. Assume the clocksource wraps every 2 hours. So you start it at
> boot time and after 119 minutes uptime the system suspends. So it will
> wakeup one minute later to update the clocksource. Heck no. If the timer is
> started when the machine actually suspends it will wake up earliest in 120
> minutes.
>
> And you even add that to the TSC which does not need it at all. It will
> wrap in about 400 years on a 2GHZ machine. So you degrade the functionality
> instead of improving it.
>
> So no, this is not going anywhere.
>
> Let's look at the problem itself:
>
>You want to use one clocksource for timekeeping during runtime which is
>fast and accurate and another one for suspend time injection which is
>slower and/or less accurate because the fast one stops in suspend.
>
>Plus you need an alarmtimer which makes sure that the clocksource does
>not wrap around during suspend.
>
> Now lets look what we have already:
>
>Both clocksources already exist and are registered as clocksources with
>all required data in the clocksource core.
>
> Ergo the only sane and logical conclusion is to expand the existing
> infrastructure to handle that.
>
> When a clocksource is registered, then the registration function already
> makes decisions about using it as timekeeping clocksource. So add a few
> lines of code to check whether the newly registered clocksource is suitable
> and preferred for suspend.
>
> if (!stops_in_suspend(newcs)) {
> if (!suspend_cs || is_preferred_suspend_cs(newcs))
> suspend_cs = newcs;
> }
>
> The is_preferred_suspend_cs() can be based on rating, the maximum suspend
> length which can be achieved or whatever is sensible. It should start of as
> a very simple decision function based on rating and not an prematurely
> overengineered monstrosity.
>
> The suspend/resume() code needs a few very simple changes:
>
> xxx_suspend():
> clocksource_prepare_suspend();
>
>   Note, this is _NOT_ timekeeping_suspend() because that is invoked _AFTER_
>   alarmtimer_suspend(). So if an alarm timer is required it needs to be
>   armed before that. A trivial solution might be to just call it from
>   alarmtimer_suspend(), but that a minor detail to worry about.
>
> timekeeping_suspend()
> {
> clocksource_enter_suspend();
> ...
>
> timekeeping_resume()
> {
> ...
> if (clocksource_leave_suspend()) {
> ts_delta = ns_to_timespec64(nsec);
> sleeptime_injected = true;
> } else if (..

[PATCH v2 1/2] dt-bindings: mediatek: Add bindings for mediatek MT6765 Platform

2018-06-25 Thread Mars Cheng

This adds dt-binding documentation for Mediatek MT6765. Only
include very basic items, gic, uart timer and cpu.

Signed-off-by: Mars Cheng 
---
 Documentation/devicetree/bindings/arm/mediatek.txt |4 
 .../interrupt-controller/mediatek,sysirq.txt   |1 +
 .../devicetree/bindings/serial/mtk-uart.txt|1 +
 3 files changed, 6 insertions(+)

diff --git a/Documentation/devicetree/bindings/arm/mediatek.txt 
b/Documentation/devicetree/bindings/arm/mediatek.txt
index 7d21ab3..48fac4e 100644
--- a/Documentation/devicetree/bindings/arm/mediatek.txt
+++ b/Documentation/devicetree/bindings/arm/mediatek.txt
@@ -11,6 +11,7 @@ compatible: Must contain one of
"mediatek,mt6589"
"mediatek,mt6592"
"mediatek,mt6755"
+   "mediatek,mt6765"
"mediatek,mt6795"
"mediatek,mt6797"
"mediatek,mt7622"
@@ -41,6 +42,9 @@ Supported boards:
 - Evaluation phone for MT6755(Helio P10):
 Required root node properties:
   - compatible = "mediatek,mt6755-evb", "mediatek,mt6755";
+- Evaluation board for MT6765(Helio P22):
+Required root node properties:
+  - compatible = "mediatek,mt6765-evb", "mediatek,mt6765";
 - Evaluation board for MT6795(Helio X10):
 Required root node properties:
   - compatible = "mediatek,mt6795-evb", "mediatek,mt6795";
diff --git 
a/Documentation/devicetree/bindings/interrupt-controller/mediatek,sysirq.txt 
b/Documentation/devicetree/bindings/interrupt-controller/mediatek,sysirq.txt
index 07bf0b9..c8eda80 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/mediatek,sysirq.txt
+++ b/Documentation/devicetree/bindings/interrupt-controller/mediatek,sysirq.txt
@@ -11,6 +11,7 @@ Required properties:
"mediatek,mt7622-sysirq", "mediatek,mt6577-sysirq": for MT7622
"mediatek,mt6795-sysirq", "mediatek,mt6577-sysirq": for MT6795
"mediatek,mt6797-sysirq", "mediatek,mt6577-sysirq": for MT6797
+   "mediatek,mt6765-sysirq", "mediatek,mt6577-sysirq": for MT6765
"mediatek,mt6755-sysirq", "mediatek,mt6577-sysirq": for MT6755
"mediatek,mt6592-sysirq", "mediatek,mt6577-sysirq": for MT6592
"mediatek,mt6589-sysirq", "mediatek,mt6577-sysirq": for MT6589
diff --git a/Documentation/devicetree/bindings/serial/mtk-uart.txt 
b/Documentation/devicetree/bindings/serial/mtk-uart.txt
index f73abff..742cb47 100644
--- a/Documentation/devicetree/bindings/serial/mtk-uart.txt
+++ b/Documentation/devicetree/bindings/serial/mtk-uart.txt
@@ -8,6 +8,7 @@ Required properties:
   * "mediatek,mt6582-uart" for MT6582 compatible UARTS
   * "mediatek,mt6589-uart" for MT6589 compatible UARTS
   * "mediatek,mt6755-uart" for MT6755 compatible UARTS
+  * "mediatek,mt6765-uart" for MT6765 compatible UARTS
   * "mediatek,mt6795-uart" for MT6795 compatible UARTS
   * "mediatek,mt6797-uart" for MT6797 compatible UARTS
   * "mediatek,mt7622-uart" for MT7622 compatible UARTS
-- 
1.7.9.5

[PATCH v2 1/2] dt-bindings: mediatek: Add bindings for mediatek MT6765 Platform

2018-06-25 Thread Mars Cheng

This adds dt-binding documentation for Mediatek MT6765. Only
include very basic items, gic, uart timer and cpu.

Signed-off-by: Mars Cheng 
---
 Documentation/devicetree/bindings/arm/mediatek.txt |4 
 .../interrupt-controller/mediatek,sysirq.txt   |1 +
 .../devicetree/bindings/serial/mtk-uart.txt|1 +
 3 files changed, 6 insertions(+)

diff --git a/Documentation/devicetree/bindings/arm/mediatek.txt 
b/Documentation/devicetree/bindings/arm/mediatek.txt
index 7d21ab3..48fac4e 100644
--- a/Documentation/devicetree/bindings/arm/mediatek.txt
+++ b/Documentation/devicetree/bindings/arm/mediatek.txt
@@ -11,6 +11,7 @@ compatible: Must contain one of
"mediatek,mt6589"
"mediatek,mt6592"
"mediatek,mt6755"
+   "mediatek,mt6765"
"mediatek,mt6795"
"mediatek,mt6797"
"mediatek,mt7622"
@@ -41,6 +42,9 @@ Supported boards:
 - Evaluation phone for MT6755(Helio P10):
 Required root node properties:
   - compatible = "mediatek,mt6755-evb", "mediatek,mt6755";
+- Evaluation board for MT6765(Helio P22):
+Required root node properties:
+  - compatible = "mediatek,mt6765-evb", "mediatek,mt6765";
 - Evaluation board for MT6795(Helio X10):
 Required root node properties:
   - compatible = "mediatek,mt6795-evb", "mediatek,mt6795";
diff --git 
a/Documentation/devicetree/bindings/interrupt-controller/mediatek,sysirq.txt 
b/Documentation/devicetree/bindings/interrupt-controller/mediatek,sysirq.txt
index 07bf0b9..c8eda80 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/mediatek,sysirq.txt
+++ b/Documentation/devicetree/bindings/interrupt-controller/mediatek,sysirq.txt
@@ -11,6 +11,7 @@ Required properties:
"mediatek,mt7622-sysirq", "mediatek,mt6577-sysirq": for MT7622
"mediatek,mt6795-sysirq", "mediatek,mt6577-sysirq": for MT6795
"mediatek,mt6797-sysirq", "mediatek,mt6577-sysirq": for MT6797
+   "mediatek,mt6765-sysirq", "mediatek,mt6577-sysirq": for MT6765
"mediatek,mt6755-sysirq", "mediatek,mt6577-sysirq": for MT6755
"mediatek,mt6592-sysirq", "mediatek,mt6577-sysirq": for MT6592
"mediatek,mt6589-sysirq", "mediatek,mt6577-sysirq": for MT6589
diff --git a/Documentation/devicetree/bindings/serial/mtk-uart.txt 
b/Documentation/devicetree/bindings/serial/mtk-uart.txt
index f73abff..742cb47 100644
--- a/Documentation/devicetree/bindings/serial/mtk-uart.txt
+++ b/Documentation/devicetree/bindings/serial/mtk-uart.txt
@@ -8,6 +8,7 @@ Required properties:
   * "mediatek,mt6582-uart" for MT6582 compatible UARTS
   * "mediatek,mt6589-uart" for MT6589 compatible UARTS
   * "mediatek,mt6755-uart" for MT6755 compatible UARTS
+  * "mediatek,mt6765-uart" for MT6765 compatible UARTS
   * "mediatek,mt6795-uart" for MT6795 compatible UARTS
   * "mediatek,mt6797-uart" for MT6797 compatible UARTS
   * "mediatek,mt7622-uart" for MT7622 compatible UARTS
-- 
1.7.9.5

[PATCH v2 2/2] arm64: dts: mediatek: add mt6765 support

2018-06-25 Thread Mars Cheng

This adds basic chip support for MT6765 SoC.

Signed-off-by: Mars Cheng 
---
 arch/arm64/boot/dts/mediatek/Makefile   |1 +
 arch/arm64/boot/dts/mediatek/mt6765-evb.dts |   33 ++
 arch/arm64/boot/dts/mediatek/mt6765.dtsi|  158 +++
 3 files changed, 192 insertions(+)
 create mode 100644 arch/arm64/boot/dts/mediatek/mt6765-evb.dts
 create mode 100644 arch/arm64/boot/dts/mediatek/mt6765.dtsi

diff --git a/arch/arm64/boot/dts/mediatek/Makefile 
b/arch/arm64/boot/dts/mediatek/Makefile
index ac17f60..7506b0d 100644
--- a/arch/arm64/boot/dts/mediatek/Makefile
+++ b/arch/arm64/boot/dts/mediatek/Makefile
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 dtb-$(CONFIG_ARCH_MEDIATEK) += mt2712-evb.dtb
 dtb-$(CONFIG_ARCH_MEDIATEK) += mt6755-evb.dtb
+dtb-$(CONFIG_ARCH_MEDIATEK) += mt6765-evb.dtb
 dtb-$(CONFIG_ARCH_MEDIATEK) += mt6795-evb.dtb
 dtb-$(CONFIG_ARCH_MEDIATEK) += mt6797-evb.dtb
 dtb-$(CONFIG_ARCH_MEDIATEK) += mt7622-rfb1.dtb
diff --git a/arch/arm64/boot/dts/mediatek/mt6765-evb.dts 
b/arch/arm64/boot/dts/mediatek/mt6765-evb.dts
new file mode 100644
index 000..36dddff2
--- /dev/null
+++ b/arch/arm64/boot/dts/mediatek/mt6765-evb.dts
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * dts file for Mediatek MT6765
+ *
+ * (C) Copyright 2018. Mediatek, Inc.
+ *
+ * Mars Cheng 
+ */
+
+/dts-v1/;
+#include "mt6765.dtsi"
+
+/ {
+   model = "MediaTek MT6765 EVB";
+   compatible = "mediatek,mt6765-evb", "mediatek,mt6765";
+
+   aliases {
+   serial0 = 
+   };
+
+   memory@4000 {
+   device_type = "memory";
+   reg = <0 0x4000 0 0x1e80>;
+   };
+
+   chosen {
+   stdout-path = "serial0:921600n8";
+   };
+};
+
+ {
+   status = "okay";
+};
diff --git a/arch/arm64/boot/dts/mediatek/mt6765.dtsi 
b/arch/arm64/boot/dts/mediatek/mt6765.dtsi
new file mode 100644
index 000..ab34c0f
--- /dev/null
+++ b/arch/arm64/boot/dts/mediatek/mt6765.dtsi
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * dts file for Mediatek MT6765
+ *
+ * (C) Copyright 2018. Mediatek, Inc.
+ *
+ * Mars Cheng 
+ */
+
+#include 
+#include 
+
+/ {
+   compatible = "mediatek,mt6765";
+   interrupt-parent = <>;
+   #address-cells = <2>;
+   #size-cells = <2>;
+
+   psci {
+   compatible = "arm,psci-0.2";
+   method = "smc";
+   };
+
+   cpus {
+   #address-cells = <1>;
+   #size-cells = <0>;
+
+   cpu0: cpu@0 {
+   device_type = "cpu";
+   compatible = "arm,cortex-a53";
+   enable-method = "psci";
+   reg = <0x000>;
+   };
+
+   cpu1: cpu@1 {
+   device_type = "cpu";
+   compatible = "arm,cortex-a53";
+   enable-method = "psci";
+   reg = <0x001>;
+   };
+
+   cpu2: cpu@2 {
+   device_type = "cpu";
+   compatible = "arm,cortex-a53";
+   enable-method = "psci";
+   reg = <0x002>;
+   };
+
+   cpu3: cpu@3 {
+   device_type = "cpu";
+   compatible = "arm,cortex-a53";
+   enable-method = "psci";
+   reg = <0x003>;
+   };
+
+   cpu4: cpu@100 {
+   device_type = "cpu";
+   compatible = "arm,cortex-a53";
+   enable-method = "psci";
+   reg = <0x100>;
+   };
+
+   cpu5: cpu@101 {
+   device_type = "cpu";
+   compatible = "arm,cortex-a53";
+   enable-method = "psci";
+   reg = <0x101>;
+   };
+
+   cpu6: cpu@102 {
+   device_type = "cpu";
+   compatible = "arm,cortex-a53";
+   enable-method = "psci";
+   reg = <0x102>;
+   };
+
+   cpu7: cpu@103 {
+   device_type = "cpu";
+   compatible = "arm,cortex-a53";
+   enable-method = "psci";
+   reg = <0x103>;
+   };
+   };
+
+   baud_clk: dummy26m {
+   compatible = "fixed-clock";
+   clock-frequency = <2600>;
+   #clock-cells = <0>;
+   };
+
+   sys_clk: dummyclk {
+   compatible = "fixed-clock";
+   clock-frequency = <2600>;
+   #clock-cells = <0>;
+   };
+
+   timer {
+   compatible = "arm,armv8-timer";
+   interrupt-parent = <>;
+   interrupts = ,
+,
+,
+

[PATCH 0/2] Add basic SoC support for MT6765

2018-06-25 Thread Mars Cheng



This patch adds basic SoC support for Mediatek's new 8-core SoC,
MT6765, which is mainly for smartphone application.

Change in V2:
1. fix clk properties in uart dts node
2. fix typo in submit title
3. add simple-bus in mt6765.dtsi
4. use correct SPDX license format

Mars Cheng (2):
  dt-bindings: mediatek: Add bindings for mediatek MT6765 Platform
  arm64: dts: mediatek: add mt6765 support

 Documentation/devicetree/bindings/arm/mediatek.txt |4 +
 .../interrupt-controller/mediatek,sysirq.txt   |1 +
 .../devicetree/bindings/serial/mtk-uart.txt|1 +
 arch/arm64/boot/dts/mediatek/Makefile  |1 +
 arch/arm64/boot/dts/mediatek/mt6765-evb.dts|   33 
 arch/arm64/boot/dts/mediatek/mt6765.dtsi   |  158 
 6 files changed, 198 insertions(+)
 create mode 100644 arch/arm64/boot/dts/mediatek/mt6765-evb.dts
 create mode 100644 arch/arm64/boot/dts/mediatek/mt6765.dtsi

[PATCH v2 2/2] arm64: dts: mediatek: add mt6765 support

2018-06-25 Thread Mars Cheng

This adds basic chip support for MT6765 SoC.

Signed-off-by: Mars Cheng 
---
 arch/arm64/boot/dts/mediatek/Makefile   |1 +
 arch/arm64/boot/dts/mediatek/mt6765-evb.dts |   33 ++
 arch/arm64/boot/dts/mediatek/mt6765.dtsi|  158 +++
 3 files changed, 192 insertions(+)
 create mode 100644 arch/arm64/boot/dts/mediatek/mt6765-evb.dts
 create mode 100644 arch/arm64/boot/dts/mediatek/mt6765.dtsi

diff --git a/arch/arm64/boot/dts/mediatek/Makefile 
b/arch/arm64/boot/dts/mediatek/Makefile
index ac17f60..7506b0d 100644
--- a/arch/arm64/boot/dts/mediatek/Makefile
+++ b/arch/arm64/boot/dts/mediatek/Makefile
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 dtb-$(CONFIG_ARCH_MEDIATEK) += mt2712-evb.dtb
 dtb-$(CONFIG_ARCH_MEDIATEK) += mt6755-evb.dtb
+dtb-$(CONFIG_ARCH_MEDIATEK) += mt6765-evb.dtb
 dtb-$(CONFIG_ARCH_MEDIATEK) += mt6795-evb.dtb
 dtb-$(CONFIG_ARCH_MEDIATEK) += mt6797-evb.dtb
 dtb-$(CONFIG_ARCH_MEDIATEK) += mt7622-rfb1.dtb
diff --git a/arch/arm64/boot/dts/mediatek/mt6765-evb.dts 
b/arch/arm64/boot/dts/mediatek/mt6765-evb.dts
new file mode 100644
index 000..36dddff2
--- /dev/null
+++ b/arch/arm64/boot/dts/mediatek/mt6765-evb.dts
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * dts file for Mediatek MT6765
+ *
+ * (C) Copyright 2018. Mediatek, Inc.
+ *
+ * Mars Cheng 
+ */
+
+/dts-v1/;
+#include "mt6765.dtsi"
+
+/ {
+   model = "MediaTek MT6765 EVB";
+   compatible = "mediatek,mt6765-evb", "mediatek,mt6765";
+
+   aliases {
+   serial0 = 
+   };
+
+   memory@4000 {
+   device_type = "memory";
+   reg = <0 0x4000 0 0x1e80>;
+   };
+
+   chosen {
+   stdout-path = "serial0:921600n8";
+   };
+};
+
+ {
+   status = "okay";
+};
diff --git a/arch/arm64/boot/dts/mediatek/mt6765.dtsi 
b/arch/arm64/boot/dts/mediatek/mt6765.dtsi
new file mode 100644
index 000..ab34c0f
--- /dev/null
+++ b/arch/arm64/boot/dts/mediatek/mt6765.dtsi
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * dts file for Mediatek MT6765
+ *
+ * (C) Copyright 2018. Mediatek, Inc.
+ *
+ * Mars Cheng 
+ */
+
+#include 
+#include 
+
+/ {
+   compatible = "mediatek,mt6765";
+   interrupt-parent = <>;
+   #address-cells = <2>;
+   #size-cells = <2>;
+
+   psci {
+   compatible = "arm,psci-0.2";
+   method = "smc";
+   };
+
+   cpus {
+   #address-cells = <1>;
+   #size-cells = <0>;
+
+   cpu0: cpu@0 {
+   device_type = "cpu";
+   compatible = "arm,cortex-a53";
+   enable-method = "psci";
+   reg = <0x000>;
+   };
+
+   cpu1: cpu@1 {
+   device_type = "cpu";
+   compatible = "arm,cortex-a53";
+   enable-method = "psci";
+   reg = <0x001>;
+   };
+
+   cpu2: cpu@2 {
+   device_type = "cpu";
+   compatible = "arm,cortex-a53";
+   enable-method = "psci";
+   reg = <0x002>;
+   };
+
+   cpu3: cpu@3 {
+   device_type = "cpu";
+   compatible = "arm,cortex-a53";
+   enable-method = "psci";
+   reg = <0x003>;
+   };
+
+   cpu4: cpu@100 {
+   device_type = "cpu";
+   compatible = "arm,cortex-a53";
+   enable-method = "psci";
+   reg = <0x100>;
+   };
+
+   cpu5: cpu@101 {
+   device_type = "cpu";
+   compatible = "arm,cortex-a53";
+   enable-method = "psci";
+   reg = <0x101>;
+   };
+
+   cpu6: cpu@102 {
+   device_type = "cpu";
+   compatible = "arm,cortex-a53";
+   enable-method = "psci";
+   reg = <0x102>;
+   };
+
+   cpu7: cpu@103 {
+   device_type = "cpu";
+   compatible = "arm,cortex-a53";
+   enable-method = "psci";
+   reg = <0x103>;
+   };
+   };
+
+   baud_clk: dummy26m {
+   compatible = "fixed-clock";
+   clock-frequency = <2600>;
+   #clock-cells = <0>;
+   };
+
+   sys_clk: dummyclk {
+   compatible = "fixed-clock";
+   clock-frequency = <2600>;
+   #clock-cells = <0>;
+   };
+
+   timer {
+   compatible = "arm,armv8-timer";
+   interrupt-parent = <>;
+   interrupts = ,
+,
+,
+

[PATCH 0/2] Add basic SoC support for MT6765

2018-06-25 Thread Mars Cheng



This patch adds basic SoC support for Mediatek's new 8-core SoC,
MT6765, which is mainly for smartphone application.

Change in V2:
1. fix clk properties in uart dts node
2. fix typo in submit title
3. add simple-bus in mt6765.dtsi
4. use correct SPDX license format

Mars Cheng (2):
  dt-bindings: mediatek: Add bindings for mediatek MT6765 Platform
  arm64: dts: mediatek: add mt6765 support

 Documentation/devicetree/bindings/arm/mediatek.txt |4 +
 .../interrupt-controller/mediatek,sysirq.txt   |1 +
 .../devicetree/bindings/serial/mtk-uart.txt|1 +
 arch/arm64/boot/dts/mediatek/Makefile  |1 +
 arch/arm64/boot/dts/mediatek/mt6765-evb.dts|   33 
 arch/arm64/boot/dts/mediatek/mt6765.dtsi   |  158 
 6 files changed, 198 insertions(+)
 create mode 100644 arch/arm64/boot/dts/mediatek/mt6765-evb.dts
 create mode 100644 arch/arm64/boot/dts/mediatek/mt6765.dtsi

Re: [PATCH v4 1/4] seccomp: add a return code to trap to userspace

2018-06-25 Thread Andy Lutomirski




> On Jun 25, 2018, at 6:32 PM, Tycho Andersen  wrote:
> 
>> On Sat, Jun 23, 2018 at 12:27:43AM +0200, Jann Horn wrote:
>>> On Fri, Jun 22, 2018 at 11:51 PM Kees Cook  wrote:
>>> 
 On Fri, Jun 22, 2018 at 11:09 AM, Andy Lutomirski  
 wrote:
 One possible extra issue: IIRC /proc/.../mem uses FOLL_FORCE, which is not 
 what we want here.
>> 
>> Uuugh, I forgot about that.
>> 
 How about just adding an explicit “read/write the seccomp-trapped task’s 
 memory” primitive?  That should be easier than a “open mem fd” primitive.
>>> 
>>> Uuugh. Can we avoid adding another "read/write remote process memory"
>>> interface? The point of this series was to provide a lightweight
>>> approach to what should normally be possible via the existing
>>> seccomp+ptrace interface. I do like Jann's context idea, but I agree
>>> with Andy: it can't be a handle to /proc/$pid/mem, since it's
>>> FOLL_FORCE. Is there any other kind of process context id we can use
>>> for this instead of pid? There was once an idea of pid-fd but it never
>>> landed... This would let us get rid of the "id" in the structure too.
>>> And if that existed, we could make process_vm_*v() safer too (taking a
>>> pid-fd instead of a pid).
>> 
>> Or make a duplicate of /proc/$pid/mem that only differs in whether it
>> sets FOLL_FORCE? The code is basically already there... something like
>> this:
> 
> But we want more than just memory access, I think. rootfs access, ns
> fds, etc. all seem like they might be useful, and racy to open.
> 
> I guess I see two options: use the existing id and add something to
> seccomp() to ask if it's still valid or independent of this patchset
> add some kind of pid id :\
> 

I think we use the existing id / cookie / whatever and ask seccomp, or new 
syscalls, to do the requested operation. This is because we know the target 
task is in a very special stopping point. As a result, a seccomp-specific 
mechanism can do RCU-less fd modifications against a single-threaded target, 
can muck with things like struct cred, etc, while a more general interface 
can’t.

It might be nice to add a syscall with flags such that it could be used on 
ptrace-stopped targets later on. Something like:

access_remote_task(int fd, u64 id, u32 type, ...)

Where type is 16 bits of “id and fd is from seccomp” and 16 bits of “write 
memory” or such.

Re: [PATCH v4 1/4] seccomp: add a return code to trap to userspace

2018-06-25 Thread Andy Lutomirski




> On Jun 25, 2018, at 6:32 PM, Tycho Andersen  wrote:
> 
>> On Sat, Jun 23, 2018 at 12:27:43AM +0200, Jann Horn wrote:
>>> On Fri, Jun 22, 2018 at 11:51 PM Kees Cook  wrote:
>>> 
 On Fri, Jun 22, 2018 at 11:09 AM, Andy Lutomirski  
 wrote:
 One possible extra issue: IIRC /proc/.../mem uses FOLL_FORCE, which is not 
 what we want here.
>> 
>> Uuugh, I forgot about that.
>> 
 How about just adding an explicit “read/write the seccomp-trapped task’s 
 memory” primitive?  That should be easier than a “open mem fd” primitive.
>>> 
>>> Uuugh. Can we avoid adding another "read/write remote process memory"
>>> interface? The point of this series was to provide a lightweight
>>> approach to what should normally be possible via the existing
>>> seccomp+ptrace interface. I do like Jann's context idea, but I agree
>>> with Andy: it can't be a handle to /proc/$pid/mem, since it's
>>> FOLL_FORCE. Is there any other kind of process context id we can use
>>> for this instead of pid? There was once an idea of pid-fd but it never
>>> landed... This would let us get rid of the "id" in the structure too.
>>> And if that existed, we could make process_vm_*v() safer too (taking a
>>> pid-fd instead of a pid).
>> 
>> Or make a duplicate of /proc/$pid/mem that only differs in whether it
>> sets FOLL_FORCE? The code is basically already there... something like
>> this:
> 
> But we want more than just memory access, I think. rootfs access, ns
> fds, etc. all seem like they might be useful, and racy to open.
> 
> I guess I see two options: use the existing id and add something to
> seccomp() to ask if it's still valid or independent of this patchset
> add some kind of pid id :\
> 

I think we use the existing id / cookie / whatever and ask seccomp, or new 
syscalls, to do the requested operation. This is because we know the target 
task is in a very special stopping point. As a result, a seccomp-specific 
mechanism can do RCU-less fd modifications against a single-threaded target, 
can muck with things like struct cred, etc, while a more general interface 
can’t.

It might be nice to add a syscall with flags such that it could be used on 
ptrace-stopped targets later on. Something like:

access_remote_task(int fd, u64 id, u32 type, ...)

Where type is 16 bits of “id and fd is from seccomp” and 16 bits of “write 
memory” or such.

[PATCH] clk: aspeed: Fix SDCLK name

2018-06-25 Thread Lei YU

The SDCLK was named SDCLKCLK, and no one has used this yet.
Fix it.

Signed-off-by: Lei YU 
---
 drivers/clk/clk-aspeed.c | 2 +-
 include/dt-bindings/clock/aspeed-clock.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/clk/clk-aspeed.c b/drivers/clk/clk-aspeed.c
index 38b366b..f49c684 100644
--- a/drivers/clk/clk-aspeed.c
+++ b/drivers/clk/clk-aspeed.c
@@ -109,7 +109,7 @@ struct aspeed_clk_gate {
[ASPEED_CLK_GATE_RSACLK] =  { 24, -1, "rsaclk-gate",NULL,   
0 }, /* RSA */
[ASPEED_CLK_GATE_UART3CLK] ={ 25, -1, "uart3clk-gate",  "uart", 
0 }, /* UART3 */
[ASPEED_CLK_GATE_UART4CLK] ={ 26, -1, "uart4clk-gate",  "uart", 
0 }, /* UART4 */
-   [ASPEED_CLK_GATE_SDCLKCLK] ={ 27, 16, "sdclk-gate", NULL,   
0 }, /* SDIO/SD */
+   [ASPEED_CLK_GATE_SDCLK] =   { 27, 16, "sdclk-gate", NULL,   
0 }, /* SDIO/SD */
[ASPEED_CLK_GATE_LHCCLK] =  { 28, -1, "lhclk-gate", 
"lhclk", 0 }, /* LPC master/LPC+ */
 };
 
diff --git a/include/dt-bindings/clock/aspeed-clock.h 
b/include/dt-bindings/clock/aspeed-clock.h
index 4476184..f437386 100644
--- a/include/dt-bindings/clock/aspeed-clock.h
+++ b/include/dt-bindings/clock/aspeed-clock.h
@@ -25,7 +25,7 @@
 #define ASPEED_CLK_GATE_RSACLK 19
 #define ASPEED_CLK_GATE_UART3CLK   20
 #define ASPEED_CLK_GATE_UART4CLK   21
-#define ASPEED_CLK_GATE_SDCLKCLK   22
+#define ASPEED_CLK_GATE_SDCLK  22
 #define ASPEED_CLK_GATE_LHCCLK 23
 #define ASPEED_CLK_HPLL24
 #define ASPEED_CLK_AHB 25
-- 
1.9.1

[PATCH] clk: aspeed: Fix SDCLK name

2018-06-25 Thread Lei YU

The SDCLK was named SDCLKCLK, and no one has used this yet.
Fix it.

Signed-off-by: Lei YU 
---
 drivers/clk/clk-aspeed.c | 2 +-
 include/dt-bindings/clock/aspeed-clock.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/clk/clk-aspeed.c b/drivers/clk/clk-aspeed.c
index 38b366b..f49c684 100644
--- a/drivers/clk/clk-aspeed.c
+++ b/drivers/clk/clk-aspeed.c
@@ -109,7 +109,7 @@ struct aspeed_clk_gate {
[ASPEED_CLK_GATE_RSACLK] =  { 24, -1, "rsaclk-gate",NULL,   
0 }, /* RSA */
[ASPEED_CLK_GATE_UART3CLK] ={ 25, -1, "uart3clk-gate",  "uart", 
0 }, /* UART3 */
[ASPEED_CLK_GATE_UART4CLK] ={ 26, -1, "uart4clk-gate",  "uart", 
0 }, /* UART4 */
-   [ASPEED_CLK_GATE_SDCLKCLK] ={ 27, 16, "sdclk-gate", NULL,   
0 }, /* SDIO/SD */
+   [ASPEED_CLK_GATE_SDCLK] =   { 27, 16, "sdclk-gate", NULL,   
0 }, /* SDIO/SD */
[ASPEED_CLK_GATE_LHCCLK] =  { 28, -1, "lhclk-gate", 
"lhclk", 0 }, /* LPC master/LPC+ */
 };
 
diff --git a/include/dt-bindings/clock/aspeed-clock.h 
b/include/dt-bindings/clock/aspeed-clock.h
index 4476184..f437386 100644
--- a/include/dt-bindings/clock/aspeed-clock.h
+++ b/include/dt-bindings/clock/aspeed-clock.h
@@ -25,7 +25,7 @@
 #define ASPEED_CLK_GATE_RSACLK 19
 #define ASPEED_CLK_GATE_UART3CLK   20
 #define ASPEED_CLK_GATE_UART4CLK   21
-#define ASPEED_CLK_GATE_SDCLKCLK   22
+#define ASPEED_CLK_GATE_SDCLK  22
 #define ASPEED_CLK_GATE_LHCCLK 23
 #define ASPEED_CLK_HPLL24
 #define ASPEED_CLK_AHB 25
-- 
1.9.1

[PATCH resend 1/5] remoteproc: Rename subdev functions to start/stop

2018-06-25 Thread Alex Elder

From: Bjorn Andersson 

"start" and "stop" are more suitable names for how these two operations
are used, and they fit better with the upcoming introduction of two
additional operations in the struct.

[el...@linaro.org: minor comment edits]

Signed-off-by: Bjorn Andersson 
Acked-by: Alex Elder 
Tested-by: Fabien Dessenne 
---
 drivers/remoteproc/remoteproc_core.c | 30 ++--
 include/linux/remoteproc.h   | 14 ++---
 2 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/drivers/remoteproc/remoteproc_core.c 
b/drivers/remoteproc/remoteproc_core.c
index a9609d971f7f..5dd58e6bea88 100644
--- a/drivers/remoteproc/remoteproc_core.c
+++ b/drivers/remoteproc/remoteproc_core.c
@@ -774,13 +774,13 @@ static int rproc_handle_resources(struct rproc *rproc,
return ret;
 }
 
-static int rproc_probe_subdevices(struct rproc *rproc)
+static int rproc_start_subdevices(struct rproc *rproc)
 {
struct rproc_subdev *subdev;
int ret;
 
list_for_each_entry(subdev, >subdevs, node) {
-   ret = subdev->probe(subdev);
+   ret = subdev->start(subdev);
if (ret)
goto unroll_registration;
}
@@ -789,17 +789,17 @@ static int rproc_probe_subdevices(struct rproc *rproc)
 
 unroll_registration:
list_for_each_entry_continue_reverse(subdev, >subdevs, node)
-   subdev->remove(subdev, true);
+   subdev->stop(subdev, true);
 
return ret;
 }
 
-static void rproc_remove_subdevices(struct rproc *rproc, bool crashed)
+static void rproc_stop_subdevices(struct rproc *rproc, bool crashed)
 {
struct rproc_subdev *subdev;
 
list_for_each_entry_reverse(subdev, >subdevs, node)
-   subdev->remove(subdev, crashed);
+   subdev->stop(subdev, crashed);
 }
 
 /**
@@ -901,8 +901,8 @@ static int rproc_start(struct rproc *rproc, const struct 
firmware *fw)
return ret;
}
 
-   /* probe any subdevices for the remote processor */
-   ret = rproc_probe_subdevices(rproc);
+   /* Start any subdevices for the remote processor */
+   ret = rproc_start_subdevices(rproc);
if (ret) {
dev_err(dev, "failed to probe subdevices for %s: %d\n",
rproc->name, ret);
@@ -1014,8 +1014,8 @@ static int rproc_stop(struct rproc *rproc, bool crashed)
struct device *dev = >dev;
int ret;
 
-   /* remove any subdevices for the remote processor */
-   rproc_remove_subdevices(rproc, crashed);
+   /* Stop any subdevices for the remote processor */
+   rproc_stop_subdevices(rproc, crashed);
 
/* the installed resource table is no longer accessible */
rproc->table_ptr = rproc->cached_table;
@@ -1657,16 +1657,16 @@ EXPORT_SYMBOL(rproc_del);
  * rproc_add_subdev() - add a subdevice to a remoteproc
  * @rproc: rproc handle to add the subdevice to
  * @subdev: subdev handle to register
- * @probe: function to call when the rproc boots
- * @remove: function to call when the rproc shuts down
+ * @start: function to call after the rproc is started
+ * @stop: function to call before the rproc is stopped
  */
 void rproc_add_subdev(struct rproc *rproc,
  struct rproc_subdev *subdev,
- int (*probe)(struct rproc_subdev *subdev),
- void (*remove)(struct rproc_subdev *subdev, bool crashed))
+ int (*start)(struct rproc_subdev *subdev),
+ void (*stop)(struct rproc_subdev *subdev, bool crashed))
 {
-   subdev->probe = probe;
-   subdev->remove = remove;
+   subdev->start = start;
+   subdev->stop = stop;
 
list_add_tail(>node, >subdevs);
 }
diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
index dfdaede9139e..bf55bf2a5ee1 100644
--- a/include/linux/remoteproc.h
+++ b/include/linux/remoteproc.h
@@ -477,15 +477,15 @@ struct rproc {
 /**
  * struct rproc_subdev - subdevice tied to a remoteproc
  * @node: list node related to the rproc subdevs list
- * @probe: probe function, called as the rproc is started
- * @remove: remove function, called as the rproc is being stopped, the @crashed
- * parameter indicates if this originates from the a recovery
+ * @start: start function, called after the rproc has been started
+ * @stop: stop function, called before the rproc is stopped; the @crashed
+ * parameter indicates if this originates from a recovery
  */
 struct rproc_subdev {
struct list_head node;
 
-   int (*probe)(struct rproc_subdev *subdev);
-   void (*remove)(struct rproc_subdev *subdev, bool crashed);
+   int (*start)(struct rproc_subdev *subdev);
+   void (*stop)(struct rproc_subdev *subdev, bool crashed);
 };
 
 /* we currently support only two vrings per rvdev */
@@ -568,8 +568,8 @@ static inline struct rproc *vdev_to_rproc(struct 
virtio_device *vdev)
 
 void

[PATCH resend 1/5] remoteproc: Rename subdev functions to start/stop

2018-06-25 Thread Alex Elder

From: Bjorn Andersson 

"start" and "stop" are more suitable names for how these two operations
are used, and they fit better with the upcoming introduction of two
additional operations in the struct.

[el...@linaro.org: minor comment edits]

Signed-off-by: Bjorn Andersson 
Acked-by: Alex Elder 
Tested-by: Fabien Dessenne 
---
 drivers/remoteproc/remoteproc_core.c | 30 ++--
 include/linux/remoteproc.h   | 14 ++---
 2 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/drivers/remoteproc/remoteproc_core.c 
b/drivers/remoteproc/remoteproc_core.c
index a9609d971f7f..5dd58e6bea88 100644
--- a/drivers/remoteproc/remoteproc_core.c
+++ b/drivers/remoteproc/remoteproc_core.c
@@ -774,13 +774,13 @@ static int rproc_handle_resources(struct rproc *rproc,
return ret;
 }
 
-static int rproc_probe_subdevices(struct rproc *rproc)
+static int rproc_start_subdevices(struct rproc *rproc)
 {
struct rproc_subdev *subdev;
int ret;
 
list_for_each_entry(subdev, >subdevs, node) {
-   ret = subdev->probe(subdev);
+   ret = subdev->start(subdev);
if (ret)
goto unroll_registration;
}
@@ -789,17 +789,17 @@ static int rproc_probe_subdevices(struct rproc *rproc)
 
 unroll_registration:
list_for_each_entry_continue_reverse(subdev, >subdevs, node)
-   subdev->remove(subdev, true);
+   subdev->stop(subdev, true);
 
return ret;
 }
 
-static void rproc_remove_subdevices(struct rproc *rproc, bool crashed)
+static void rproc_stop_subdevices(struct rproc *rproc, bool crashed)
 {
struct rproc_subdev *subdev;
 
list_for_each_entry_reverse(subdev, >subdevs, node)
-   subdev->remove(subdev, crashed);
+   subdev->stop(subdev, crashed);
 }
 
 /**
@@ -901,8 +901,8 @@ static int rproc_start(struct rproc *rproc, const struct 
firmware *fw)
return ret;
}
 
-   /* probe any subdevices for the remote processor */
-   ret = rproc_probe_subdevices(rproc);
+   /* Start any subdevices for the remote processor */
+   ret = rproc_start_subdevices(rproc);
if (ret) {
dev_err(dev, "failed to probe subdevices for %s: %d\n",
rproc->name, ret);
@@ -1014,8 +1014,8 @@ static int rproc_stop(struct rproc *rproc, bool crashed)
struct device *dev = >dev;
int ret;
 
-   /* remove any subdevices for the remote processor */
-   rproc_remove_subdevices(rproc, crashed);
+   /* Stop any subdevices for the remote processor */
+   rproc_stop_subdevices(rproc, crashed);
 
/* the installed resource table is no longer accessible */
rproc->table_ptr = rproc->cached_table;
@@ -1657,16 +1657,16 @@ EXPORT_SYMBOL(rproc_del);
  * rproc_add_subdev() - add a subdevice to a remoteproc
  * @rproc: rproc handle to add the subdevice to
  * @subdev: subdev handle to register
- * @probe: function to call when the rproc boots
- * @remove: function to call when the rproc shuts down
+ * @start: function to call after the rproc is started
+ * @stop: function to call before the rproc is stopped
  */
 void rproc_add_subdev(struct rproc *rproc,
  struct rproc_subdev *subdev,
- int (*probe)(struct rproc_subdev *subdev),
- void (*remove)(struct rproc_subdev *subdev, bool crashed))
+ int (*start)(struct rproc_subdev *subdev),
+ void (*stop)(struct rproc_subdev *subdev, bool crashed))
 {
-   subdev->probe = probe;
-   subdev->remove = remove;
+   subdev->start = start;
+   subdev->stop = stop;
 
list_add_tail(>node, >subdevs);
 }
diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
index dfdaede9139e..bf55bf2a5ee1 100644
--- a/include/linux/remoteproc.h
+++ b/include/linux/remoteproc.h
@@ -477,15 +477,15 @@ struct rproc {
 /**
  * struct rproc_subdev - subdevice tied to a remoteproc
  * @node: list node related to the rproc subdevs list
- * @probe: probe function, called as the rproc is started
- * @remove: remove function, called as the rproc is being stopped, the @crashed
- * parameter indicates if this originates from the a recovery
+ * @start: start function, called after the rproc has been started
+ * @stop: stop function, called before the rproc is stopped; the @crashed
+ * parameter indicates if this originates from a recovery
  */
 struct rproc_subdev {
struct list_head node;
 
-   int (*probe)(struct rproc_subdev *subdev);
-   void (*remove)(struct rproc_subdev *subdev, bool crashed);
+   int (*start)(struct rproc_subdev *subdev);
+   void (*stop)(struct rproc_subdev *subdev, bool crashed);
 };
 
 /* we currently support only two vrings per rvdev */
@@ -568,8 +568,8 @@ static inline struct rproc *vdev_to_rproc(struct 
virtio_device *vdev)
 
 void

1 2 3 4 5 6 7 8 9 10 >

1 - 100 of 1596 matches

Mail list logo