Re: [PATCH 3/5] intel_pstate: remove intel_pstate.get()

2017-06-16 Thread Rafael J. Wysocki
On Wednesday, June 07, 2017 07:39:14 PM Len Brown wrote:
> From: Len Brown 
> 
> The x86 cpufreq core now uses aperfmperf_khz_on_cpu()
> to supply /sys/.../cpufreq/scaling_cur_freq
> on all x86 systems supporting APERF/MPERF.
> 
> That includes 100% of systems supported by intel_pstate,
> and so intel_pstate.get() is now a NOP -- remove it.
> 
> Invoke aperfmperf_khz_on_cpu() directly,
> if legacy-mode p-state tracing is enabled.
> 
> Signed-off-by: Len Brown 
> ---
>  drivers/cpufreq/intel_pstate.c | 16 +---
>  1 file changed, 1 insertion(+), 15 deletions(-)
> 
> diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
> index b7de5bd..5d67780 100644
> --- a/drivers/cpufreq/intel_pstate.c
> +++ b/drivers/cpufreq/intel_pstate.c
> @@ -1597,12 +1597,6 @@ static inline bool intel_pstate_sample(struct cpudata 
> *cpu, u64 time)
>   return false;
>  }
>  
> -static inline int32_t get_avg_frequency(struct cpudata *cpu)
> -{
> - return mul_ext_fp(cpu->sample.core_avg_perf,
> -   cpu->pstate.max_pstate_physical * 
> cpu->pstate.scaling);
> -}
> -
>  static inline int32_t get_avg_pstate(struct cpudata *cpu)
>  {
>   return mul_ext_fp(cpu->pstate.max_pstate_physical,
> @@ -1728,7 +1722,7 @@ static void intel_pstate_adjust_pstate(struct cpudata 
> *cpu, int target_pstate)
>   sample->mperf,
>   sample->aperf,
>   sample->tsc,
> - get_avg_frequency(cpu),
> + aperfmperf_khz_on_cpu(cpu->cpu),
>   fp_toint(cpu->iowait_boost * 100));
>  }
>  
> @@ -1922,13 +1916,6 @@ static int intel_pstate_init_cpu(unsigned int cpunum)
>   return 0;
>  }
>  
> -static unsigned int intel_pstate_get(unsigned int cpu_num)
> -{
> - struct cpudata *cpu = all_cpu_data[cpu_num];
> -
> - return cpu ? get_avg_frequency(cpu) : 0;
> -}
> -
>  static void intel_pstate_set_update_util_hook(unsigned int cpu_num)
>  {
>   struct cpudata *cpu = all_cpu_data[cpu_num];
> @@ -2157,7 +2144,6 @@ static struct cpufreq_driver intel_pstate = {
>   .setpolicy  = intel_pstate_set_policy,
>   .suspend= intel_pstate_hwp_save_state,
>   .resume = intel_pstate_resume,
> - .get= intel_pstate_get,
>   .init   = intel_pstate_cpu_init,
>   .exit   = intel_pstate_cpu_exit,
>   .stop_cpu   = intel_pstate_stop_cpu,
> 

This change will cause cpufreq_quick_get() to work differently and it is
called by KVM among other things.  Will that still work?

Thanks,
Rafael



Re: LTS testing with latest kselftests - some failures

2017-06-16 Thread Fengguang Wu

On Fri, Jun 16, 2017 at 06:46:51PM +0200, Luis R. Rodriguez wrote:

Kees, please review 47e0bbb7fa98 below.
Brian, please review be4a1326d12c below.

On Thu, Jun 15, 2017 at 11:26:53PM +0530, Sumit Semwal wrote:

Hello Greg, Shuah,

While testing 4.4.y and 4.9.y LTS kernels with latest kselftest,


To be clear it seems like you are taking the latest upstream ksefltest and run
it against older stable kernels. Furthermore you seem to only run the shell
script tests but are using older kselftests drivers? Is this all correct?
Otherwise it is unclear how you are running into the issues below.

Does 0-day so the same? I thought 0-day takes just the kselftest from each tree
submitted. That *seemed* to me like the way it was designed. Shuah ?


Yes in 0-day, we run the kselftest code corresponding to the current kernel.

Thanks,
Fengguang


[PATCH 3/3] block: order /proc/devices by major number

2017-06-16 Thread Logan Gunthorpe
Presently, the order of the block devices listed in /proc/devices is not
entirely sequential. If a block device has a major number greater than
BLKDEV_MAJOR_HASH_SIZE (255), it will be ordered as if its major were
module 255. For example, 511 appears after 1.

This patch cleans that up and prints each major number in the correct
order, regardless of where they are stored in the hash table.

In order to do this, we introduce BLKDEV_MAJOR_MAX as an artificial
limit (chosen to be 512). It will then print all devices in major
order number from 0 to the maximum.

Signed-off-by: Logan Gunthorpe 
Cc: Greg Kroah-Hartman 
Cc: Jens Axboe 
Cc: Jeff Layton 
Cc: "J. Bruce Fields" 
---

This is a patch Greg requested after I proposed[1] the same to char devs.
It is based on the chardev patch I sent so it should be merged after it (to
avoid conflicts). If there are any changes requested I'll resend the
entire set as a whole.

[1] https://patchwork.kernel.org/patch/9790093/

 block/genhd.c  | 18 +-
 fs/proc/devices.c  |  4 ++--
 include/linux/fs.h |  4 ++--
 3 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/block/genhd.c b/block/genhd.c
index d252d29fe837..1fc734b1a0e4 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -242,6 +242,7 @@ EXPORT_SYMBOL_GPL(disk_map_sector_rcu);
  * Can be deleted altogether. Later.
  *
  */
+#define BLKDEV_MAJOR_HASH_SIZE 255
 static struct blk_major_name {
struct blk_major_name *next;
int major;
@@ -259,12 +260,11 @@ void blkdev_show(struct seq_file *seqf, off_t offset)
 {
struct blk_major_name *dp;

-   if (offset < BLKDEV_MAJOR_HASH_SIZE) {
-   mutex_lock(&block_class_lock);
-   for (dp = major_names[offset]; dp; dp = dp->next)
+   mutex_lock(&block_class_lock);
+   for (dp = major_names[major_to_index(offset)]; dp; dp = dp->next)
+   if (dp->major == offset)
seq_printf(seqf, "%3d %s\n", dp->major, dp->name);
-   mutex_unlock(&block_class_lock);
-   }
+   mutex_unlock(&block_class_lock);
 }
 #endif /* CONFIG_PROC_FS */

@@ -309,6 +309,14 @@ int register_blkdev(unsigned int major, const char *name)
ret = major;
}

+   if (major >= BLKDEV_MAJOR_MAX) {
+   pr_err("register_blkdev: major requested (%d) is greater than 
the maximum (%d) for %s\n",
+  major, BLKDEV_MAJOR_MAX, name);
+
+   ret = -EINVAL;
+   goto out;
+   }
+
p = kmalloc(sizeof(struct blk_major_name), GFP_KERNEL);
if (p == NULL) {
ret = -ENOMEM;
diff --git a/fs/proc/devices.c b/fs/proc/devices.c
index d196e22c4f1c..e5709343feb7 100644
--- a/fs/proc/devices.c
+++ b/fs/proc/devices.c
@@ -25,7 +25,7 @@ static int devinfo_show(struct seq_file *f, void *v)

 static void *devinfo_start(struct seq_file *f, loff_t *pos)
 {
-   if (*pos < (BLKDEV_MAJOR_HASH_SIZE + CHRDEV_MAJOR_MAX))
+   if (*pos < (BLKDEV_MAJOR_MAX + CHRDEV_MAJOR_MAX))
return pos;
return NULL;
 }
@@ -33,7 +33,7 @@ static void *devinfo_start(struct seq_file *f, loff_t *pos)
 static void *devinfo_next(struct seq_file *f, void *v, loff_t *pos)
 {
(*pos)++;
-   if (*pos >= (BLKDEV_MAJOR_HASH_SIZE + CHRDEV_MAJOR_MAX))
+   if (*pos >= (BLKDEV_MAJOR_MAX + CHRDEV_MAJOR_MAX))
return NULL;
return pos;
 }
diff --git a/include/linux/fs.h b/include/linux/fs.h
index f1347c2ca3e9..8cc651807ea4 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2478,14 +2478,14 @@ static inline void unregister_chrdev(unsigned int 
major, const char *name)
 #define BDEVT_SIZE 10  /* Largest string for MAJ:MIN for blkdev */

 #ifdef CONFIG_BLOCK
-#define BLKDEV_MAJOR_HASH_SIZE 255
+#define BLKDEV_MAJOR_MAX   512
 extern const char *__bdevname(dev_t, char *buffer);
 extern const char *bdevname(struct block_device *bdev, char *buffer);
 extern struct block_device *lookup_bdev(const char *);
 extern void blkdev_show(struct seq_file *,off_t);

 #else
-#define BLKDEV_MAJOR_HASH_SIZE 0
+#define BLKDEV_MAJOR_MAX   0
 #endif

 extern void init_special_inode(struct inode *, umode_t, dev_t);
--
2.11.0


Re: [PATCH 1/2] platform/x86: silead_dmi: Add touchscreen info for PoV mobii wintab p800w

2017-06-16 Thread Darren Hart
On Fri, Jun 16, 2017 at 03:22:45PM +0200, Hans de Goede wrote:
> Hi,
> 
> On 16-06-17 14:44, Andy Shevchenko wrote:
> > On Thu, Jun 15, 2017 at 7:53 PM, Darren Hart  wrote:
> > > On Thu, Jun 15, 2017 at 08:48:31AM +0200, Hans de Goede wrote:
> > 
> > > > + /* Point of View mobii wintab p800w */
> > > > + .driver_data = (void *)&pov_mobii_wintab_p800w_data,
> > > > + .matches = {
> > > > + DMI_MATCH(DMI_BOARD_VENDOR, "AMI Corporation"),
> > > > + DMI_MATCH(DMI_BOARD_NAME, "Aptio CRB"),
> > > > + DMI_MATCH(DMI_BIOS_VERSION, "3BAIR1013"),
> > > > + /* Above matches are too generic, add bios-date 
> > > > match */
> > > > + DMI_MATCH(DMI_BIOS_DATE, "08/22/2014"),
> > > 
> > > This is the first time I've seen a BIOS date match used to determine 
> > > hardware
> > > features. DMI matching is a (necessary) hack to begin with (the vendors 
> > > should
> > > be providing this data via ACPI _DSD anyway) but a date match means we 
> > > would
> > > need a kernel patch every time one of these tablets gets a BIOS update...
> > > 
> > > With words like "Aptio CRB" it's clear the vendor isn't doing their job 
> > > and just
> > > using unmodified reference code. The problem with this of course is that 
> > > the
> > > vendor is not providing a way to identify this hardware.
> > > 
> > > Andy, I'd appreciate your thoughts on this... I'm leaning towards not 
> > > accepting
> > > bios date (or indeed, BIOS version) as a way to identify a platform.
> > 
> > The question is what is the anticipated amount of affected devices
> > with BIOS date included and otherwise?
> 
> I expect there to be collisions (false positive matches) without the
> BIOS_DATE check, a quick web-search finds other devices with a
> 3BAIR1013 bios version. Those don't necessarily also use a Silead
> touchscreen (which is needed for a collision to happen), but given
> the popularity of Silead touchscreens on cheap devices a collision
> is not unlikely.
> 
> With the bios-date check added, I expect this match to be unique,
> for it to not be unique we would need to be really unlucky.
> 
> > If Hans believes that there will be no update for some devices,
> 
> Yeah I'm pretty sure this specific device will not see any
> BIOS updates ever.
> 
> > while there are devices with the same DMI strings, but different date and
> > _fixed_ issue, I think we have no other choice for now.
> > Also can we use some other strings to distinguish group of devices
> > which are affected?
> 
> bios_date: 08/22/2014
> bios_vendor: American Megatrends Inc.
> bios_version: 3BAIR1013
> board_asset_tag: To be filled by O.E.M.
> board_name: Aptio CRB
> board_serial: T80091A4C11B0848
> board_vendor: AMI Corporation
> board_version: To be filled by O.E.M.
> chassis_asset_tag: To Be Filled By O.E.M.
> chassis_serial: To Be Filled By O.E.M.
> chassis_type: 3
> chassis_vendor: To Be Filled By O.E.M.
> chassis_version: To Be Filled By O.E.M.
> product_name: To be filled by O.E.M.
> product_serial: To be filled by O.E.M.
> product_uuid: 03000200-0400-0500-0006-000700080009
> product_version: To be filled by O.E.M.
> sys_vendor: To be filled by O.E.M.
> 
> The product-uuid is a known example uuid, so is
> no good. The board_serial might be useful, but
> only if it is unique for the model and not per
> tablet. Unfortunately I only have 1 of these
> tablets, so I cannot tell.

Do we have any indication that this BIOS Date isn't just the default value
provided by AMI? Does it offer any more information than the BIOS Version?

I suppose we may be able to do some kind of a partial match on the Board Serial
if even that is platform specific (I suspect it is with the T800 at the
beginning.

The sloppy handling of this firmware really irks me. That's obviously not Hans'
fault, so we'll take the patch. If we see a conflict in the future, we'll just
have to compare the other DMI strings for a match and see what we can do I'm
even tempted to insert a printk on this match, dumping the DMI values and
requesting the user to copy.paste them into an email to this list

I think we've already spent too much time on this patch based on this review:
https://www.notebookcheck.net/Point-of-View-Mobii-WinTab-800W-Tablet-Review.129561.0.html

Nice...

-- 
Darren Hart
VMware Open Source Technology Center


[PATCHv3 1/3] firmware_class: move NO_CACHE from private to driver_data_req_params

2017-06-16 Thread yi1 . li
From: Yi Li 

This adds DRIVER_DATA_REQ_NO_CACHE flag with .req flag under struct
driver_data_req_params. When this flag is set, the driver_data driver
will not cache the firmware during PM cycle, which is expensive. It
will be used by streaming case and other drivers which implement
their own cache thing. Also added the debugfs interface to selftest.

Signed-off-by: Yi Li 
---
 drivers/base/firmware_class-dbg.c | 108 ++
 drivers/base/firmware_class.c |  26 +
 include/linux/driver_data.h   |   4 ++
 3 files changed, 127 insertions(+), 11 deletions(-)
 create mode 100644 drivers/base/firmware_class-dbg.c

diff --git a/drivers/base/firmware_class-dbg.c 
b/drivers/base/firmware_class-dbg.c
new file mode 100644
index 000..102a4cd
--- /dev/null
+++ b/drivers/base/firmware_class-dbg.c
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2017 by Yi Li 
+ *
+ */
+/* This is part of firmware_class.c for testing firmware cache */
+
+#ifndef CONFIG_TEST_DRIVER_DATA
+static inline void create_debug_files(struct firmware_cache *cache) { }
+static inline void remove_debug_files(struct firmware_cache *cache) { }
+#else
+#include 
+#include 
+
+static int debug_cache_show(struct seq_file *s, void *v)
+{
+   struct firmware_cache *cache = s->private;
+   unsigned long flags;
+   struct fw_cache_entry *cache_entry;
+
+   spin_lock_irqsave(&cache->lock, flags);
+
+   list_for_each_entry(cache_entry, &cache->fw_names, list)
+   seq_printf(s, "cached %s\n", cache_entry->name);
+
+   spin_unlock_irqrestore(&cache->lock, flags);
+
+   return 0;
+}
+
+static int debug_cache_open(struct inode *inode, struct file *file)
+{
+   return single_open(file, debug_cache_show, inode->i_private);
+}
+
+#define MAX_LEN16
+/**
+ * test_cache - set value in the 'cache' control file
+ *
+ * The relevant values are:
+ *
+ *  1: Test the suspend and start the cache
+ *  0: Test the resume and clear the cache.
+ **/
+static ssize_t test_cache(struct file *fp, const char __user *user_buffer,
+ size_t size, loff_t *ppos)
+{
+   char buf[MAX_LEN];
+   size_t len;
+   long cmd;
+
+   len = min(size, (size_t)(MAX_LEN - 1));
+   if (copy_from_user(buf, user_buffer, len))
+   return -EFAULT;
+   buf[len] = 0;
+   if (kstrtol(buf, 10, &cmd))
+   return -EFAULT;
+
+#ifdef CONFIG_PM_SLEEP
+   switch (cmd) {
+   /* Simulate PM suspend prepare and start to cache */
+   case 1:
+   kill_pending_fw_fallback_reqs(true);
+   device_cache_fw_images();
+   disable_firmware();
+   break;
+   /* Simulate PM resume and un-cache */
+   case 0:
+   mutex_lock(&fw_lock);
+   fw_cache.state = FW_LOADER_NO_CACHE;
+   mutex_unlock(&fw_lock);
+   enable_firmware();
+   device_uncache_fw_images_delay(10);
+   break;
+   default:
+   pr_err("unexpected cmd\n");
+   }
+#endif
+   return size;
+}
+
+static const struct file_operations debug_cache_fops = {
+   .open = debug_cache_open,
+   .read = seq_read,
+   .write = test_cache,
+   .llseek = seq_lseek,
+   .release = single_release,
+};
+
+static void create_debug_files(struct firmware_cache *cache)
+{
+   cache->debug = debugfs_create_dir("firmware", NULL);
+   if (!cache->debug)
+   return;
+   if (!debugfs_create_file("cache", 0644, cache->debug,
+cache, &debug_cache_fops))
+   goto failed_create;
+   return;
+
+failed_create:
+   debugfs_remove_recursive(cache->debug);
+}
+
+static void remove_debug_files(struct firmware_cache *cache)
+{
+   debugfs_remove_recursive(cache->debug);
+   cache->debug = NULL;
+}
+#endif
diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c
index 7af430a..a70a2a7 100644
--- a/drivers/base/firmware_class.c
+++ b/drivers/base/firmware_class.c
@@ -72,14 +72,10 @@ enum driver_data_mode {
  * issue a uevent to userspace. Userspace in turn is expected to be
  * monitoring for uevents for the firmware_class and will use the
  * exposted sysfs interface to upload the driver data for the caller.
- * @DRIVER_DATA_PRIV_REQ_NO_CACHE: indicates that the driver data request
- * should not set up and use the internal caching mechanism to assist
- * drivers from fetching driver data at resume time after suspend.
  */
 enum driver_data_priv_reqs {
DRIVER_DATA_PRIV_REQ_FALLBACK   = 1 << 0,
DRIVER_DATA_PRIV_REQ_FALLBACK_UEVENT= 1 << 1,
-   DRIVER_DATA_PRIV_REQ_NO_CACHE   = 1 << 2,
 };
 
 /**
@@ -151,10 +147,12 @@ struct driver_data_params {
}
 
 #define __DATA_REQ_FIRMWARE_BUF(buf, size) \
+   .req_params = {   

[PATCHv3 2/3] iwlwifi: use DRIVER_DATA_REQ_NO_CACHE for driver_data

2017-06-16 Thread yi1 . li
From: Yi Li 

Set DRIVER_DATA_REQ_NO_CACHE flag to disable driver_data driver caching
mechanism, iwlwifi has its own firmware cache management.

Signed-off-by: Yi Li 
---
 drivers/net/wireless/intel/iwlwifi/iwl-drv.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c 
b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c
index 028854d3..db4d6fc 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c
@@ -229,6 +229,8 @@ static int iwl_request_firmware(struct iwl_drv *drv)
const struct driver_data_req_params req_params = {
DRIVER_DATA_API_CB(iwl_req_fw_callback, drv),
DRIVER_DATA_API(cfg->ucode_api_min, cfg->ucode_api_max, 
".ucode"),
+   .reqs = DRIVER_DATA_REQ_NO_CACHE |
+   DRIVER_DATA_REQ_USE_API_VERSIONING,
};
 
return driver_data_request_async(name_pre,
-- 
2.7.4



[PATCHv3 0/3] Enable no_cache in driver_data

2017-06-16 Thread yi1 . li
From: Yi Li 

Changes in v3:

  - Add a platform device to enable PM support in test driver
  - Add a debugfs device to enable driver data cache testing 
  - Fix a logic error so driver_data can be able to cache
  - Add two test cases for caching and non-caching
  - Rebase to Luis R. Rodriguez's 20170605-driver-data branch

Changes in v2:

  - Rebase to Luis R. Rodriguez's 20170501-driver-data-try2
branch 
  - Expose DRIVER_DATA_REQ_NO_CACHE flag to public 
driver_data_req_params structure, so upper drivers can ask
driver_data driver to bypass the internal caching mechanism.
This will be used for streaming and other drivers maintains
their own caching like iwlwifi. 
  - Add self test cases.


Yi Li (3):
  firmware_class: move NO_CACHE from private to driver_data_req_params
  iwlwifi: use DRIVER_DATA_REQ_NO_CACHE for driver_data
  test: add no_cache to driver_data load tester

 drivers/base/firmware_class-dbg.c   | 108 
 drivers/base/firmware_class.c   |  26 +++---
 drivers/net/wireless/intel/iwlwifi/iwl-drv.c|   2 +
 include/linux/driver_data.h |   4 +
 lib/test_driver_data.c  |  97 ++---
 tools/testing/selftests/firmware/driver_data.sh |  69 +++
 6 files changed, 285 insertions(+), 21 deletions(-)
 create mode 100644 drivers/base/firmware_class-dbg.c

-- 
2.7.4



[PATCHv3 3/3] test: add no_cache to driver_data load tester

2017-06-16 Thread yi1 . li
From: Yi Li 

Add a platform device to enable PM cache and add cache/no_cache test.

Signed-off-by: Yi Li 
---
 lib/test_driver_data.c  | 97 ++---
 tools/testing/selftests/firmware/driver_data.sh | 67 +
 2 files changed, 154 insertions(+), 10 deletions(-)

diff --git a/lib/test_driver_data.c b/lib/test_driver_data.c
index c176527..cadd122 100644
--- a/lib/test_driver_data.c
+++ b/lib/test_driver_data.c
@@ -44,6 +44,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 
 /* Used for the fallback default to test against */
 #define TEST_DRIVER_DATA "test-driver_data.bin"
@@ -73,6 +75,10 @@ int num_test_devs;
  * struct driver_data_reg_params @optional field for more information.
  * @keep: whether or not we wish to free the driver_data on our own, refer to
  * the struct driver_data_req_params @keep field for more information.
+ * @no_cache: whether or not we wish to use the internal caching mechanism
+ *  to assist drivers from fetching driver data at resume time after
+ *  suspend, refer to the struct driver_data_req_params .req
+ *  DRIVER_DATA_REQ_NO_CACHE for more information.
  * @enable_opt_cb: whether or not the optional callback should be set
  * on a trigger. There is no equivalent setting on the struct
  * driver_data_req_params as this is implementation specific, and in
@@ -121,6 +127,7 @@ struct test_config {
bool async;
bool optional;
bool keep;
+   bool no_cache;
bool enable_opt_cb;
bool use_api_versioning;
u8 api_min;
@@ -163,6 +170,7 @@ struct test_driver_data_private {
  * a driver might typically use to stuff firmware / driver_data.
  * @misc_dev: we use a misc device under the hood
  * @dev: pointer to misc_dev's own struct device
+ * @pdev: pointer to platform device's struct device
  * @api_found_calls: number of calls a fetch for a driver was found. We use
  * for internal use on the api callback.
  * @driver_data_mutex: for access into the @driver_data, the fake storage
@@ -181,6 +189,7 @@ struct driver_data_test_device {
struct test_driver_data_private test_driver_data;
struct miscdevice misc_dev;
struct device *dev;
+   struct device *pdev;
 
u8 api_found_calls;
 
@@ -346,6 +355,9 @@ static ssize_t config_show(struct device *dev,
len += snprintf(buf+len, PAGE_SIZE,
"keep:\t\t%s\n",
config->keep ? "true" : "false");
+   len += snprintf(buf + len, PAGE_SIZE,
+   "no_cache:\t\t%s\n",
+   config->no_cache ? "true" : "false");
 
mutex_unlock(&test_dev->config_mutex);
 
@@ -399,9 +411,9 @@ static int config_req_default(struct 
driver_data_test_device *test_dev)
 config->default_name);
 
ret = driver_data_request_sync(config->default_name,
-  &req_params, test_dev->dev);
+  &req_params, test_dev->pdev);
if (ret)
-   dev_info(test_dev->dev,
+   dev_info(test_dev->pdev,
 "load of default '%s' failed: %d\n",
 config->default_name, ret);
 
@@ -456,14 +468,17 @@ static int trigger_config_sync(struct 
driver_data_test_device *test_dev)
  (config->optional ?
   DRIVER_DATA_REQ_OPTIONAL : 0) |
  (config->keep ?
-  DRIVER_DATA_REQ_KEEP : 0)),
+  DRIVER_DATA_REQ_KEEP : 0) |
+ (config->no_cache ?
+  DRIVER_DATA_REQ_NO_CACHE : 0)),
};
const struct driver_data_req_params req_params_opt_cb = {
DRIVER_DATA_DEFAULT_SYNC(config_sync_req_cb, test_dev),
DRIVER_DATA_SYNC_OPT_CB(config_sync_req_default_cb,
 test_dev),
.reqs = (config->optional ? DRIVER_DATA_REQ_OPTIONAL : 0) |
-   (config->keep ? DRIVER_DATA_REQ_KEEP : 0),
+   (config->keep ? DRIVER_DATA_REQ_KEEP : 0) |
+   (config->no_cache ? DRIVER_DATA_REQ_NO_CACHE : 0),
};
const struct driver_data_req_params *req_params;
 
@@ -472,9 +487,10 @@ static int trigger_config_sync(struct 
driver_data_test_device *test_dev)
else
req_params = &req_params_default;
 
-   ret = driver_data_request_sync(config->name, req_params, test_dev->dev);
+   ret = driver_data_request_sync(config->name, req_params,
+  test_dev->pdev);
if (ret)
-   dev_err(test_dev->dev, "sync load of '%s' failed: %d\n",
+   dev_err(test_dev->

Re: [PATCH v9 2/4] arm64: dts: hisi: add kirin pcie node

2017-06-16 Thread Bjorn Helgaas
On Sat, Jun 17, 2017 at 06:31:59AM +0800, Guodong Xu wrote:
> Hi, Bjorn
> 
> On Sat, Jun 17, 2017 at 5:11 AM, Bjorn Helgaas  wrote:
> > On Tue, Jun 06, 2017 at 07:19:53PM +0800, Guodong Xu wrote:
> >> Hi, Arnd
> >>
> >> On Tue, Jun 6, 2017 at 5:23 PM, Arnd Bergmann  wrote:
> >> > On Sun, Jun 4, 2017 at 2:03 AM, kbuild test robot  wrote:
> >> >> Hi Xiaowei,
> >> >>
> >> >> [auto build test ERROR on pci/next]
> >> >> [also build test ERROR on v4.12-rc3 next-20170602]
> >> >> [if your patch is applied to the wrong git tree, please drop us a note 
> >> >> to help improve the system]
> >> >>
> >> >> url:
> >> >> https://github.com/0day-ci/linux/commits/Xiaowei-Song/add-PCIe-driver-for-Kirin-PCIe/20170531-182118
> >> >> base:   https://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci.git 
> >> >> next
> >> >> config: arm64-allnoconfig (attached as .config)
> >> >> compiler: aarch64-linux-gnu-gcc (Debian 6.1.1-9) 6.1.1 20160705
> >> >> reproduce:
> >> >> wget 
> >> >> https://raw.githubusercontent.com/01org/lkp-tests/master/sbin/make.cross
> >> >>  -O ~/bin/make.cross
> >> >> chmod +x ~/bin/make.cross
> >> >> # save the attached .config to linux build tree
> >> >> make.cross ARCH=arm64
> >> >>
> >> >> All errors (new ones prefixed by >>):
> >> >>
> >>  Error: arch/arm64/boot/dts/hisilicon/hi3660.dtsi:180.24-25 syntax 
> >>  error
> >>  FATAL ERROR: Unable to parse input tree
> >> >
> >> > We keep getting the build errors for patch submissions. Obviously the 
> >> > patch is
> >> > still broken and can't be merged as-is. What is the plan for merging the 
> >> > series?
> >> >
> >>
> >> This dts patch can be applied to dts series [1]. For upstream review
> >> purpose, hi3660-hikey960 dts patches, which don't have a related
> >> driver changes, are sent in [1]. Other patches, which need driver
> >> changes, like this one, are sent together with driver.
> >>
> >> Patchset [1] is now at its v2 review. Rob Herring already gave his ACK
> >> for some of them in v1. Hopefully I can get more ACK for remaining
> >> ones, and make them ready for v4.13 merging window.
> >>
> >> [1], http://www.spinics.net/lists/devicetree/msg178303.html
> >
> > I don't know how you want to deal with the DTS build failure.
> 
> DTS part of this is also included in a broader Hi3660 dts patchset [1], and
> was ACK'ed [2] today by HiSilicon SoC maintainer Xu Wei. Hopefully
> they can land in next merge window.
> 
> [1] https://www.spinics.net/lists/arm-kernel/msg588232.html
> [2] https://www.spinics.net/lists/arm-kernel/msg588686.html

This sounds good, but doesn't help me make progress.  I don't want to
apply [PATCH v9 2/4] because it didn't build.  I haven't seen an
updated series that *does* build.  And it probably doesn't make sense
for me to apply the arch/arm64 changes anyway because they aren't
really in the PCI purview.

If you want me to apply something, post patches 1 and 3 by themselves
with the trival updates I included.  Those are really only PCI and
should build without error.

> > From a
> > PCI perspective, I think I could apply patches 1 and 3 pretty easily
> > by themselves.
> >
> > If/when you post these again, please incorporate the following
> > incremental diff to clean up various whitespace and capitalization
> > nits (these are spread across several of your patches).
> >
> >
> > diff --git a/Documentation/devicetree/bindings/pci/kirin-pcie.txt 
> > b/Documentation/devicetree/bindings/pci/kirin-pcie.txt
> > index 68ffa0fbcd73..20357d840af1 100644
> > --- a/Documentation/devicetree/bindings/pci/kirin-pcie.txt
> > +++ b/Documentation/devicetree/bindings/pci/kirin-pcie.txt
> > @@ -24,8 +24,8 @@ Example based on kirin960:
> >
> > pcie@f400 {
> > compatible = "hisilicon,kirin-pcie";
> > -   reg = <0x0 0xf400 0x0 0x1000>, <0x0 0xff3fe000 0x0 
> > 0x1000>,
> > - <0x0 0xf3f2 0x0 0x4>, <0x0 0xF400 0 
> > 0x2000>;
> > +   reg = <0x0 0xf400 0x0  0x1000>, <0x0 0xff3fe000 0x0 
> > 0x1000>,
> > + <0x0 0xf3f2 0x0 0x4>, <0x0 0xf400 0x0 
> > 0x2000>;
> > reg-names = "dbi","apb","phy", "config";
> > bus-range = <0x0  0x1>;
> > #address-cells = <3>;
> > @@ -46,5 +46,5 @@ Example based on kirin960:
> >  <&crg_ctrl HI3660_ACLK_GATE_PCIE>;
> > clock-names = "pcie_phy_ref", "pcie_aux",
> >   "pcie_apb_phy", "pcie_apb_sys", "pcie_aclk";
> > -   reset-gpios = <&gpio11 1 0 >;
> > +   reset-gpios = <&gpio11 1 0>;
> > };
> > diff --git a/arch/arm64/boot/dts/hisilicon/hi3660.dtsi 
> > b/arch/arm64/boot/dts/hisilicon/hi3660.dtsi
> > index e8feb2fb4d53..7bc89baa40ba 100644
> > --- a/arch/arm64/boot/dts/hisilicon/hi3660.dtsi
> > +++ b/arch/arm64/boot/dts/hisilicon/hi3660.dtsi
> > @@ -159,12 +159,12 @@
> >
> > pcie@f400

Re: [RFC PATCH 7/7 v1]powerpc: Deliver SEGV signal on protection key violation.

2017-06-16 Thread Benjamin Herrenschmidt
On Fri, 2017-06-16 at 12:15 -0700, Ram Pai wrote:
> gp_regs size is not changed, nor is the layout. A unused field in
> the gp_regs is used to fill in the AMR contents. Old binaries will not
> be knowing about this unused field, and hence should not break.
> 
> New binaries can leverage this already existing but newly defined
> field; to read the contents of AMR.
> 
> Is it still a concern?

Calls to sys_swapcontext with a made-up context will end up with a crap
AMR if done by code who didn't know about that register.

Ben.



Re: [PATCH] PCI: Move test of INTx masking to pci_setup_device

2017-06-16 Thread Bjorn Helgaas
On Fri, May 26, 2017 at 10:02:25PM +0100, Piotr Gregor wrote:
> The test for INTx masking via config space command performed
> in pci_intx_mask_supported() should be performed before PCI device
> can be used. This is to avoid reading/writing of PCI_COMMAND_INTX_DISABLE
> register which may collide with MSI/MSI-X interrupts.
> 
> This patch moves test performed in pci_intx_mask_supported() to
> 
> static void pci_test_intx_masking(struct pci_dev *dev)
> 
> defined in drivers/pci/probe.c.
> 
> This function is called from pci_setup_device(). It skips the test
> if the device has been already marked to have broken INTx masking
> feature. Otherwise the test is executed and broken_intx_masking
> field of struct pci_dev is set accordingly. broken_intx_masking
> meaning is: if it is true then the test has been either skipped
> because the device has been already known to have broken INTx
> masking support, or the test's been done and it has detected INTx
> masking support to be broken.
> The test result can be queried at any time later from the pci_dev
> using same interface as before (though whith changed implementation)
> 
> static inline bool pci_intx_mask_supported(struct pci_dev *pdev)
> {
> /*
>  * INTx masking is supported if device has not been marked
>  * to have this feature broken and it has passed
>  * pci_test_intx_masking() test.
>  */
> return !pdev->broken_intx_masking;
> }
> 
> so current users of pci_intx_mask_supported: uio and vfio, keep
> their code unchanged.
> 
> Signed-off-by: Piotr Gregor 
> ---
>  drivers/pci/pci.c   | 42 +-
>  drivers/pci/probe.c | 44 
>  include/linux/pci.h | 13 +++--
>  3 files changed, 56 insertions(+), 43 deletions(-)
> 
> diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
> index b01bd5b..7c4e1aa 100644
> --- a/drivers/pci/pci.c
> +++ b/drivers/pci/pci.c
> @@ -3708,46 +3708,6 @@ void pci_intx(struct pci_dev *pdev, int enable)
>  }
>  EXPORT_SYMBOL_GPL(pci_intx);
>  
> -/**
> - * pci_intx_mask_supported - probe for INTx masking support
> - * @dev: the PCI device to operate on
> - *
> - * Check if the device dev support INTx masking via the config space
> - * command word.
> - */
> -bool pci_intx_mask_supported(struct pci_dev *dev)
> -{
> - bool mask_supported = false;
> - u16 orig, new;
> -
> - if (dev->broken_intx_masking)
> - return false;
> -
> - pci_cfg_access_lock(dev);
> -
> - pci_read_config_word(dev, PCI_COMMAND, &orig);
> - pci_write_config_word(dev, PCI_COMMAND,
> -   orig ^ PCI_COMMAND_INTX_DISABLE);
> - pci_read_config_word(dev, PCI_COMMAND, &new);
> -
> - /*
> -  * There's no way to protect against hardware bugs or detect them
> -  * reliably, but as long as we know what the value should be, let's
> -  * go ahead and check it.
> -  */
> - if ((new ^ orig) & ~PCI_COMMAND_INTX_DISABLE) {
> - dev_err(&dev->dev, "Command register changed from 0x%x to 0x%x: 
> driver or hardware bug?\n",
> - orig, new);
> - } else if ((new ^ orig) & PCI_COMMAND_INTX_DISABLE) {
> - mask_supported = true;
> - pci_write_config_word(dev, PCI_COMMAND, orig);
> - }
> -
> - pci_cfg_access_unlock(dev);
> - return mask_supported;
> -}
> -EXPORT_SYMBOL_GPL(pci_intx_mask_supported);
> -
>  static bool pci_check_and_set_intx_mask(struct pci_dev *dev, bool mask)
>  {
>   struct pci_bus *bus = dev->bus;
> @@ -3798,7 +3758,7 @@ static bool pci_check_and_set_intx_mask(struct pci_dev 
> *dev, bool mask)
>   * @dev: the PCI device to operate on
>   *
>   * Check if the device dev has its INTx line asserted, mask it and
> - * return true in that case. False is returned if not interrupt was
> + * return true in that case. False is returned if no interrupt was
>   * pending.
>   */
>  bool pci_check_and_mask_intx(struct pci_dev *dev)
> diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
> index 19c8950..ee6b55c 100644
> --- a/drivers/pci/probe.c
> +++ b/drivers/pci/probe.c
> @@ -1330,6 +1330,48 @@ static void pci_msi_setup_pci_dev(struct pci_dev *dev)
>  }
>  
>  /**
> + * pci_test_intx_masking - probe for INTx masking support
> + * @dev: the PCI device to operate on
> + *
> + * Check if the @dev supports INTx masking via the config space
> + * command word. Executed when PCI device is setup. Result is saved
> + * in broken_intx_masking field of struct pci_dev and can be checked
> + * with pci_intx_mask_supported at any time later, after the PCI device
> + * has been setup (this avoids testing of PCI_COMMAND_INTX_DISABLE
> + * register at runtime).
> + */
> +static void pci_test_intx_masking(struct pci_dev *dev)
> +{
> + u16 orig, toggle, new;
> +
> + /*
> +  * If device doesn't support this feature though it could pass the test.
> +  */
> + if (dev->broken_intx_masking)
> +

Re: [RFC PATCH 1/3] atmel-hlcdc: add support for 8-bit color lookup table mode

2017-06-16 Thread Peter Rosin
 Hm, it's probably too late to do it here. Planes have already been
 enabled and the engine may have started to fetch data and do the
 composition. You could do that in ->update_plane() [1], and make it a
 per-plane thing.

 I'm not sure, but I think you can get the new crtc_state from
 plane->crtc->state in this context (state have already been swapped,
 and new state is being applied, which means relevant locks are held).  
>>>
>>> Ok, I can move it there. My plan is to just copy the default .update_plane
>>> function and insert 
>>>
>>> if (crtc->state->color_mgmt_changed && crtc->state->gamma_lut) {
>>> ...
>>> }
>>>
>>> just before the drm_atomic_commit(state) call. Sounds ok?
>>
>> Why would you copy the default ->update_plane() when we already have
>> our own ->atomic_update_plane() implementation [1]? Just put it there
>> (before the atmel_hlcdc_layer_update_commit() call) and we should be
>> good.
> 
> Ahh, but you said ->update_plane() and I took that as .update_plane in
> layer_plane_funcs, not ->atomic_update() in 
> atmel_hlcdc_layer_plane_helper_funcs.
> 
> Makes sense now, and much neater too.

No, it doesn't make sense. There's no atmel_hlcdc_layer_update_commit call
anywhere, and no such function. You seem to have some further changes that
are not even in -next. Where am I getting those changes and why are they
not upstream yet?

There's a mention of the missing function here [1], but that's some 18
months ago. What's going on?

[1] https://patchwork.kernel.org/patch/7965721/

Cheers,
peda


endian bitshift defects [ was: staging: fusb302: don't bitshift __le16 type ]

2017-06-16 Thread Joe Perches
On Fri, 2017-06-16 at 19:45 +0200, Frans Klaver wrote:
> The header field in struct pd_message is declared as an __le16 type. The
> data in the message is supposed to be little endian. This means we don't
> have to go and shift the individual bytes into position when we're
> filling the buffer, we can just copy the contents right away. As an
> added benefit we don't get fishy results on big endian systems anymore.

Thanks for pointing this out.

There are several instances of this class of error.

Here's a cocci script to find them.

This is best used with cocci's --all-includes option like:

$ spatch --all-includes --very-quiet --sp-file lebe_bitshifts.cocci .
[ many defects...]

$ cat lebe_bitshifts.cocci
@@
typedef __le16, __le32, __le64,  __be16, __be32, __be64;
{ __le16, __le32, __le64,  __be16, __be32, __be64 } a;
expression b;
@@

*   a << b

@@
{ __le16, __le32, __le64,  __be16, __be32, __be64 } a;
expression b;
@@

*   a <<= b

@@
{ __le16, __le32, __le64,  __be16, __be32, __be64 } a;
expression b;
@@

*   a >> b

@@
{ __le16, __le32, __le64,  __be16, __be32, __be64 } a;
expression b;
@@

*   a >>= b
$



Re: [PATCH v9 2/4] arm64: dts: hisi: add kirin pcie node

2017-06-16 Thread Guodong Xu
Hi, Bjorn

On Sat, Jun 17, 2017 at 5:11 AM, Bjorn Helgaas  wrote:
> On Tue, Jun 06, 2017 at 07:19:53PM +0800, Guodong Xu wrote:
>> Hi, Arnd
>>
>> On Tue, Jun 6, 2017 at 5:23 PM, Arnd Bergmann  wrote:
>> > On Sun, Jun 4, 2017 at 2:03 AM, kbuild test robot  wrote:
>> >> Hi Xiaowei,
>> >>
>> >> [auto build test ERROR on pci/next]
>> >> [also build test ERROR on v4.12-rc3 next-20170602]
>> >> [if your patch is applied to the wrong git tree, please drop us a note to 
>> >> help improve the system]
>> >>
>> >> url:
>> >> https://github.com/0day-ci/linux/commits/Xiaowei-Song/add-PCIe-driver-for-Kirin-PCIe/20170531-182118
>> >> base:   https://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci.git 
>> >> next
>> >> config: arm64-allnoconfig (attached as .config)
>> >> compiler: aarch64-linux-gnu-gcc (Debian 6.1.1-9) 6.1.1 20160705
>> >> reproduce:
>> >> wget 
>> >> https://raw.githubusercontent.com/01org/lkp-tests/master/sbin/make.cross 
>> >> -O ~/bin/make.cross
>> >> chmod +x ~/bin/make.cross
>> >> # save the attached .config to linux build tree
>> >> make.cross ARCH=arm64
>> >>
>> >> All errors (new ones prefixed by >>):
>> >>
>>  Error: arch/arm64/boot/dts/hisilicon/hi3660.dtsi:180.24-25 syntax error
>>  FATAL ERROR: Unable to parse input tree
>> >
>> > We keep getting the build errors for patch submissions. Obviously the 
>> > patch is
>> > still broken and can't be merged as-is. What is the plan for merging the 
>> > series?
>> >
>>
>> This dts patch can be applied to dts series [1]. For upstream review
>> purpose, hi3660-hikey960 dts patches, which don't have a related
>> driver changes, are sent in [1]. Other patches, which need driver
>> changes, like this one, are sent together with driver.
>>
>> Patchset [1] is now at its v2 review. Rob Herring already gave his ACK
>> for some of them in v1. Hopefully I can get more ACK for remaining
>> ones, and make them ready for v4.13 merging window.
>>
>> [1], http://www.spinics.net/lists/devicetree/msg178303.html
>
> I don't know how you want to deal with the DTS build failure.

DTS part of this is also included in a broader Hi3660 dts patchset [1], and
was ACK'ed [2] today by HiSilicon SoC maintainer Xu Wei. Hopefully
they can land in next merge window.

[1] https://www.spinics.net/lists/arm-kernel/msg588232.html
[2] https://www.spinics.net/lists/arm-kernel/msg588686.html

-Guodong

> From a
> PCI perspective, I think I could apply patches 1 and 3 pretty easily
> by themselves.
>
> If/when you post these again, please incorporate the following
> incremental diff to clean up various whitespace and capitalization
> nits (these are spread across several of your patches).
>
>
> diff --git a/Documentation/devicetree/bindings/pci/kirin-pcie.txt 
> b/Documentation/devicetree/bindings/pci/kirin-pcie.txt
> index 68ffa0fbcd73..20357d840af1 100644
> --- a/Documentation/devicetree/bindings/pci/kirin-pcie.txt
> +++ b/Documentation/devicetree/bindings/pci/kirin-pcie.txt
> @@ -24,8 +24,8 @@ Example based on kirin960:
>
> pcie@f400 {
> compatible = "hisilicon,kirin-pcie";
> -   reg = <0x0 0xf400 0x0 0x1000>, <0x0 0xff3fe000 0x0 
> 0x1000>,
> - <0x0 0xf3f2 0x0 0x4>, <0x0 0xF400 0 0x2000>;
> +   reg = <0x0 0xf400 0x0  0x1000>, <0x0 0xff3fe000 0x0 
> 0x1000>,
> + <0x0 0xf3f2 0x0 0x4>, <0x0 0xf400 0x0 
> 0x2000>;
> reg-names = "dbi","apb","phy", "config";
> bus-range = <0x0  0x1>;
> #address-cells = <3>;
> @@ -46,5 +46,5 @@ Example based on kirin960:
>  <&crg_ctrl HI3660_ACLK_GATE_PCIE>;
> clock-names = "pcie_phy_ref", "pcie_aux",
>   "pcie_apb_phy", "pcie_apb_sys", "pcie_aclk";
> -   reset-gpios = <&gpio11 1 0 >;
> +   reset-gpios = <&gpio11 1 0>;
> };
> diff --git a/arch/arm64/boot/dts/hisilicon/hi3660.dtsi 
> b/arch/arm64/boot/dts/hisilicon/hi3660.dtsi
> index e8feb2fb4d53..7bc89baa40ba 100644
> --- a/arch/arm64/boot/dts/hisilicon/hi3660.dtsi
> +++ b/arch/arm64/boot/dts/hisilicon/hi3660.dtsi
> @@ -159,12 +159,12 @@
>
> pcie@f400 {
> compatible = "hisilicon,kirin960-pcie";
> -   reg = <0x0 0xf400 0x0 0x1000>,
> - <0x0 0xff3fe000 0x0 0x1000>,
> +   reg = <0x0 0xf400 0x0  0x1000>,
> + <0x0 0xff3fe000 0x0  0x1000>,
>   <0x0 0xf3f2 0x0 0x4>,
> - <0x0 0xF500 0x0 0x2000>;
> + <0x0 0xf500 0x0  0x2000>;
> reg-names = "dbi", "apb", "phy", "config";
> -   bus-range = <0x0  0x1>;
> +   bus-range = <0x0 0x1>;
> #address-cells = <3>;
>  

Re: [RFC PATCH 1/3] atmel-hlcdc: add support for 8-bit color lookup table mode

2017-06-16 Thread Peter Rosin
On 2017-06-16 23:12, Peter Rosin wrote:
> On 2017-06-16 18:15, Boris Brezillon wrote:
>> To be very clear, I'd like you to test it through DRM ioctls, not only
>> through the fbdev emulation layer.
> 
> ...so yeah, right, I couldn't agree more. Any pointers to code w/o a bunch
> of complex library dependencies that I can test with?

I have now built libdrm-2.4.81, and get this:

$ modetest -M atmel-hlcdc -s 27@39:1024x768
setting mode 1024x768-60Hz@XR24 on connectors 27, crtc 39
$ modetest -M atmel-hlcdc -s 27@39:1024x768@RG16
setting mode 1024x768-60Hz@RG16 on connectors 27, crtc 39
$ modetest -M atmel-hlcdc -s 27@39:1024x768@C8
unknown format C8


(output on the lcd looks sane for the first two, not that I really
know exactly what to expect)

Looking at the libdrm code, I only find YUV and RGB modes in
tests/util/format.c which make me less confident that I will find
something sane to test with.

So, pointers to code to test with desperately needed...

Cheers,
peda


Re: [PATCH] PCI / PM: Restore the status of PCI devices across hibernation

2017-06-16 Thread Rafael J. Wysocki
On Fri, Jun 16, 2017 at 9:45 PM, Bjorn Helgaas  wrote:
> On Thu, May 25, 2017 at 04:49:07PM +0800, Chen Yu wrote:
>> Currently we saw a lot of "No irq handler" errors during hibernation,
>> which caused the system hang finally:
>>
>> [  710.141581] ata4.00: qc timeout (cmd 0xec)
>> [  710.147135] ata4.00: failed to IDENTIFY (I/O error, err_mask=0x4)
>> [  710.154593] ata4.00: revalidation failed (errno=-5)
>> [  710.468124] ata4: SATA link up 6.0 Gbps (SStatus 133 SControl 300)
>> [  710.477746] do_IRQ: 31.151 No irq handler for vector
>>
>> According to above logs, there is an interrupt triggered and it is
>> dispatched to CPU31 with a vector number 151, but there is no handler
>> for it, thus this irq will not get acked and caused irq flood which kill
>> the system. To be more specific, the 31.151 is an interrupt from the ahci
>> host controller.
>>
>> After some investigation, the reason why this issue is triggered is
>> because the thaw_noirq() function does not restore the MSI/MSIX settings
>> across hibernation.
>>
>> The scenario is illustrated below:
>>
>> 1. Before the hibernation starts, the irq 34 is the handler for the ahci 
>> device,
>>which is binded on cpu31.
>> 2. Hibernation starts, the ahci device is put into low power state.
>> 3. All the nonboot CPUs are put offline, so the irq 34 has to be migrated to
>>the last alive one - CPU0.
>> 4. After the snapshot has been created, all the nonboot CPUs are brought up 
>> again,
>>the CPU affinity for IRQ 34 remains to be 0.
>> 5. ahci device are put into D0.
>> 6. The snapshot is written to the disk.
>>
>> The issue is triggered in step 6, in theory the ahci interrupt should be
>> delivered to CPU0, however the actually result is that this interrupt is
>> delivered to the original CPU31 instead, which cause the "No irq handler" 
>> issue.
>>
>> Ying Huang has has provided a clue that, in step 3 it is possible that the 
>> writing
>> to the register might not take effect as the PCI devices have been put 
>> suspended.
>> Actually it is true:
>> In step 3, the irq 34 affinity is supposed to be modified from 31 to 0,
>> but actually it did not. In __pci_write_msi_msg(), if the device is already
>> in low power state, the low level msi message entry will not be updated
>> but cached. So in theory during the device restore process, the cached msi
>> modification information should be written back to the hardware, and this
>> is what pci_restore_msi_state() do during normal suspend-resume.
>> But this is not the case for hibernation, pci_restore_msi_state() is not
>> invoked currently, to be more specific, pci_restore_state() is not invoked
>> in pci_pm_thaw_noirq(), although pci_save_state() has saved the necessary
>> pci cached information in pci_pm_freeze_noirq().
>>
>> This patch tries to restore the pci status for the device during hibernation,
>> otherwise the status might be lost across hibernation(for example, the 
>> MSI/MSIX
>> message settings), which might cause problems during hibernation.
>>
>> Suggested-by: Ying Huang 
>> Suggested-by: Rafael J. Wysocki 
>> Cc: Rafael J. Wysocki 
>> Cc: Bjorn Helgaas 
>> Cc: Len Brown 
>> Cc: Dan Williams 
>> Cc: Rui Zhang 
>> Cc: Ying Huang 
>> Cc: linux-...@vger.kernel.org
>> Cc: linux...@vger.kernel.org
>> Cc: linux-kernel@vger.kernel.org
>> Signed-off-by: Chen Yu 
>
> Added a stable tag and applied with Rafael's reviewed-by to pci/pm for
> v4.13, thanks!
>
> pci_restore_state() restores a lot of stuff besides MSI/MSI-X: PCIe
> device, link, slot control, ATS, VC, BARs, ACS, IOV.  I guess I'm a
> little surprised that we haven't noticed more issues if all these
> things were broken.

That's because they weren't broken. :-)

None of them is expected to change over the image creation, which is
why pci_pm_thaw_noirq() didn't call pci_restore_state(), but we
overlooked the fact that taking nonboot CPUs offline changed the
configuration of interrupts that needed to be restored afterward.

So this one is really exceptional.

Thanks,
Rafael


Re: [PATCH v4] Introduce v3 namespaced file capabilities

2017-06-16 Thread Stefan Berger

On 06/14/2017 11:05 PM, Serge E. Hallyn wrote:

On Wed, Jun 14, 2017 at 08:27:40AM -0400, Stefan Berger wrote:

On 06/13/2017 07:55 PM, Serge E. Hallyn wrote:

Quoting Stefan Berger (stef...@linux.vnet.ibm.com):

  If all extended
attributes were to support this model, maybe the 'uid' could be
associated with the 'name' of the xattr rather than its 'value' (not
sure whether that's possible).

Right, I missed that in your original email when I saw it this morning.
It's not what my patch does, but it's an interesting idea.  Do you have
a patch to that effect?  We might even be able to generalize that to

No, I don't have a patch. It may not be possible to implement it.
The xattr_handler's  take the name of the xattr as input to get().

That may be ok though.  Assume the host created a container with
10 as the uid for root, which created a container with 13 as
uid for root.  If root in the nested container tries to read the
xattr, the kernel can check for security.foo[13] first, then
security.foo[10], then security.foo.  Or, it can do a listxattr
and look for those.  Am I overlooking one?


So that sounds like a child would 'inherit' the value of an xattr from 
the closest parent if it doesn't have one itself. I guess it would 
depend on the xattr whether that should apply? And removing an xattr 
becomes difficult then if the parent container's xattr always shines 
through...





So one could try to encode the mapped uid in the name. However, that

I thought that's exactly what you were suggesting in your original
email?  "security.capability[uid=2000]"


could lead to problems with stale xattrs in a shared filesystem over
time unless one could limit the number of xattrs with the same
prefix, e.g., security.capability*. So I doubt that it would work.

Hm.  Yeah.  But really how many setups are there like that?  I.e. if
you launch a regular docker or lxd container, the image doesn't do a
bind mount of a shared image, it layers something above it or does a
copy.  What setups do you know of where multiple containers in different
user namespaces mount the same filesystem shared and writeable?


So you think it's a good idea? I am not sure when I would get to it, 
though...


   Stefan





Otherwise it would be good if the value was wrapped in a data
structure use by all xattrs, but that doesn't seem to be the case,
either. So I guess we have to go into each type of value structure
and add a uid field there.


namespace any security.* xattrs.  Wouldn't be automatically enabled
for anything but ima and capabilities, but we could make the infrastructure
generic and re-usable.





Re: [kernel-hardening] Re: [PATCH v4 06/13] iscsi: ensure RNG is seeded before use

2017-06-16 Thread Lee Duncan
On 06/08/2017 05:09 AM, Jason A. Donenfeld wrote:
> On Thu, Jun 8, 2017 at 4:43 AM, Theodore Ts'o  wrote:
>> What was the testing that was done for commit?  It looks safe, but I'm
>> unfamiliar enough with how the iSCSI authentication works that I'd
>> prefer getting an ack'ed by from the iSCSI maintainers or
>> alternativel, information about how to kick off some kind of automated
>> test suite ala xfstests for file systems.
> 
> Only very basic testing from my end.
> 
> I'm thus adding the iSCSI list to see if they'll have a look (patch 
> reattached).
> 
> Jason
> 

It seems like what you are doing is basically "good", i.e. if there is
not enough random data, don't use it. But what happens in that case? The
authentication fails? How does the user know to wait and try again?
-- 
Lee Duncan
SUSE Labs


Re: [PATCH v2 2/5] dt-bindings: scsi: ufs: add document for hi3660-ufs

2017-06-16 Thread Arnd Bergmann
On Fri, Jun 16, 2017 at 8:51 AM, Bu Tao  wrote:
> add ufs node document for hi3660
>
> Signed-off-by: Bu Tao 
> ---
>  .../devicetree/bindings/ufs/hi3660-ufs.txt | 58 
> ++
>  1 file changed, 58 insertions(+)
>  create mode 100644 Documentation/devicetree/bindings/ufs/hi3660-ufs.txt
>
> diff --git a/Documentation/devicetree/bindings/ufs/hi3660-ufs.txt 
> b/Documentation/devicetree/bindings/ufs/hi3660-ufs.txt
> new file mode 100644
> index ..461afc8ef017
> --- /dev/null
> +++ b/Documentation/devicetree/bindings/ufs/hi3660-ufs.txt
> @@ -0,0 +1,58 @@
> +* Hisilicon Universal Flash Storage (UFS) Host Controller
> +
> +UFS nodes are defined to describe on-chip UFS hardware macro.
> +Each UFS Host Controller should have its own node.
> +
> +Required properties:
> +- compatible: compatible list, contains one of the following -
> +   "hisilicon,hi3660-ufs" for hisi ufs host controller
> +present on Hi3660 chipset.
> +- reg   : should contain UFS register address space & UFS SYS 
> CTRL register address,
> +- interrupt-parent  : interrupt device
> +- interrupts: interrupt number
> +- clocks   : List of phandle and clock specifier pairs
> +- clock-names   : List of clock input name strings sorted in the same
> + order as the clocks property. "clk_ref", "clk_phy" is 
> optional
> +- resets: reset node register, one reset the clk and the other 
> reset the controller
> +- reset-names   : describe reset node register
> +
> +Optional properties for board device:
> +- ufs-hi3660-use-rate-B: specifies UFS rate-B
> +- ufs-hi3660-broken-fastauto   : specifies no fastauto
> +- ufs-hi3660-use-HS-GEAR3  : specifies UFS HS-GEAR3
> +- ufs-hi3660-use-HS-GEAR2  : specifies UFS HS-GEAR2
> +- ufs-hi3660-use-HS-GEAR1  : specifies UFS HS-GEAR1
> +- ufs-hi3660-broken-clk-gate-bypass: specifies no clk-gate
> +- ufs-hi3660-use-one-line  : specifies UFS use one line work
> +- reset-gpio   : specifies to reset devices

Some of these sound rather generic and might apply to UFS implementations
other than hi3660, so I'd suggest adding them to the base ufs binding with
a generic name instead.

Any DT properties that might be useful across multiple implementations
should be parsed in generic code that gets called by the individual drivers,
and then the properties that are specific to the integration work done by
hisilicon should be prefixed with "hisilicon,", but not normally with the
SoC name: it is quite possible that another SoC will be derived from this
chip and it should reuse the properties.

(note: this is different from the value of the "compatible" property that
is meant to be as specific as possible".

Also, please clarify how your binding relates to the ufshcd binding
in Documentation/devicetree/bindings/ufs/ufshcd-pltfrm.txt: does
hi3660 implement any registers that are shared with ufshcd, or does
it use the same physical interface with a different register set?

   Arnd


Re: autofs multi-map regression

2017-06-16 Thread Dick Streefland
On Friday 2017-06-16 15:57, Eric W. Biederman wrote:
| I don't believe this is a kernel change.
| 
| I dug up an old VM and I was able to reproduce this issue simply
| by installing autofs, and your auto.master and auto.net files.
| 
| # uname -a
| Linux ubuntu-16 4.4.0-24-generic #43-Ubuntu SMP Wed Jun 8 19:27:37 UTC 2016 
x86_64 x86_64 x86_64 GNU/Linux
| 
| # ls /net/
| localhost
| # ls /net/localhost/loc
| ls: cannot open directory '/net/localhost/loc': Too many levels of symbolic 
links
| # ls /loc
| ls: cannot open directory '/loc/': Too many levels of symbolic links
| 
| I suspect there is configuration somewhere in your autofs
| configuration.  I don't speak autofs well enough to debug the issue at
| this point.  But I can conclusively say it was not the kernel commit you
| pointed at, as I see the issue you are reporting and I don't have that
| commit in the kernel under test.

I have a second partition mounted on /loc, that is the reason for the
multi-map autofs setup. With a separate mount on /loc, you won't see
the errors with the old kernel.

Fact is that my setup worked for a long time, and that it stopped
working after the backport of commit 1064f874 to the ubuntu 4.4
kernel.

-- 
Dick


[PATCH 3/5] soc: bcm: brcmstb: Add support for S2/S3/S5 suspend states (ARM)

2017-06-16 Thread Florian Fainelli
From: Brian Norris 

This commit adds support for the Broadcom STB S2/S3/S5 suspend states on
ARM based SoCs.

This requires quite a lot of code in order to deal with the different HW
blocks that need to be quiesced during suspend:

- DDR PHY SHIM
- DDR memory controller and sequencer
- control processor

The final steps of the suspend execute in an on-chip SRAM and there is a
little bit of assembly code in order to shut down the DDR PHY PLL and
then go into a wfi loop until a wake-up even occurs. Conversely the
resume part involves waiting for the DDR PHY PLL to come back up and
resume executions where we left.

For S3, because of our memory hashing (actual hashing code not included
for simplicity, and is bypassed) we need to relocate the writable
variables (stack) into SRAM shortly before suspending in order to leave
the DRAM untouched and create a reliable hash of its contents.

This code has been contributed by Brian Norris initially and has been
incrementally fixed and updated to support new chips by a lot of people.

Signed-off-by: Brian Norris 
Signed-off-by: Markus Mayer 
Signed-off-by: Justin Chen 
Signed-off-by: Gareth Powell 
Signed-off-by: Doug Berger 
Signed-off-by: Florian Fainelli 
---
 drivers/soc/bcm/brcmstb/Kconfig   |   1 +
 drivers/soc/bcm/brcmstb/pm/Makefile   |   1 +
 drivers/soc/bcm/brcmstb/pm/aon_defs.h | 113 +
 drivers/soc/bcm/brcmstb/pm/pm-arm.c   | 836 ++
 drivers/soc/bcm/brcmstb/pm/pm.h   |  78 
 drivers/soc/bcm/brcmstb/pm/s2-arm.S   |  76 
 6 files changed, 1105 insertions(+)
 create mode 100644 drivers/soc/bcm/brcmstb/pm/Makefile
 create mode 100644 drivers/soc/bcm/brcmstb/pm/aon_defs.h
 create mode 100644 drivers/soc/bcm/brcmstb/pm/pm-arm.c
 create mode 100644 drivers/soc/bcm/brcmstb/pm/pm.h
 create mode 100644 drivers/soc/bcm/brcmstb/pm/s2-arm.S

diff --git a/drivers/soc/bcm/brcmstb/Kconfig b/drivers/soc/bcm/brcmstb/Kconfig
index 996a75db015e..246fc0f30515 100644
--- a/drivers/soc/bcm/brcmstb/Kconfig
+++ b/drivers/soc/bcm/brcmstb/Kconfig
@@ -3,6 +3,7 @@ if SOC_BRCMSTB
 config BRCMSTB_PM
 bool "Support suspend/resume for STB platforms"
 default y
+   depends on ARM
 depends on PM
 
 endif # SOC_BRCMSTB
diff --git a/drivers/soc/bcm/brcmstb/pm/Makefile 
b/drivers/soc/bcm/brcmstb/pm/Makefile
new file mode 100644
index ..66a4f7e43ad5
--- /dev/null
+++ b/drivers/soc/bcm/brcmstb/pm/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_ARM)  += s2-arm.o pm-arm.o
diff --git a/drivers/soc/bcm/brcmstb/pm/aon_defs.h 
b/drivers/soc/bcm/brcmstb/pm/aon_defs.h
new file mode 100644
index ..fb936abd847d
--- /dev/null
+++ b/drivers/soc/bcm/brcmstb/pm/aon_defs.h
@@ -0,0 +1,113 @@
+/*
+ * Always ON (AON) register interface between bootloader and Linux
+ *
+ * Copyright © 2014-2017 Broadcom
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __BRCMSTB_AON_DEFS_H__
+#define __BRCMSTB_AON_DEFS_H__
+
+#include 
+
+/* Magic number in upper 16-bits */
+#define BRCMSTB_S3_MAGIC_MASK   0x
+#define BRCMSTB_S3_MAGIC_SHORT  0x5AFE
+
+enum {
+   /* Restore random key for AES memory verification (off = fixed key) */
+   S3_FLAG_LOAD_RANDKEY= (1 << 0),
+
+   /* Scratch buffer page table is present */
+   S3_FLAG_SCRATCH_BUFFER_TABLE= (1 << 1),
+
+   /* Skip all memory verification */
+   S3_FLAG_NO_MEM_VERIFY   = (1 << 2),
+
+   /*
+* Modification of this bit reserved for bootloader only.
+* 1=PSCI started Linux, 0=Direct jump to Linux.
+*/
+   S3_FLAG_PSCI_BOOT   = (1 << 3),
+
+   /*
+* Modification of this bit reserved for bootloader only.
+* 1=64 bit boot, 0=32 bit boot.
+*/
+   S3_FLAG_BOOTED64= (1 << 4),
+};
+
+#define BRCMSTB_HASH_LEN   (128 / 8) /* 128-bit hash */
+
+#define AON_REG_MAGIC_FLAGS0x00
+#define AON_REG_CONTROL_LOW0x04
+#define AON_REG_CONTROL_HIGH   0x08
+#define AON_REG_S3_HASH0x0c /* hash of S3 
params */
+#define AON_REG_CONTROL_HASH_LEN   0x1c
+#define AON_REG_PANIC  0x20
+
+#define BRCMSTB_S3_MAGIC   0x5AFEB007
+#define BRCMSTB_PANIC_MAGIC0x512E115E
+#define BOOTLOADER_SCRATCH_SIZE64
+#define BRCMSTB_DTU_STATE_MAP_ENTRIES  (8*1024)
+#define BRCMSTB_DTU_CONFIG_ENTRIES (512)
+#define BRCMSTB_DTU_COUNT  (2)
+
+

Re: [PATCH] drivers: net: phy: Correct kernel-doc comment parameter

2017-06-16 Thread Randy Dunlap
On 06/16/2017 03:19 AM, sayli karnik wrote:
> Correct function parameter in kernel-doc comment to fix following
> warnings in the sphinx build:
> 
> .//drivers/net/phy/phy.c:259: warning: No description found for
> parameter 'features'
> .//drivers/net/phy/phy.c:259: warning: Excess function parameter
> 'feature' description in 'phy_lookup_setting'
> 
> Signed-off-by: sayli karnik 

Already fixed. See commit 6c6ab3e73b391b38c9749c5bf21479ed7d60.

What tree did you make this patch against?


> ---
>  drivers/net/phy/phy.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
> index 82ab8fb..7524caa 100644
> --- a/drivers/net/phy/phy.c
> +++ b/drivers/net/phy/phy.c
> @@ -241,7 +241,7 @@ static const struct phy_setting settings[] = {
>   * phy_lookup_setting - lookup a PHY setting
>   * @speed: speed to match
>   * @duplex: duplex to match
> - * @feature: allowed link modes
> + * @features: allowed link modes
>   * @exact: an exact match is required
>   *
>   * Search the settings array for a setting that matches the speed and
> 


-- 
~Randy


[PATCH 0/5] Broadcom STB S2/S3/S5 support for ARM and MIPS

2017-06-16 Thread Florian Fainelli
Hi,

This patch series adds support for S2/S3/S5 suspend/resume states on
ARM and MIPS based Broadcom STB SoCs.

This was submitted a long time ago by Brian, and I am now picking this
up and trying to get this included with support for our latest chips.

Provided that I can collect the necessary Acks from Rob (DT) and other
people (Rafael?) I would probably take this via the Broadcom ARM SoC
pull requests.

Thank you!

Brian Norris (1):
  soc: bcm: brcmstb: Add support for S2/S3/S5 suspend states (ARM)

Florian Fainelli (3):
  dt-bindings: Update Broadcom STB binding
  soc: bcm: brcmstb: Add Kconfig entry point for power management
  dt-bindings: Document MIPS Broadcom STB power management nodes

Justin Chen (1):
  soc bcm: brcmstb: Add support for S2/S3/S5 suspend states (MIPS)

 .../devicetree/bindings/arm/bcm/brcm,brcmstb.txt   |   6 +-
 .../devicetree/bindings/mips/brcm/soc.txt  |  77 ++
 drivers/soc/bcm/Kconfig|   2 +
 drivers/soc/bcm/brcmstb/Kconfig|   9 +
 drivers/soc/bcm/brcmstb/Makefile   |   1 +
 drivers/soc/bcm/brcmstb/pm/Makefile|   2 +
 drivers/soc/bcm/brcmstb/pm/aon_defs.h  | 113 +++
 drivers/soc/bcm/brcmstb/pm/pm-arm.c| 836 +
 drivers/soc/bcm/brcmstb/pm/pm-mips.c   | 461 
 drivers/soc/bcm/brcmstb/pm/pm.h|  89 +++
 drivers/soc/bcm/brcmstb/pm/s2-arm.S|  76 ++
 drivers/soc/bcm/brcmstb/pm/s2-mips.S   | 200 +
 drivers/soc/bcm/brcmstb/pm/s3-mips.S   | 146 
 13 files changed, 2017 insertions(+), 1 deletion(-)
 create mode 100644 drivers/soc/bcm/brcmstb/Kconfig
 create mode 100644 drivers/soc/bcm/brcmstb/pm/Makefile
 create mode 100644 drivers/soc/bcm/brcmstb/pm/aon_defs.h
 create mode 100644 drivers/soc/bcm/brcmstb/pm/pm-arm.c
 create mode 100644 drivers/soc/bcm/brcmstb/pm/pm-mips.c
 create mode 100644 drivers/soc/bcm/brcmstb/pm/pm.h
 create mode 100644 drivers/soc/bcm/brcmstb/pm/s2-arm.S
 create mode 100644 drivers/soc/bcm/brcmstb/pm/s2-mips.S
 create mode 100644 drivers/soc/bcm/brcmstb/pm/s3-mips.S

-- 
2.9.3



[PATCH 1/5] dt-bindings: Update Broadcom STB binding

2017-06-16 Thread Florian Fainelli
Update the Broadcom STB binding document with new compatible strings for
the DDR PHY and memory controller found on newer chips.

Signed-off-by: Florian Fainelli 
---
 Documentation/devicetree/bindings/arm/bcm/brcm,brcmstb.txt | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/arm/bcm/brcm,brcmstb.txt 
b/Documentation/devicetree/bindings/arm/bcm/brcm,brcmstb.txt
index 0d0c1ae81bed..790e6b0b8306 100644
--- a/Documentation/devicetree/bindings/arm/bcm/brcm,brcmstb.txt
+++ b/Documentation/devicetree/bindings/arm/bcm/brcm,brcmstb.txt
@@ -164,6 +164,8 @@ Control registers for this memory controller's DDR PHY.
 
 Required properties:
 - compatible : should contain one of these
+   "brcm,brcmstb-ddr-phy-v71.1"
+   "brcm,brcmstb-ddr-phy-v72.0"
"brcm,brcmstb-ddr-phy-v225.1"
"brcm,brcmstb-ddr-phy-v240.1"
"brcm,brcmstb-ddr-phy-v240.2"
@@ -184,7 +186,9 @@ Sequencer DRAM parameters and control registers. Used for 
Self-Refresh
 Power-Down (SRPD), among other things.
 
 Required properties:
-- compatible : should contain "brcm,brcmstb-memc-ddr"
+- compatible : should contain one of these
+   "brcm,brcmstb-memc-ddr-rev-b.2.2"
+   "brcm,brcmstb-memc-ddr"
 - reg: the MEMC DDR register range
 
 Example:
-- 
2.9.3



[PATCH 5/5] soc bcm: brcmstb: Add support for S2/S3/S5 suspend states (MIPS)

2017-06-16 Thread Florian Fainelli
From: Justin Chen 

This commit adds support for the Broadcom STB S2/S3/S5 suspend
states on MIPS based SoCs.

This requires quite a lot of code in order to deal with the
different HW blocks that need to be quiesced during suspend:

- DDR PHY
- DDR memory controller and arbiter
- control processor

The final steps of the suspend execute in cache and there is is a little
bit of assembly code in order to shut down the DDR PHY PLL and then go
into a wait loop until a wake-up even occurs. Conversely the resume part
involves waiting for the DDR PHY PLL to come back up and resume
executions where we left.

Signed-off-by: Justin Chen 
Signed-off-by: Florian Fainelli 
---
 drivers/soc/bcm/brcmstb/Kconfig  |   2 +-
 drivers/soc/bcm/brcmstb/pm/Makefile  |   1 +
 drivers/soc/bcm/brcmstb/pm/pm-mips.c | 461 +++
 drivers/soc/bcm/brcmstb/pm/pm.h  |  13 +-
 drivers/soc/bcm/brcmstb/pm/s2-mips.S | 200 +++
 drivers/soc/bcm/brcmstb/pm/s3-mips.S | 146 +++
 6 files changed, 821 insertions(+), 2 deletions(-)
 create mode 100644 drivers/soc/bcm/brcmstb/pm/pm-mips.c
 create mode 100644 drivers/soc/bcm/brcmstb/pm/s2-mips.S
 create mode 100644 drivers/soc/bcm/brcmstb/pm/s3-mips.S

diff --git a/drivers/soc/bcm/brcmstb/Kconfig b/drivers/soc/bcm/brcmstb/Kconfig
index 246fc0f30515..7ab04f10dbe7 100644
--- a/drivers/soc/bcm/brcmstb/Kconfig
+++ b/drivers/soc/bcm/brcmstb/Kconfig
@@ -3,7 +3,7 @@ if SOC_BRCMSTB
 config BRCMSTB_PM
 bool "Support suspend/resume for STB platforms"
 default y
-   depends on ARM
+   depends on ARM || BMIPS_GENERIC
 depends on PM
 
 endif # SOC_BRCMSTB
diff --git a/drivers/soc/bcm/brcmstb/pm/Makefile 
b/drivers/soc/bcm/brcmstb/pm/Makefile
index 66a4f7e43ad5..d524ce9644ba 100644
--- a/drivers/soc/bcm/brcmstb/pm/Makefile
+++ b/drivers/soc/bcm/brcmstb/pm/Makefile
@@ -1 +1,2 @@
 obj-$(CONFIG_ARM)  += s2-arm.o pm-arm.o
+obj-$(CONFIG_BMIPS_GENERIC)+= s2-mips.o s3-mips.o pm-mips.o
diff --git a/drivers/soc/bcm/brcmstb/pm/pm-mips.c 
b/drivers/soc/bcm/brcmstb/pm/pm-mips.c
new file mode 100644
index ..5037b2dc6be9
--- /dev/null
+++ b/drivers/soc/bcm/brcmstb/pm/pm-mips.c
@@ -0,0 +1,461 @@
+/*
+ * MIPS-specific support for Broadcom STB S2/S3/S5 power management
+ *
+ * Copyright (C) 2016-2017 Broadcom
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "pm.h"
+
+#define S2_NUM_PARAMS  6
+#define MAX_NUM_MEMC   3
+
+/* S3 constants */
+#define MAX_GP_REGS16
+#define MAX_CP0_REGS   32
+#define NUM_MEMC_CLIENTS   128
+#define AON_CTRL_RAM_SIZE  128
+#define BRCMSTB_S3_MAGIC   0x5AFEB007
+
+#define CLEAR_RESET_MASK   0x01
+
+/* Index each CP0 register that needs to be saved */
+#define CONTEXT0
+#define USER_LOCAL 1
+#define PGMK   2
+#define HWRENA 3
+#define COMPARE4
+#define STATUS 5
+#define CONFIG 6
+#define MODE   7
+#define EDSP   8
+#define BOOT_VEC   9
+#define EBASE  10
+
+struct brcmstb_memc {
+   void __iomem *ddr_phy_base;
+   void __iomem *arb_base;
+};
+
+struct brcmstb_pm_control {
+   void __iomem *aon_ctrl_base;
+   void __iomem *aon_sram_base;
+   void __iomem *timers_base;
+   struct brcmstb_memc memcs[MAX_NUM_MEMC];
+   int num_memc;
+};
+
+struct brcm_pm_s3_context {
+   u32 cp0_regs[MAX_CP0_REGS];
+   u32 memc0_rts[NUM_MEMC_CLIENTS];
+   u32 sc_boot_vec;
+};
+
+struct brcmstb_mem_transfer;
+
+struct brcmstb_mem_transfer {
+   struct brcmstb_mem_transfer *next;
+   void*src;
+   void*dst;
+   dma_addr_t  pa_src;
+   dma_addr_t  pa_dst;
+   u32 len;
+   u8  key;
+   u8  mode;
+   u8  src_remapped;
+   u8  dst_remapped;
+   u8  src_dst_remapped;
+};
+
+#define AON_SAVE_SRAM(base, idx, val) \
+   __raw_writel(val, base + (idx << 2))
+
+/* Used for saving registers in asm */
+u32 gp_regs[MAX_GP_REGS];
+
+#defineBSP_CLOCK_STOP  0x00
+#define PM_INITIATE0x01
+
+static struct brcmstb_pm_control ctrl;
+
+static void

[PATCH 4/5] dt-bindings: Document MIPS Broadcom STB power management nodes

2017-06-16 Thread Florian Fainelli
Document the different nodes required for supporting S2/S3/S5 suspend
states on MIPS-based Broadcom STB SoCs.

Signed-off-by: Florian Fainelli 
---
 .../devicetree/bindings/mips/brcm/soc.txt  | 77 ++
 1 file changed, 77 insertions(+)

diff --git a/Documentation/devicetree/bindings/mips/brcm/soc.txt 
b/Documentation/devicetree/bindings/mips/brcm/soc.txt
index e4e1cd91fb1f..f7413168d938 100644
--- a/Documentation/devicetree/bindings/mips/brcm/soc.txt
+++ b/Documentation/devicetree/bindings/mips/brcm/soc.txt
@@ -11,3 +11,80 @@ Required properties:
 
 The experimental -viper variants are for running Linux on the 3384's
 BMIPS4355 cable modem CPU instead of the BMIPS5000 application processor.
+
+Power management
+
+
+For power management (particularly, S2/S3/S5 system suspend), the following SoC
+components are needed:
+
+= Always-On control block (AON CTRL)
+
+This hardware provides control registers for the "always-on" (even in low-power
+modes) hardware, such as the Power Management State Machine (PMSM).
+
+Required properties:
+- compatible : should contain "brcm,brcmstb-aon-ctrl"
+- reg: the register start and length for the AON CTRL block
+
+Example:
+
+aon-ctrl@41 {
+   compatible = "brcm,brcmstb-aon-ctrl";
+   reg = <0x41 0x400>;
+};
+
+= Memory controllers
+
+A Broadcom STB SoC typically has a number of independent memory controllers,
+each of which may have several associated hardware blocks, which are versioned
+independently (control registers, DDR PHYs, etc.). One might consider
+describing these controllers as a parent "memory controllers" block, which
+contains N sub-nodes (one for each controller in the system), each of which is
+associated with a number of hardware register resources (e.g., its PHY). See
+the example device tree snippet below.
+
+== MEMC (MEMory Controller)
+
+Represents a single memory controller instance.
+
+Required properties:
+- compatible : should contain "brcm,brcmstb-memc" and "simple-bus"
+
+Should contain subnodes for any of the following relevant hardware resources:
+
+== DDR PHY control
+
+Control registers for this memory controller's DDR PHY.
+
+Required properties:
+- compatible : should contain one of these
+   "brcm,brcmstb-ddr-phy-v64.5"
+   "brcm,brcmstb-ddr-phy"
+
+- reg: the DDR PHY register range
+
+== MEMC Arbiter
+
+The memory controller arbiter is responsible for memory clients allocation
+(bandwidth, priorities etc.) and needs to have its contents restored during
+deep sleep states (S3).
+
+Required properties:
+
+- compatible   : should contain one of these
+   "brcm,brcmstb-memc-arb-v10.0.0.0"
+   "brcm,brcmstb-memc-arb"
+
+- reg  : the DDR Arbiter register range
+
+== Timers
+
+The Broadcom STB chips contain a timer block with several general purpose
+timers that can be used.
+
+Required properties:
+
+- compatible   : should contain "brcm,brcmstb-timers"
+- reg  : the timers register range
+
-- 
2.9.3



[PATCH 2/5] soc: bcm: brcmstb: Add Kconfig entry point for power management

2017-06-16 Thread Florian Fainelli
Add the necessary pluming to select and build CONFIG_BRCMSTB_PM.
Functional code is not added yet.

Signed-off-by: Florian Fainelli 
---
 drivers/soc/bcm/Kconfig  | 2 ++
 drivers/soc/bcm/brcmstb/Kconfig  | 8 
 drivers/soc/bcm/brcmstb/Makefile | 1 +
 3 files changed, 11 insertions(+)
 create mode 100644 drivers/soc/bcm/brcmstb/Kconfig

diff --git a/drivers/soc/bcm/Kconfig b/drivers/soc/bcm/Kconfig
index 49f1e2a75d61..055a845ed979 100644
--- a/drivers/soc/bcm/Kconfig
+++ b/drivers/soc/bcm/Kconfig
@@ -20,4 +20,6 @@ config SOC_BRCMSTB
 
  If unsure, say N.
 
+source "drivers/soc/bcm/brcmstb/Kconfig"
+
 endmenu
diff --git a/drivers/soc/bcm/brcmstb/Kconfig b/drivers/soc/bcm/brcmstb/Kconfig
new file mode 100644
index ..996a75db015e
--- /dev/null
+++ b/drivers/soc/bcm/brcmstb/Kconfig
@@ -0,0 +1,8 @@
+if SOC_BRCMSTB
+
+config BRCMSTB_PM
+bool "Support suspend/resume for STB platforms"
+default y
+depends on PM
+
+endif # SOC_BRCMSTB
diff --git a/drivers/soc/bcm/brcmstb/Makefile b/drivers/soc/bcm/brcmstb/Makefile
index 9120b2715d3e..ee5b4de741b8 100644
--- a/drivers/soc/bcm/brcmstb/Makefile
+++ b/drivers/soc/bcm/brcmstb/Makefile
@@ -1 +1,2 @@
 obj-y  += common.o biuctrl.o
+obj-y  += pm/
-- 
2.9.3



[PATCH v2 0/4] Generalize fncpy availability

2017-06-16 Thread Florian Fainelli
Hi all,

This patch series makes ARM's fncpy() implementation more generic (dropping the
Thumb-specifics) and available in an asm-generic header file.

Tested on a Broadcom ARM64 STB platform with code that is written to SRAM.

Changes in v2:
- leave the ARM implementation where it is
- make the generic truly generic (no)

This is helpful in making SoC-specific power management code become true drivers
that can be shared between different architectures.

Thanks!

Florian Fainelli (4):
  ARM: fncpy: Rename include guards
  asm-generic: Provide a fncpy() implementation
  arm64: Provide a fncpy implementation
  misc: sram: Allow ARM64 to select SRAM_EXEC

 arch/arm/include/asm/fncpy.h   |  6 +--
 arch/arm64/include/asm/fncpy.h |  6 +++
 drivers/misc/Kconfig   |  2 +-
 include/asm-generic/fncpy.h| 93 ++
 4 files changed, 103 insertions(+), 4 deletions(-)
 create mode 100644 arch/arm64/include/asm/fncpy.h
 create mode 100644 include/asm-generic/fncpy.h

-- 
2.9.3



[PATCH v2 2/4] asm-generic: Provide a fncpy() implementation

2017-06-16 Thread Florian Fainelli
Define a generic fncpy() implementation largely based on the ARM version
that requires an 8 bytes alignment for the destination address where to
copy this function as well as the function's own address.

Signed-off-by: Florian Fainelli 
---
 include/asm-generic/fncpy.h | 93 +
 1 file changed, 93 insertions(+)
 create mode 100644 include/asm-generic/fncpy.h

diff --git a/include/asm-generic/fncpy.h b/include/asm-generic/fncpy.h
new file mode 100644
index ..5bb3e5d20ae0
--- /dev/null
+++ b/include/asm-generic/fncpy.h
@@ -0,0 +1,93 @@
+/*
+ * include/asm-generic/fncpy.h - helper macros for function body copying
+ *
+ * Copyright (C) 2011 Linaro Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+/*
+ * These macros are intended for use when there is a need to copy a low-level
+ * function body into special memory.
+ *
+ * For example, when reconfiguring the SDRAM controller, the code doing the
+ * reconfiguration may need to run from SRAM.
+ *
+ * NOTE: that the copied function body must be entirely self-contained and
+ * position-independent in order for this to work properly.
+ *
+ * NOTE: in order for embedded literals and data to get referenced correctly,
+ * the alignment of functions must be preserved when copying.  To ensure this,
+ * the source and destination addresses for fncpy() must be aligned to a
+ * multiple of 8 bytes: you will be get a BUG() if this condition is not met.
+ * You will typically need a ".align 3" directive in the assembler where the
+ * function to be copied is defined, and ensure that your allocator for the
+ * destination buffer returns 8-byte-aligned pointers.
+ *
+ * Typical usage example:
+ *
+ * extern int f(args);
+ * extern uint32_t size_of_f;
+ * int (*copied_f)(args);
+ * void *sram_buffer;
+ *
+ * copied_f = fncpy(sram_buffer, &f, size_of_f);
+ *
+ * ... later, call the function: ...
+ *
+ * copied_f(args);
+ *
+ * The size of the function to be copied can't be determined from C:
+ * this must be determined by other means, such as adding assmbler directives
+ * in the file where f is defined.
+ */
+
+#ifndef __ASM_ARM_FNCPY_H
+#define __ASM_ARM_FNCPY_H
+
+#include 
+#include 
+
+#include 
+#include 
+
+/*
+ * Minimum alignment requirement for the source and destination addresses
+ * for function copying.
+ */
+#define FNCPY_ALIGN 8
+
+#define fncpy(dest_buf, funcp, size) ({
\
+   uintptr_t __funcp_address;  \
+   typeof(funcp) __result; \
+   \
+   asm("" : "=r" (__funcp_address) : "0" (funcp)); \
+   \
+   /*  \
+* Ensure alignment of source and destination addresses.\
+*/ \
+   BUG_ON((uintptr_t)(dest_buf) & (FNCPY_ALIGN - 1) || \
+   (__funcp_address & (FNCPY_ALIGN - 1))); \
+   \
+   memcpy(dest_buf, (void const *)__funcp_address, size);  \
+   flush_icache_range((unsigned long)(dest_buf),   \
+   (unsigned long)(dest_buf) + (size));\
+   \
+   asm("" : "=r" (__result)\
+   : "0" ((uintptr_t)(dest_buf))); \
+   \
+   __result;   \
+})
+
+#endif /* !__ASM_ARM_FNCPY_H */
-- 
2.9.3



[PATCH v2 4/4] misc: sram: Allow ARM64 to select SRAM_EXEC

2017-06-16 Thread Florian Fainelli
Now that ARM64 also has a fncpy() implementation, allow selection
SRAM_EXEC for ARM64 as well.

Signed-off-by: Florian Fainelli 
---
 drivers/misc/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index 07bbd4cc1852..ac8779278c0c 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -464,7 +464,7 @@ config SRAM
bool "Generic on-chip SRAM driver"
depends on HAS_IOMEM
select GENERIC_ALLOCATOR
-   select SRAM_EXEC if ARM
+   select SRAM_EXEC if ARM || ARM64
help
  This driver allows you to declare a memory region to be managed by
  the genalloc API. It is supposed to be used for small on-chip SRAM
-- 
2.9.3



[PATCH v2 1/4] ARM: fncpy: Rename include guards

2017-06-16 Thread Florian Fainelli
In preparation for allowing a generic fncpy() implementation to live
under include/asm-generic/fncpy.h, rename the current include guards to
be __ASM_ARM_FNCPY_H, this also makes the header file more consistent
with other headers in the same directory.

Signed-off-by: Florian Fainelli 
---
 arch/arm/include/asm/fncpy.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/arm/include/asm/fncpy.h b/arch/arm/include/asm/fncpy.h
index de5354746924..86a8fc14cde9 100644
--- a/arch/arm/include/asm/fncpy.h
+++ b/arch/arm/include/asm/fncpy.h
@@ -53,8 +53,8 @@
  * in the file where f is defined.
  */
 
-#ifndef __ASM_FNCPY_H
-#define __ASM_FNCPY_H
+#ifndef __ASM_ARM_FNCPY_H
+#define __ASM_ARM_FNCPY_H
 
 #include 
 #include 
@@ -91,4 +91,4 @@
__result;   \
 })
 
-#endif /* !__ASM_FNCPY_H */
+#endif /* !__ASM_ARM_FNCPY_H */
-- 
2.9.3



[PATCH v2 3/4] arm64: Provide a fncpy implementation

2017-06-16 Thread Florian Fainelli
Utilize the asm-generic/fncpy.h implementation for ARM64 to allow the
use of drivers/misc/sram*.c on these platforms as well.

Signed-off-by: Florian Fainelli 
---
 arch/arm64/include/asm/fncpy.h | 6 ++
 1 file changed, 6 insertions(+)
 create mode 100644 arch/arm64/include/asm/fncpy.h

diff --git a/arch/arm64/include/asm/fncpy.h b/arch/arm64/include/asm/fncpy.h
new file mode 100644
index ..578f942f55e4
--- /dev/null
+++ b/arch/arm64/include/asm/fncpy.h
@@ -0,0 +1,6 @@
+#ifndef __ASMARM64_FNCPY_H
+#define __ASMARM64_FNCPY_H
+
+#include 
+
+#endif /* __ASMARM64_FNCPY_H */
-- 
2.9.3



Re: autofs multi-map regression

2017-06-16 Thread Eric W. Biederman
Dick Streefland  writes:

> On Friday 2017-06-16 12:03, Eric W. Biederman wrote:
> | Interesting...
> | 
> | Can you test this on a stock 4.11 kernel?
> | 
> | I definitely need a little bit more information to solve this.  That
> | commit did not add any new error condidtions so I need to understand
> | what state you are getting yourself into that is affected by this
> | commit.
> | 
> | Is there a chance you can post /proc/self/mountinfo from when this is
> | happening?
>
> I've installed the mainline 4.11 kernel from:
>
>   http://kernel.ubuntu.com/~kernel-ppa/mainline/v4.11/
>
> and this kernel works correctly!
>
> So either this issue was fixed in the meantime, or it is something
> specific to the Ubuntu kernel. I guess I should file a bug report
> with Ubuntu then?

Please.

> I've also looked at /proc/self/mountinfo before and directly after the
> mount attempt. Here are the ext4 and autofs entries for the failing 4.4
> kernel:

Thank you.

I am definitely out of my depth on the autofs portion of this.  As
things are working with 4.11 and failing with my test of 4.4 with
a much older kernel.  I will leave this with you and the ubuntu folks to
sort out.

Good Luck,

Eric


> before:
> 23 0 8:2 / / rw,relatime shared:1 - ext4 /dev/sda2 
> rw,errors=remount-ro,data=ordered
> 41 19 0:34 / /proc/sys/fs/binfmt_misc rw,relatime shared:24 - autofs 
> systemd-1 rw,fd=34,pgrp=1,timeout=0,minproto=5,maxproto=5,direct
> 46 23 8:4 / /loc rw,nosuid,nodev,noatime shared:30 - ext4 /dev/sda4 
> rw,block_validity,delalloc,barrier,user_xattr,acl
> 202 23 0:44 / /net rw,relatime shared:160 - autofs /etc/auto.net 
> rw,fd=6,pgrp=1724,timeout=120,minproto=5,maxproto=5,indirect
>

> after:
> 23 0 8:2 / / rw,relatime shared:1 - ext4 /dev/sda2 
> rw,errors=remount-ro,data=ordered
> 41 19 0:34 / /proc/sys/fs/binfmt_misc rw,relatime shared:24 - autofs 
> systemd-1 rw,fd=34,pgrp=1,timeout=0,minproto=5,maxproto=5,direct
> 46 162 8:4 / /loc rw,nosuid,nodev,noatime shared:30 - ext4 /dev/sda4 
> rw,block_validity,delalloc,barrier,user_xattr,acl
> 202 23 0:44 / /net rw,relatime shared:160 - autofs /etc/auto.net 
> rw,fd=6,pgrp=1724,timeout=120,minproto=5,maxproto=5,indirect
> 157 202 8:2 / /net/localhost rw,relatime shared:1 - ext4 /dev/sda2 
> rw,errors=remount-ro,data=ordered
> 161 157 0:47 / /net/localhost/loc rw,relatime shared:119 - autofs 
> /etc/auto.net rw,fd=6,pgrp=1724,timeout=120,minproto=5,maxproto=5,offset
> 162 23 0:47 / /loc rw,relatime shared:119 - autofs /etc/auto.net 
> rw,fd=6,pgrp=1724,timeout=120,minproto=5,maxproto=5,offset
>
> And here the info for the working mainline 4.11 kernel:
>
> before:
> 23 0 8:2 / / rw,relatime shared:1 - ext4 /dev/sda2 
> rw,errors=remount-ro,data=ordered
> 74 19 0:36 / /proc/sys/fs/binfmt_misc rw,relatime shared:56 - autofs 
> systemd-1 
> rw,fd=35,pgrp=1,timeout=0,minproto=5,maxproto=5,direct,pipe_ino=12754
> 45 23 8:4 / /loc rw,nosuid,nodev,noatime shared:28 - ext4 /dev/sda4 
> rw,block_validity,delalloc,barrier,user_xattr,acl
> 208 23 0:46 / /net rw,relatime shared:164 - autofs /etc/auto.net 
> rw,fd=6,pgrp=1545,timeout=120,minproto=5,maxproto=5,indirect,pipe_ino=26555
>
> after:
> 23 0 8:2 / / rw,relatime shared:1 - ext4 /dev/sda2 
> rw,errors=remount-ro,data=ordered
> 74 19 0:36 / /proc/sys/fs/binfmt_misc rw,relatime shared:56 - autofs 
> systemd-1 
> rw,fd=35,pgrp=1,timeout=0,minproto=5,maxproto=5,direct,pipe_ino=12754
> 45 175 8:4 / /loc rw,nosuid,nodev,noatime shared:28 - ext4 /dev/sda4 
> rw,block_validity,delalloc,barrier,user_xattr,acl
> 208 23 0:46 / /net rw,relatime shared:164 - autofs /etc/auto.net 
> rw,fd=6,pgrp=1545,timeout=120,minproto=5,maxproto=5,indirect,pipe_ino=26555
> 162 208 8:2 / /net/localhost rw,relatime shared:1 - ext4 /dev/sda2 
> rw,errors=remount-ro,data=ordered
> 166 162 0:48 / /net/localhost/loc rw,relatime shared:122 - autofs 
> /etc/auto.net 
> rw,fd=6,pgrp=1545,timeout=120,minproto=5,maxproto=5,offset,pipe_ino=26555
> 167 23 0:48 / /loc rw,relatime shared:122 - autofs /etc/auto.net 
> rw,fd=6,pgrp=1545,timeout=120,minproto=5,maxproto=5,offset,pipe_ino=26555
> 174 166 8:4 / /net/localhost/loc rw,nosuid,nodev,noatime shared:28 - ext4 
> /dev/sda4 rw,block_validity,delalloc,barrier,user_xattr,acl
> 175 167 8:4 / /loc rw,nosuid,nodev,noatime shared:28 - ext4 /dev/sda4 
> rw,block_validity,delalloc,barrier,user_xattr,acl


Re: [RFC PATCH 1/3] atmel-hlcdc: add support for 8-bit color lookup table mode

2017-06-16 Thread Peter Rosin
On 2017-06-16 18:15, Boris Brezillon wrote:
> Hi Peter,
> 
> On Fri, 16 Jun 2017 17:54:04 +0200
> Peter Rosin  wrote:
> 
>> On 2017-06-16 12:01, Boris Brezillon wrote:
>>> Hi Peter,
>>>
>>> On Fri, 16 Jun 2017 11:12:25 +0200
>>> Peter Rosin  wrote:
>>>   
 All layers of chips support this, the only variable is the base address
 of the lookup table in the register map.

 Signed-off-by: Peter Rosin 
 ---
  drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c  | 48 
 +
  drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c| 13 +++
  drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.h| 16 +
  drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c |  5 +++
  4 files changed, 82 insertions(+)

 diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c 
 b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c
 index 5348985..75871b5 100644
 --- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c
 +++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c
 @@ -61,6 +61,7 @@ struct atmel_hlcdc_crtc {
struct atmel_hlcdc_dc *dc;
struct drm_pending_vblank_event *event;
int id;
 +  u32 clut[ATMEL_HLCDC_CLUT_SIZE];  
>>>
>>> Do we really need to duplicate this table here? I mean, the gamma_lut
>>> table should always be available in the crtc_state, so do you have a
>>> good reason a copy here?  
>>
>> If I don't keep a copy in the driver, it doesn't work when there's no
>> gamma_lut. And there is no gamma_lut when I use fbdev emulation. Maybe
>> that's a bug somewhere else?
> 
> Can't we re-use crtc->gamma_store? Honnestly, I don't know how the
> fbdev->DRM link should be done, so we'd better wait for DRM maintainers
> feedback here (Daniel, any opinion?).

Ahh, gamma_store. Makes perfect sense. Thanks for that pointer!

>>
>> Sure, I could have added it in patch 3/3 instead, but didn't when I
>> divided the work into patches...
> 
> No, my point is that IMO it shouldn't be needed at all.

Right, with gamma_store it's no longer needed.

>>
  };
  
  static inline struct atmel_hlcdc_crtc *
 @@ -140,6 +141,46 @@ static void atmel_hlcdc_crtc_mode_set_nofb(struct 
 drm_crtc *c)
   cfg);
  }
  
 +static void
 +atmel_hlcdc_crtc_load_lut(struct drm_crtc *c)
 +{
 +  struct atmel_hlcdc_crtc *crtc = drm_crtc_to_atmel_hlcdc_crtc(c);
 +  struct atmel_hlcdc_dc *dc = crtc->dc;
 +  int layer;
 +  int idx;
 +
 +  for (layer = 0; layer < ATMEL_HLCDC_MAX_LAYERS; layer++) {
 +  if (!dc->layers[layer])
 +  continue;
 +  for (idx = 0; idx < ATMEL_HLCDC_CLUT_SIZE; idx++)
 +  atmel_hlcdc_layer_write_clut(dc->layers[layer],
 +   idx, crtc->clut[idx]);
 +  }
 +}
 +
 +static void
 +atmel_hlcdc_crtc_flush_lut(struct drm_crtc *c)
 +{
 +  struct atmel_hlcdc_crtc *crtc = drm_crtc_to_atmel_hlcdc_crtc(c);
 +  struct drm_crtc_state *state = c->state;
 +  struct drm_color_lut *lut;
 +  int idx;
 +
 +  if (!state->gamma_lut)
 +  return;
 +
 +  lut = (struct drm_color_lut *)state->gamma_lut->data;
 +
 +  for (idx = 0; idx < ATMEL_HLCDC_CLUT_SIZE; idx++) {
 +  crtc->clut[idx] =
 +  ((lut[idx].red << 8) & 0xff) |
 +  (lut[idx].green & 0xff00) |
 +  (lut[idx].blue >> 8);
 +  }
 +
 +  atmel_hlcdc_crtc_load_lut(c);
 +}
 +
  static enum drm_mode_status
  atmel_hlcdc_crtc_mode_valid(struct drm_crtc *c,
const struct drm_display_mode *mode)
 @@ -312,6 +353,9 @@ static void atmel_hlcdc_crtc_atomic_flush(struct 
 drm_crtc *crtc,
  struct drm_crtc_state *old_s)
  {
/* TODO: write common plane control register if available */
 +
 +  if (crtc->state->color_mgmt_changed)
 +  atmel_hlcdc_crtc_flush_lut(crtc);  
>>>
>>> Hm, it's probably too late to do it here. Planes have already been
>>> enabled and the engine may have started to fetch data and do the
>>> composition. You could do that in ->update_plane() [1], and make it a
>>> per-plane thing.
>>>
>>> I'm not sure, but I think you can get the new crtc_state from
>>> plane->crtc->state in this context (state have already been swapped,
>>> and new state is being applied, which means relevant locks are held).  
>>
>> Ok, I can move it there. My plan is to just copy the default .update_plane
>> function and insert 
>>
>>  if (crtc->state->color_mgmt_changed && crtc->state->gamma_lut) {
>>  ...
>>  }
>>
>> just before the drm_atomic_commit(state) call. Sounds ok?
> 
> Why would you copy the default ->update_plane() when we already have
> our own ->atomic_update_plane() implementation [1]? Just put it there
> (before the atmel

Re: [PATCH V1 09/15] spmi: pmic-arb: check apid enabled before calling the handler

2017-06-16 Thread Stephen Boyd
On 06/14, kgu...@codeaurora.org wrote:
> On 2017-06-01 02:09, Stephen Boyd wrote:
> >On 05/30, Kiran Gunda wrote:
> >>From: Abhijeet Dharmapurikar 
> >>
> >>The driver currently invokes the apid handler (periph_handler())
> >
> >You mean periph_interrupt()?
> >
> Yes.
> >>once it sees that the summary status bit for that apid is set.
> >>
> >>However the hardware is designed to set that bit even if the apid
> >>interrupts are disabled. The driver should check whether the apid
> >>is indeed enabled before calling the apid handler.
> >
> >Really? Wow that is awful. Or is this because ACC_ENABLE bit is
> >always set now and never cleared?
> >
> Yes. It is awful. It is not because of the ACC_ENABLE bit is set.
> >>
> >>Signed-off-by: Abhijeet Dharmapurikar 
> >>Signed-off-by: Kiran Gunda 
> >>---
> >> drivers/spmi/spmi-pmic-arb.c | 10 +++---
> >> 1 file changed, 7 insertions(+), 3 deletions(-)
> >>
> >>diff --git a/drivers/spmi/spmi-pmic-arb.c
> >>b/drivers/spmi/spmi-pmic-arb.c
> >>index ad34491..f8638fa 100644
> >>--- a/drivers/spmi/spmi-pmic-arb.c
> >>+++ b/drivers/spmi/spmi-pmic-arb.c
> >>@@ -536,8 +536,8 @@ static void pmic_arb_chained_irq(struct
> >>irq_desc *desc)
> >>void __iomem *intr = pa->intr;
> >>int first = pa->min_apid >> 5;
> >>int last = pa->max_apid >> 5;
> >>-   u32 status;
> >>-   int i, id;
> >>+   u32 status, enable;
> >>+   int i, id, apid;
> >>
> >>chained_irq_enter(chip, desc);
> >>
> >>@@ -547,7 +547,11 @@ static void pmic_arb_chained_irq(struct
> >>irq_desc *desc)
> >>while (status) {
> >>id = ffs(status) - 1;
> >>status &= ~BIT(id);
> >>-   periph_interrupt(pa, id + i * 32);
> >>+   apid = id + i * 32;
> >>+   enable = readl_relaxed(intr +
> >>+   pa->ver_ops->acc_enable(apid));
> >
> >Do we need to read the hardware to figure this out? After earlier
> >patches in this series we would never clear the
> >SPMI_PIC_ACC_ENABLE_BIT after one of the irqs in a peripheral is
> >unmasked for the first time (which looks to be fixing a bug in
> >the existing driver BTW). So in practice, this should almost
> >always be true.
> >
> yes. We have removed clearing the SPMI_PIC_ACC_ENABLE_BIT from the
> irq_mask,
> because if we disable this BIT it disables all the peripheral IRQs,
> which we don't want.

Right, we could reference count it though and only clear and set
the bit when we mask and unmask the last irq in the peripheral.

> 
> Once the peripheral fires the interrupt the summary status bit for
> that peripheral
> is set even though the SPMI_PIC_ACC_ENABLE_BIT is not enabled.
> That's why we have to
> read this register to not service the interrupt that is not
> requested/enabled yet.
> This SPMI_PIC_ACC_ENABLE_BIT is enabled during the irq_unmask which
> is called from request_irq.

Ok. So this is again about handling the case where an interrupt
is pending out of the bootloader?

> 
> >In the one case that it isn't true, we'll be handling some other
> >irq for another peripheral and then hardware will tell us there's
> >an interrupt for a peripheral that doesn't have any interrupts
> >unmasked. We would call periph_interrupt() and then that
> >shouldn't see any interrupts in the status register for that
> >APID. So we do some more work, but nothing happens still. Did I
> >miss something? What is this fixing?
> 
> Yes. As you said this fixes the issue of calling the periph_interrupt
> for some other irq that is not yet requested and enabled yet.


Hmm. I seemed to miss the fact that periph_interrupt() will see
an unmasked interrupt and think it's valid. I thought that only
SPMI_PIC_ACC_ENABLE_BIT was broken, but you're saying that the
status register for a particular peripheral will always latch
interrupts even if we haven't enabled them?

-- 
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
a Linux Foundation Collaborative Project


Re: [PATCH v9 2/4] arm64: dts: hisi: add kirin pcie node

2017-06-16 Thread Bjorn Helgaas
On Tue, Jun 06, 2017 at 07:19:53PM +0800, Guodong Xu wrote:
> Hi, Arnd
> 
> On Tue, Jun 6, 2017 at 5:23 PM, Arnd Bergmann  wrote:
> > On Sun, Jun 4, 2017 at 2:03 AM, kbuild test robot  wrote:
> >> Hi Xiaowei,
> >>
> >> [auto build test ERROR on pci/next]
> >> [also build test ERROR on v4.12-rc3 next-20170602]
> >> [if your patch is applied to the wrong git tree, please drop us a note to 
> >> help improve the system]
> >>
> >> url:
> >> https://github.com/0day-ci/linux/commits/Xiaowei-Song/add-PCIe-driver-for-Kirin-PCIe/20170531-182118
> >> base:   https://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci.git 
> >> next
> >> config: arm64-allnoconfig (attached as .config)
> >> compiler: aarch64-linux-gnu-gcc (Debian 6.1.1-9) 6.1.1 20160705
> >> reproduce:
> >> wget 
> >> https://raw.githubusercontent.com/01org/lkp-tests/master/sbin/make.cross 
> >> -O ~/bin/make.cross
> >> chmod +x ~/bin/make.cross
> >> # save the attached .config to linux build tree
> >> make.cross ARCH=arm64
> >>
> >> All errors (new ones prefixed by >>):
> >>
>  Error: arch/arm64/boot/dts/hisilicon/hi3660.dtsi:180.24-25 syntax error
>  FATAL ERROR: Unable to parse input tree
> >
> > We keep getting the build errors for patch submissions. Obviously the patch 
> > is
> > still broken and can't be merged as-is. What is the plan for merging the 
> > series?
> >
> 
> This dts patch can be applied to dts series [1]. For upstream review
> purpose, hi3660-hikey960 dts patches, which don't have a related
> driver changes, are sent in [1]. Other patches, which need driver
> changes, like this one, are sent together with driver.
> 
> Patchset [1] is now at its v2 review. Rob Herring already gave his ACK
> for some of them in v1. Hopefully I can get more ACK for remaining
> ones, and make them ready for v4.13 merging window.
> 
> [1], http://www.spinics.net/lists/devicetree/msg178303.html

I don't know how you want to deal with the DTS build failure.  From a
PCI perspective, I think I could apply patches 1 and 3 pretty easily
by themselves.

If/when you post these again, please incorporate the following
incremental diff to clean up various whitespace and capitalization
nits (these are spread across several of your patches).


diff --git a/Documentation/devicetree/bindings/pci/kirin-pcie.txt 
b/Documentation/devicetree/bindings/pci/kirin-pcie.txt
index 68ffa0fbcd73..20357d840af1 100644
--- a/Documentation/devicetree/bindings/pci/kirin-pcie.txt
+++ b/Documentation/devicetree/bindings/pci/kirin-pcie.txt
@@ -24,8 +24,8 @@ Example based on kirin960:
 
pcie@f400 {
compatible = "hisilicon,kirin-pcie";
-   reg = <0x0 0xf400 0x0 0x1000>, <0x0 0xff3fe000 0x0 0x1000>,
- <0x0 0xf3f2 0x0 0x4>, <0x0 0xF400 0 0x2000>;
+   reg = <0x0 0xf400 0x0  0x1000>, <0x0 0xff3fe000 0x0 0x1000>,
+ <0x0 0xf3f2 0x0 0x4>, <0x0 0xf400 0x0 0x2000>;
reg-names = "dbi","apb","phy", "config";
bus-range = <0x0  0x1>;
#address-cells = <3>;
@@ -46,5 +46,5 @@ Example based on kirin960:
 <&crg_ctrl HI3660_ACLK_GATE_PCIE>;
clock-names = "pcie_phy_ref", "pcie_aux",
  "pcie_apb_phy", "pcie_apb_sys", "pcie_aclk";
-   reset-gpios = <&gpio11 1 0 >;
+   reset-gpios = <&gpio11 1 0>;
};
diff --git a/arch/arm64/boot/dts/hisilicon/hi3660.dtsi 
b/arch/arm64/boot/dts/hisilicon/hi3660.dtsi
index e8feb2fb4d53..7bc89baa40ba 100644
--- a/arch/arm64/boot/dts/hisilicon/hi3660.dtsi
+++ b/arch/arm64/boot/dts/hisilicon/hi3660.dtsi
@@ -159,12 +159,12 @@
 
pcie@f400 {
compatible = "hisilicon,kirin960-pcie";
-   reg = <0x0 0xf400 0x0 0x1000>,
- <0x0 0xff3fe000 0x0 0x1000>,
+   reg = <0x0 0xf400 0x0  0x1000>,
+ <0x0 0xff3fe000 0x0  0x1000>,
  <0x0 0xf3f2 0x0 0x4>,
- <0x0 0xF500 0x0 0x2000>;
+ <0x0 0xf500 0x0  0x2000>;
reg-names = "dbi", "apb", "phy", "config";
-   bus-range = <0x0  0x1>;
+   bus-range = <0x0 0x1>;
#address-cells = <3>;
#size-cells = <2>;
device_type = "pci";
@@ -173,7 +173,7 @@
num-lanes = <1>;
#interrupt-cells = <1>;
interrupt-map-mask = <0xf800 0 0 7>;
-   interrupt-map = <0x0 0 0 1 &gic 0 0 0 282 4>,
+   interrupt-map = <0x0 0 0 1 &gic 0 0 0  282 4>,
<0x0 0 0 2 &gic 0 0 0  283 4>,
<0x0 0 0 3 &gic 0 0 0  284 4>,

Re: [PATCH v2 03/11] tty: kbd: reduce stack size with KASAN

2017-06-16 Thread Dmitry Torokhov
On Fri, Jun 16, 2017 at 1:56 PM, Arnd Bergmann  wrote:
> On Fri, Jun 16, 2017 at 7:29 PM, Dmitry Torokhov
>  wrote:
>> On Fri, Jun 16, 2017 at 8:58 AM, Samuel Thibault
>>  wrote:
>>> I'm however afraid we'd have to mark a lot of static functions that way,
>>> depending on the aggressivity of gcc... I'd indeed really argue that gcc
>>> should consider stack usage when inlining.
>>>
>>> static int f(int foo) {
>>> char c[256];
>>> g(c, foo);
>>> }
>>>
>>> is really not something that I'd want to see the compiler to inline.
>>
>> Why would not we want it be inlined? What we do not want us several
>> calls having _separate_ instances of 'c' generated on the stack, all
>> inlined calls should share 'c'. And of course if we have f1, f2, and
>> f3 with c1, c2, and c3, GCC should not blow up the stack inlining and
>> allocating stack for all 3 of them beforehand.
>>
>> But this all seems to me issue that should be solved in toolchain, not
>> trying to play whack-a-mole with kernel sources.
>
> The problem for the Samuel's example is that
>
> a) the "--param asan-stack=1" option in KASAN does blow up the
>stack, which is why the annotation is now called 'noinline_if_stackbloat'.
>
> b) The toolchain cannot solve the problem, as most instances of the
>problem (unlike kbd_put_queue) force the inlining unless you build
>with the x86-specific CONFIG_OPTIMIZE_INLINING.

If inlining done right there should be no change in stack size,
because if calls are not inlined then stack storage is "shared"
between calls, and it should similarly be shared when calls are
inlined. And that is toolchain issue.

-- 
Dmitry


Re: autofs multi-map regression

2017-06-16 Thread Eric W. Biederman
Dick Streefland  writes:

> After a recent upgrade of a Ubuntu xenial machine, a particular
> autofs multi-map mount setup stopped working. A simplified example is:
>
> ::
> auto.master
> ::
> /net  /etc/auto.net
> ::
> auto.net
> ::
> localhost / :/ /loc :/loc
>
> Accessing /net/localhost/loc should trigger two nested bind mounts on
> /net/localhost and /net/localhost/loc, but with the new kernel, it fails
> with ELOOP:
>
> $ ls /net/localhost/loc
> ls: cannot open directory '/net/localhost/loc': Too many levels of symbolic 
> links
>
> The problem is related to the upgrade of the Ubuntu xenial kernel from
> 4.4.0-38.57 to 4.4.0-78.99. I bisected the regression to commit
> 731ac92843877f3633325203abc942193c1e9001, which is a Ubuntu backport
> of this upstream kernel commit:
>
> commit 1064f874abc0d05eeed8993815f584d847b72486
> Author: Eric W. Biederman 
> Date:   Fri Jan 20 18:28:35 2017 +1300
>
> mnt: Tuck mounts under others instead of creating shadow/side mounts.


I don't believe this is a kernel change.

I dug up an old VM and I was able to reproduce this issue simply
by installing autofs, and your auto.master and auto.net files.

# uname -a
Linux ubuntu-16 4.4.0-24-generic #43-Ubuntu SMP Wed Jun 8 19:27:37 UTC 2016 
x86_64 x86_64 x86_64 GNU/Linux

# ls /net/
localhost
# ls /net/localhost/loc
ls: cannot open directory '/net/localhost/loc': Too many levels of symbolic 
links
# ls /loc
ls: cannot open directory '/loc/': Too many levels of symbolic links

I suspect there is configuration somewhere in your autofs
configuration.  I don't speak autofs well enough to debug the issue at
this point.  But I can conclusively say it was not the kernel commit you
pointed at, as I see the issue you are reporting and I don't have that
commit in the kernel under test.

Eric




Re: [RFC PATCH 0/2] crypto: caam - fix cts(cbc(aes)) with CAAM driver

2017-06-16 Thread Horia Geantă
On 6/16/2017 11:00 AM, Herbert Xu wrote:
> On Fri, Jun 16, 2017 at 07:57:00AM +, Horia Geantă wrote:
>>
>> Commit 0605c41cc53ca ("crypto: cts - Convert to skcipher") appends
>> CRYPTO_TFM_REQ_MAY_BACKLOG to the original crypto request flags for the
>> last block - when calling cts_cbc_encrypt().
>> Is it really needed?
> 
> Yes, because at this point we cannot tell the sender to back off.
> 
>> For cts(cbc(aes)) with cbc(aes) offloaded in HW, i.e. running in async
>> mode, we get the below stack for CAAM driver.
>> Driver is told that it can sleep (CRYPTO_TFM_REQ_MAY_BACKLOG flag), so
>> it uses GFP_KERNEL to allocate memory. However, this is incorrect, since
>> driver runs in atomic context (softirq).
> 
> This is wrong.  Whether you can sleep or not is determined by
> MAY_SLEEP, not MAY_BACKLOG.  MAY_BACKLOG only indicates that this
> request must be queued, even if the queue is full.
> 
Indeed, CAAM driver incorrectly decides to use GFP_KERNEL for allocation
when MAY_BACKLOG flag is set. This seems to be a long-standing issue, I
will send a fix (separately).

Still I think we have a problem.
David reported that the user is fscrypt. Looking into fscrypt code, I
see that besides MAY_BACKLOG, MAY_SLEEP flag is also set. So we end up
in the situation I described earlier: the last block is encrypted in
atomic context and with MAY_SLEEP set.

Thanks,
Horia


mmotm 2017-06-16-13-59 uploaded

2017-06-16 Thread akpm
The mm-of-the-moment snapshot 2017-06-16-13-59 has been uploaded to

   http://www.ozlabs.org/~akpm/mmotm/

mmotm-readme.txt says

README for mm-of-the-moment:

http://www.ozlabs.org/~akpm/mmotm/

This is a snapshot of my -mm patch queue.  Uploaded at random hopefully
more than once a week.

You will need quilt to apply these patches to the latest Linus release (4.x
or 4.x-rcY).  The series file is in broken-out.tar.gz and is duplicated in
http://ozlabs.org/~akpm/mmotm/series

The file broken-out.tar.gz contains two datestamp files: .DATE and
.DATE--mm-dd-hh-mm-ss.  Both contain the string -mm-dd-hh-mm-ss,
followed by the base kernel version against which this patch series is to
be applied.

This tree is partially included in linux-next.  To see which patches are
included in linux-next, consult the `series' file.  Only the patches
within the #NEXT_PATCHES_START/#NEXT_PATCHES_END markers are included in
linux-next.

A git tree which contains the memory management portion of this tree is
maintained at git://git.kernel.org/pub/scm/linux/kernel/git/mhocko/mm.git
by Michal Hocko.  It contains the patches which are between the
"#NEXT_PATCHES_START mm" and "#NEXT_PATCHES_END" markers, from the series
file, http://www.ozlabs.org/~akpm/mmotm/series.


A full copy of the full kernel tree with the linux-next and mmotm patches
already applied is available through git within an hour of the mmotm
release.  Individual mmotm releases are tagged.  The master branch always
points to the latest release, so it's constantly rebasing.

http://git.cmpxchg.org/cgit.cgi/linux-mmotm.git/

To develop on top of mmotm git:

  $ git remote add mmotm 
git://git.kernel.org/pub/scm/linux/kernel/git/mhocko/mm.git
  $ git remote update mmotm
  $ git checkout -b topic mmotm/master
  
  $ git send-email mmotm/master.. [...]

To rebase a branch with older patches to a new mmotm release:

  $ git remote update mmotm
  $ git rebase --onto mmotm/master  topic




The directory http://www.ozlabs.org/~akpm/mmots/ (mm-of-the-second)
contains daily snapshots of the -mm tree.  It is updated more frequently
than mmotm, and is untested.

A git copy of this tree is available at

http://git.cmpxchg.org/cgit.cgi/linux-mmots.git/

and use of this tree is similar to
http://git.cmpxchg.org/cgit.cgi/linux-mmotm.git/, described above.


This mmotm tree contains the following patches against 4.12-rc5:
(patches marked "*" will be included in linux-next)

  origin.patch
  i-need-old-gcc.patch
* mm-hwpoison-use-compound_head-flags-for-huge-pages.patch
* swap-cond_resched-in-swap_cgroup_prepare.patch
* mm-numa-avoid-waiting-on-freed-migrated-pages.patch
* userfaultfd-shmem-handle-coredumping-in-handle_userfault.patch
* mm-correct-the-comment-when-reclaimed-pages-exceed-the-scanned-pages.patch
* mm-correct-the-comment-when-reclaimed-pages-exceed-the-scanned-pages-fix.patch
* mm-list_lruc-use-cond_resched_lock-for-nlru-lock.patch
* mm-skip-hwpoisoned-pages-when-onlining-pages.patch
* mm-huge-vmap-fail-gracefully-on-unexpected-huge-vmap-mappings.patch
* autofs-sanity-check-status-reported-with-autofs_dev_ioctl_fail.patch
* arm-arch-arm-include-asm-pageh-needs-personalityh.patch
* mn10300-remove-wrapper-header-for-asm-deviceh.patch
* mn10300-use-generic-fbh.patch
* tile-provide-default-ioremap-declaration.patch
* 
teach-initramfs_root_uid-and-initramfs_root_gid-that-1-means-current-user.patch
* 
clarify-help-text-that-compression-applies-to-ramfs-as-well-as-legacy-ramdisk.patch
* 
sh-intc-delete-an-error-message-for-a-failed-memory-allocation-in-add_virq_to_pirq.patch
* ocfs2-fix-a-static-checker-warning.patch
* ocfs2-use-magich.patch
* ocfs2-get-rid-of-ocfs2_is_o2cb_active-function.patch
* 
ocfs2-old-mle-put-and-release-after-the-function-dlm_add_migration_mle-called.patch
* 
ocfs2-old-mle-put-and-release-after-the-function-dlm_add_migration_mle-called-fix.patch
* ocfs2-dlm-optimization-of-code-while-free-dead-node-locks.patch
* 
ocfs2-dlm-optimization-of-code-while-free-dead-node-locks-checkpatch-fixes.patch
* ocfs2-give-an-obvious-tip-for-dismatch-cluster-names.patch
* ocfs2-give-an-obvious-tip-for-dismatch-cluster-names-v2.patch
* ocfs2-move-some-definitions-to-header-file.patch
* ocfs2-fix-some-small-problems.patch
* ocfs2-add-kobject-for-online-file-check.patch
* ocfs2-add-duplicative-ino-number-check.patch
* 
block-restore-proc-partitions-to-not-display-non-partitionable-removable-devices.patch
* sendfile-do-not-update-file-offset-of-non-lseekable-objects.patch
* fs-file-replace-alloc_fdmem-with-kvmalloc-alternative.patch
* watchdog-remove-unused-declaration.patch
* watchdog-introduce-arch_touch_nmi_watchdog.patch
* watchdog-split-up-config-options.patch
* watchdog-provide-watchdog_reconfigure-for-arch-watchdogs.patch
* powerpc-64s-implement-arch-specific-hardlockup-watchdog.patch
* powerpc-64s-implement-arch-specific-hardlockup-watchdog-checkpatch-fixes.patch
  mm.patch
* mm-slub-remove-a-redundant-assignment-in-___slab_alloc.patch
* mm-slub-rese

Re: [PATCH v5 3/4] ARM64: dts: meson-gx: use stable UART bindings with correct gate clock

2017-06-16 Thread Kevin Hilman
Neil Armstrong  writes:

> From: Helmut Klein 
>
> This patch switches to the stable UART bindings but also add the correct
> gate clock to the non-AO UART nodes for GXBB and GXL SoCs.
>
> Acked-by: Jerome Brunet 
> Signed-off-by: Helmut Klein 
> Signed-off-by: Neil Armstrong 
> ---
>  arch/arm64/boot/dts/amlogic/meson-gx.dtsi   | 12 +---
>  arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi | 25 +
>  arch/arm64/boot/dts/amlogic/meson-gxl.dtsi  | 25 +
>  3 files changed, 55 insertions(+), 7 deletions(-)
>
> diff --git a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi 
> b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi
> index 603491d..86a4018 100644
> --- a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi
> +++ b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi
> @@ -225,7 +225,7 @@
>   };
>  
>   uart_A: serial@84c0 {
> - compatible = "amlogic,meson-uart";
> + compatible = "amlogic,meson-gx-uart";

IMO, we should keep both compatibles (the more specific one first.)
That would allow new DTs to continue to run on older kernels.

It would also allow this DT change to be completely independent of the
driver changes.  Otherwise, if I merge this before the driver change are
merged, we'll have a bunch of boards with no more serial console output.

Kevin


Re: [PATCH v2 03/11] tty: kbd: reduce stack size with KASAN

2017-06-16 Thread Arnd Bergmann
On Fri, Jun 16, 2017 at 7:29 PM, Dmitry Torokhov
 wrote:
> On Fri, Jun 16, 2017 at 8:58 AM, Samuel Thibault
>  wrote:
>> Arnd Bergmann, on ven. 16 juin 2017 17:41:47 +0200, wrote:
>>> The problem are the 'ch' and 'flag' variables that are passed into
>>> tty_insert_flip_char by value, and from there into
>>> tty_insert_flip_string_flags by reference.  In this case, kasan tries
>>> to detect whether tty_insert_flip_string_flags() does any out-of-bounds
>>> access on the pointers and adds 64 bytes redzone around each of
>>> the two variables.
>>
>> Ouch.
>>
>>> gcc-6.3.1 happens to inline 16 calls of tty_insert_flip_char() into
>
> I wonder if we should stop marking tty_insert_flip_char() as inline.

That would be an easy solution, yes. tty_insert_flip_char() was
apparently meant to be optimized for the fast path to completely
avoid calling into another function, but that fast path got a bit more
complex with commit acc0f67f307f ("tty: Halve flip buffer
GFP_ATOMIC memory consumption").

If we move it out of line, the fast path optimization goes away and
we could just have a simple implementation like


int tty_insert_flip_char(struct tty_port *port, unsigned char ch, char flag)
{
struct tty_buffer *tb = port->buf.tail;
int flags = (flag == TTY_NORMAL) ? TTYB_NORMAL : 0;

if (!__tty_buffer_request_room(port, 1, flags))
return 0;

if (~tb->flags & TTYB_NORMAL)
*flag_buf_ptr(tb, tb->used) = flag;
*char_buf_ptr(tb, tb->used++) = ch;

return 1;
}

One rather simple change I found would actually avoid the warning
and would seem to actually give us better runtime behavior even
without KASAN:

diff --git a/include/linux/tty_flip.h b/include/linux/tty_flip.h
index c28dd523f96e..15d03a14ad0f 100644
--- a/include/linux/tty_flip.h
+++ b/include/linux/tty_flip.h
@@ -26,7 +26,7 @@ static inline int tty_insert_flip_char(struct tty_port *port,
*char_buf_ptr(tb, tb->used++) = ch;
return 1;
}
-   return tty_insert_flip_string_flags(port, &ch, &flag, 1);
+   return tty_insert_flip_string_fixed_flag(port, &ch, flag, 1);
 }

 static inline int tty_insert_flip_string(struct tty_port *port,

This reduces the stack frame size for kbd_event() to 1256 bytes,
which is well within the limit, and it lets us keep the flag-less
buffers across a 'tb->used >= tb->size' condition. Calling
into tty_insert_flip_string_flags() today will allocate a flag buffer
if there isn't already one, even when it is not needed.

>> I'm however afraid we'd have to mark a lot of static functions that way,
>> depending on the aggressivity of gcc... I'd indeed really argue that gcc
>> should consider stack usage when inlining.
>>
>> static int f(int foo) {
>> char c[256];
>> g(c, foo);
>> }
>>
>> is really not something that I'd want to see the compiler to inline.
>
> Why would not we want it be inlined? What we do not want us several
> calls having _separate_ instances of 'c' generated on the stack, all
> inlined calls should share 'c'. And of course if we have f1, f2, and
> f3 with c1, c2, and c3, GCC should not blow up the stack inlining and
> allocating stack for all 3 of them beforehand.
>
> But this all seems to me issue that should be solved in toolchain, not
> trying to play whack-a-mole with kernel sources.

The problem for the Samuel's example is that

a) the "--param asan-stack=1" option in KASAN does blow up the
   stack, which is why the annotation is now called 'noinline_if_stackbloat'.

b) The toolchain cannot solve the problem, as most instances of the
   problem (unlike kbd_put_queue) force the inlining unless you build
   with the x86-specific CONFIG_OPTIMIZE_INLINING.

Arnd


Re: [PATCH RESEND 02/13] mfd: cros_ec: Add EC console read structures definitions

2017-06-16 Thread Benson Leung
Hi Enric,

On 05/16/2017 09:13 AM, Enric Balletbo i Serra wrote:
> From: Nicolas Boichat 
> 
> ec_params_console_read_v1 is used to capture EC logs from kernel,
> and ec_params_get_cmd_versions_v1 is used to probe whether EC
> supports that command.
> 
> Signed-off-by: Nicolas Boichat 
> Reviewed-by: Guenter Roeck 
> Acked-by: Lee Jones 
> Tested-by: Enric Balletbo i Serra 

Thanks. Applied.


-- 
Benson Leung
Staff Software Engineer
Chrome OS Kernel
Google Inc.
ble...@google.com
Chromium OS Project
ble...@chromium.org



signature.asc
Description: OpenPGP digital signature


Re: [PATCH RESEND 01/13] mfd: cros_ec: Add helper for event notifier.

2017-06-16 Thread Benson Leung
Hi Enric,

On 05/16/2017 09:13 AM, Enric Balletbo i Serra wrote:
> From: Gwendal Grignou 
> 
> Add cros_ec_get_event() entry point to retrieve event within functions
> called by the notifier.
> 
> Signed-off-by: Gwendal Grignou 
> Signed-off-by: Enric Balletbo i Serra 
> Acked-by: Lee Jones 

Applied to my branch. I'll let you know when the whole thing is ready.

-- 
Benson Leung
Staff Software Engineer
Chrome OS Kernel
Google Inc.
ble...@google.com
Chromium OS Project
ble...@chromium.org



signature.asc
Description: OpenPGP digital signature


Re: [PATCH 00/13] block: assorted cleanup for bio splitting and cloning.

2017-06-16 Thread Jens Axboe
On 06/16/2017 01:34 AM, Christoph Hellwig wrote:
> On Fri, Jun 16, 2017 at 05:30:50PM +1000, NeilBrown wrote:
>> I've pushed the new version to the same place.  Do you actually want
>> me to re-post all the patches?
> 
> I personally prefer to always have patches on the list, but I can't
> speak for Jens of course.

Yes please, I'd prefer them posted again as well.

-- 
Jens Axboe



Re: [RFC v2 0/2] swait: add idle to make idle-hacks on kthreads explicit

2017-06-16 Thread Eric W. Biederman
"Paul E. McKenney"  writes:

> On Fri, Jun 16, 2017 at 01:26:19AM +0200, Luis R. Rodriguez wrote:
>> On Thu, Jun 15, 2017 at 02:57:17PM -0700, Paul E. McKenney wrote:
>> > On Thu, Jun 15, 2017 at 11:48:18AM -0700, Luis R. Rodriguez wrote:
>> > > While reviewing RCU's interruptible swaits I noticed signals were 
>> > > actually
>> > > not expected. Paul explained that the reason signals are not expected is
>> > > we use kthreads, which don't get signals, furthermore the code avoided 
>> > > the
>> > > uninterruptible swaits as otherwise it would contribute to the system 
>> > > load
>> > > average on idle, bumping it from 0 to 2 or 3 (depending on preemption).
>> > > 
>> > > Since this can be confusing its best to be explicit about the 
>> > > requirements and
>> > > goals. This patch depends on the other killable swaits [0] recently 
>> > > proposed as
>> > > well interms of context. Thee patch can however be tested independently 
>> > > if
>> > > the hunk is addressed separately.
>> > > 
>> > > [0] https://lkml.kernel.org/r/20170614222017.14653-3-mcg...@kernel.org
>> > 
>> > Tested-by: Paul E. McKenney 
>> > 
>> > Are you looking to push these or were you wanting me to?
>> 
>> I'd be happy for you to take them.
>
> OK, let's see if we can get some Acked-by's or Reviewed-by's from the
> relevant people.
>
> For but one example, Eric, does this look good to you or are adjustments
> needed?

Other than an unnecessary return code I don't see any issues.

Acked-by: "Eric W. Biederman" 

In truth I am just barely ahead of you folks.  I ran into the same issue
the other day with a piece of my code and someone pointed me to TASK_IDLE.

Eric


Re: [PATCH] fs: don't forget to put old mntns in mntns_install

2017-06-16 Thread Andrei Vagin
Hi Alexander,

Do you have any comments about this patch?

Here is a reproducer for this leak:

$ cat mount.sh 
set -e -x
mount --make-rprivate /
mount -t tmpfs zdtm /mnt
mount --make-shared /mnt
mount -t proc procX /proc
for i in `seq $1`; do
mount --bind /mnt /mnt
done
mount --make-rprivate /mnt
unshare -m sleep 1000 &
pid=$!
unshare -m nsenter -m -t $pid nsenter -m -t $$ true


$ while :; do unshare -Umpfr sh mount.sh 16 || break; done

...

$ cat /proc/slabinfo | grep mnt
mnt_cache 3281745 3281776512   162 : tunables00 0 : 
slabdata 205111 205111  0


On Thu, Jun 08, 2017 at 05:32:29PM -0700, Andrei Vagin wrote:
> Fixes: 4f757f3cbf54 ("make sure that mntns_install() doesn't end up with 
> referral for root")
> Cc: Al Viro 
> Signed-off-by: Andrei Vagin 
> ---
>  fs/namespace.c | 2 ++
>  1 file changed, 2 insertions(+)
> 
> diff --git a/fs/namespace.c b/fs/namespace.c
> index 8bd3e4d..5a44384 100644
> --- a/fs/namespace.c
> +++ b/fs/namespace.c
> @@ -3488,6 +3488,8 @@ static int mntns_install(struct nsproxy *nsproxy, 
> struct ns_common *ns)
>   return err;
>   }
>  
> + put_mnt_ns(old_mnt_ns);
> +
>   /* Update the pwd and root */
>   set_fs_pwd(fs, &root);
>   set_fs_root(fs, &root);
> -- 
> 2.9.4
> 


Re: [PATCH 03/44] dmaengine: ioat: don't use DMA_ERROR_CODE

2017-06-16 Thread Alexander Duyck
On Fri, Jun 16, 2017 at 11:10 AM, Christoph Hellwig  wrote:
> DMA_ERROR_CODE is not a public API and will go away.  Instead properly
> unwind based on the loop counter.
>
> Signed-off-by: Christoph Hellwig 
> Acked-by: Dave Jiang 
> Acked-By: Vinod Koul 
> ---
>  drivers/dma/ioat/init.c | 24 +++-
>  1 file changed, 7 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/dma/ioat/init.c b/drivers/dma/ioat/init.c
> index 6ad4384b3fa8..ed8ed1192775 100644
> --- a/drivers/dma/ioat/init.c
> +++ b/drivers/dma/ioat/init.c
> @@ -839,8 +839,6 @@ static int ioat_xor_val_self_test(struct ioatdma_device 
> *ioat_dma)
> goto free_resources;
> }
>
> -   for (i = 0; i < IOAT_NUM_SRC_TEST; i++)
> -   dma_srcs[i] = DMA_ERROR_CODE;
> for (i = 0; i < IOAT_NUM_SRC_TEST; i++) {
> dma_srcs[i] = dma_map_page(dev, xor_srcs[i], 0, PAGE_SIZE,
>DMA_TO_DEVICE);
> @@ -910,8 +908,6 @@ static int ioat_xor_val_self_test(struct ioatdma_device 
> *ioat_dma)
>
> xor_val_result = 1;
>
> -   for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++)
> -   dma_srcs[i] = DMA_ERROR_CODE;
> for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) {
> dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE,
>DMA_TO_DEVICE);
> @@ -965,8 +961,6 @@ static int ioat_xor_val_self_test(struct ioatdma_device 
> *ioat_dma)
> op = IOAT_OP_XOR_VAL;
>
> xor_val_result = 0;
> -   for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++)
> -   dma_srcs[i] = DMA_ERROR_CODE;
> for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) {
> dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE,
>DMA_TO_DEVICE);
> @@ -1017,18 +1011,14 @@ static int ioat_xor_val_self_test(struct 
> ioatdma_device *ioat_dma)
> goto free_resources;
>  dma_unmap:
> if (op == IOAT_OP_XOR) {
> -   if (dest_dma != DMA_ERROR_CODE)
> -   dma_unmap_page(dev, dest_dma, PAGE_SIZE,
> -  DMA_FROM_DEVICE);
> -   for (i = 0; i < IOAT_NUM_SRC_TEST; i++)
> -   if (dma_srcs[i] != DMA_ERROR_CODE)
> -   dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE,
> -  DMA_TO_DEVICE);
> +   while (--i >= 0)
> +   dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE,
> +  DMA_TO_DEVICE);
> +   dma_unmap_page(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE);
> } else if (op == IOAT_OP_XOR_VAL) {
> -   for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++)
> -   if (dma_srcs[i] != DMA_ERROR_CODE)
> -   dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE,
> -  DMA_TO_DEVICE);
> +   while (--i >= 0)
> +   dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE,
> +  DMA_TO_DEVICE);

Wouldn't it make more sense to pull out the while loop and just call
dma_unmap_page on dest_dma if "op == IOAT_OP_XOR"? Odds are it is what
the compiler is already generating and will save a few lines of code
so what you end up with is something like:
while (--i >= 0)
dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, DMA_TO_DEVICE);
if (op == IOAT_OP_XOR)
dma_unmap_page(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE);

> }
>  free_resources:
> dma->device_free_chan_resources(dma_chan);
> --
> 2.11.0
>


Re: autofs multi-map regression

2017-06-16 Thread Dick Streefland
On Friday 2017-06-16 12:03, Eric W. Biederman wrote:
| Interesting...
| 
| Can you test this on a stock 4.11 kernel?
| 
| I definitely need a little bit more information to solve this.  That
| commit did not add any new error condidtions so I need to understand
| what state you are getting yourself into that is affected by this
| commit.
| 
| Is there a chance you can post /proc/self/mountinfo from when this is
| happening?

I've installed the mainline 4.11 kernel from:

  http://kernel.ubuntu.com/~kernel-ppa/mainline/v4.11/

and this kernel works correctly!

So either this issue was fixed in the meantime, or it is something
specific to the Ubuntu kernel. I guess I should file a bug report
with Ubuntu then?

I've also looked at /proc/self/mountinfo before and directly after the
mount attempt. Here are the ext4 and autofs entries for the failing 4.4
kernel:

before:
23 0 8:2 / / rw,relatime shared:1 - ext4 /dev/sda2 
rw,errors=remount-ro,data=ordered
41 19 0:34 / /proc/sys/fs/binfmt_misc rw,relatime shared:24 - autofs systemd-1 
rw,fd=34,pgrp=1,timeout=0,minproto=5,maxproto=5,direct
46 23 8:4 / /loc rw,nosuid,nodev,noatime shared:30 - ext4 /dev/sda4 
rw,block_validity,delalloc,barrier,user_xattr,acl
202 23 0:44 / /net rw,relatime shared:160 - autofs /etc/auto.net 
rw,fd=6,pgrp=1724,timeout=120,minproto=5,maxproto=5,indirect

after:
23 0 8:2 / / rw,relatime shared:1 - ext4 /dev/sda2 
rw,errors=remount-ro,data=ordered
41 19 0:34 / /proc/sys/fs/binfmt_misc rw,relatime shared:24 - autofs systemd-1 
rw,fd=34,pgrp=1,timeout=0,minproto=5,maxproto=5,direct
46 162 8:4 / /loc rw,nosuid,nodev,noatime shared:30 - ext4 /dev/sda4 
rw,block_validity,delalloc,barrier,user_xattr,acl
202 23 0:44 / /net rw,relatime shared:160 - autofs /etc/auto.net 
rw,fd=6,pgrp=1724,timeout=120,minproto=5,maxproto=5,indirect
157 202 8:2 / /net/localhost rw,relatime shared:1 - ext4 /dev/sda2 
rw,errors=remount-ro,data=ordered
161 157 0:47 / /net/localhost/loc rw,relatime shared:119 - autofs /etc/auto.net 
rw,fd=6,pgrp=1724,timeout=120,minproto=5,maxproto=5,offset
162 23 0:47 / /loc rw,relatime shared:119 - autofs /etc/auto.net 
rw,fd=6,pgrp=1724,timeout=120,minproto=5,maxproto=5,offset

And here the info for the working mainline 4.11 kernel:

before:
23 0 8:2 / / rw,relatime shared:1 - ext4 /dev/sda2 
rw,errors=remount-ro,data=ordered
74 19 0:36 / /proc/sys/fs/binfmt_misc rw,relatime shared:56 - autofs systemd-1 
rw,fd=35,pgrp=1,timeout=0,minproto=5,maxproto=5,direct,pipe_ino=12754
45 23 8:4 / /loc rw,nosuid,nodev,noatime shared:28 - ext4 /dev/sda4 
rw,block_validity,delalloc,barrier,user_xattr,acl
208 23 0:46 / /net rw,relatime shared:164 - autofs /etc/auto.net 
rw,fd=6,pgrp=1545,timeout=120,minproto=5,maxproto=5,indirect,pipe_ino=26555

after:
23 0 8:2 / / rw,relatime shared:1 - ext4 /dev/sda2 
rw,errors=remount-ro,data=ordered
74 19 0:36 / /proc/sys/fs/binfmt_misc rw,relatime shared:56 - autofs systemd-1 
rw,fd=35,pgrp=1,timeout=0,minproto=5,maxproto=5,direct,pipe_ino=12754
45 175 8:4 / /loc rw,nosuid,nodev,noatime shared:28 - ext4 /dev/sda4 
rw,block_validity,delalloc,barrier,user_xattr,acl
208 23 0:46 / /net rw,relatime shared:164 - autofs /etc/auto.net 
rw,fd=6,pgrp=1545,timeout=120,minproto=5,maxproto=5,indirect,pipe_ino=26555
162 208 8:2 / /net/localhost rw,relatime shared:1 - ext4 /dev/sda2 
rw,errors=remount-ro,data=ordered
166 162 0:48 / /net/localhost/loc rw,relatime shared:122 - autofs /etc/auto.net 
rw,fd=6,pgrp=1545,timeout=120,minproto=5,maxproto=5,offset,pipe_ino=26555
167 23 0:48 / /loc rw,relatime shared:122 - autofs /etc/auto.net 
rw,fd=6,pgrp=1545,timeout=120,minproto=5,maxproto=5,offset,pipe_ino=26555
174 166 8:4 / /net/localhost/loc rw,nosuid,nodev,noatime shared:28 - ext4 
/dev/sda4 rw,block_validity,delalloc,barrier,user_xattr,acl
175 167 8:4 / /loc rw,nosuid,nodev,noatime shared:28 - ext4 /dev/sda4 
rw,block_validity,delalloc,barrier,user_xattr,acl

-- 
Dick


Re: [PATCH RESEND 0/2] Add support for ARM CCN-502 interconnect

2017-06-16 Thread Scott Branden

Arnd,

Should this patchset go through the ARM maintainers?


On 17-06-15 11:39 PM, Scott Branden wrote:

Add compatible string for ARM CCN-502 interconnect.
CCN-502 interconnect is already compatible with the
existing ARM CCN driver, which supports CCN-504.

Velibor Markovski (2):
   dt-bindings: arm-ccn: Add bindings info for CCN-502 compatible string
   bus: arm-ccn: Enable stats for CCN-502 interconnect

  Documentation/devicetree/bindings/arm/ccn.txt | 1 +
  drivers/bus/arm-ccn.c | 1 +
  2 files changed, 2 insertions(+)





Re: [RFC v2 1/2] swait: add idle variants which don't contribute to load average

2017-06-16 Thread Eric W. Biederman
"Luis R. Rodriguez"  writes:

> There are cases where folks are using an interruptible swait when
> using kthreads. This is rather confusing given you'd expect
> interruptible waits to be -- interruptible, but kthreads are not
> interruptible ! The reason for such practice though is to avoid
> having these kthreads contribute to the system load average.
>
> When systems are idle some kthreads may spend a lot of time blocking if
> using swait_event_timeout(). This would contribute to the system load
> average. On systems without preemption this would mean the load average
> of an idle system is bumped to 2 instead of 0. On systems with PREEMPT=y
> this would mean the load average of an idle system is bumped to 3
> instead of 0.
>
> This adds proper API using TASK_IDLE to make such goals explicit and
> avoid confusion.
>
> Suggested-by: "Eric W. Biederman" 
> Signed-off-by: Luis R. Rodriguez 
> ---
>  include/linux/swait.h | 25 +
>  1 file changed, 25 insertions(+)
>
> diff --git a/include/linux/swait.h b/include/linux/swait.h
> index 2c700694d50a..105c70e23286 100644
> --- a/include/linux/swait.h
> +++ b/include/linux/swait.h
> @@ -194,4 +194,29 @@ do { 
> \
>   __ret;  \
>  })
>  
> +#define __swait_event_idle(wq, condition)\
> + ___swait_event(wq, condition, TASK_IDLE, 0, schedule())
> +
> +#define swait_event_idle(wq, condition)  
> \
> +({   \
> + int __ret = 0;  \
> + if (!(condition))   \
> + __ret = __swait_event_idle(wq, condition);  \
> + __ret;  \
> +})

The wait isn't interruptible so a return code doesn't make sense here.

> +#define __swait_event_idle_timeout(wq, condition, timeout)   \
> + ___swait_event(wq, ___wait_cond_timeout(condition), \
> +TASK_IDLE, timeout,  \
> +__ret = schedule_timeout(__ret))
> +
> +#define swait_event_idle_timeout(wq, condition, timeout) \
> +({   \
> + long __ret = timeout;   \
> + if (!___wait_cond_timeout(condition))   \
> + __ret = __swait_event_idle_timeout(wq,  \
> +condition, timeout); \
> + __ret;  \
> +})
> +
>  #endif /* _LINUX_SWAIT_H */


Re: [PATCH] rtlwifi: rtl8821ae: remove unused variable

2017-06-16 Thread Larry Finger

On 06/13/2017 03:42 PM, Gustavo A. R. Silva wrote:

Remove unused variable rtlhal.

Addresses-Coverity-ID: 1248810
Signed-off-by: Gustavo A. R. Silva 
---


NACK!! That variable is used in file core.c in driver rtlwifi, which is loaded 
and used by rtl8821ae.


Please do more than blindly follow Coverity outputs, or improve that tool!

Larry


  drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c | 3 ---
  1 file changed, 3 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c 
b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c
index 2bc6bac..d158e34 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c
@@ -1360,7 +1360,6 @@ static bool _rtl8821ae_reset_pcie_interface_dma(struct 
ieee80211_hw *hw,
  static void _rtl8821ae_get_wakeup_reason(struct ieee80211_hw *hw)
  {
struct rtl_priv *rtlpriv = rtl_priv(hw);
-   struct rtl_hal *rtlhal = rtl_hal(rtl_priv(hw));
struct rtl_ps_ctl *ppsc = rtl_psc(rtlpriv);
u8 fw_reason = 0;
struct timeval ts;
@@ -1372,8 +1371,6 @@ static void _rtl8821ae_get_wakeup_reason(struct 
ieee80211_hw *hw)
  
  	ppsc->wakeup_reason = 0;
  
-	rtlhal->last_suspend_sec = ts.tv_sec;

-
switch (fw_reason) {
case FW_WOW_V2_PTK_UPDATE_EVENT:
ppsc->wakeup_reason = WOL_REASON_PTK_UPDATE;





Re: hexagon: build error in -next due to 'mm: memcontrol: per-lruvec stats infrastructure'

2017-06-16 Thread Andrew Morton
On Fri, 16 Jun 2017 16:15:23 -0400 Johannes Weiner  wrote:

> In any case, memcontrol.h doesn't/shouldn't need hardirq.h. When that
> include is removed, the below patch compiles on: x86 allno, x86_64
> allno, and my regular x86_64 config:
> 
> ---
> Subject: mm-memcontrol-per-lruvec-stats-infrastructure-fix-4

Did you try x86_64 allmodconfig?  I'm getting a mess:

In file included from ./include/linux/mm.h:1032,
 from ./include/linux/highmem.h:7,
 from ./include/linux/bio.h:21,
 from ./include/linux/writeback.h:205,
 from ./include/linux/memcontrol.h:28,
 from ./include/linux/swap.h:8,
 from ./include/linux/suspend.h:4,
 from arch/x86/kernel/asm-offsets.c:12:
./include/linux/vmstat.h: In function 'lruvec_page_state':
./include/linux/vmstat.h:362: error: implicit declaration of function 
'mem_cgroup_disabled'
./include/linux/vmstat.h:365: error: dereferencing pointer to incomplete type
./include/linux/vmstat.h:365: error: type defaults to 'int' in declaration of 
'type name'
...

Presumably because we have memcontrol.h indirectly including mm.h which
includes vmstat.h (from a stupid place) and with this patch we have
vmstat.h including memcontrol.h.



Re: [PATCH] ipmi: use rcu lock around call to intf->handlers->sender()

2017-06-16 Thread Corey Minyard

On 06/16/2017 08:11 AM, Tony Camuso wrote:

On 06/16/2017 08:15 AM, Corey Minyard wrote:

On 06/15/2017 10:54 AM, Corey Minyard wrote:

On 06/13/2017 09:54 AM, Tony Camuso wrote:
A vendor with a system having more than 128 CPUs occasionally 
encounters a
crash during shutdown. This is not an easily reproduceable event, 
but the

vendor was able to provide the following analysis of the crash, which
exhibits the same footprint each time.

crash> bt
PID: 0  TASK: 88017c70ce70  CPU: 5   COMMAND: "swapper/5"
  #0 [88085c143ac8] machine_kexec at 81059c8b
  #1 [88085c143b28] __crash_kexec at 811052e2
  #2 [88085c143bf8] crash_kexec at 811053d0
  #3 [88085c143c10] oops_end at 8168ef88
  #4 [88085c143c38] no_context at 8167ebb3
  #5 [88085c143c88] __bad_area_nosemaphore at 8167ec49
  #6 [88085c143cd0] bad_area_nosemaphore at 8167edb3
  #7 [88085c143ce0] __do_page_fault at 81691d1e
  #8 [88085c143d40] do_page_fault at 81691ec5
  #9 [88085c143d70] page_fault at 8168e188
 [exception RIP: unknown or invalid address]
 RIP: a053c800  RSP: 88085c143e28  RFLAGS: 00010206
 RAX: 88017c72bfd8  RBX: 88017a8dc000  RCX: 
8810588b5ac8
 RDX: 8810588b5a00  RSI: a053c800  RDI: 
8810588b5a00
 RBP: 88085c143e58   R8: 88017c70d408   R9: 
88017a8dc000
 R10: 0002  R11: 88085c143da0  R12: 
8810588b5ac8
 R13: 0100  R14: a053c800  R15: 
8810588b5a00

 ORIG_RAX:   CS: 0010  SS: 0018
---  ---
 [exception RIP: cpuidle_enter_state+82]
 RIP: 81514192  RSP: 88017c72be50  RFLAGS: 0202
 RAX: 001e4c3c6f16  RBX: f8a0  RCX: 
0018
 RDX: 000225c17d03  RSI: 88017c72bfd8  RDI: 
001e4c3c6f16
 RBP: 88017c72be78   R8: 237e   R9: 
0018
 R10: 2494  R11: 0001  R12: 
88017c72be20
 R13: 88085c14f8e0  R14: 0082  R15: 
001e4c3bb400

 ORIG_RAX: ff10  CS: 0010  SS: 0018

This is the corresponding stack trace

It has crashed because the area pointed with RIP extracted from timer
element is already removed during a shutdown process.

The function is smi_timeout().

And we think 8810588b5a00 in RDX is a parameter struct smi_info

crash> rd 8810588b5a00 20
8810588b5a00:  8810588b6000  .`.X
8810588b5a10:  880853264400 a05417e0 .D&S..T.
8810588b5a20:  24a024a0  .$.$
8810588b5a30:    
8810588b5a40:  a053a040 a053a060 @.S.`.S.
8810588b5a50:   00010001 
8810588b5a60:   0e00 
8810588b5a70:  a053a580 a053a6e0 ..S...S.
8810588b5a80:  a053a4a0 a053a250 ..S.P.S.
8810588b5a90:  00050002  

Unfortunately the top of this area is already detroyed by someone.
But because of two reasonns we think this is struct smi_info
  1) The address included in between  8810588b5a70 and 
8810588b5a80:

   are inside of ipmi_si_intf.c  see crash> module 88085779d2c0

  2) We've found the area which point this.
   It is offset 0x68 of  880859df4000

crash> rd  880859df4000 100
880859df4000:   0001 
880859df4010:  a0535290 dead0200 .RS.
880859df4020:  880859df4020 880859df4020 @.Y @.Y
880859df4030:  0002 00100010 
880859df4040:  880859df4040 880859df4040 @@.Y@@.Y
880859df4050:    
880859df4060:   8810588b5a00 .Z.X
880859df4070:  0001 880859df4078 x@.Y

  If we regards it as struct ipmi_smi in shutdown process
  it looks consistent.

The remedy for this apparent race is affixed below.


I think you are right about this problem, but in_shutdown is checked 
already
a bit before when newmsg is extracted from the list.  Wouldn't it be 
better

to add the rcu_read_lock() region starting right before the previous
in_shutdown check to after the send?  That would avoid a leak in this
case.


While lying awake unable to sleep, I realized that you can't call the
sender function while holding rcu_read_lock().  That will break RT,
because you can't claim a mutex while holding rcu_read_lock(),
and the sender function will claim normal spinlocks.

So I need to think about this a bit.

-corey



Thanks,

-corey



Would this be adequate to prevent the race?
Is the sender's mutex/spinlock sufficient to limit acc

Re: [RFC PATCH 00/13] Switchtec NTB Support

2017-06-16 Thread Serge Semin
On Fri, Jun 16, 2017 at 01:34:59PM -0600, Logan Gunthorpe  
wrote:
> 
> 
> On 16/06/17 12:38 PM, Serge Semin wrote:
> > On Fri, Jun 16, 2017 at 11:08:52AM -0600, Logan Gunthorpe 
> >  wrote:
> > It's the way the NTB API was created for, to have set of functions to access
> > NTB devices in the similar way. These aren't my beliefs, it's the way it was
> > created. I agree it can be optional, but it shouldn't be made as the basics
> > of the driver. It is called NTB "hardware" driver after all, not 
> > "emulating" or
> > "abstracting" driver.
> 
> Just more philosophy. You haven't given any good reason to remove the
> functionality. Vague references to the way things were created aren't
> compelling arguments. Better to cite code and point out actual problems.
> 

Actual problem is the design of your driver. Of course you can disagree as much 
as
you want.

> > ntb_transport could work without Scratchpads, if it's properly altered to
> > use NTB messaging. This should be the way to make things compatible, but not
> > making the hardware driver suitable for just one ntb_transport.
> 
> Ok, well when all the NTB clients no longer require using scratchpads
> and we can all abide by the rule that clients must function without
> them. Then, I'll remove the emulation. Until then, it stays.
> 
> > It's not like my whim or something, but the way it's usually done.
> > https://kernelnewbies.org/PatchPhilosophy
> 
> > Cite from there:
> > "Each patch should group changes into a logical sequence. Bug fixes must
> > come first in the patchset, then new features. This is because we need to be
> > able to backport bug fixes to older kernels, and they should not depend on
> > new features."
> 
> You should probably read that again because it doesn't actually support
> your point (in fact it's saying something quite unrelated). It is also
> probably a good idea to read the rest of the seciton you cite:
> 
> "The idea here is that you should break changes up in such a way that it
> will be easy to review."
> 
> "When creating a new feature patchset, you may need to break up your
> changes into multiple commits. "
> 
> "Clean up patches that are over 200 lines long are discouraged, because
> they are hard to review. Break those patches up into smaller patches. "
> 

This doesn't prove your way of splitting patchset is correct, but supports
my point. As well as the sentence about the logical sentence in addition
to the thing about easy review.

> Also, to quote Greg Kroah-Hartman from my last series[1]:
> 
> "That's one big patch to review, would you want to do that?
> 
> Can you break it up into smaller parts?"
> 
> > You grouped the patches in according to your logical view or development
> > progress (I don't know for sure), but it's not obvious for reviewers.
> > From my perspective your new Microsemi Switchtec NTB driver is just one
> > feature. I don't know who would think differently so to split the solid
> > driver up for review. Switchtec management driver alteration might be the
> > same - just one fix. It's much easier for you to have your commits squashed,
> > than for me to look at your git tree, than get back to your patchset looking
> > for a necessary peace of patch and commenting it there.
> 
> Well you're free to think that but, in my experience, your opinion
> differs significantly from the rest of the kernel community which I
> personally agree with.
> 

And your quotation doesn't prove you are right. Greg asked you to split at
least the documentation. He had point to ask it, since it's logically correct.
You wasn't arguing with him, was you? But in this case you have sent the
set of incremental patches of your own code, so I don't see how it can be
easier for review, than a combined text.

> Now, if you'd like to actually review the code I'd be happy to address
> any concerns you find. I won't be responding to any more philosophical
> arguments or bike-shedding over the format of the patch.
> 

I don't want to review a patchset, which isn't properly formated.

> Logan
> 
> [1] https://lkml.org/lkml/2017/1/31/637
> 
> -- 
> You received this message because you are subscribed to the Google Groups 
> "linux-ntb" group.
> To unsubscribe from this group and stop receiving emails from it, send an 
> email to linux-ntb+unsubscr...@googlegroups.com.
> To post to this group, send email to linux-...@googlegroups.com.
> To view this discussion on the web visit 
> https://groups.google.com/d/msgid/linux-ntb/33b6c321-c0af-7340-8e8e-e929a5c7%40deltatee.com.
> For more options, visit https://groups.google.com/d/optout.


Re: [PATCH] MIPS: Make individual platforms select ARCH_MIGHT_HAVE_PC_SERIO

2017-06-16 Thread Maciej W. Rozycki
On Fri, 16 Jun 2017, Florian Fainelli wrote:

> >  How did you determine that?  Malta for one not only has an SMSC FDC37M817 
> > Super I/O Controller featuring an 8042-compatible core, but actual PS/2 
> > keyboard and mouse connectors as well.
> 
> I was just grepping for i8042 in platform code to determine that, this
> came after having SERIO accidentally enabled on my platform
> (BMIPS_GENERIC) and seeing that it crashed badly and it annoyed the crap
> out of me that MIPS had ARCH_MIGHT_HAVE_PC_SERIO for platforms that
> don't need it.
> 
> Will come up with a v2 that includes malta, any other platforms for
> which it's not obvious?

 I don't know offhand, but in principle anything that has PCI and a 
southbridge (not all PCI platforms have one, e.g. Broadcom SWARM and 
BigSur are legacy-free) can have an 8042 wired.  Ideally probing for 8042 
hardware should be done by platform code and the driver's init code would 
not be called at all if there's no 8042 present, similarly to how e.g. RTC 
is usually registered.

  Maciej


Re: hexagon: build error in -next due to 'mm: memcontrol: per-lruvec stats infrastructure'

2017-06-16 Thread Johannes Weiner
On Fri, Jun 16, 2017 at 12:14:53PM -0700, Andrew Morton wrote:
> On Fri, 16 Jun 2017 14:49:51 -0400 Johannes Weiner  wrote:
> 
> > On Wed, Jun 14, 2017 at 12:26:46AM -0700, Guenter Roeck wrote:
> > > Hi,
> > > 
> > > I see the following build error in -next when building hexagon images.
> > > 
> > >   CC  arch/hexagon/kernel/asm-offsets.s
> > > In file included from ./include/linux/memcontrol.h:30:0,
> > >  from ./include/linux/swap.h:8,
> > >  from ./arch/hexagon/include/asm/pgtable.h:27,
> > >  from ./include/linux/mm.h:70,
> > >  from arch/hexagon/kernel/asm-offsets.c:28:
> > > ./include/linux/vmstat.h: In function '__inc_zone_page_state':
> > > ./include/linux/vmstat.h:294:2: error: implicit declaration of function 
> > > 'page_zone' [-Werror=implicit-function-declaration]
> > > ./include/linux/vmstat.h:294:2: warning: passing argument 1 of 
> > > '__inc_zone_state' makes pointer from integer without a cast [enabled by 
> > > default]
> > > ./include/linux/vmstat.h:267:20: note: expected 'struct zone *' but 
> > > argument is of type 'int'
> > 
> > vmstat.h depends on definitions in mm.h, but mm.h through the above
> > chain includes vmstat.h first. It worked in my x86 test because x86
> > pgtable.h doesn't include swap.h.
> > 
> > The headers are a bit of a mess. memcontrol.h is supposed to be a
> > lower level header than mm.h and vmstat.h, yet the new accounting
> > functions depend on mm.h definitions.
> > 
> > Let's move the lruvec accounting infra to vmstat.h and shuffle
> > memcontrol.h into the stack under mm.h and vmstat.h.
> > 
> > Does the following fix the hexagon build?
> 
> This breaks x86_64 allnoconfig.
> 
> arch/x86/mm/pat.c:734: error: redefinition of 'arch_io_reserve_memtype_wc'
> ./include/linux/io.h:175: note: previous definition of 
> 'arch_io_reserve_memtype_wc' was here
> arch/x86/mm/pat.c:742: error: redefinition of 'arch_io_free_memtype_wc'
> ./include/linux/io.h:181: note: previous definition of 
> 'arch_io_free_memtype_wc' was here

wat:

/home/hannes/src/linux/linux/arch/x86/mm/pat.c:734:5: error: redefinition of 
‘arch_io_reserve_memtype_wc’
 int arch_io_reserve_memtype_wc(resource_size_t start, resource_size_t size)
 ^~ 
 
In file included from /home/hannes/src/linux/linux/include/linux/irq.h:24:0,

 from 
/home/hannes/src/linux/linux/arch/x86/include/asm/hardirq.h:5,
 from /home/hannes/src/linux/linux/include/linux/hardirq.h:8,
 from 
/home/hannes/src/linux/linux/include/linux/memcontrol.h:24,
 from /home/hannes/src/linux/linux/include/linux/vmstat.h:9,
 from /home/hannes/src/linux/linux/include/linux/mm.h:1032,
 from /home/hannes/src/linux/linux/include/linux/pfn_t.h:3,
 from /home/hannes/src/linux/linux/arch/x86/mm/pat.c:15:
/home/hannes/src/linux/linux/include/linux/io.h:175:19: note: previous 
definition of ‘arch_io_reserve_memtype_wc’ was here
 static inline int arch_io_reserve_memtype_wc(resource_size_t base,
   ^~

In any case, memcontrol.h doesn't/shouldn't need hardirq.h. When that
include is removed, the below patch compiles on: x86 allno, x86_64
allno, and my regular x86_64 config:

---
Subject: mm-memcontrol-per-lruvec-stats-infrastructure-fix-4

On Wed, Jun 14, 2017 at 12:26:46AM -0700, Guenter Roeck wrote:
> Hi,
>
> I see the following build error in -next when building hexagon images.
>
>   CC  arch/hexagon/kernel/asm-offsets.s
> In file included from ./include/linux/memcontrol.h:30:0,
>  from ./include/linux/swap.h:8,
>  from ./arch/hexagon/include/asm/pgtable.h:27,
>  from ./include/linux/mm.h:70,
>  from arch/hexagon/kernel/asm-offsets.c:28:
> ./include/linux/vmstat.h: In function '__inc_zone_page_state':
> ./include/linux/vmstat.h:294:2: error: implicit declaration of function 
> 'page_zone' [-Werror=implicit-function-declaration]
> ./include/linux/vmstat.h:294:2: warning: passing argument 1 of 
> '__inc_zone_state' makes pointer from integer without a cast [enabled by 
> default]
> ./include/linux/vmstat.h:267:20: note: expected 'struct zone *' but argument 
> is of type 'int'

vmstat.h depends on definitions in mm.h, but mm.h through the above
chain includes vmstat.h first. It worked in my x86 test because x86
pgtable.h doesn't include swap.h.

The headers are a bit of a mess. memcontrol.h is supposed to be a
lower level header than mm.h and vmstat.h, yet the new accounting
functions depend on mm.h definitions.

Let's move the lruvec accounting infra to vmstat.h and shuffle
memcontrol.h into the stack under mm.h and vmstat.h.

Reported-by: Guenter Roeck 
Signed-off-by

[PATCH] mm/hwpoison: Clear PRESENT bit for kernel 1:1 mappings of poison pages

2017-06-16 Thread Luck, Tony
From: Tony Luck 

Speculative processor accesses may reference any memory that has a
valid page table entry.  While a speculative access won't generate
a machine check, it will log the error in a machine check bank. That
could cause escalation of a subsequent error since the overflow bit
will be then set in the machine check bank status register.

Code has to be double-plus-tricky to avoid mentioning the 1:1 virtual
address of the page we want to map out otherwise we may trigger the
very problem we are trying to avoid.  We use a non-canonical address
that passes through the usual Linux table walking code to get to the
same "pte".

Cc: Dave Hansen 
Cc: Naoya Horiguchi 
Cc: x...@kernel.org
Cc: linux...@kvack.org
Cc: linux-kernel@vger.kernel.org
Cc: sta...@vger.kernel.org
Signed-off-by: Tony Luck 
---
Thanks to Dave Hansen for reviewing several iterations of this.

 arch/x86/include/asm/page_64.h   |  4 
 arch/x86/kernel/cpu/mcheck/mce.c | 35 +++
 include/linux/mm_inline.h|  6 ++
 mm/memory-failure.c  |  2 ++
 4 files changed, 47 insertions(+)

diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h
index b4a0d43248cf..b50df06ad251 100644
--- a/arch/x86/include/asm/page_64.h
+++ b/arch/x86/include/asm/page_64.h
@@ -51,6 +51,10 @@ static inline void clear_page(void *page)
 
 void copy_page(void *to, void *from);
 
+#ifdef CONFIG_X86_MCE
+#define arch_unmap_kpfn arch_unmap_kpfn
+#endif
+
 #endif /* !__ASSEMBLY__ */
 
 #ifdef CONFIG_X86_VSYSCALL_EMULATION
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 5cfbaeb6529a..56563db0b2be 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -51,6 +51,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "mce-internal.h"
 
@@ -1056,6 +1057,40 @@ static int do_memory_failure(struct mce *m)
return ret;
 }
 
+#ifdef CONFIG_X86_64
+
+void arch_unmap_kpfn(unsigned long pfn)
+{
+   unsigned long decoy_addr;
+
+   /*
+* Unmap this page from the kernel 1:1 mappings to make sure
+* we don't log more errors because of speculative access to
+* the page.
+* We would like to just call:
+*  set_memory_np((unsigned long)pfn_to_kaddr(pfn), 1);
+* but doing that would radically increase the odds of a
+* speculative access to the posion page because we'd have
+* the virtual address of the kernel 1:1 mapping sitting
+* around in registers.
+* Instead we get tricky.  We create a non-canonical address
+* that looks just like the one we want, but has bit 63 flipped.
+* This relies on set_memory_np() not checking whether we passed
+* a legal address.
+*/
+
+#if PGDIR_SHIFT + 9 < 63 /* 9 because cpp doesn't grok ilog2(PTRS_PER_PGD) */
+   decoy_addr = (pfn << PAGE_SHIFT) + (PAGE_OFFSET ^ BIT(63));
+#else
+#error "no unused virtual bit available"
+#endif
+
+   if (set_memory_np(decoy_addr, 1))
+   pr_warn("Could not invalidate pfn=0x%lx from 1:1 map \n", pfn);
+
+}
+#endif
+
 /*
  * The actual machine check handler. This only handles real
  * exceptions when something got corrupted coming in through int 18.
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index e030a68ead7e..25438b2b6f22 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -126,4 +126,10 @@ static __always_inline enum lru_list page_lru(struct page 
*page)
 
 #define lru_to_page(head) (list_entry((head)->prev, struct page, lru))
 
+#ifdef arch_unmap_kpfn
+extern void arch_unmap_kpfn(unsigned long pfn);
+#else
+static __always_inline void arch_unmap_kpfn(unsigned long pfn) { }
+#endif
+
 #endif
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 342fac9ba89b..9479e190dcbd 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1071,6 +1071,8 @@ int memory_failure(unsigned long pfn, int trapno, int 
flags)
return 0;
}
 
+   arch_unmap_kpfn(pfn);
+
/*
 * Currently errors on hugetlbfs pages are measured in hugepage units,
 * so nr_pages should be 1 << compound_order.  OTOH when errors are on
-- 
2.11.0



[PATCH] [perf/core branch] perf coresight: Fix ARM builds caused by misplaced __printf

2017-06-16 Thread Kim Phillips
Trailing __printf attributes work for function declarations, but not
definitions.  This patch fixes arm32/64 builds by placing __printf
before the declarator.  Otherwise this happens:

arch/arm64/util/../../arm/util/cs-etm.c:586:1: error: attributes should be 
specified before the declarator in a function definition
 static int cs_device__print_file(const char *name, const char *fmt, ...) 
__printf(2, 3)
 ^~
arch/arm64/util/../../arm/util/cs-etm.c: In function ‘cs_etm_set_drv_config’:
arch/arm64/util/../../arm/util/cs-etm.c:610:8: error: implicit declaration of 
function ‘cs_device__print_file’ [-Werror=implicit-function-declaration]
  ret = cs_device__print_file(enable_sink, "%d", 1);
^
arch/arm64/util/../../arm/util/cs-etm.c:610:2: error: nested extern declaration 
of ‘cs_device__print_file’ [-Werror=nested-externs]
  ret = cs_device__print_file(enable_sink, "%d", 1);
  ^~~
At top level:
arch/arm64/util/../../arm/util/cs-etm.c:566:14: error: ‘cs_device__open_file’ 
defined but not used [-Werror=unused-function]
 static FILE *cs_device__open_file(const char *name)
  ^~~~
cc1: all warnings being treated as errors

Fixes: 2ee261d962ac "tools: Adopt __printf from kernel sources"
Cc: Arnaldo Carvalho de Melo 
Cc: Mathieu Poirier 
Signed-off-by: Kim Phillips 
---
Applies to acme's perf/core branch

 tools/perf/arch/arm/util/cs-etm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/arch/arm/util/cs-etm.c 
b/tools/perf/arch/arm/util/cs-etm.c
index 90a6f42ba904..7ce3d1a25133 100644
--- a/tools/perf/arch/arm/util/cs-etm.c
+++ b/tools/perf/arch/arm/util/cs-etm.c
@@ -583,7 +583,7 @@ static FILE *cs_device__open_file(const char *name)
 
 }
 
-static int cs_device__print_file(const char *name, const char *fmt, ...) 
__printf(2, 3)
+static int __printf(2, 3) cs_device__print_file(const char *name, const char 
*fmt, ...)
 {
va_list args;
FILE *file;
-- 
2.11.0



Re: perf report: fix off-by-one for non-activation frames

2017-06-16 Thread Milian Wolff
On Freitag, 16. Juni 2017 13:57:44 CEST Jan Kratochvil wrote:
> On Fri, 16 Jun 2017 13:51:37 +0200, Milian Wolff wrote:
> > > perf-4.12.0-0.rc5.git0.1.fc27.x86_64
> > > 
> > >   39e32e gdb_main (/usr/libexec/gdb)
> > >   10b6fa main (/usr/libexec/gdb)
> > >
> > >0x5565f6f6 <+54>:callq  0x558f17a0
> > >:mov   
> > >0x18(%rsp),%rcx
> 
> [...]
> 
> > Excuse me, but I'm having trouble following you. The non-GDB backtraces
> > you
> > are pasting do not show srcline information. So what exactly is broken?
> 
> There is broken that perf now reports address 10b6fa (corresponding to
> relocated address 0x5565f6fa) but there is no instruction on address
> 0x5565f6fa.  If you 'objdump -d' it you cannot find any instruction
> on adress 0x5565f6fa (or on address 0x10b6fa).  There is
> instruction on address 0x5565f6fb.
> 
> > Maybe paste the perf output you get now and highlight what you'd expect
> > instead?
> 
> Actual:
>   39e32e gdb_main (/usr/libexec/gdb)
>   10b6fa main (/usr/libexec/gdb)
> Expected:
>   39e32f gdb_main (/usr/libexec/gdb)
>   10b6fb main (/usr/libexec/gdb)
> 
> I agree perf needs to calculate with 39e32e and 10b6fa.  But it should
> display to user 39e32f and 10b6fb.

Hmmm this will require some more changes throughout the stack then. I.e. we'll 
have to remember the "isactivation" flag along with the original IP, and only 
apply the offset then when we query for inliners or srcline information. Maybe 
I can pull that off somehow in the patch series I'm working on currently, 
which refactors the whole inline/srcline/callchain logic anyways.

I don't see an easy way to fix the behavior. Does anyone else? So how do we 
deal with this situation in the interim? I'd prefer we keep the current 
"broken" state, as I consider it less broken than what we had before... I 
guess some of the core perf developers should decide how to handle this.

Thanks

-- 
Milian Wolff | milian.wo...@kdab.com | Senior Software Engineer
KDAB (Deutschland) GmbH&Co KG, a KDAB Group company
Tel: +49-30-521325470
KDAB - The Qt Experts


Re: LTS testing with latest kselftests - some failures

2017-06-16 Thread Luis R. Rodriguez
On Fri, Jun 16, 2017 at 09:29:52PM +0200, Greg Kroah-Hartman wrote:
> On Fri, Jun 16, 2017 at 06:46:51PM +0200, Luis R. Rodriguez wrote:
> > Kees, please review 47e0bbb7fa98 below.
> > Brian, please review be4a1326d12c below.
> > 
> > On Thu, Jun 15, 2017 at 11:26:53PM +0530, Sumit Semwal wrote:
> > > Hello Greg, Shuah,
> > > 
> > > While testing 4.4.y and 4.9.y LTS kernels with latest kselftest,
> > 
> > To be clear it seems like you are taking the latest upstream ksefltest and 
> > run
> > it against older stable kernels. Furthermore you seem to only run the shell
> > script tests but are using older kselftests drivers? Is this all correct?
> > Otherwise it is unclear how you are running into the issues below.
> > 
> > Does 0-day so the same? I thought 0-day takes just the kselftest from each 
> > tree
> > submitted. That *seemed* to me like the way it was designed. Shuah ?
> > 
> > What's the name of *this* testing effort BTW? Is this part of the overall
> > kselftest ? Or is this something Linaro does for LTS kernels ? If there
> > is a name to your effort can you document it here so that others are aware:
> 
> It's a "test LTS kernels to make sure Greg didn't break anything" type
> of testing effort that Linaro is helping out with.

OK so its "standard" :)

> This could also be called, "it's about time someone did this..." :)

Good to know!

> > > we found a couple more test failures due to test-kernel mismatch:
> > > 
> > > 1. firmware tests: - linux 4.5 [1] and 4.10 [2] added a few updates to
> > > tests, and related updates to lib/test_firmware.c to improve the
> > > tests. Stable-4.4 misses these patches to lib/test_firmware.c. Stable
> > > 4.9 misses the second update.
> > 
> > <-- snip, skipped 2. and 3. -->
> > 
> > > For all the 3 listed above, we will try and update the tests to 
> > > gracefully exit.
> > 
> > Hmm, this actually raises a good kselftest question:
> > 
> > I *though* kselftests were running tests on par with the kernels, so we 
> > would
> > *not* take latest upstream kselftests to test against older kernels. Is this
> > incorrect?
> 
> That is incorrect.  Your test should always degrade gracefully if the
> feature is not present in the kernel under test.

OK perfect, now I know to look for knobs in the shell tests to ensure this
doesn't happen again.

Some of the knobs however are for extending tests for
existing APIs in older kernels, the async and custom fallback one are an
example.  There are a series of test cases later added which could help
test LTS kernels. Would Linaro pick these test driver enhancements to help
increase coverage of tests? Or is it not worth it? If its worth it then
what I was curious was how to help make this easier for this process to
bloom.

> If the test is for a
> bug that was fixed, then that fix should also go to a stable kernel
> release.

Indeed, that was perfectly clear.

  Luis


Re: [PATCH] tools/testing/selftests/sysctl: Add pre-check to the value of writes_strict

2017-06-16 Thread Orson Zhai
On 16 June 2017 at 22:49, Sumit Semwal  wrote:
> Hi Orson,
>
> Thanks for the patch.
>
> On 16 June 2017 at 14:58, Orson Zhai  wrote:
>> Sysctl test will fail in some items if the value of /proc/sys/kernel
>> /sysctrl_writes_strict is 0 as the default value in kernel older than v4.5.
>>
>> Make this test more robust and compatible with older kernels by checking and
>> update sysctrl_writes_strict value and restore it when test is done.
>>
>> Signed-off-by: Orson Zhai 
>
> Please feel free to add my
> Reviewed-by: Sumit Semwal 
> Tested-by: Sumit Semwal 
>  [sumits: tested LTS-4.4 with hikey (arm64) ]

Sure. I will add them to my patch V2.

Thanks,
Orson

>
>> ---
>>  tools/testing/selftests/sysctl/common_tests | 14 ++
>>  tools/testing/selftests/sysctl/run_numerictests |  3 +++
>>  tools/testing/selftests/sysctl/run_stringtests  |  3 +++
>>  3 files changed, 20 insertions(+)
>>
>> diff --git a/tools/testing/selftests/sysctl/common_tests
>> b/tools/testing/selftests/sysctl/common_tests
>> index 17d534b1b7b4..f5c5c51d16f3 100644
>> --- a/tools/testing/selftests/sysctl/common_tests
>> +++ b/tools/testing/selftests/sysctl/common_tests
>> @@ -63,6 +63,20 @@ else
>> echo "ok"
>>  fi
>>
>> +echo -n "Checking writes strict setting ... "
>> +WRITES_STRICT="${SYSCTL}/kernel/sysctl_writes_strict"
>> +if [ ! -e ${WRITES_STRICT} ]; then
>> +   echo "FAIL, but skip in case of old kernel" >&2
>> +else
>> +   val=$(cat ${WRITES_STRICT})
>> +   if [ "$val" = "1" ]; then
>> +   echo "ok"
>> +   else
>> +   echo "FAIL, strict value is 0 but force to 1 to continue" >&2
>> +   echo "1" > ${WRITES_STRICT}
>> +   fi
>> +fi
>> +
>>  # Now that we've validated the sanity of "set_test" and "set_orig",
>>  # we can use those functions to set starting states before running
>>  # specific behavioral tests.
>> diff --git a/tools/testing/selftests/sysctl/run_numerictests
>> b/tools/testing/selftests/sysctl/run_numerictests
>> index 8510f93f2d14..c0a98fd82c5c 100755
>> --- a/tools/testing/selftests/sysctl/run_numerictests
>> +++ b/tools/testing/selftests/sysctl/run_numerictests
>> @@ -7,4 +7,7 @@ TEST_STR=$(( $ORIG + 1 ))
>>
>>  . ./common_tests
>>
>> +if [ ! -z ${val} ]; then
>> +   echo ${val} > ${WRITES_STRICT}
>> +fi
>>  exit $rc
>> diff --git a/tools/testing/selftests/sysctl/run_stringtests
>> b/tools/testing/selftests/sysctl/run_stringtests
>> index 90a9293d520c..ae98d66a9ec6 100755
>> --- a/tools/testing/selftests/sysctl/run_stringtests
>> +++ b/tools/testing/selftests/sysctl/run_stringtests
>> @@ -74,4 +74,7 @@ else
>> echo "ok"
>>  fi
>>
>> +if [ ! -z ${val} ]; then
>> +   echo ${val} > ${WRITES_STRICT}
>> +fi
>>  exit $rc
>> --
>> 2.12.2
>
>
> Best,
> Sumit.


Re: [PATCH v2 3/3] dax: use common 4k zero page for dax mmap reads

2017-06-16 Thread Ross Zwisler
On Thu, Jun 15, 2017 at 04:58:56PM +0200, Jan Kara wrote:
> On Wed 14-06-17 11:22:11, Ross Zwisler wrote:
> > @@ -216,17 +217,6 @@ static void dax_unlock_mapping_entry(struct 
> > address_space *mapping,
> > dax_wake_mapping_entry_waiter(mapping, index, entry, false);
> >  }
> >  
> > -static void put_locked_mapping_entry(struct address_space *mapping,
> > -pgoff_t index, void *entry)
> > -{
> > -   if (!radix_tree_exceptional_entry(entry)) {
> > -   unlock_page(entry);
> > -   put_page(entry);
> > -   } else {
> > -   dax_unlock_mapping_entry(mapping, index);
> > -   }
> > -}
> > -
> 
> The naming becomes asymetric with this. So I'd prefer keeping
> put_locked_mapping_entry() as a trivial wrapper around
> dax_unlock_mapping_entry() unless we can craft more sensible naming / API
> for entry grabbing (and that would be a separate patch anyway).

Sure, that works for me.  I'll fix for v3.

> > -static int dax_load_hole(struct address_space *mapping, void **entry,
> > +static int dax_load_hole(struct address_space *mapping, void *entry,
> >  struct vm_fault *vmf)
> >  {
> > struct inode *inode = mapping->host;
> > -   struct page *page;
> > -   int ret;
> > -
> > -   /* Hole page already exists? Return it...  */
> > -   if (!radix_tree_exceptional_entry(*entry)) {
> > -   page = *entry;
> > -   goto finish_fault;
> > -   }
> > +   unsigned long vaddr = vmf->address;
> > +   int ret = VM_FAULT_NOPAGE;
> > +   struct page *zero_page;
> > +   void *entry2;
> >  
> > -   /* This will replace locked radix tree entry with a hole page */
> > -   page = find_or_create_page(mapping, vmf->pgoff,
> > -  vmf->gfp_mask | __GFP_ZERO);
> 
> With this gone, you can also remove the special DAX handling from
> mm/filemap.c: page_cache_tree_insert() and remove from dax.h
> dax_wake_mapping_entry_waiter(), dax_radix_locked_entry() and RADIX_DAX
> definitions. Yay! As a separate patch please.

Oh, yay!  :)  Sure, I'll have this patch for v3.


Re: [PATCH] PCI / PM: Restore the status of PCI devices across hibernation

2017-06-16 Thread Bjorn Helgaas
On Thu, May 25, 2017 at 04:49:07PM +0800, Chen Yu wrote:
> Currently we saw a lot of "No irq handler" errors during hibernation,
> which caused the system hang finally:
> 
> [  710.141581] ata4.00: qc timeout (cmd 0xec)
> [  710.147135] ata4.00: failed to IDENTIFY (I/O error, err_mask=0x4)
> [  710.154593] ata4.00: revalidation failed (errno=-5)
> [  710.468124] ata4: SATA link up 6.0 Gbps (SStatus 133 SControl 300)
> [  710.477746] do_IRQ: 31.151 No irq handler for vector
> 
> According to above logs, there is an interrupt triggered and it is
> dispatched to CPU31 with a vector number 151, but there is no handler
> for it, thus this irq will not get acked and caused irq flood which kill
> the system. To be more specific, the 31.151 is an interrupt from the ahci
> host controller.
> 
> After some investigation, the reason why this issue is triggered is
> because the thaw_noirq() function does not restore the MSI/MSIX settings
> across hibernation.
> 
> The scenario is illustrated below:
> 
> 1. Before the hibernation starts, the irq 34 is the handler for the ahci 
> device,
>which is binded on cpu31.
> 2. Hibernation starts, the ahci device is put into low power state.
> 3. All the nonboot CPUs are put offline, so the irq 34 has to be migrated to
>the last alive one - CPU0.
> 4. After the snapshot has been created, all the nonboot CPUs are brought up 
> again,
>the CPU affinity for IRQ 34 remains to be 0.
> 5. ahci device are put into D0.
> 6. The snapshot is written to the disk.
> 
> The issue is triggered in step 6, in theory the ahci interrupt should be
> delivered to CPU0, however the actually result is that this interrupt is
> delivered to the original CPU31 instead, which cause the "No irq handler" 
> issue.
> 
> Ying Huang has has provided a clue that, in step 3 it is possible that the 
> writing
> to the register might not take effect as the PCI devices have been put 
> suspended.
> Actually it is true:
> In step 3, the irq 34 affinity is supposed to be modified from 31 to 0,
> but actually it did not. In __pci_write_msi_msg(), if the device is already
> in low power state, the low level msi message entry will not be updated
> but cached. So in theory during the device restore process, the cached msi
> modification information should be written back to the hardware, and this
> is what pci_restore_msi_state() do during normal suspend-resume.
> But this is not the case for hibernation, pci_restore_msi_state() is not
> invoked currently, to be more specific, pci_restore_state() is not invoked
> in pci_pm_thaw_noirq(), although pci_save_state() has saved the necessary
> pci cached information in pci_pm_freeze_noirq().
> 
> This patch tries to restore the pci status for the device during hibernation,
> otherwise the status might be lost across hibernation(for example, the 
> MSI/MSIX
> message settings), which might cause problems during hibernation.
> 
> Suggested-by: Ying Huang 
> Suggested-by: Rafael J. Wysocki 
> Cc: Rafael J. Wysocki 
> Cc: Bjorn Helgaas 
> Cc: Len Brown 
> Cc: Dan Williams 
> Cc: Rui Zhang 
> Cc: Ying Huang 
> Cc: linux-...@vger.kernel.org
> Cc: linux...@vger.kernel.org
> Cc: linux-kernel@vger.kernel.org
> Signed-off-by: Chen Yu 

Added a stable tag and applied with Rafael's reviewed-by to pci/pm for
v4.13, thanks!

pci_restore_state() restores a lot of stuff besides MSI/MSI-X: PCIe
device, link, slot control, ATS, VC, BARs, ACS, IOV.  I guess I'm a
little surprised that we haven't noticed more issues if all these
things were broken.

> ---
>  drivers/pci/pci-driver.c | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
> index 192e7b6..b399fa3 100644
> --- a/drivers/pci/pci-driver.c
> +++ b/drivers/pci/pci-driver.c
> @@ -964,6 +964,7 @@ static int pci_pm_thaw_noirq(struct device *dev)
>   return pci_legacy_resume_early(dev);
>  
>   pci_update_current_state(pci_dev, PCI_D0);
> + pci_restore_state(pci_dev);
>  
>   if (drv && drv->pm && drv->pm->thaw_noirq)
>   error = drv->pm->thaw_noirq(dev);
> -- 
> 2.7.4
> 


Re: [PATCH v2 1/3] mm: add vm_insert_mixed_mkwrite()

2017-06-16 Thread Ross Zwisler
On Thu, Jun 15, 2017 at 04:42:04PM +0200, Jan Kara wrote:
> On Wed 14-06-17 11:22:09, Ross Zwisler wrote:
> > To be able to use the common 4k zero page in DAX we need to have our PTE
> > fault path look more like our PMD fault path where a PTE entry can be
> > marked as dirty and writeable as it is first inserted, rather than waiting
> > for a follow-up dax_pfn_mkwrite() => finish_mkwrite_fault() call.
> > 
> > Right now we can rely on having a dax_pfn_mkwrite() call because we can
> > distinguish between these two cases in do_wp_page():
> > 
> > case 1: 4k zero page => writable DAX storage
> > case 2: read-only DAX storage => writeable DAX storage
> > 
> > This distinction is made by via vm_normal_page().  vm_normal_page() returns
> > false for the common 4k zero page, though, just as it does for DAX ptes.
> > Instead of special casing the DAX + 4k zero page case, we will simplify our
> > DAX PTE page fault sequence so that it matches our DAX PMD sequence, and
> > get rid of dax_pfn_mkwrite() completely.
> > 
> > This means that insert_pfn() needs to follow the lead of insert_pfn_pmd()
> > and allow us to pass in a 'mkwrite' flag.  If 'mkwrite' is set insert_pfn()
> > will do the work that was previously done by wp_page_reuse() as part of the
> > dax_pfn_mkwrite() call path.
> > 
> > Signed-off-by: Ross Zwisler 
> 
> So I agree that getting rid of dax_pfn_mkwrite() and using fault handler in
> that case is a way to go. However I somewhat dislike the
> vm_insert_mixed_mkwrite() thing - it looks like a hack - and I'm aware that
> we have a similar thing for PMD which is ugly as well. Besides being ugly
> I'm also concerned that when 'mkwrite' is set, we just silently overwrite
> whatever PTE was installed at that position. Not that I'd see how that
> could screw us for DAX but still a concern that e.g. some PTE flag could
> get discarded by this is there... In fact, for !HAVE_PTE_SPECIAL
> architectures, you will leak zero page references by just overwriting the
> PTE - for those archs you really need to unmap zero page before replacing
> PTE (and the same for PMD I suppose).
> 
> So how about some vmf_insert_pfn(vmf, pe_size, pfn) helper that would
> properly detect PTE / PMD case, read / write case etc., check that PTE did
> not change from orig_pte, and handle all the nasty details instead of
> messing with insert_pfn?
> 
>   Honza

Sounds good, I'll figure this out for v3.

Thanks for the review!


Re: [PATCH RESEND 03/13] mfd: cros_ec: add debugfs, console log file

2017-06-16 Thread Benson Leung
Hi Enric,

I have gotten around to reviewing this series, and hope to get
this in ASAP.

I found an issue with this commit, but I'll go ahead and fix it
myself as I'm creating the immutable branch. No need to respin the series.

On Tue, May 16, 2017 at 06:13:09PM +0200, Enric Balletbo i Serra wrote:
> +static int ec_read_version_supported(struct cros_ec_dev *ec)
> +{
> + struct ec_params_get_cmd_versions_v1 *params;
> + struct ec_response_get_cmd_versions *response;
> + int ret;
> +
> + struct cros_ec_command *msg;
> +
> + msg = kzalloc(sizeof(*msg) + max(sizeof(params), sizeof(response)),
> + GFP_KERNEL);
> + if (!msg)
> + return 0;
> +
> + msg->command = EC_CMD_GET_CMD_VERSIONS + ec->cmd_offset;
> + msg->outsize = sizeof(*params);
> + msg->insize = sizeof(*response);

By my diff, the above two lines were changed from the original CHROMIUM
commit, based on Doug's comment here: https://lkml.org/lkml/2017/2/22/630

However, this is an incomplete fix. Instead, we should pick this:
https://chromium-review.googlesource.com/#/c/444085/

I'll go ahead and do that. Thanks!

Benson

-- 
Benson Leung
Staff Software Engineer
Chrome OS Kernel
Google Inc.
ble...@google.com
Chromium OS Project
ble...@chromium.org


signature.asc
Description: Digital signature


[tip:perf/urgent] perf evsel: Fix probing of precise_ip level for default cycles event

2017-06-16 Thread tip-bot for Arnaldo Carvalho de Melo
Commit-ID:  7a1ac110c22eb726684c837544a2d42c33e07be7
Gitweb: http://git.kernel.org/tip/7a1ac110c22eb726684c837544a2d42c33e07be7
Author: Arnaldo Carvalho de Melo 
AuthorDate: Fri, 9 Jun 2017 16:54:28 -0300
Committer:  Arnaldo Carvalho de Melo 
CommitDate: Wed, 14 Jun 2017 15:44:29 -0300

perf evsel: Fix probing of precise_ip level for default cycles event

Since commit 18e7a45af91a ("perf/x86: Reject non sampling events with
precise_ip") returns -EINVAL for sys_perf_event_open() with an attribute
with (attr.precise_ip > 0 && attr.sample_period == 0), just like is done
in the routine used to probe the max precise level when no events were
passed to 'perf record' or 'perf top', i.e.:

perf_evsel__new_cycles()
perf_event_attr__set_max_precise_ip()

The x86 code, in x86_pmu_hw_config(), which is called all the way from
sys_perf_event_open() did, starting with the aforementioned commit:

/* There's no sense in having PEBS for non sampling events: */
if (!is_sampling_event(event))
return -EINVAL;

Which makes it fail for cycles:ppp, cycles:pp and cycles:p, always using
just the non precise cycles variant.

To make sure that this is the case, I tested it, before this patch,
with:

  # perf probe -L x86_pmu_hw_config
  
0  int x86_pmu_hw_config(struct perf_event *event)
1  {
2 if (event->attr.precise_ip) {

   17 if (event->attr.precise_ip > precise)
   18 return -EOPNOTSUPP;

  /* There's no sense in having PEBS for non sampling 
events: */
   21 if (!is_sampling_event(event))
   22 return -EINVAL;
  }

  # perf probe x86_pmu_hw_config:22
  Added new events:
probe:x86_pmu_hw_config (on x86_pmu_hw_config:22)
probe:x86_pmu_hw_config_1 (on x86_pmu_hw_config:22)

  You can now use it in all perf tools, such as:

perf record -e probe:x86_pmu_hw_config_1 -aR sleep 1

  # perf trace -e perf_event_open,probe:x86_pmu_hwconfig*/max-stack=16/ perf 
record usleep 1
 0.000 ( 0.015 ms): perf/4150 perf_event_open(attr_uptr: 0x7ffebc8ba110, 
cpu: -1, group_fd: -1  ) ...
 0.015 ( ): probe:x86_pmu_hw_config:(9c0065e1))
   x86_pmu_hw_config ([kernel.kallsyms])
   hsw_hw_config ([kernel.kallsyms])
   x86_pmu_event_init ([kernel.kallsyms])
   perf_try_init_event ([kernel.kallsyms])
   perf_event_alloc ([kernel.kallsyms])
   SYSC_perf_event_open ([kernel.kallsyms])
   sys_perf_event_open ([kernel.kallsyms])
   do_syscall_64 ([kernel.kallsyms])
   return_from_SYSCALL_64 
([kernel.kallsyms])
   syscall (/usr/lib64/libc-2.24.so)
   perf_event_attr__set_max_precise_ip 
(/home/acme/bin/perf)
   perf_evsel__new_cycles 
(/home/acme/bin/perf)
   perf_evlist__add_default 
(/home/acme/bin/perf)
   cmd_record (/home/acme/bin/perf)
   run_builtin (/home/acme/bin/perf)
   handle_internal_command 
(/home/acme/bin/perf)
 0.000 ( 0.021 ms): perf/4150  ... [continued]: perf_event_open()) = -1 
EINVAL Invalid argument
 0.023 ( 0.002 ms): perf/4150 perf_event_open(attr_uptr: 0x7ffebc8ba110, 
cpu: -1, group_fd: -1  ) ...
 0.025 ( ): probe:x86_pmu_hw_config:(9c0065e1))
   x86_pmu_hw_config ([kernel.kallsyms])
   hsw_hw_config ([kernel.kallsyms])
   x86_pmu_event_init ([kernel.kallsyms])
   perf_try_init_event ([kernel.kallsyms])
   perf_event_alloc ([kernel.kallsyms])
   SYSC_perf_event_open ([kernel.kallsyms])
   sys_perf_event_open ([kernel.kallsyms])
   do_syscall_64 ([kernel.kallsyms])
   return_from_SYSCALL_64 
([kernel.kallsyms])
   syscall (/usr/lib64/libc-2.24.so)
   perf_event_attr__set_max_precise_ip 
(/home/acme/bin/perf)
   perf_evsel__new_cycles 
(/home/acme/bin/perf)
   perf_evlist__add_default 
(/home/acme/bin/perf)
   cmd_record (/home/acme/bin/perf)
 

[tip:perf/urgent] perf tools: Fix build with ARCH=x86_64

2017-06-16 Thread tip-bot for Jiada Wang
Commit-ID:  7a759cd8e8272ee18922838ee711219c7c796a31
Gitweb: http://git.kernel.org/tip/7a759cd8e8272ee18922838ee711219c7c796a31
Author: Jiada Wang 
AuthorDate: Sun, 9 Apr 2017 20:02:37 -0700
Committer:  Arnaldo Carvalho de Melo 
CommitDate: Wed, 14 Jun 2017 15:44:29 -0300

perf tools: Fix build with ARCH=x86_64

With commit: 0a943cb10ce78 (tools build: Add HOSTARCH Makefile variable)
when building for ARCH=x86_64, ARCH=x86_64 is passed to perf instead of
ARCH=x86, so the perf build process searchs header files from
tools/arch/x86_64/include, which doesn't exist.

The following build failure is seen:

  In file included from util/event.c:2:0:
tools/include/uapi/linux/mman.h:4:27: fatal error: uapi/asm/mman.h: No such 
file or directory
compilation terminated.

Fix this issue by using SRCARCH instead of ARCH in perf, just like the
main kernel Makefile and tools/objtool's.

Signed-off-by: Jiada Wang 
Tested-by: Arnaldo Carvalho de Melo 
Acked-by: Jiri Olsa 
Cc: Alexander Shishkin 
Cc: Andi Kleen 
Cc: Eugeniu Rosca 
Cc: Jan Stancek 
Cc: Masami Hiramatsu 
Cc: Peter Zijlstra 
Cc: Ravi Bangoria 
Cc: Rui Teng 
Cc: Sukadev Bhattiprolu 
Cc: Wang Nan 
Fixes: 0a943cb10ce7 ("tools build: Add HOSTARCH Makefile variable")
Link: 
http://lkml.kernel.org/r/1491793357-14977-2-git-send-email-jiada_w...@mentor.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/Makefile.config  | 38 +++---
 tools/perf/Makefile.perf|  2 +-
 tools/perf/arch/Build   |  2 +-
 tools/perf/pmu-events/Build |  4 ++--
 tools/perf/tests/Build  |  2 +-
 tools/perf/util/header.c|  2 +-
 6 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index 8354d04..1f4fbc9 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -19,18 +19,18 @@ CFLAGS := $(EXTRA_CFLAGS) $(EXTRA_WARNINGS)
 
 include $(srctree)/tools/scripts/Makefile.arch
 
-$(call detected_var,ARCH)
+$(call detected_var,SRCARCH)
 
 NO_PERF_REGS := 1
 
 # Additional ARCH settings for ppc
-ifeq ($(ARCH),powerpc)
+ifeq ($(SRCARCH),powerpc)
   NO_PERF_REGS := 0
   LIBUNWIND_LIBS := -lunwind -lunwind-ppc64
 endif
 
 # Additional ARCH settings for x86
-ifeq ($(ARCH),x86)
+ifeq ($(SRCARCH),x86)
   $(call detected,CONFIG_X86)
   ifeq (${IS_64_BIT}, 1)
 CFLAGS += -DHAVE_ARCH_X86_64_SUPPORT -DHAVE_SYSCALL_TABLE 
-I$(OUTPUT)arch/x86/include/generated
@@ -43,12 +43,12 @@ ifeq ($(ARCH),x86)
   NO_PERF_REGS := 0
 endif
 
-ifeq ($(ARCH),arm)
+ifeq ($(SRCARCH),arm)
   NO_PERF_REGS := 0
   LIBUNWIND_LIBS = -lunwind -lunwind-arm
 endif
 
-ifeq ($(ARCH),arm64)
+ifeq ($(SRCARCH),arm64)
   NO_PERF_REGS := 0
   LIBUNWIND_LIBS = -lunwind -lunwind-aarch64
 endif
@@ -61,7 +61,7 @@ endif
 # Disable it on all other architectures in case libdw unwind
 # support is detected in system. Add supported architectures
 # to the check.
-ifneq ($(ARCH),$(filter $(ARCH),x86 arm))
+ifneq ($(SRCARCH),$(filter $(SRCARCH),x86 arm))
   NO_LIBDW_DWARF_UNWIND := 1
 endif
 
@@ -115,9 +115,9 @@ endif
 FEATURE_CHECK_CFLAGS-libbabeltrace := $(LIBBABELTRACE_CFLAGS)
 FEATURE_CHECK_LDFLAGS-libbabeltrace := $(LIBBABELTRACE_LDFLAGS) 
-lbabeltrace-ctf
 
-FEATURE_CHECK_CFLAGS-bpf = -I. -I$(srctree)/tools/include 
-I$(srctree)/tools/arch/$(ARCH)/include/uapi -I$(srctree)/tools/include/uapi
+FEATURE_CHECK_CFLAGS-bpf = -I. -I$(srctree)/tools/include 
-I$(srctree)/tools/arch/$(SRCARCH)/include/uapi -I$(srctree)/tools/include/uapi
 # include ARCH specific config
--include $(src-perf)/arch/$(ARCH)/Makefile
+-include $(src-perf)/arch/$(SRCARCH)/Makefile
 
 ifdef PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET
   CFLAGS += -DHAVE_ARCH_REGS_QUERY_REGISTER_OFFSET
@@ -228,12 +228,12 @@ ifeq ($(DEBUG),0)
 endif
 
 INC_FLAGS += -I$(src-perf)/util/include
-INC_FLAGS += -I$(src-perf)/arch/$(ARCH)/include
+INC_FLAGS += -I$(src-perf)/arch/$(SRCARCH)/include
 INC_FLAGS += -I$(srctree)/tools/include/uapi
 INC_FLAGS += -I$(srctree)/tools/include/
-INC_FLAGS += -I$(srctree)/tools/arch/$(ARCH)/include/uapi
-INC_FLAGS += -I$(srctree)/tools/arch/$(ARCH)/include/
-INC_FLAGS += -I$(srctree)/tools/arch/$(ARCH)/
+INC_FLAGS += -I$(srctree)/tools/arch/$(SRCARCH)/include/uapi
+INC_FLAGS += -I$(srctree)/tools/arch/$(SRCARCH)/include/
+INC_FLAGS += -I$(srctree)/tools/arch/$(SRCARCH)/
 
 # $(obj-perf)  for generated common-cmds.h
 # $(obj-perf)/util for generated bison/flex headers
@@ -355,7 +355,7 @@ ifndef NO_LIBELF
 
   ifndef NO_DWARF
 ifeq ($(origin PERF_HAVE_DWARF_REGS), undefined)
-  msg := $(warning DWARF register mappings have not been defined for 
architecture $(ARCH), DWARF support disabled);
+  msg := $(warning DWARF register mappings have not been defined for 
architecture $(SRCARCH), DWARF support disabled);
   NO_DWARF := 1
 else
   CFLAGS += -DHAVE_DWARF_SUPPORT $(LIBDW_CFLAGS)
@@ -380,7 +380,7 @@ ifndef NO_LIBELF
 CFLAGS += -DHAVE_BPF_PROLOGUE
 $(call detected,CONFIG_BPF_PR

[tip:perf/urgent] perf unwind: Report module before querying isactivation in dwfl unwind

2017-06-16 Thread tip-bot for Milian Wolff
Commit-ID:  9126cbbacecb8917bd0418809ef1d26616b2061e
Gitweb: http://git.kernel.org/tip/9126cbbacecb8917bd0418809ef1d26616b2061e
Author: Milian Wolff 
AuthorDate: Fri, 2 Jun 2017 16:37:53 +0200
Committer:  Arnaldo Carvalho de Melo 
CommitDate: Fri, 16 Jun 2017 14:37:30 -0300

perf unwind: Report module before querying isactivation in dwfl unwind

The PC returned by dwfl_frame_pc() may map into a not-yet-reported
module. We have to report it before we continue unwinding. But when we
query for the isactivation flag in dwfl_frame_pc, libdw will actually do
one more unwinding step internally which can then break and lead to
missed frames or broken stacks.

With libunwind we get e.g.:

~
  heaptrack_gui  2228 135073.400474: 613969 cycles:
  108c8e [unknown] (/usr/lib/libQt5Core.so.5.8.0)
  1093bc [unknown] (/usr/lib/libQt5Core.so.5.8.0)
  109e7b QLocale::QLocale (/usr/lib/libQt5Core.so.5.8.0)
  1470ff [unknown] (/usr/lib/libQt5Core.so.5.8.0)
  147f67 QSystemLocale::query (/usr/lib/libQt5Core.so.5.8.0)
  109fbf QLocalePrivate::updateSystemPrivate 
(/usr/lib/libQt5Core.so.5.8.0)
  10aa27 QLocale::QLocale (/usr/lib/libQt5Core.so.5.8.0)
  1e02c3 [unknown] (/usr/lib/libQt5Core.so.5.8.0)
  2113bb [unknown] (/usr/lib/libQt5Core.so.5.8.0)
  211505 [unknown] (/usr/lib/libQt5Core.so.5.8.0)
  1b5df0 QFileInfo::exists (/usr/lib/libQt5Core.so.5.8.0)
   92eb2 [unknown] (/usr/lib/libQt5Core.so.5.8.0)
   93423 [unknown] (/usr/lib/libQt5Core.so.5.8.0)
   93d2a QLibraryInfo::location (/usr/lib/libQt5Core.so.5.8.0)
  2170af [unknown] (/usr/lib/libQt5Core.so.5.8.0)
  297c53 QCoreApplicationPrivate::init 
(/usr/lib/libQt5Core.so.5.8.0)
   f7cde QGuiApplicationPrivate::init 
(/usr/lib/libQt5Gui.so.5.8.0)
  1589e8 QApplicationPrivate::init 
(/usr/lib/libQt5Widgets.so.5.8.0)
   78622 main 
(/home/milian/projects/compiled/other/bin/heaptrack_gui)
   20439 __libc_start_main (/usr/lib/libc-2.25.so)
   78299 _start 
(/home/milian/projects/compiled/other/bin/heaptrack_gui)

  heaptrack_gui  2228 135073.401156: 569521 cycles:
  131633 QString::endsWith (/usr/lib/libQt5Core.so.5.8.0)
  1a0701 QDir::cleanPath (/usr/lib/libQt5Core.so.5.8.0)
  21b82d [unknown] (/usr/lib/libQt5Core.so.5.8.0)
  1b3727 QFileInfo::canonicalFilePath 
(/usr/lib/libQt5Core.so.5.8.0)
  2780c7 QFactoryLoader::update (/usr/lib/libQt5Core.so.5.8.0)
  279525 QFactoryLoader::QFactoryLoader 
(/usr/lib/libQt5Core.so.5.8.0)
   e5bd0 QPlatformIntegrationFactory::create 
(/usr/lib/libQt5Gui.so.5.8.0)
   f5a1c QGuiApplicationPrivate::createPlatformIntegration 
(/usr/lib/libQt5Gui.so.5.8.0)
   f650c QGuiApplicationPrivate::createEventDispatcher 
(/usr/lib/libQt5Gui.so.5.8.0)
  298524 QCoreApplicationPrivate::init 
(/usr/lib/libQt5Core.so.5.8.0)
   f7cde QGuiApplicationPrivate::init 
(/usr/lib/libQt5Gui.so.5.8.0)
  1589e8 QApplicationPrivate::init 
(/usr/lib/libQt5Widgets.so.5.8.0)
   78622 main 
(/home/milian/projects/compiled/other/bin/heaptrack_gui)
   20439 __libc_start_main (/usr/lib/libc-2.25.so)
   78299 _start 
(/home/milian/projects/compiled/other/bin/heaptrack_gui)
~

Note the two frames 1589e8 and 78622 in the first sample. These are
missing when unwinding with libdw. The second sample's breakage is
more obvious:

~
  heaptrack_gui  2228 135073.400474: 613969 cycles:
  108c8e [unknown] (/usr/lib/libQt5Core.so.5.8.0)
  1093bc [unknown] (/usr/lib/libQt5Core.so.5.8.0)
  109e7b QLocale::QLocale (/usr/lib/libQt5Core.so.5.8.0)
  1470ff [unknown] (/usr/lib/libQt5Core.so.5.8.0)
  147f67 QSystemLocale::query (/usr/lib/libQt5Core.so.5.8.0)
  109fbf QLocalePrivate::updateSystemPrivate 
(/usr/lib/libQt5Core.so.5.8.0)
  10aa27 QLocale::QLocale (/usr/lib/libQt5Core.so.5.8.0)
  1e02c3 [unknown] (/usr/lib/libQt5Core.so.5.8.0)
  2113bb [unknown] (/usr/lib/libQt5Core.so.5.8.0)
  211505 [unknown] (/usr/lib/libQt5Core.so.5.8.0)
  1b5df0 QFileInfo::exists (/usr/lib/libQt5Core.so.5.8.0)
   92eb2 [unknown] (/usr/lib/libQt5Core.so.5.8.0)
   93423 [unknown] (/usr/lib/libQt5Core.so.5.8.0)
   93d2a QLibraryInfo::location (/usr/lib/libQt5Core.so.5.8.0)
  2170af [unknown] (/usr/lib/libQt5Core.so.5.8.0)
  297c53 QCoreApplicationPrivate::init 
(/usr/lib/l

Re: [RFC PATCH 7/7 v1]powerpc: Deliver SEGV signal on protection key violation.

2017-06-16 Thread Ram Pai
On Fri, Jun 16, 2017 at 09:18:29PM +1000, Michael Ellerman wrote:
> Ram Pai  writes:
> > diff --git a/arch/powerpc/include/uapi/asm/ptrace.h 
> > b/arch/powerpc/include/uapi/asm/ptrace.h
> > index 8036b38..109d0c2 100644
> > --- a/arch/powerpc/include/uapi/asm/ptrace.h
> > +++ b/arch/powerpc/include/uapi/asm/ptrace.h
> > @@ -49,6 +49,8 @@ struct pt_regs {
> > unsigned long dar;  /* Fault registers */
> > unsigned long dsisr;/* on 4xx/Book-E used for ESR */
> > unsigned long result;   /* Result of a system call */
> > +   unsigned long dscr; /* contents of the DSCR register */
> > +   unsigned long amr;  /* contents of AMR register */
> >  };
> 
> You can't change pt_regs, it's ABI.
> 
> > @@ -109,7 +111,8 @@ struct pt_regs {
> >  #define PT_DSISR 42
> >  #define PT_RESULT 43
> >  #define PT_DSCR 44
> > -#define PT_REGS_COUNT 44
> > +#define PT_AMR 45
> > +#define PT_REGS_COUNT 45
> 
> You can add PT_AMR, but it has to be synthetic like DSCR, ie. not
> actually in pt_regs but available via ptrace.

ok.

> 
> But do we want to do that? How does the x86 code export the key(s) of a
> process? Or doesn't it?

The semantics defined on x86 is, 

signal handler has to have a way of knowing the contents of the
PKRU; (the x86 equivalent of AMR).  Also the signal handler
has to have the ability to modify the PKRU before it returns
from the signal handler. This modified information will be
used by the kernel to program the CPU's PKRU register.

if the signal handler does not have the ability to do so, than
when the signal handler returns and the user code restarts executing
where it had left, it will continue to access the same protected 
address and fault again, which will again invoke the signal handler
and this will continue infinitely.

We have to provide the same semantics on powerpc. The way I intend to
do it is to use one of the unused field in the gp_regs and fill that 
with the contents of the AMR register. PT_AMR, at offset 45 in gp_regs
is not used currently. offset 45, 46, and 47 are available AFIACT.


Dave: Why is it not ok to reprogram the PKRU from the signal handler,
instead of telling the kernel to do so on its behalf? Or
have I got my understanding of the semantics wrong?


> 
> cheers

-- 
Ram Pai



Re: [GIT PULL 0/3] perf/urgent fixes

2017-06-16 Thread Ingo Molnar

* Arnaldo Carvalho de Melo  wrote:

> Hi Ingo,
> 
>   Please consider pulling,
> 
> - Arnaldo
> 
> Test results at the end of this message, as usual.
> 
> The following changes since commit 63f700aab4c11d46626de3cd051dae56cf7e9056:
> 
>   Merge tag 'xtensa-20170612' of git://github.com/jcmvbkbc/linux-xtensa 
> (2017-06-13 15:09:10 +0900)
> 
> are available in the git repository at:
> 
>   git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux.git 
> tags/perf-urgent-for-mingo-4.12-20170616
> 
> for you to fetch changes up to 9126cbbacecb8917bd0418809ef1d26616b2061e:
> 
>   perf unwind: Report module before querying isactivation in dwfl unwind 
> (2017-06-16 14:37:30 -0300)
> 
> 
> perf/urgent fixes:
> 
> - Fix probing of precise_ip level for default cycles event, that
>   got broken recently on x86_64 when its arch code started
>   considering invalid requesting precise samples when not sampling
>   (i.e. when attr.sample_period == 0).
> 
>   This also fixes another problem in s/390 where the precision
>   probing with sample_period == 0 returned precise_ip > 0, that
>   then, when setting up the real cycles event (not probing) would
>   return EOPNOTSUPP for precise_ip > 0 (as determined previously
>   by probing) and sample_period > 0.
> 
>   These problems resulted in attr_precise not being set to the
>   highest precision available on x86.64 when no event was specified,
>   i.e. the canonical:
> 
>   perf record ./workload
> 
>   would end up using attr.precise_ip = 0. As a workaround this would
>   need to be done:
> 
>   perf record -e cycles:P ./workload
> 
>   And on s/390 it would plain not work, requiring using:
> 
> perf record -e cycles ./workload
> 
>   as a workaround.  (Arnaldo Carvalho de Melo)
> 
> - Fix perf build with ARCH=x86_64, when ARCH should be transformed
>   into ARCH=x86, just like with the main kernel Makefile and
>   tools/objtool's, i.e. use SRCARCH. (Jiada Wang)
> 
> - Avoid accessing uninitialized data structures when unwinding with
>   elfutils's libdw, making it more closely mimic libunwind's unwinder.
>   (Milian Wolff)
> 
> Signed-off-by: Arnaldo Carvalho de Melo 
> 
> 
> Arnaldo Carvalho de Melo (1):
>   perf evsel: Fix probing of precise_ip level for default cycles event
> 
> Jiada Wang (1):
>   perf tools: Fix build with ARCH=x86_64
> 
> Milian Wolff (1):
>   perf unwind: Report module before querying isactivation in dwfl unwind
> 
>  tools/perf/Makefile.config | 38 +++---
>  tools/perf/Makefile.perf   |  2 +-
>  tools/perf/arch/Build  |  2 +-
>  tools/perf/pmu-events/Build|  4 ++--
>  tools/perf/tests/Build |  2 +-
>  tools/perf/tests/task-exit.c   |  2 +-
>  tools/perf/util/evsel.c| 12 
>  tools/perf/util/header.c   |  2 +-
>  tools/perf/util/unwind-libdw.c |  8 
>  9 files changed, 46 insertions(+), 26 deletions(-)

Pulled, thanks a lot Arnaldo!

Ingo


Re: [RFC PATCH 00/13] Switchtec NTB Support

2017-06-16 Thread Logan Gunthorpe


On 16/06/17 12:38 PM, Serge Semin wrote:
> On Fri, Jun 16, 2017 at 11:08:52AM -0600, Logan Gunthorpe 
>  wrote:
> It's the way the NTB API was created for, to have set of functions to access
> NTB devices in the similar way. These aren't my beliefs, it's the way it was
> created. I agree it can be optional, but it shouldn't be made as the basics
> of the driver. It is called NTB "hardware" driver after all, not "emulating" 
> or
> "abstracting" driver.

Just more philosophy. You haven't given any good reason to remove the
functionality. Vague references to the way things were created aren't
compelling arguments. Better to cite code and point out actual problems.

> ntb_transport could work without Scratchpads, if it's properly altered to
> use NTB messaging. This should be the way to make things compatible, but not
> making the hardware driver suitable for just one ntb_transport.

Ok, well when all the NTB clients no longer require using scratchpads
and we can all abide by the rule that clients must function without
them. Then, I'll remove the emulation. Until then, it stays.

> It's not like my whim or something, but the way it's usually done.
> https://kernelnewbies.org/PatchPhilosophy

> Cite from there:
> "Each patch should group changes into a logical sequence. Bug fixes must
> come first in the patchset, then new features. This is because we need to be
> able to backport bug fixes to older kernels, and they should not depend on
> new features."

You should probably read that again because it doesn't actually support
your point (in fact it's saying something quite unrelated). It is also
probably a good idea to read the rest of the seciton you cite:

"The idea here is that you should break changes up in such a way that it
will be easy to review."

"When creating a new feature patchset, you may need to break up your
changes into multiple commits. "

"Clean up patches that are over 200 lines long are discouraged, because
they are hard to review. Break those patches up into smaller patches. "

Also, to quote Greg Kroah-Hartman from my last series[1]:

"That's one big patch to review, would you want to do that?

Can you break it up into smaller parts?"

> You grouped the patches in according to your logical view or development
> progress (I don't know for sure), but it's not obvious for reviewers.
> From my perspective your new Microsemi Switchtec NTB driver is just one
> feature. I don't know who would think differently so to split the solid
> driver up for review. Switchtec management driver alteration might be the
> same - just one fix. It's much easier for you to have your commits squashed,
> than for me to look at your git tree, than get back to your patchset looking
> for a necessary peace of patch and commenting it there.

Well you're free to think that but, in my experience, your opinion
differs significantly from the rest of the kernel community which I
personally agree with.

Now, if you'd like to actually review the code I'd be happy to address
any concerns you find. I won't be responding to any more philosophical
arguments or bike-shedding over the format of the patch.

Logan

[1] https://lkml.org/lkml/2017/1/31/637



Re: LTS testing with latest kselftests - some failures

2017-06-16 Thread Greg Kroah-Hartman
On Fri, Jun 16, 2017 at 06:46:51PM +0200, Luis R. Rodriguez wrote:
> Kees, please review 47e0bbb7fa98 below.
> Brian, please review be4a1326d12c below.
> 
> On Thu, Jun 15, 2017 at 11:26:53PM +0530, Sumit Semwal wrote:
> > Hello Greg, Shuah,
> > 
> > While testing 4.4.y and 4.9.y LTS kernels with latest kselftest,
> 
> To be clear it seems like you are taking the latest upstream ksefltest and run
> it against older stable kernels. Furthermore you seem to only run the shell
> script tests but are using older kselftests drivers? Is this all correct?
> Otherwise it is unclear how you are running into the issues below.
> 
> Does 0-day so the same? I thought 0-day takes just the kselftest from each 
> tree
> submitted. That *seemed* to me like the way it was designed. Shuah ?
> 
> What's the name of *this* testing effort BTW? Is this part of the overall
> kselftest ? Or is this something Linaro does for LTS kernels ? If there
> is a name to your effort can you document it here so that others are aware:

It's a "test LTS kernels to make sure Greg didn't break anything" type
of testing effort that Linaro is helping out with.

This could also be called, "it's about time someone did this..." :)

> > we found a couple more test failures due to test-kernel mismatch:
> > 
> > 1. firmware tests: - linux 4.5 [1] and 4.10 [2] added a few updates to
> > tests, and related updates to lib/test_firmware.c to improve the
> > tests. Stable-4.4 misses these patches to lib/test_firmware.c. Stable
> > 4.9 misses the second update.
> 
> <-- snip, skipped 2. and 3. -->
> 
> > For all the 3 listed above, we will try and update the tests to gracefully 
> > exit.
> 
> Hmm, this actually raises a good kselftest question:
> 
> I *though* kselftests were running tests on par with the kernels, so we would
> *not* take latest upstream kselftests to test against older kernels. Is this
> incorrect?

That is incorrect.  Your test should always degrade gracefully if the
feature is not present in the kernel under test.  If the test is for a
bug that was fixed, then that fix should also go to a stable kernel
release.

thanks,

greg k-h


[PATCH 1/6] rtc: s3c: Jump to central exit point on getting src clock error

2017-06-16 Thread Krzysztof Kozlowski
In other error paths in probe, centralized exit point was used so make
this consistent.

Signed-off-by: Krzysztof Kozlowski 
---
 drivers/rtc/rtc-s3c.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c
index d44fb34df8fe..c5aa7a35d07f 100644
--- a/drivers/rtc/rtc-s3c.c
+++ b/drivers/rtc/rtc-s3c.c
@@ -510,8 +510,7 @@ static int s3c_rtc_probe(struct platform_device *pdev)
else
dev_dbg(&pdev->dev,
"probe deferred due to missing rtc src 
clk\n");
-   clk_disable_unprepare(info->rtc_clk);
-   return ret;
+   goto err_src_clk;
}
clk_prepare_enable(info->rtc_src_clk);
}
@@ -575,6 +574,7 @@ static int s3c_rtc_probe(struct platform_device *pdev)
 
if (info->data->needs_src_clk)
clk_disable_unprepare(info->rtc_src_clk);
+err_src_clk:
clk_disable_unprepare(info->rtc_clk);
 
return ret;
-- 
2.9.3



[PATCH 5/6] rtc: s3c: Handle clock prepare failures in probe

2017-06-16 Thread Krzysztof Kozlowski
clk_prepare_enable() can fail so handle such case.

Signed-off-by: Krzysztof Kozlowski 
---
 drivers/rtc/rtc-s3c.c | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c
index c666b95fb8d7..0cb2f27a30b4 100644
--- a/drivers/rtc/rtc-s3c.c
+++ b/drivers/rtc/rtc-s3c.c
@@ -498,7 +498,9 @@ static int s3c_rtc_probe(struct platform_device *pdev)
dev_dbg(&pdev->dev, "probe deferred due to missing rtc 
clk\n");
return ret;
}
-   clk_prepare_enable(info->rtc_clk);
+   ret = clk_prepare_enable(info->rtc_clk);
+   if (ret)
+   return ret;
 
if (info->data->needs_src_clk) {
info->rtc_src_clk = devm_clk_get(&pdev->dev, "rtc_src");
@@ -512,7 +514,9 @@ static int s3c_rtc_probe(struct platform_device *pdev)
"probe deferred due to missing rtc src 
clk\n");
goto err_src_clk;
}
-   clk_prepare_enable(info->rtc_src_clk);
+   ret = clk_prepare_enable(info->rtc_src_clk);
+   if (ret)
+   goto err_src_clk;
}
 
/* check to see if everything is setup correctly */
-- 
2.9.3



[PATCH 2/6] rtc: s3c: Minor white-space cleanups

2017-06-16 Thread Krzysztof Kozlowski
Minor cleanups to make the code easier to read. No functional changes.
1. Remove one space before labels as this is nowadays mostly preferred.
2. Fix indentation of arguments in function calls.
3. Split structure member declaration.

Signed-off-by: Krzysztof Kozlowski 
---
 drivers/rtc/rtc-s3c.c | 47 +++
 1 file changed, 23 insertions(+), 24 deletions(-)

diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c
index c5aa7a35d07f..2b503dab7957 100644
--- a/drivers/rtc/rtc-s3c.c
+++ b/drivers/rtc/rtc-s3c.c
@@ -49,7 +49,8 @@ struct s3c_rtc {
spinlock_t pie_lock;
spinlock_t alarm_clk_lock;
 
-   int ticnt_save, ticnt_en_save;
+   int ticnt_save;
+   int ticnt_en_save;
bool wake_en;
 };
 
@@ -169,7 +170,7 @@ static int s3c_rtc_gettime(struct device *dev, struct 
rtc_time *rtc_tm)
 
s3c_rtc_enable_clk(info);
 
- retry_get_time:
+retry_get_time:
rtc_tm->tm_min  = readb(info->base + S3C2410_RTCMIN);
rtc_tm->tm_hour = readb(info->base + S3C2410_RTCHOUR);
rtc_tm->tm_mday = readb(info->base + S3C2410_RTCDATE);
@@ -199,8 +200,8 @@ static int s3c_rtc_gettime(struct device *dev, struct 
rtc_time *rtc_tm)
rtc_tm->tm_year += 100;
 
dev_dbg(dev, "read time %04d.%02d.%02d %02d:%02d:%02d\n",
-1900 + rtc_tm->tm_year, rtc_tm->tm_mon, rtc_tm->tm_mday,
-rtc_tm->tm_hour, rtc_tm->tm_min, rtc_tm->tm_sec);
+   1900 + rtc_tm->tm_year, rtc_tm->tm_mon, rtc_tm->tm_mday,
+   rtc_tm->tm_hour, rtc_tm->tm_min, rtc_tm->tm_sec);
 
rtc_tm->tm_mon -= 1;
 
@@ -213,8 +214,8 @@ static int s3c_rtc_settime(struct device *dev, struct 
rtc_time *tm)
int year = tm->tm_year - 100;
 
dev_dbg(dev, "set time %04d.%02d.%02d %02d:%02d:%02d\n",
-1900 + tm->tm_year, tm->tm_mon, tm->tm_mday,
-tm->tm_hour, tm->tm_min, tm->tm_sec);
+   1900 + tm->tm_year, tm->tm_mon, tm->tm_mday,
+   tm->tm_hour, tm->tm_min, tm->tm_sec);
 
/* we get around y2k by simply not supporting it */
 
@@ -259,9 +260,9 @@ static int s3c_rtc_getalarm(struct device *dev, struct 
rtc_wkalrm *alrm)
alrm->enabled = (alm_en & S3C2410_RTCALM_ALMEN) ? 1 : 0;
 
dev_dbg(dev, "read alarm %d, %04d.%02d.%02d %02d:%02d:%02d\n",
-alm_en,
-1900 + alm_tm->tm_year, alm_tm->tm_mon, alm_tm->tm_mday,
-alm_tm->tm_hour, alm_tm->tm_min, alm_tm->tm_sec);
+   alm_en,
+   1900 + alm_tm->tm_year, alm_tm->tm_mon, alm_tm->tm_mday,
+   alm_tm->tm_hour, alm_tm->tm_min, alm_tm->tm_sec);
 
/* decode the alarm enable field */
if (alm_en & S3C2410_RTCALM_SECEN)
@@ -295,9 +296,9 @@ static int s3c_rtc_setalarm(struct device *dev, struct 
rtc_wkalrm *alrm)
int year = tm->tm_year - 100;
 
dev_dbg(dev, "s3c_rtc_setalarm: %d, %04d.%02d.%02d %02d:%02d:%02d\n",
-alrm->enabled,
-1900 + tm->tm_year, tm->tm_mon + 1, tm->tm_mday,
-tm->tm_hour, tm->tm_min, tm->tm_sec);
+   alrm->enabled,
+   1900 + tm->tm_year, tm->tm_mon + 1, tm->tm_mday,
+   tm->tm_hour, tm->tm_min, tm->tm_sec);
 
s3c_rtc_enable_clk(info);
 
@@ -378,8 +379,7 @@ static void s3c24xx_rtc_enable(struct s3c_rtc *info)
dev_info(info->dev, "rtc disabled, re-enabling\n");
 
tmp = readw(info->base + S3C2410_RTCCON);
-   writew(tmp | S3C2410_RTCCON_RTCEN,
-   info->base + S3C2410_RTCCON);
+   writew(tmp | S3C2410_RTCCON_RTCEN, info->base + S3C2410_RTCCON);
}
 
if (con & S3C2410_RTCCON_CNTSEL) {
@@ -387,7 +387,7 @@ static void s3c24xx_rtc_enable(struct s3c_rtc *info)
 
tmp = readw(info->base + S3C2410_RTCCON);
writew(tmp & ~S3C2410_RTCCON_CNTSEL,
-   info->base + S3C2410_RTCCON);
+  info->base + S3C2410_RTCCON);
}
 
if (con & S3C2410_RTCCON_CLKRST) {
@@ -395,7 +395,7 @@ static void s3c24xx_rtc_enable(struct s3c_rtc *info)
 
tmp = readw(info->base + S3C2410_RTCCON);
writew(tmp & ~S3C2410_RTCCON_CLKRST,
-   info->base + S3C2410_RTCCON);
+  info->base + S3C2410_RTCCON);
}
 }
 
@@ -481,7 +481,7 @@ static int s3c_rtc_probe(struct platform_device *pdev)
}
 
dev_dbg(&pdev->dev, "s3c2410_rtc: tick irq %d, alarm irq %d\n",
-info->irq_tick, info->irq_alarm);
+   info->irq_tick, info->irq_alarm);
 
/* get the memory region */
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
@@ -520,7 +520,7 @@ static int s3c_rtc_probe(struct platform_device *pdev)
info->data->enable(info);
 
dev_dbg(&pdev->dev, "s3c2410_rtc: RTCCON=%02x\n",
-readw(info->base +

[PATCH 6/6] rtc: s3c: Handle clock enable failures

2017-06-16 Thread Krzysztof Kozlowski
clk_enable() can fail so handle such case.

Signed-off-by: Krzysztof Kozlowski 
---
 drivers/rtc/rtc-s3c.c | 72 ---
 1 file changed, 57 insertions(+), 15 deletions(-)

diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c
index 0cb2f27a30b4..a8992c227f61 100644
--- a/drivers/rtc/rtc-s3c.c
+++ b/drivers/rtc/rtc-s3c.c
@@ -68,18 +68,32 @@ struct s3c_rtc_data {
void (*disable) (struct s3c_rtc *info);
 };
 
-static void s3c_rtc_enable_clk(struct s3c_rtc *info)
+static int s3c_rtc_enable_clk(struct s3c_rtc *info)
 {
unsigned long irq_flags;
+   int ret = 0;
 
spin_lock_irqsave(&info->alarm_clk_lock, irq_flags);
+
if (info->clk_disabled) {
-   clk_enable(info->rtc_clk);
-   if (info->data->needs_src_clk)
-   clk_enable(info->rtc_src_clk);
+   ret = clk_enable(info->rtc_clk);
+   if (ret)
+   goto out;
+
+   if (info->data->needs_src_clk) {
+   ret = clk_enable(info->rtc_src_clk);
+   if (ret) {
+   clk_disable(info->rtc_clk);
+   goto out;
+   }
+   }
info->clk_disabled = false;
}
+
+out:
spin_unlock_irqrestore(&info->alarm_clk_lock, irq_flags);
+
+   return ret;
 }
 
 static void s3c_rtc_disable_clk(struct s3c_rtc *info)
@@ -122,10 +136,13 @@ static int s3c_rtc_setaie(struct device *dev, unsigned 
int enabled)
 {
struct s3c_rtc *info = dev_get_drvdata(dev);
unsigned int tmp;
+   int ret;
 
dev_dbg(info->dev, "%s: aie=%d\n", __func__, enabled);
 
-   s3c_rtc_enable_clk(info);
+   ret = s3c_rtc_enable_clk(info);
+   if (ret)
+   return ret;
 
tmp = readb(info->base + S3C2410_RTCALM) & ~S3C2410_RTCALM_ALMEN;
 
@@ -136,10 +153,13 @@ static int s3c_rtc_setaie(struct device *dev, unsigned 
int enabled)
 
s3c_rtc_disable_clk(info);
 
-   if (enabled)
-   s3c_rtc_enable_clk(info);
-   else
+   if (enabled) {
+   ret = s3c_rtc_enable_clk(info);
+   if (ret)
+   return ret;
+   } else {
s3c_rtc_disable_clk(info);
+   }
 
return 0;
 }
@@ -147,10 +167,14 @@ static int s3c_rtc_setaie(struct device *dev, unsigned 
int enabled)
 /* Set RTC frequency */
 static int s3c_rtc_setfreq(struct s3c_rtc *info, int freq)
 {
+   int ret;
+
if (!is_power_of_2(freq))
return -EINVAL;
 
-   s3c_rtc_enable_clk(info);
+   ret = s3c_rtc_enable_clk(info);
+   if (ret)
+   return ret;
spin_lock_irq(&info->pie_lock);
 
if (info->data->set_freq)
@@ -167,8 +191,11 @@ static int s3c_rtc_gettime(struct device *dev, struct 
rtc_time *rtc_tm)
 {
struct s3c_rtc *info = dev_get_drvdata(dev);
unsigned int have_retried = 0;
+   int ret;
 
-   s3c_rtc_enable_clk(info);
+   ret = s3c_rtc_enable_clk(info);
+   if (ret)
+   return ret;
 
 retry_get_time:
rtc_tm->tm_min  = readb(info->base + S3C2410_RTCMIN);
@@ -212,6 +239,7 @@ static int s3c_rtc_settime(struct device *dev, struct 
rtc_time *tm)
 {
struct s3c_rtc *info = dev_get_drvdata(dev);
int year = tm->tm_year - 100;
+   int ret;
 
dev_dbg(dev, "set time %04d.%02d.%02d %02d:%02d:%02d\n",
1900 + tm->tm_year, tm->tm_mon, tm->tm_mday,
@@ -224,7 +252,9 @@ static int s3c_rtc_settime(struct device *dev, struct 
rtc_time *tm)
return -EINVAL;
}
 
-   s3c_rtc_enable_clk(info);
+   ret = s3c_rtc_enable_clk(info);
+   if (ret)
+   return ret;
 
writeb(bin2bcd(tm->tm_sec),  info->base + S3C2410_RTCSEC);
writeb(bin2bcd(tm->tm_min),  info->base + S3C2410_RTCMIN);
@@ -243,8 +273,11 @@ static int s3c_rtc_getalarm(struct device *dev, struct 
rtc_wkalrm *alrm)
struct s3c_rtc *info = dev_get_drvdata(dev);
struct rtc_time *alm_tm = &alrm->time;
unsigned int alm_en;
+   int ret;
 
-   s3c_rtc_enable_clk(info);
+   ret = s3c_rtc_enable_clk(info);
+   if (ret)
+   return ret;
 
alm_tm->tm_sec  = readb(info->base + S3C2410_ALMSEC);
alm_tm->tm_min  = readb(info->base + S3C2410_ALMMIN);
@@ -293,6 +326,7 @@ static int s3c_rtc_setalarm(struct device *dev, struct 
rtc_wkalrm *alrm)
struct s3c_rtc *info = dev_get_drvdata(dev);
struct rtc_time *tm = &alrm->time;
unsigned int alrm_en;
+   int ret;
int year = tm->tm_year - 100;
 
dev_dbg(dev, "s3c_rtc_setalarm: %d, %04d.%02d.%02d %02d:%02d:%02d\n",
@@ -300,7 +334,9 @@ static int s3c_rtc_setalarm(struct device *dev, struct 
rtc_wkalrm *alrm)
1900 + tm->tm_year, tm->tm_mon + 1, tm->tm_mday,
tm->tm_hour, tm->tm_min, 

[PATCH 3/6] rtc: s3c: Drop unneeded cast to void pointer

2017-06-16 Thread Krzysztof Kozlowski
There is no need for casting to void pointer for of_device_id data.

Signed-off-by: Krzysztof Kozlowski 
---
 drivers/rtc/rtc-s3c.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c
index 2b503dab7957..bfc8660ff1e7 100644
--- a/drivers/rtc/rtc-s3c.c
+++ b/drivers/rtc/rtc-s3c.c
@@ -801,19 +801,19 @@ static struct s3c_rtc_data const s3c6410_rtc_data = {
 static const struct of_device_id s3c_rtc_dt_match[] = {
{
.compatible = "samsung,s3c2410-rtc",
-   .data = (void *)&s3c2410_rtc_data,
+   .data = &s3c2410_rtc_data,
}, {
.compatible = "samsung,s3c2416-rtc",
-   .data = (void *)&s3c2416_rtc_data,
+   .data = &s3c2416_rtc_data,
}, {
.compatible = "samsung,s3c2443-rtc",
-   .data = (void *)&s3c2443_rtc_data,
+   .data = &s3c2443_rtc_data,
}, {
.compatible = "samsung,s3c6410-rtc",
-   .data = (void *)&s3c6410_rtc_data,
+   .data = &s3c6410_rtc_data,
}, {
.compatible = "samsung,exynos3250-rtc",
-   .data = (void *)&s3c6410_rtc_data,
+   .data = &s3c6410_rtc_data,
},
{ /* sentinel */ },
 };
-- 
2.9.3



[PATCH 4/6] rtc: s3c: Do not remove const from rodata memory

2017-06-16 Thread Krzysztof Kozlowski
All instances of struct s3c_rtc_data are in fact static const thus
put in rodata so we should not drop the const while getting the pointer
to them.

Signed-off-by: Krzysztof Kozlowski 
---
 drivers/rtc/rtc-s3c.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c
index bfc8660ff1e7..c666b95fb8d7 100644
--- a/drivers/rtc/rtc-s3c.c
+++ b/drivers/rtc/rtc-s3c.c
@@ -41,7 +41,7 @@ struct s3c_rtc {
struct clk *rtc_src_clk;
bool clk_disabled;
 
-   struct s3c_rtc_data *data;
+   const struct s3c_rtc_data *data;
 
int irq_alarm;
int irq_tick;
@@ -437,12 +437,12 @@ static int s3c_rtc_remove(struct platform_device *pdev)
 
 static const struct of_device_id s3c_rtc_dt_match[];
 
-static struct s3c_rtc_data *s3c_rtc_get_data(struct platform_device *pdev)
+static const struct s3c_rtc_data *s3c_rtc_get_data(struct platform_device 
*pdev)
 {
const struct of_device_id *match;
 
match = of_match_node(s3c_rtc_dt_match, pdev->dev.of_node);
-   return (struct s3c_rtc_data *)match->data;
+   return match->data;
 }
 
 static int s3c_rtc_probe(struct platform_device *pdev)
-- 
2.9.3



Re: LTS testing with latest kselftests - some failures

2017-06-16 Thread Alexander Alemayhu
On Fri, Jun 16, 2017 at 01:08:04PM +0530, Sumit Semwal wrote:
> 
> Thanks, this was quite helpful, and so now bpf tests build on x86_64
> with current mainline for me. Perhaps we should document these
> somewhere, as dependencies?
>
There is already some documentation available[0], but something in the kernel
tree would be nice. Please send the patch(es) to netdev.

Thanks.

[0]: http://docs.cilium.io/en/latest/bpf/#development-environment

-- 
Mit freundlichen Grüßen

Alexander Alemayhu


[PATCH 1/3] perf evsel: Fix probing of precise_ip level for default cycles event

2017-06-16 Thread Arnaldo Carvalho de Melo
From: Arnaldo Carvalho de Melo 

Since commit 18e7a45af91a ("perf/x86: Reject non sampling events with
precise_ip") returns -EINVAL for sys_perf_event_open() with an attribute
with (attr.precise_ip > 0 && attr.sample_period == 0), just like is done
in the routine used to probe the max precise level when no events were
passed to 'perf record' or 'perf top', i.e.:

perf_evsel__new_cycles()
perf_event_attr__set_max_precise_ip()

The x86 code, in x86_pmu_hw_config(), which is called all the way from
sys_perf_event_open() did, starting with the aforementioned commit:

/* There's no sense in having PEBS for non sampling events: */
if (!is_sampling_event(event))
return -EINVAL;

Which makes it fail for cycles:ppp, cycles:pp and cycles:p, always using
just the non precise cycles variant.

To make sure that this is the case, I tested it, before this patch,
with:

  # perf probe -L x86_pmu_hw_config
  
0  int x86_pmu_hw_config(struct perf_event *event)
1  {
2 if (event->attr.precise_ip) {

   17 if (event->attr.precise_ip > precise)
   18 return -EOPNOTSUPP;

  /* There's no sense in having PEBS for non sampling 
events: */
   21 if (!is_sampling_event(event))
   22 return -EINVAL;
  }

  # perf probe x86_pmu_hw_config:22
  Added new events:
probe:x86_pmu_hw_config (on x86_pmu_hw_config:22)
probe:x86_pmu_hw_config_1 (on x86_pmu_hw_config:22)

  You can now use it in all perf tools, such as:

perf record -e probe:x86_pmu_hw_config_1 -aR sleep 1

  # perf trace -e perf_event_open,probe:x86_pmu_hwconfig*/max-stack=16/ perf 
record usleep 1
 0.000 ( 0.015 ms): perf/4150 perf_event_open(attr_uptr: 0x7ffebc8ba110, 
cpu: -1, group_fd: -1  ) ...
 0.015 ( ): probe:x86_pmu_hw_config:(9c0065e1))
   x86_pmu_hw_config ([kernel.kallsyms])
   hsw_hw_config ([kernel.kallsyms])
   x86_pmu_event_init ([kernel.kallsyms])
   perf_try_init_event ([kernel.kallsyms])
   perf_event_alloc ([kernel.kallsyms])
   SYSC_perf_event_open ([kernel.kallsyms])
   sys_perf_event_open ([kernel.kallsyms])
   do_syscall_64 ([kernel.kallsyms])
   return_from_SYSCALL_64 
([kernel.kallsyms])
   syscall (/usr/lib64/libc-2.24.so)
   perf_event_attr__set_max_precise_ip 
(/home/acme/bin/perf)
   perf_evsel__new_cycles 
(/home/acme/bin/perf)
   perf_evlist__add_default 
(/home/acme/bin/perf)
   cmd_record (/home/acme/bin/perf)
   run_builtin (/home/acme/bin/perf)
   handle_internal_command 
(/home/acme/bin/perf)
 0.000 ( 0.021 ms): perf/4150  ... [continued]: perf_event_open()) = -1 
EINVAL Invalid argument
 0.023 ( 0.002 ms): perf/4150 perf_event_open(attr_uptr: 0x7ffebc8ba110, 
cpu: -1, group_fd: -1  ) ...
 0.025 ( ): probe:x86_pmu_hw_config:(9c0065e1))
   x86_pmu_hw_config ([kernel.kallsyms])
   hsw_hw_config ([kernel.kallsyms])
   x86_pmu_event_init ([kernel.kallsyms])
   perf_try_init_event ([kernel.kallsyms])
   perf_event_alloc ([kernel.kallsyms])
   SYSC_perf_event_open ([kernel.kallsyms])
   sys_perf_event_open ([kernel.kallsyms])
   do_syscall_64 ([kernel.kallsyms])
   return_from_SYSCALL_64 
([kernel.kallsyms])
   syscall (/usr/lib64/libc-2.24.so)
   perf_event_attr__set_max_precise_ip 
(/home/acme/bin/perf)
   perf_evsel__new_cycles 
(/home/acme/bin/perf)
   perf_evlist__add_default 
(/home/acme/bin/perf)
   cmd_record (/home/acme/bin/perf)
   run_builtin (/home/acme/bin/perf)
   handle_internal_command 
(/home/acme/bin/perf)
 0.023 ( 0.004 ms): perf/4150  ... [continued]: perf_event_open()) = -1 
EINVAL Invalid argument
 0.028 ( 0.002 ms): perf/4150 perf_event_open(attr_uptr: 0x7ffebc8ba110, 

[PATCH 3/3] perf unwind: Report module before querying isactivation in dwfl unwind

2017-06-16 Thread Arnaldo Carvalho de Melo
From: Milian Wolff 

The PC returned by dwfl_frame_pc() may map into a not-yet-reported
module. We have to report it before we continue unwinding. But when we
query for the isactivation flag in dwfl_frame_pc, libdw will actually do
one more unwinding step internally which can then break and lead to
missed frames or broken stacks.

With libunwind we get e.g.:

~
  heaptrack_gui  2228 135073.400474: 613969 cycles:
  108c8e [unknown] (/usr/lib/libQt5Core.so.5.8.0)
  1093bc [unknown] (/usr/lib/libQt5Core.so.5.8.0)
  109e7b QLocale::QLocale (/usr/lib/libQt5Core.so.5.8.0)
  1470ff [unknown] (/usr/lib/libQt5Core.so.5.8.0)
  147f67 QSystemLocale::query (/usr/lib/libQt5Core.so.5.8.0)
  109fbf QLocalePrivate::updateSystemPrivate 
(/usr/lib/libQt5Core.so.5.8.0)
  10aa27 QLocale::QLocale (/usr/lib/libQt5Core.so.5.8.0)
  1e02c3 [unknown] (/usr/lib/libQt5Core.so.5.8.0)
  2113bb [unknown] (/usr/lib/libQt5Core.so.5.8.0)
  211505 [unknown] (/usr/lib/libQt5Core.so.5.8.0)
  1b5df0 QFileInfo::exists (/usr/lib/libQt5Core.so.5.8.0)
   92eb2 [unknown] (/usr/lib/libQt5Core.so.5.8.0)
   93423 [unknown] (/usr/lib/libQt5Core.so.5.8.0)
   93d2a QLibraryInfo::location (/usr/lib/libQt5Core.so.5.8.0)
  2170af [unknown] (/usr/lib/libQt5Core.so.5.8.0)
  297c53 QCoreApplicationPrivate::init 
(/usr/lib/libQt5Core.so.5.8.0)
   f7cde QGuiApplicationPrivate::init 
(/usr/lib/libQt5Gui.so.5.8.0)
  1589e8 QApplicationPrivate::init 
(/usr/lib/libQt5Widgets.so.5.8.0)
   78622 main 
(/home/milian/projects/compiled/other/bin/heaptrack_gui)
   20439 __libc_start_main (/usr/lib/libc-2.25.so)
   78299 _start 
(/home/milian/projects/compiled/other/bin/heaptrack_gui)

  heaptrack_gui  2228 135073.401156: 569521 cycles:
  131633 QString::endsWith (/usr/lib/libQt5Core.so.5.8.0)
  1a0701 QDir::cleanPath (/usr/lib/libQt5Core.so.5.8.0)
  21b82d [unknown] (/usr/lib/libQt5Core.so.5.8.0)
  1b3727 QFileInfo::canonicalFilePath 
(/usr/lib/libQt5Core.so.5.8.0)
  2780c7 QFactoryLoader::update (/usr/lib/libQt5Core.so.5.8.0)
  279525 QFactoryLoader::QFactoryLoader 
(/usr/lib/libQt5Core.so.5.8.0)
   e5bd0 QPlatformIntegrationFactory::create 
(/usr/lib/libQt5Gui.so.5.8.0)
   f5a1c QGuiApplicationPrivate::createPlatformIntegration 
(/usr/lib/libQt5Gui.so.5.8.0)
   f650c QGuiApplicationPrivate::createEventDispatcher 
(/usr/lib/libQt5Gui.so.5.8.0)
  298524 QCoreApplicationPrivate::init 
(/usr/lib/libQt5Core.so.5.8.0)
   f7cde QGuiApplicationPrivate::init 
(/usr/lib/libQt5Gui.so.5.8.0)
  1589e8 QApplicationPrivate::init 
(/usr/lib/libQt5Widgets.so.5.8.0)
   78622 main 
(/home/milian/projects/compiled/other/bin/heaptrack_gui)
   20439 __libc_start_main (/usr/lib/libc-2.25.so)
   78299 _start 
(/home/milian/projects/compiled/other/bin/heaptrack_gui)
~

Note the two frames 1589e8 and 78622 in the first sample. These are
missing when unwinding with libdw. The second sample's breakage is
more obvious:

~
  heaptrack_gui  2228 135073.400474: 613969 cycles:
  108c8e [unknown] (/usr/lib/libQt5Core.so.5.8.0)
  1093bc [unknown] (/usr/lib/libQt5Core.so.5.8.0)
  109e7b QLocale::QLocale (/usr/lib/libQt5Core.so.5.8.0)
  1470ff [unknown] (/usr/lib/libQt5Core.so.5.8.0)
  147f67 QSystemLocale::query (/usr/lib/libQt5Core.so.5.8.0)
  109fbf QLocalePrivate::updateSystemPrivate 
(/usr/lib/libQt5Core.so.5.8.0)
  10aa27 QLocale::QLocale (/usr/lib/libQt5Core.so.5.8.0)
  1e02c3 [unknown] (/usr/lib/libQt5Core.so.5.8.0)
  2113bb [unknown] (/usr/lib/libQt5Core.so.5.8.0)
  211505 [unknown] (/usr/lib/libQt5Core.so.5.8.0)
  1b5df0 QFileInfo::exists (/usr/lib/libQt5Core.so.5.8.0)
   92eb2 [unknown] (/usr/lib/libQt5Core.so.5.8.0)
   93423 [unknown] (/usr/lib/libQt5Core.so.5.8.0)
   93d2a QLibraryInfo::location (/usr/lib/libQt5Core.so.5.8.0)
  2170af [unknown] (/usr/lib/libQt5Core.so.5.8.0)
  297c53 QCoreApplicationPrivate::init 
(/usr/lib/libQt5Core.so.5.8.0)
   f7cde QGuiApplicationPrivate::init 
(/usr/lib/libQt5Gui.so.5.8.0)
   20439 __libc_start_main (/usr/lib/libc-2.25.so)
   78299 _start 
(/home/milian/projects/compiled/other/bin/heaptrack_gui)

heaptrack_gui  2228 135073.401156: 569521 cycles:
  

[PATCH 2/3] perf tools: Fix build with ARCH=x86_64

2017-06-16 Thread Arnaldo Carvalho de Melo
From: Jiada Wang 

With commit: 0a943cb10ce78 (tools build: Add HOSTARCH Makefile variable)
when building for ARCH=x86_64, ARCH=x86_64 is passed to perf instead of
ARCH=x86, so the perf build process searchs header files from
tools/arch/x86_64/include, which doesn't exist.

The following build failure is seen:

  In file included from util/event.c:2:0:
tools/include/uapi/linux/mman.h:4:27: fatal error: uapi/asm/mman.h: No such 
file or directory
compilation terminated.

Fix this issue by using SRCARCH instead of ARCH in perf, just like the
main kernel Makefile and tools/objtool's.

Signed-off-by: Jiada Wang 
Tested-by: Arnaldo Carvalho de Melo 
Acked-by: Jiri Olsa 
Cc: Alexander Shishkin 
Cc: Andi Kleen 
Cc: Eugeniu Rosca 
Cc: Jan Stancek 
Cc: Masami Hiramatsu 
Cc: Peter Zijlstra 
Cc: Ravi Bangoria 
Cc: Rui Teng 
Cc: Sukadev Bhattiprolu 
Cc: Wang Nan 
Fixes: 0a943cb10ce7 ("tools build: Add HOSTARCH Makefile variable")
Link: 
http://lkml.kernel.org/r/1491793357-14977-2-git-send-email-jiada_w...@mentor.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/Makefile.config  | 38 +++---
 tools/perf/Makefile.perf|  2 +-
 tools/perf/arch/Build   |  2 +-
 tools/perf/pmu-events/Build |  4 ++--
 tools/perf/tests/Build  |  2 +-
 tools/perf/util/header.c|  2 +-
 6 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index 8354d04b392f..1f4fbc9a3292 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -19,18 +19,18 @@ CFLAGS := $(EXTRA_CFLAGS) $(EXTRA_WARNINGS)
 
 include $(srctree)/tools/scripts/Makefile.arch
 
-$(call detected_var,ARCH)
+$(call detected_var,SRCARCH)
 
 NO_PERF_REGS := 1
 
 # Additional ARCH settings for ppc
-ifeq ($(ARCH),powerpc)
+ifeq ($(SRCARCH),powerpc)
   NO_PERF_REGS := 0
   LIBUNWIND_LIBS := -lunwind -lunwind-ppc64
 endif
 
 # Additional ARCH settings for x86
-ifeq ($(ARCH),x86)
+ifeq ($(SRCARCH),x86)
   $(call detected,CONFIG_X86)
   ifeq (${IS_64_BIT}, 1)
 CFLAGS += -DHAVE_ARCH_X86_64_SUPPORT -DHAVE_SYSCALL_TABLE 
-I$(OUTPUT)arch/x86/include/generated
@@ -43,12 +43,12 @@ ifeq ($(ARCH),x86)
   NO_PERF_REGS := 0
 endif
 
-ifeq ($(ARCH),arm)
+ifeq ($(SRCARCH),arm)
   NO_PERF_REGS := 0
   LIBUNWIND_LIBS = -lunwind -lunwind-arm
 endif
 
-ifeq ($(ARCH),arm64)
+ifeq ($(SRCARCH),arm64)
   NO_PERF_REGS := 0
   LIBUNWIND_LIBS = -lunwind -lunwind-aarch64
 endif
@@ -61,7 +61,7 @@ endif
 # Disable it on all other architectures in case libdw unwind
 # support is detected in system. Add supported architectures
 # to the check.
-ifneq ($(ARCH),$(filter $(ARCH),x86 arm))
+ifneq ($(SRCARCH),$(filter $(SRCARCH),x86 arm))
   NO_LIBDW_DWARF_UNWIND := 1
 endif
 
@@ -115,9 +115,9 @@ endif
 FEATURE_CHECK_CFLAGS-libbabeltrace := $(LIBBABELTRACE_CFLAGS)
 FEATURE_CHECK_LDFLAGS-libbabeltrace := $(LIBBABELTRACE_LDFLAGS) 
-lbabeltrace-ctf
 
-FEATURE_CHECK_CFLAGS-bpf = -I. -I$(srctree)/tools/include 
-I$(srctree)/tools/arch/$(ARCH)/include/uapi -I$(srctree)/tools/include/uapi
+FEATURE_CHECK_CFLAGS-bpf = -I. -I$(srctree)/tools/include 
-I$(srctree)/tools/arch/$(SRCARCH)/include/uapi -I$(srctree)/tools/include/uapi
 # include ARCH specific config
--include $(src-perf)/arch/$(ARCH)/Makefile
+-include $(src-perf)/arch/$(SRCARCH)/Makefile
 
 ifdef PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET
   CFLAGS += -DHAVE_ARCH_REGS_QUERY_REGISTER_OFFSET
@@ -228,12 +228,12 @@ ifeq ($(DEBUG),0)
 endif
 
 INC_FLAGS += -I$(src-perf)/util/include
-INC_FLAGS += -I$(src-perf)/arch/$(ARCH)/include
+INC_FLAGS += -I$(src-perf)/arch/$(SRCARCH)/include
 INC_FLAGS += -I$(srctree)/tools/include/uapi
 INC_FLAGS += -I$(srctree)/tools/include/
-INC_FLAGS += -I$(srctree)/tools/arch/$(ARCH)/include/uapi
-INC_FLAGS += -I$(srctree)/tools/arch/$(ARCH)/include/
-INC_FLAGS += -I$(srctree)/tools/arch/$(ARCH)/
+INC_FLAGS += -I$(srctree)/tools/arch/$(SRCARCH)/include/uapi
+INC_FLAGS += -I$(srctree)/tools/arch/$(SRCARCH)/include/
+INC_FLAGS += -I$(srctree)/tools/arch/$(SRCARCH)/
 
 # $(obj-perf)  for generated common-cmds.h
 # $(obj-perf)/util for generated bison/flex headers
@@ -355,7 +355,7 @@ ifndef NO_LIBELF
 
   ifndef NO_DWARF
 ifeq ($(origin PERF_HAVE_DWARF_REGS), undefined)
-  msg := $(warning DWARF register mappings have not been defined for 
architecture $(ARCH), DWARF support disabled);
+  msg := $(warning DWARF register mappings have not been defined for 
architecture $(SRCARCH), DWARF support disabled);
   NO_DWARF := 1
 else
   CFLAGS += -DHAVE_DWARF_SUPPORT $(LIBDW_CFLAGS)
@@ -380,7 +380,7 @@ ifndef NO_LIBELF
 CFLAGS += -DHAVE_BPF_PROLOGUE
 $(call detected,CONFIG_BPF_PROLOGUE)
   else
-msg := $(warning BPF prologue is not supported by architecture 
$(ARCH), missing regs_query_register_offset());
+msg := $(warning BPF prologue is not supported by architecture 
$(SRCARCH), missing regs_query_register_offset());
   endif
 else
 

[GIT PULL 0/3] perf/urgent fixes

2017-06-16 Thread Arnaldo Carvalho de Melo
Hi Ingo,

Please consider pulling,

- Arnaldo

Test results at the end of this message, as usual.

The following changes since commit 63f700aab4c11d46626de3cd051dae56cf7e9056:

  Merge tag 'xtensa-20170612' of git://github.com/jcmvbkbc/linux-xtensa 
(2017-06-13 15:09:10 +0900)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux.git 
tags/perf-urgent-for-mingo-4.12-20170616

for you to fetch changes up to 9126cbbacecb8917bd0418809ef1d26616b2061e:

  perf unwind: Report module before querying isactivation in dwfl unwind 
(2017-06-16 14:37:30 -0300)


perf/urgent fixes:

- Fix probing of precise_ip level for default cycles event, that
  got broken recently on x86_64 when its arch code started
  considering invalid requesting precise samples when not sampling
  (i.e. when attr.sample_period == 0).

  This also fixes another problem in s/390 where the precision
  probing with sample_period == 0 returned precise_ip > 0, that
  then, when setting up the real cycles event (not probing) would
  return EOPNOTSUPP for precise_ip > 0 (as determined previously
  by probing) and sample_period > 0.

  These problems resulted in attr_precise not being set to the
  highest precision available on x86.64 when no event was specified,
  i.e. the canonical:

perf record ./workload

  would end up using attr.precise_ip = 0. As a workaround this would
  need to be done:

perf record -e cycles:P ./workload

  And on s/390 it would plain not work, requiring using:

perf record -e cycles ./workload

  as a workaround.  (Arnaldo Carvalho de Melo)

- Fix perf build with ARCH=x86_64, when ARCH should be transformed
  into ARCH=x86, just like with the main kernel Makefile and
  tools/objtool's, i.e. use SRCARCH. (Jiada Wang)

- Avoid accessing uninitialized data structures when unwinding with
  elfutils's libdw, making it more closely mimic libunwind's unwinder.
  (Milian Wolff)

Signed-off-by: Arnaldo Carvalho de Melo 


Arnaldo Carvalho de Melo (1):
  perf evsel: Fix probing of precise_ip level for default cycles event

Jiada Wang (1):
  perf tools: Fix build with ARCH=x86_64

Milian Wolff (1):
  perf unwind: Report module before querying isactivation in dwfl unwind

 tools/perf/Makefile.config | 38 +++---
 tools/perf/Makefile.perf   |  2 +-
 tools/perf/arch/Build  |  2 +-
 tools/perf/pmu-events/Build|  4 ++--
 tools/perf/tests/Build |  2 +-
 tools/perf/tests/task-exit.c   |  2 +-
 tools/perf/util/evsel.c| 12 
 tools/perf/util/header.c   |  2 +-
 tools/perf/util/unwind-libdw.c |  8 
 9 files changed, 46 insertions(+), 26 deletions(-)

Test results:

The first ones are container (docker) based builds of tools/perf with and
without libelf support, objtool where it is supported and samples/bpf/, ditto.
Where clang is available, it is also used to build perf with/without libelf.

Several are cross builds, the ones with -x-ARCH, and the android one, and those
may not have all the features built, due to lack of multi-arch devel packages,
available and being used so far on just a few, like
debian:experimental-x-{arm64,mipsel}.

The 'perf test' one will perform a variety of tests exercising
tools/perf/util/, tools/lib/{bpf,traceevent,etc}, as well as run perf commands
with a variety of command line event specifications to then intercept the
sys_perf_event syscall to check that the perf_event_attr fields are set up as
expected, among a variety of other unit tests.

Then there is the 'make -C tools/perf build-test' ones, that build tools/perf/
with a variety of feature sets, exercising the build with an incomplete set of
features as well as with a complete one. It is planned to have it run on each
of the containers mentioned above, using some container orchestration
infrastructure. Get in contact if interested in helping having this in place.

  # dm
   1 alpine:3.4: Ok
   2 alpine:3.5: Ok
   3 alpine:3.6: Ok
   4 alpine:edge: Ok
   5 android-ndk:r12b-arm: Ok
   6 archlinux:latest: Ok
   7 centos:5: Ok
   8 centos:6: Ok
   9 centos:7: Ok
  10 debian:7: Ok
  11 debian:8: Ok
  12 debian:9: Ok
  13 debian:experimental: Ok
  14 debian:experimental-x-arm64: Ok
  15 debian:experimental-x-mips: Ok
  16 debian:experimental-x-mips64: Ok
  17 debian:experimental-x-mipsel: Ok
  18 fedora:20: Ok
  19 fedora:21: Ok
  20 fedora:22: Ok
  21 fedora:23: Ok
  22 fedora:24: Ok
  23 fedora:24-x-ARC-uClibc: Ok
  24 fedora:25: Ok
  25 fedora:rawhide: Ok
  26 mageia:5: Ok
  27 opensuse:13.2: Ok
  28 opensuse:42.1: Ok
  29 opensuse:tumbleweed: Ok
  30 ubuntu:12.04.5: Ok
  31 ubuntu:14.04.4: Ok
  32 ubuntu:14.04.4-x-linaro-arm64: Ok
  33 ubuntu:15.10: Ok
  34 ubuntu:16.04: Ok
  35 ubuntu:16.04-x-arm: Ok
  3

Re: [PATCH net-next] net: dsa: add cross-chip multicast support

2017-06-16 Thread David Miller
From: Vivien Didelot 
Date: Thu, 15 Jun 2017 16:14:48 -0400

> Similarly to how cross-chip VLAN works, define a bitmap of multicast
> group members for a switch, now including its DSA ports, so that
> multicast traffic can be sent to all switches of the fabric.
> 
> A switch may drop the frames if no user port is a member.
> 
> This brings support for multicast in a multi-chip environment.
> As of now, all switches of the fabric must support the multicast
> operations in order to program a single fabric port.
> 
> Reported-by: Jason Cobham 
> Signed-off-by: Vivien Didelot 

Applied, thanks Vivien.


Re: [RFC PATCH 00/13] Switchtec NTB Support

2017-06-16 Thread Logan Gunthorpe


On 16/06/17 12:08 PM, Allen Hubbe wrote:
> Alright.  I'll leave it to you to find and reconcile common functionalities 
> of the drivers.  What about making spad emulation optional?

Ok.

I don't see the point of making spad emulation optional. Who would want
to disable it and what would be the benefit?

Logan


Re: [RFC PATCH 7/7 v1]powerpc: Deliver SEGV signal on protection key violation.

2017-06-16 Thread Ram Pai
On Fri, Jun 16, 2017 at 08:33:01PM +1000, Benjamin Herrenschmidt wrote:
> On Fri, 2017-06-16 at 14:50 +0530, Anshuman Khandual wrote:
> > On 06/06/2017 06:35 AM, Ram Pai wrote:
> > > The value of the AMR register at the time of the exception
> > > is made available in gp_regs[PT_AMR] of the siginfo.
> > 
> > But its already available there in uctxt->uc_mcontext.regs->amr
> > while inside the signal delivery context in the user space. The
> > pt_regs already got updated with new AMR register. Then why we
> > need gp_regs to also contain AMR as well ?
> 
> Also changing gp_regs layout/size is a major ABI issue...

Ben,

gp_regs size is not changed, nor is the layout. A unused field in
the gp_regs is used to fill in the AMR contents. Old binaries will not
be knowing about this unused field, and hence should not break.

New binaries can leverage this already existing but newly defined
field; to read the contents of AMR.

Is it still a concern?
RP

> 
> Ben.

-- 
Ram Pai



Re: hexagon: build error in -next due to 'mm: memcontrol: per-lruvec stats infrastructure'

2017-06-16 Thread Andrew Morton
On Fri, 16 Jun 2017 14:49:51 -0400 Johannes Weiner  wrote:

> On Wed, Jun 14, 2017 at 12:26:46AM -0700, Guenter Roeck wrote:
> > Hi,
> > 
> > I see the following build error in -next when building hexagon images.
> > 
> >   CC  arch/hexagon/kernel/asm-offsets.s
> > In file included from ./include/linux/memcontrol.h:30:0,
> >  from ./include/linux/swap.h:8,
> >  from ./arch/hexagon/include/asm/pgtable.h:27,
> >  from ./include/linux/mm.h:70,
> >  from arch/hexagon/kernel/asm-offsets.c:28:
> > ./include/linux/vmstat.h: In function '__inc_zone_page_state':
> > ./include/linux/vmstat.h:294:2: error: implicit declaration of function 
> > 'page_zone' [-Werror=implicit-function-declaration]
> > ./include/linux/vmstat.h:294:2: warning: passing argument 1 of 
> > '__inc_zone_state' makes pointer from integer without a cast [enabled by 
> > default]
> > ./include/linux/vmstat.h:267:20: note: expected 'struct zone *' but 
> > argument is of type 'int'
> 
> vmstat.h depends on definitions in mm.h, but mm.h through the above
> chain includes vmstat.h first. It worked in my x86 test because x86
> pgtable.h doesn't include swap.h.
> 
> The headers are a bit of a mess. memcontrol.h is supposed to be a
> lower level header than mm.h and vmstat.h, yet the new accounting
> functions depend on mm.h definitions.
> 
> Let's move the lruvec accounting infra to vmstat.h and shuffle
> memcontrol.h into the stack under mm.h and vmstat.h.
> 
> Does the following fix the hexagon build?

This breaks x86_64 allnoconfig.

arch/x86/mm/pat.c:734: error: redefinition of 'arch_io_reserve_memtype_wc'
./include/linux/io.h:175: note: previous definition of 
'arch_io_reserve_memtype_wc' was here
arch/x86/mm/pat.c:742: error: redefinition of 'arch_io_free_memtype_wc'
./include/linux/io.h:181: note: previous definition of 
'arch_io_free_memtype_wc' was here



Re: [PATCH 0/5] perf: add support for capturing skid IP

2017-06-16 Thread Stephane Eranian
On Fri, Jun 16, 2017 at 10:50 AM, Andi Kleen  wrote:
>> > Yeah, I think it is easier and more portable, especially on hardware with a
>> > PEBS-like mechanism but no branch buffer (like LBR). FYI, I did do a test
>> > implementation yesterday to evaluate the difficulty.
>> >
>> A more generalized usage of the feature is to evaluate the amount of skid
>> for any precise event.
>
> It should be always the same (one instruction), except for the control flow
> change case.
>
That's on Intel X86. What about the other arch?

> -Andi


Re: [RFC PATCH 7/7 v1]powerpc: Deliver SEGV signal on protection key violation.

2017-06-16 Thread Ram Pai
On Fri, Jun 16, 2017 at 02:50:13PM +0530, Anshuman Khandual wrote:
> On 06/06/2017 06:35 AM, Ram Pai wrote:
> > The value of the AMR register at the time of the exception
> > is made available in gp_regs[PT_AMR] of the siginfo.
> 
> But its already available there in uctxt->uc_mcontext.regs->amr
> while inside the signal delivery context in the user space. The
> pt_regs already got updated with new AMR register. Then why we
> need gp_regs to also contain AMR as well ?

It should not be available in uctxt->uc_mcontext.regs->amr.
In fact that field itself should not be there.

The ideas was to use one of the unused fields in gp_regs; without
extending gp_regs, to provide the contents of AMR. the 
PT_AMR offset in gp_regs is currently not used, which I am using
in this patch.

However this patch needs to be modified not to extend pt_regs,
or uctxt->uc_mcontext.regs

Thanks for initiating this concern.
RP

-- 
Ram Pai



[PATCH v7 00/36] x86: Secure Memory Encryption (AMD)

2017-06-16 Thread Tom Lendacky
This patch series provides support for AMD's new Secure Memory Encryption (SME)
feature.

SME can be used to mark individual pages of memory as encrypted through the
page tables. A page of memory that is marked encrypted will be automatically
decrypted when read from DRAM and will be automatically encrypted when
written to DRAM. Details on SME can found in the links below.

The SME feature is identified through a CPUID function and enabled through
the SYSCFG MSR. Once enabled, page table entries will determine how the
memory is accessed. If a page table entry has the memory encryption mask set,
then that memory will be accessed as encrypted memory. The memory encryption
mask (as well as other related information) is determined from settings
returned through the same CPUID function that identifies the presence of the
feature.

The approach that this patch series takes is to encrypt everything possible
starting early in the boot where the kernel is encrypted. Using the page
table macros the encryption mask can be incorporated into all page table
entries and page allocations. By updating the protection map, userspace
allocations are also marked encrypted. Certain data must be accounted for
as having been placed in memory before SME was enabled (EFI, initrd, etc.)
and accessed accordingly.

This patch series is a pre-cursor to another AMD processor feature called
Secure Encrypted Virtualization (SEV). The support for SEV will build upon
the SME support and will be submitted later. Details on SEV can be found
in the links below.

The following links provide additional detail:

AMD Memory Encryption whitepaper:
   
http://amd-dev.wpengine.netdna-cdn.com/wordpress/media/2013/12/AMD_Memory_Encryption_Whitepaper_v7-Public.pdf

AMD64 Architecture Programmer's Manual:
   http://support.amd.com/TechDocs/24593.pdf
   SME is section 7.10
   SEV is section 15.34

---

This patch series is based off of the master branch of tip:
  https://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git master

  Commit 3d55328fd1f8 ("Merge branch 'WIP.x86/fpu'")

Source code is also available at https://github.com/codomania/tip/tree/sme-v7


Still to do:
- Kdump support, including using memremap() instead of ioremap_cache()

Changes since v6:
- Fixed the asm include file issue that caused build errors on other archs
- Rebased the CR3 register changes on top of Andy Lutomirski's patch
- Added a patch to clear the SME cpu feature if running as a PV guest under
  Xen
- Added a patch to obtain the AMD microcode level earlier in the boot
  instead of directly reading the MSR
- Refactor patch #8 ("x86/mm: Add support to enable SME in early boot
  processing") because the 5-level paging support moved the code into the
  new C-function __startup_64()
- Removed need to decrypt trampoline area in-place (set memory attributes
  before copying the trampoline code)
- General code cleanup based on feedback

Changes since v5:
- Added support for 5-level paging
- Added IOMMU support
- Created a generic asm/mem_encrypt.h in order to remove a bunch of
  #ifndef/#define entries
- Removed changes to the __va() macro and defined a function to return
  the true physical address in cr3
- Removed sysfs support as it was determined not to be needed
- General code cleanup based on feedback
- General cleanup of patch subjects and descriptions

Changes since v4:
- Re-worked mapping of setup data to not use a fixed list. Rather, check
  dynamically whether the requested early_memremap()/memremap() call
  needs to be mapped decrypted.
- Moved SME cpu feature into scattered features
- Moved some declarations into header files
- Cleared the encryption mask from the __PHYSICAL_MASK so that users
  of macros such as pmd_pfn_mask() don't have to worry/know about the
  encryption mask
- Updated some return types and values related to EFI and e820 functions
  so that an error could be returned
- During cpu shutdown, removed cache disabling and added a check for kexec
  in progress to use wbinvd followed immediately by halt in order to avoid
  any memory corruption
- Update how persistent memory is identified
- Added a function to find command line arguments and their values
- Added sysfs support
- General code cleanup based on feedback
- General cleanup of patch subjects and descriptions


Changes since v3:
- Broke out some of the patches into smaller individual patches
- Updated Documentation
- Added a message to indicate why the IOMMU was disabled
- Updated CPU feature support for SME by taking into account whether
  BIOS has enabled SME
- Eliminated redundant functions
- Added some warning messages for DMA usage of bounce buffers when SME
  is active
- Added support for persistent memory
- Added support to determine when setup data is being mapped and be sure
  to map it un-encrypted
- Added CONFIG support to set the default action of whether to activate
  SME if it is supported/enabled
- Added support for (re)booting with kexec

Changes since v2:
- Updated Documentati

[PATCH v7 03/36] x86, mpparse, x86/acpi, x86/PCI, x86/dmi, SFI: Use memremap for RAM mappings

2017-06-16 Thread Tom Lendacky
The ioremap() function is intended for mapping MMIO. For RAM, the
memremap() function should be used. Convert calls from ioremap() to
memremap() when re-mapping RAM.

This will be used later by SME to control how the encryption mask is
applied to memory mappings, with certain memory locations being mapped
decrypted vs encrypted.

Signed-off-by: Tom Lendacky 
---
 arch/x86/include/asm/dmi.h   |8 
 arch/x86/kernel/acpi/boot.c  |6 +++---
 arch/x86/kernel/kdebugfs.c   |   34 +++---
 arch/x86/kernel/ksysfs.c |   28 ++--
 arch/x86/kernel/mpparse.c|   10 +-
 arch/x86/pci/common.c|4 ++--
 drivers/firmware/dmi-sysfs.c |5 +++--
 drivers/firmware/pcdp.c  |4 ++--
 drivers/sfi/sfi_core.c   |   22 +++---
 9 files changed, 55 insertions(+), 66 deletions(-)

diff --git a/arch/x86/include/asm/dmi.h b/arch/x86/include/asm/dmi.h
index 3c69fed..a8e15b0 100644
--- a/arch/x86/include/asm/dmi.h
+++ b/arch/x86/include/asm/dmi.h
@@ -13,9 +13,9 @@ static __always_inline __init void *dmi_alloc(unsigned len)
 }
 
 /* Use early IO mappings for DMI because it's initialized early */
-#define dmi_early_remapearly_ioremap
-#define dmi_early_unmapearly_iounmap
-#define dmi_remap  ioremap_cache
-#define dmi_unmap  iounmap
+#define dmi_early_remapearly_memremap
+#define dmi_early_unmapearly_memunmap
+#define dmi_remap(_x, _l)  memremap(_x, _l, MEMREMAP_WB)
+#define dmi_unmap(_x)  memunmap(_x)
 
 #endif /* _ASM_X86_DMI_H */
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 6bb6806..850160a 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -115,7 +115,7 @@
 #defineACPI_INVALID_GSIINT_MIN
 
 /*
- * This is just a simple wrapper around early_ioremap(),
+ * This is just a simple wrapper around early_memremap(),
  * with sanity checks for phys == 0 and size == 0.
  */
 char *__init __acpi_map_table(unsigned long phys, unsigned long size)
@@ -124,7 +124,7 @@ char *__init __acpi_map_table(unsigned long phys, unsigned 
long size)
if (!phys || !size)
return NULL;
 
-   return early_ioremap(phys, size);
+   return early_memremap(phys, size);
 }
 
 void __init __acpi_unmap_table(char *map, unsigned long size)
@@ -132,7 +132,7 @@ void __init __acpi_unmap_table(char *map, unsigned long 
size)
if (!map || !size)
return;
 
-   early_iounmap(map, size);
+   early_memunmap(map, size);
 }
 
 #ifdef CONFIG_X86_LOCAL_APIC
diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c
index 38b6458..fd6f8fb 100644
--- a/arch/x86/kernel/kdebugfs.c
+++ b/arch/x86/kernel/kdebugfs.c
@@ -33,7 +33,6 @@ static ssize_t setup_data_read(struct file *file, char __user 
*user_buf,
struct setup_data_node *node = file->private_data;
unsigned long remain;
loff_t pos = *ppos;
-   struct page *pg;
void *p;
u64 pa;
 
@@ -47,18 +46,13 @@ static ssize_t setup_data_read(struct file *file, char 
__user *user_buf,
count = node->len - pos;
 
pa = node->paddr + sizeof(struct setup_data) + pos;
-   pg = pfn_to_page((pa + count - 1) >> PAGE_SHIFT);
-   if (PageHighMem(pg)) {
-   p = ioremap_cache(pa, count);
-   if (!p)
-   return -ENXIO;
-   } else
-   p = __va(pa);
+   p = memremap(pa, count, MEMREMAP_WB);
+   if (!p)
+   return -ENOMEM;
 
remain = copy_to_user(user_buf, p, count);
 
-   if (PageHighMem(pg))
-   iounmap(p);
+   memunmap(p);
 
if (remain)
return -EFAULT;
@@ -109,7 +103,6 @@ static int __init create_setup_data_nodes(struct dentry 
*parent)
struct setup_data *data;
int error;
struct dentry *d;
-   struct page *pg;
u64 pa_data;
int no = 0;
 
@@ -126,16 +119,12 @@ static int __init create_setup_data_nodes(struct dentry 
*parent)
goto err_dir;
}
 
-   pg = pfn_to_page((pa_data+sizeof(*data)-1) >> PAGE_SHIFT);
-   if (PageHighMem(pg)) {
-   data = ioremap_cache(pa_data, sizeof(*data));
-   if (!data) {
-   kfree(node);
-   error = -ENXIO;
-   goto err_dir;
-   }
-   } else
-   data = __va(pa_data);
+   data = memremap(pa_data, sizeof(*data), MEMREMAP_WB);
+   if (!data) {
+   kfree(node);
+   error = -ENOMEM;
+   goto err_dir;
+   }
 
node->paddr = pa_data;
node->type = data->type;
@@ -143,8 +132,7 @

[PATCH v7 05/36] x86/CPU/AMD: Handle SME reduction in physical address size

2017-06-16 Thread Tom Lendacky
When System Memory Encryption (SME) is enabled, the physical address
space is reduced. Adjust the x86_phys_bits value to reflect this
reduction.

Reviewed-by: Borislav Petkov 
Signed-off-by: Tom Lendacky 
---
 arch/x86/kernel/cpu/amd.c |   10 +++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index c47ceee..5bdcbd4 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -613,15 +613,19 @@ static void early_init_amd(struct cpuinfo_x86 *c)
set_cpu_bug(c, X86_BUG_AMD_E400);
 
/*
-* BIOS support is required for SME. If BIOS has not enabled SME
-* then don't advertise the feature (set in scattered.c)
+* BIOS support is required for SME. If BIOS has enabld SME then
+* adjust x86_phys_bits by the SME physical address space reduction
+* value. If BIOS has not enabled SME then don't advertise the
+* feature (set in scattered.c).
 */
if (cpu_has(c, X86_FEATURE_SME)) {
u64 msr;
 
/* Check if SME is enabled */
rdmsrl(MSR_K8_SYSCFG, msr);
-   if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT))
+   if (msr & MSR_K8_SYSCFG_MEM_ENCRYPT)
+   c->x86_phys_bits -= (cpuid_ebx(0x801f) >> 6) & 0x3f;
+   else
clear_cpu_cap(c, X86_FEATURE_SME);
}
 }



[PATCH v7 10/36] x86/mm: Provide general kernel support for memory encryption

2017-06-16 Thread Tom Lendacky
Changes to the existing page table macros will allow the SME support to
be enabled in a simple fashion with minimal changes to files that use these
macros.  Since the memory encryption mask will now be part of the regular
pagetable macros, we introduce two new macros (_PAGE_TABLE_NOENC and
_KERNPG_TABLE_NOENC) to allow for early pagetable creation/initialization
without the encryption mask before SME becomes active.  Two new pgprot()
macros are defined to allow setting or clearing the page encryption mask.

The FIXMAP_PAGE_NOCACHE define is introduced for use with MMIO.  SME does
not support encryption for MMIO areas so this define removes the encryption
mask from the page attribute.

Two new macros are introduced (__sme_pa() / __sme_pa_nodebug()) to allow
creating a physical address with the encryption mask.  These are used when
working with the cr3 register so that the PGD can be encrypted. The current
__va() macro is updated so that the virtual address is generated based off
of the physical address without the encryption mask thus allowing the same
virtual address to be generated regardless of whether encryption is enabled
for that physical location or not.

Also, an early initialization function is added for SME.  If SME is active,
this function:
 - Updates the early_pmd_flags so that early page faults create mappings
   with the encryption mask.
 - Updates the __supported_pte_mask to include the encryption mask.
 - Updates the protection_map entries to include the encryption mask so
   that user-space allocations will automatically have the encryption mask
   applied.

Reviewed-by: Borislav Petkov 
Signed-off-by: Tom Lendacky 
---
 arch/x86/boot/compressed/pagetable.c |7 +
 arch/x86/include/asm/fixmap.h|7 +
 arch/x86/include/asm/mem_encrypt.h   |   13 ++
 arch/x86/include/asm/page_types.h|3 ++
 arch/x86/include/asm/pgtable.h   |9 +++
 arch/x86/include/asm/pgtable_types.h |   45 ++
 arch/x86/include/asm/processor.h |3 ++
 arch/x86/kernel/espfix_64.c  |2 +-
 arch/x86/kernel/head64.c |   11 +++-
 arch/x86/kernel/head_64.S|   20 ---
 arch/x86/mm/kasan_init_64.c  |4 ++-
 arch/x86/mm/mem_encrypt.c|   18 ++
 arch/x86/mm/pageattr.c   |3 ++
 include/asm-generic/pgtable.h|8 ++
 include/linux/mem_encrypt.h  |8 ++
 15 files changed, 128 insertions(+), 33 deletions(-)

diff --git a/arch/x86/boot/compressed/pagetable.c 
b/arch/x86/boot/compressed/pagetable.c
index 8e69df9..246bf29 100644
--- a/arch/x86/boot/compressed/pagetable.c
+++ b/arch/x86/boot/compressed/pagetable.c
@@ -15,6 +15,13 @@
 #define __pa(x)  ((unsigned long)(x))
 #define __va(x)  ((void *)((unsigned long)(x)))
 
+/*
+ * The pgtable.h and mm/ident_map.c includes make use of the SME related
+ * information which is not used in the compressed image support. Un-define
+ * the SME support to avoid any compile and link errors.
+ */
+#undef CONFIG_AMD_MEM_ENCRYPT
+
 #include "misc.h"
 
 /* These actually do the work of building the kernel identity maps. */
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index b65155c..d9ff226 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -157,6 +157,13 @@ static inline void __set_fixmap(enum fixed_addresses idx,
 }
 #endif
 
+/*
+ * FIXMAP_PAGE_NOCACHE is used for MMIO. Memory encryption is not
+ * supported for MMIO addresses, so make sure that the memory encryption
+ * mask is not part of the page attributes.
+ */
+#define FIXMAP_PAGE_NOCACHE PAGE_KERNEL_IO_NOCACHE
+
 #include 
 
 #define __late_set_fixmap(idx, phys, flags) __set_fixmap(idx, phys, flags)
diff --git a/arch/x86/include/asm/mem_encrypt.h 
b/arch/x86/include/asm/mem_encrypt.h
index 988b336..faae4e1 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -21,18 +21,31 @@
 
 extern unsigned long sme_me_mask;
 
+void __init sme_early_init(void);
+
 void __init sme_enable(void);
 
 #else  /* !CONFIG_AMD_MEM_ENCRYPT */
 
 #define sme_me_mask0UL
 
+static inline void __init sme_early_init(void) { }
+
 static inline void __init sme_enable(void) { }
 
 #endif /* CONFIG_AMD_MEM_ENCRYPT */
 
 unsigned long sme_get_me_mask(void);
 
+/*
+ * The __sme_pa() and __sme_pa_nodebug() macros are meant for use when
+ * writing to or comparing values from the cr3 register.  Having the
+ * encryption mask set in cr3 enables the PGD entry to be encrypted and
+ * avoid special case handling of PGD allocations.
+ */
+#define __sme_pa(x)(__pa(x) | sme_me_mask)
+#define __sme_pa_nodebug(x)(__pa_nodebug(x) | sme_me_mask)
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* __X86_MEM_ENCRYPT_H__ */
diff --git a/arch/x86/include/asm/page_types.h 
b/arch/x86/include/asm/page_types.h
index 7bd0099..b98ed9d 100644
--- a/arch/x86/include/asm/page_types.h
+++ b/

[PATCH v7 19/36] x86/mm: Add support to access boot related data in the clear

2017-06-16 Thread Tom Lendacky
Boot data (such as EFI related data) is not encrypted when the system is
booted because UEFI/BIOS does not run with SME active. In order to access
this data properly it needs to be mapped decrypted.

Update early_memremap() to provide an arch specific routine to modify the
pagetable protection attributes before they are applied to the new
mapping. This is used to remove the encryption mask for boot related data.

Update memremap() to provide an arch specific routine to determine if RAM
remapping is allowed.  RAM remapping will cause an encrypted mapping to be
generated. By preventing RAM remapping, ioremap_cache() will be used
instead, which will provide a decrypted mapping of the boot related data.

Signed-off-by: Tom Lendacky 
---
 arch/x86/include/asm/io.h |5 +
 arch/x86/mm/ioremap.c |  179 +
 include/linux/io.h|2 +
 kernel/memremap.c |   20 -
 mm/early_ioremap.c|   18 -
 5 files changed, 217 insertions(+), 7 deletions(-)

diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index 7afb0e2..09c5557 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -381,4 +381,9 @@ extern int __must_check arch_phys_wc_add(unsigned long base,
 #define arch_io_reserve_memtype_wc arch_io_reserve_memtype_wc
 #endif
 
+extern bool arch_memremap_can_ram_remap(resource_size_t offset,
+   unsigned long size,
+   unsigned long flags);
+#define arch_memremap_can_ram_remap arch_memremap_can_ram_remap
+
 #endif /* _ASM_X86_IO_H */
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 4feda83..f3fa007 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -14,6 +14,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -22,6 +23,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "physaddr.h"
 
@@ -422,6 +424,183 @@ void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr)
iounmap((void __iomem *)((unsigned long)addr & PAGE_MASK));
 }
 
+/*
+ * Examine the physical address to determine if it is an area of memory
+ * that should be mapped decrypted.  If the memory is not part of the
+ * kernel usable area it was accessed and created decrypted, so these
+ * areas should be mapped decrypted.
+ */
+static bool memremap_should_map_decrypted(resource_size_t phys_addr,
+ unsigned long size)
+{
+   /* Check if the address is outside kernel usable area */
+   switch (e820__get_entry_type(phys_addr, phys_addr + size - 1)) {
+   case E820_TYPE_RESERVED:
+   case E820_TYPE_ACPI:
+   case E820_TYPE_NVS:
+   case E820_TYPE_UNUSABLE:
+   return true;
+   default:
+   break;
+   }
+
+   return false;
+}
+
+/*
+ * Examine the physical address to determine if it is EFI data. Check
+ * it against the boot params structure and EFI tables and memory types.
+ */
+static bool memremap_is_efi_data(resource_size_t phys_addr,
+unsigned long size)
+{
+   u64 paddr;
+
+   /* Check if the address is part of EFI boot/runtime data */
+   if (!efi_enabled(EFI_BOOT))
+   return false;
+
+   paddr = boot_params.efi_info.efi_memmap_hi;
+   paddr <<= 32;
+   paddr |= boot_params.efi_info.efi_memmap;
+   if (phys_addr == paddr)
+   return true;
+
+   paddr = boot_params.efi_info.efi_systab_hi;
+   paddr <<= 32;
+   paddr |= boot_params.efi_info.efi_systab;
+   if (phys_addr == paddr)
+   return true;
+
+   if (efi_is_table_address(phys_addr))
+   return true;
+
+   switch (efi_mem_type(phys_addr)) {
+   case EFI_BOOT_SERVICES_DATA:
+   case EFI_RUNTIME_SERVICES_DATA:
+   return true;
+   default:
+   break;
+   }
+
+   return false;
+}
+
+/*
+ * Examine the physical address to determine if it is boot data by checking
+ * it against the boot params setup_data chain.
+ */
+static bool memremap_is_setup_data(resource_size_t phys_addr,
+  unsigned long size)
+{
+   struct setup_data *data;
+   u64 paddr, paddr_next;
+
+   paddr = boot_params.hdr.setup_data;
+   while (paddr) {
+   unsigned int len;
+
+   if (phys_addr == paddr)
+   return true;
+
+   data = memremap(paddr, sizeof(*data),
+   MEMREMAP_WB | MEMREMAP_DEC);
+
+   paddr_next = data->next;
+   len = data->len;
+
+   memunmap(data);
+
+   if ((phys_addr > paddr) && (phys_addr < (paddr + len)))
+   return true;
+
+   paddr = paddr_next;
+   }
+
+   return false;
+}
+
+/*
+ * Examine the physical address to determine if it is boot data by checking
+ * it against the boot params se

[PATCH v7 20/36] x86, mpparse: Use memremap to map the mpf and mpc data

2017-06-16 Thread Tom Lendacky
The SMP MP-table is built by UEFI and placed in memory in a decrypted
state. These tables are accessed using a mix of early_memremap(),
early_memunmap(), phys_to_virt() and virt_to_phys(). Change all accesses
to use early_memremap()/early_memunmap(). This allows for proper setting
of the encryption mask so that the data can be successfully accessed when
SME is active.

Signed-off-by: Tom Lendacky 
---
 arch/x86/kernel/mpparse.c |   98 -
 1 file changed, 70 insertions(+), 28 deletions(-)

diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index fd37f39..5cbb317 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -429,7 +429,7 @@ static inline void __init construct_default_ISA_mptable(int 
mpc_default_type)
}
 }
 
-static struct mpf_intel *mpf_found;
+static unsigned long mpf_base;
 
 static unsigned long __init get_mpc_size(unsigned long physptr)
 {
@@ -451,6 +451,7 @@ static int __init check_physptr(struct mpf_intel *mpf, 
unsigned int early)
 
size = get_mpc_size(mpf->physptr);
mpc = early_memremap(mpf->physptr, size);
+
/*
 * Read the physical hardware table.  Anything here will
 * override the defaults.
@@ -497,12 +498,12 @@ static int __init check_physptr(struct mpf_intel *mpf, 
unsigned int early)
  */
 void __init default_get_smp_config(unsigned int early)
 {
-   struct mpf_intel *mpf = mpf_found;
+   struct mpf_intel *mpf;
 
if (!smp_found_config)
return;
 
-   if (!mpf)
+   if (!mpf_base)
return;
 
if (acpi_lapic && early)
@@ -515,6 +516,12 @@ void __init default_get_smp_config(unsigned int early)
if (acpi_lapic && acpi_ioapic)
return;
 
+   mpf = early_memremap(mpf_base, sizeof(*mpf));
+   if (!mpf) {
+   pr_err("MPTABLE: error mapping MP table\n");
+   return;
+   }
+
pr_info("Intel MultiProcessor Specification v1.%d\n",
mpf->specification);
 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32)
@@ -529,7 +536,7 @@ void __init default_get_smp_config(unsigned int early)
/*
 * Now see if we need to read further.
 */
-   if (mpf->feature1 != 0) {
+   if (mpf->feature1) {
if (early) {
/*
 * local APIC has default address
@@ -542,8 +549,10 @@ void __init default_get_smp_config(unsigned int early)
construct_default_ISA_mptable(mpf->feature1);
 
} else if (mpf->physptr) {
-   if (check_physptr(mpf, early))
+   if (check_physptr(mpf, early)) {
+   early_memunmap(mpf, sizeof(*mpf));
return;
+   }
} else
BUG();
 
@@ -552,6 +561,8 @@ void __init default_get_smp_config(unsigned int early)
/*
 * Only use the first configuration found.
 */
+
+   early_memunmap(mpf, sizeof(*mpf));
 }
 
 static void __init smp_reserve_memory(struct mpf_intel *mpf)
@@ -561,15 +572,16 @@ static void __init smp_reserve_memory(struct mpf_intel 
*mpf)
 
 static int __init smp_scan_config(unsigned long base, unsigned long length)
 {
-   unsigned int *bp = phys_to_virt(base);
+   unsigned int *bp;
struct mpf_intel *mpf;
-   unsigned long mem;
+   int ret = 0;
 
apic_printk(APIC_VERBOSE, "Scan for SMP in [mem %#010lx-%#010lx]\n",
base, base + length - 1);
BUILD_BUG_ON(sizeof(*mpf) != 16);
 
while (length > 0) {
+   bp = early_memremap(base, length);
mpf = (struct mpf_intel *)bp;
if ((*bp == SMP_MAGIC_IDENT) &&
(mpf->length == 1) &&
@@ -579,24 +591,26 @@ static int __init smp_scan_config(unsigned long base, 
unsigned long length)
 #ifdef CONFIG_X86_LOCAL_APIC
smp_found_config = 1;
 #endif
-   mpf_found = mpf;
+   mpf_base = base;
 
-   pr_info("found SMP MP-table at [mem %#010llx-%#010llx] 
mapped at [%p]\n",
-   (unsigned long long) virt_to_phys(mpf),
-   (unsigned long long) virt_to_phys(mpf) +
-   sizeof(*mpf) - 1, mpf);
+   pr_info("found SMP MP-table at [mem %#010lx-%#010lx] 
mapped at [%p]\n",
+   base, base + sizeof(*mpf) - 1, mpf);
 
-   mem = virt_to_phys(mpf);
-   memblock_reserve(mem, sizeof(*mpf));
+   memblock_reserve(base, sizeof(*mpf));
if (mpf->physptr)
smp_reserve_memory(mpf);
 
-   return 1;
+   ret = 1;
}
-   bp += 4;
+   early_memunmap(bp, length);
+
+  

[PATCH v7 23/36] x86, realmode: Decrypt trampoline area if memory encryption is active

2017-06-16 Thread Tom Lendacky
When Secure Memory Encryption is enabled, the trampoline area must not
be encrypted. A CPU running in real mode will not be able to decrypt
memory that has been encrypted because it will not be able to use addresses
with the memory encryption mask.

Signed-off-by: Tom Lendacky 
---
 arch/x86/realmode/init.c |8 
 1 file changed, 8 insertions(+)

diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c
index cd4be19..d6ddc7e 100644
--- a/arch/x86/realmode/init.c
+++ b/arch/x86/realmode/init.c
@@ -1,6 +1,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -59,6 +60,13 @@ static void __init setup_real_mode(void)
 
base = (unsigned char *)real_mode_header;
 
+   /*
+* If SME is active, the trampoline area will need to be in
+* decrypted memory in order to bring up other processors
+* successfully.
+*/
+   set_memory_decrypted((unsigned long)base, size >> PAGE_SHIFT);
+
memcpy(base, real_mode_blob, size);
 
phys_base = __pa(base);



[PATCH v7 30/36] kvm: x86: svm: Support Secure Memory Encryption within KVM

2017-06-16 Thread Tom Lendacky
Update the KVM support to work with SME. The VMCB has a number of fields
where physical addresses are used and these addresses must contain the
memory encryption mask in order to properly access the encrypted memory.
Also, use the memory encryption mask when creating and using the nested
page tables.

Reviewed-by: Borislav Petkov 
Signed-off-by: Tom Lendacky 
---
 arch/x86/include/asm/kvm_host.h |2 +-
 arch/x86/kvm/mmu.c  |   12 
 arch/x86/kvm/mmu.h  |2 +-
 arch/x86/kvm/svm.c  |   35 ++-
 arch/x86/kvm/vmx.c  |3 ++-
 arch/x86/kvm/x86.c  |3 ++-
 6 files changed, 32 insertions(+), 25 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 695605e..6d1267f 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1069,7 +1069,7 @@ struct kvm_arch_async_pf {
 void kvm_mmu_uninit_vm(struct kvm *kvm);
 void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 p_mask,
-   u64 acc_track_mask);
+   u64 acc_track_mask, u64 me_mask);
 
 void kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
 void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index cb82259..e85888c 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -107,7 +107,7 @@ enum {
(((address) >> PT32_LEVEL_SHIFT(level)) & ((1 << PT32_LEVEL_BITS) - 1))
 
 
-#define PT64_BASE_ADDR_MASK (((1ULL << 52) - 1) & ~(u64)(PAGE_SIZE-1))
+#define PT64_BASE_ADDR_MASK __sme_clr1ULL << 52) - 1) & 
~(u64)(PAGE_SIZE-1)))
 #define PT64_DIR_BASE_ADDR_MASK \
(PT64_BASE_ADDR_MASK & ~((1ULL << (PAGE_SHIFT + PT64_LEVEL_BITS)) - 1))
 #define PT64_LVL_ADDR_MASK(level) \
@@ -125,7 +125,7 @@ enum {
* PT32_LEVEL_BITS))) - 1))
 
 #define PT64_PERM_MASK (PT_PRESENT_MASK | PT_WRITABLE_MASK | shadow_user_mask \
-   | shadow_x_mask | shadow_nx_mask)
+   | shadow_x_mask | shadow_nx_mask | shadow_me_mask)
 
 #define ACC_EXEC_MASK1
 #define ACC_WRITE_MASK   PT_WRITABLE_MASK
@@ -184,6 +184,7 @@ struct kvm_shadow_walk_iterator {
 static u64 __read_mostly shadow_dirty_mask;
 static u64 __read_mostly shadow_mmio_mask;
 static u64 __read_mostly shadow_present_mask;
+static u64 __read_mostly shadow_me_mask;
 
 /*
  * The mask/value to distinguish a PTE that has been marked not-present for
@@ -317,7 +318,7 @@ static bool check_mmio_spte(struct kvm_vcpu *vcpu, u64 spte)
 
 void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 p_mask,
-   u64 acc_track_mask)
+   u64 acc_track_mask, u64 me_mask)
 {
if (acc_track_mask != 0)
acc_track_mask |= SPTE_SPECIAL_MASK;
@@ -330,6 +331,7 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
shadow_present_mask = p_mask;
shadow_acc_track_mask = acc_track_mask;
WARN_ON(shadow_accessed_mask != 0 && shadow_acc_track_mask != 0);
+   shadow_me_mask = me_mask;
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes);
 
@@ -2398,7 +2400,8 @@ static void link_shadow_page(struct kvm_vcpu *vcpu, u64 
*sptep,
BUILD_BUG_ON(VMX_EPT_WRITABLE_MASK != PT_WRITABLE_MASK);
 
spte = __pa(sp->spt) | shadow_present_mask | PT_WRITABLE_MASK |
-  shadow_user_mask | shadow_x_mask | shadow_accessed_mask;
+  shadow_user_mask | shadow_x_mask | shadow_accessed_mask |
+  shadow_me_mask;
 
mmu_spte_set(sptep, spte);
 
@@ -2700,6 +2703,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
pte_access &= ~ACC_WRITE_MASK;
 
spte |= (u64)pfn << PAGE_SHIFT;
+   spte |= shadow_me_mask;
 
if (pte_access & ACC_WRITE_MASK) {
 
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 330bf3a..08b779d 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -48,7 +48,7 @@
 
 static inline u64 rsvd_bits(int s, int e)
 {
-   return ((1ULL << (e - s + 1)) - 1) << s;
+   return __sme_clr(((1ULL << (e - s + 1)) - 1) << s);
 }
 
 void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index ba9891a..d2e9fca 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1138,9 +1138,9 @@ static void avic_init_vmcb(struct vcpu_svm *svm)
 {
struct vmcb *vmcb = svm->vmcb;
struct kvm_arch *vm_data = &svm->vcpu.kvm->arch;
-   phys_addr_t bpa = page_to_phys(svm->avic_backing_page);
-   phys_addr_t lpa = page_to_phys(vm_data->avic_logical_id_table_page);
-   phys_addr_t ppa = page_to_phys(vm_data->avic_physical_id_table_page);
+   phys_addr_t bpa = __sme_set(page_to_phys(svm->avic_backing_page));
+   phys_addr_t lpa = 
__sme_set(page_to_phys(vm_data->avic

[PATCH v7 36/36] x86/mm: Add support to make use of Secure Memory Encryption

2017-06-16 Thread Tom Lendacky
Add support to check if SME has been enabled and if memory encryption
should be activated (checking of command line option based on the
configuration of the default state).  If memory encryption is to be
activated, then the encryption mask is set and the kernel is encrypted
"in place."

Signed-off-by: Tom Lendacky 
---
 arch/x86/include/asm/mem_encrypt.h |6 ++-
 arch/x86/kernel/head64.c   |4 +-
 arch/x86/mm/mem_encrypt.c  |   86 +++-
 3 files changed, 90 insertions(+), 6 deletions(-)

diff --git a/arch/x86/include/asm/mem_encrypt.h 
b/arch/x86/include/asm/mem_encrypt.h
index 7da6de3..aac9ed9 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -17,6 +17,8 @@
 
 #include 
 
+#include 
+
 #ifdef CONFIG_AMD_MEM_ENCRYPT
 
 extern unsigned long sme_me_mask;
@@ -37,7 +39,7 @@ void __init sme_early_decrypt(resource_size_t paddr,
 
 void __init sme_early_init(void);
 
-void __init sme_enable(void);
+void __init sme_enable(struct boot_params *bp);
 
 /* Architecture __weak replacement functions */
 void __init mem_encrypt_init(void);
@@ -58,7 +60,7 @@ static inline void __init sme_unmap_bootdata(char 
*real_mode_data) { }
 
 static inline void __init sme_early_init(void) { }
 
-static inline void __init sme_enable(void) { }
+static inline void __init sme_enable(struct boot_params *bp) { }
 
 #endif /* CONFIG_AMD_MEM_ENCRYPT */
 
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 9e94ed2..1ff2e98 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -43,7 +43,7 @@ static void __init *fixup_pointer(void *ptr, unsigned long 
physaddr)
return ptr - (void *)_text + (void *)physaddr;
 }
 
-void __init __startup_64(unsigned long physaddr)
+void __init __startup_64(unsigned long physaddr, struct boot_params *bp)
 {
unsigned long load_delta, *p;
unsigned long pgtable_flags;
@@ -68,7 +68,7 @@ void __init __startup_64(unsigned long physaddr)
for (;;);
 
/* Activate Secure Memory Encryption (SME) if supported and enabled */
-   sme_enable();
+   sme_enable(bp);
 
/* Include the SME encryption mask in the fixup value */
load_delta += sme_get_me_mask();
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index 6e87662..13f780e 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -13,19 +13,34 @@
 #include 
 #include 
 
+#include 
+
 #ifdef CONFIG_AMD_MEM_ENCRYPT
 
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 
+#include 
+#include 
+#include 
+
+/*
+ * Some SME functions run very early causing issues with the stack-protector
+ * support. Provide a way to turn off this support on a per-function basis.
+ */
+#define SME_NOSTACKP __attribute__((__optimize__("no-stack-protector")))
+
+static char sme_cmdline_arg[] __initdata = "mem_encrypt";
+static char sme_cmdline_on[]  __initdata = "on";
+static char sme_cmdline_off[] __initdata = "off";
 
 /*
  * Since SME related variables are set early in the boot process they must
@@ -200,6 +215,8 @@ void __init mem_encrypt_init(void)
 
/* Call into SWIOTLB to update the SWIOTLB DMA buffers */
swiotlb_update_mem_attributes();
+
+   pr_info("AMD Secure Memory Encryption (SME) active\n");
 }
 
 void swiotlb_set_mem_attributes(void *vaddr, unsigned long size)
@@ -527,8 +544,73 @@ void __init sme_encrypt_kernel(void)
native_write_cr3(__native_read_cr3());
 }
 
-void __init sme_enable(void)
+void __init SME_NOSTACKP sme_enable(struct boot_params *bp)
 {
+   const char *cmdline_ptr, *cmdline_arg, *cmdline_on, *cmdline_off;
+   unsigned int eax, ebx, ecx, edx;
+   bool active_by_default;
+   unsigned long me_mask;
+   char buffer[16];
+   u64 msr;
+
+   /* Check for the SME support leaf */
+   eax = 0x8000;
+   ecx = 0;
+   native_cpuid(&eax, &ebx, &ecx, &edx);
+   if (eax < 0x801f)
+   return;
+
+   /*
+* Check for the SME feature:
+*   CPUID Fn8000_001F[EAX] - Bit 0
+* Secure Memory Encryption support
+*   CPUID Fn8000_001F[EBX] - Bits 5:0
+* Pagetable bit position used to indicate encryption
+*/
+   eax = 0x801f;
+   ecx = 0;
+   native_cpuid(&eax, &ebx, &ecx, &edx);
+   if (!(eax & 1))
+   return;
+
+   me_mask = 1UL << (ebx & 0x3f);
+
+   /* Check if SME is enabled */
+   msr = __rdmsr(MSR_K8_SYSCFG);
+   if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT))
+   return;
+
+   /*
+* Fixups have not been applied to phys_base yet and we're running
+* identity mapped, so we must obtain the address to the SME command
+* line argument data using rip-relative addressing.
+*/
+   asm ("lea sme_cmdline_arg(%%rip), %0"
+: "=r" (cmdline_arg)
+  

[PATCH v7 34/36] x86/mm: Add support to encrypt the kernel in-place

2017-06-16 Thread Tom Lendacky
Add the support to encrypt the kernel in-place. This is done by creating
new page mappings for the kernel - a decrypted write-protected mapping
and an encrypted mapping. The kernel is encrypted by copying it through
a temporary buffer.

Signed-off-by: Tom Lendacky 
---
 arch/x86/include/asm/mem_encrypt.h |6 +
 arch/x86/mm/Makefile   |2 
 arch/x86/mm/mem_encrypt.c  |  314 
 arch/x86/mm/mem_encrypt_boot.S |  150 +
 4 files changed, 472 insertions(+)
 create mode 100644 arch/x86/mm/mem_encrypt_boot.S

diff --git a/arch/x86/include/asm/mem_encrypt.h 
b/arch/x86/include/asm/mem_encrypt.h
index af835cf..7da6de3 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -21,6 +21,12 @@
 
 extern unsigned long sme_me_mask;
 
+void sme_encrypt_execute(unsigned long encrypted_kernel_vaddr,
+unsigned long decrypted_kernel_vaddr,
+unsigned long kernel_len,
+unsigned long encryption_wa,
+unsigned long encryption_pgd);
+
 void __init sme_early_encrypt(resource_size_t paddr,
  unsigned long size);
 void __init sme_early_decrypt(resource_size_t paddr,
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 9e13841..0633142 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -38,3 +38,5 @@ obj-$(CONFIG_NUMA_EMU)+= numa_emulation.o
 obj-$(CONFIG_X86_INTEL_MPX)+= mpx.o
 obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o
 obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o
+
+obj-$(CONFIG_AMD_MEM_ENCRYPT)  += mem_encrypt_boot.o
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index 842c8a6..6e87662 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -24,6 +24,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 
 /*
  * Since SME related variables are set early in the boot process they must
@@ -209,8 +211,320 @@ void swiotlb_set_mem_attributes(void *vaddr, unsigned 
long size)
set_memory_decrypted((unsigned long)vaddr, size >> PAGE_SHIFT);
 }
 
+static void __init sme_clear_pgd(pgd_t *pgd_base, unsigned long start,
+unsigned long end)
+{
+   unsigned long pgd_start, pgd_end, pgd_size;
+   pgd_t *pgd_p;
+
+   pgd_start = start & PGDIR_MASK;
+   pgd_end = end & PGDIR_MASK;
+
+   pgd_size = (((pgd_end - pgd_start) / PGDIR_SIZE) + 1);
+   pgd_size *= sizeof(pgd_t);
+
+   pgd_p = pgd_base + pgd_index(start);
+
+   memset(pgd_p, 0, pgd_size);
+}
+
+#ifndef CONFIG_X86_5LEVEL
+#define native_make_p4d(_x)(p4d_t) { .pgd = native_make_pgd(_x) }
+#endif
+
+#define PGD_FLAGS  _KERNPG_TABLE_NOENC
+#define P4D_FLAGS  _KERNPG_TABLE_NOENC
+#define PUD_FLAGS  _KERNPG_TABLE_NOENC
+#define PMD_FLAGS  (__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL)
+
+static void __init *sme_populate_pgd(pgd_t *pgd_base, void *pgtable_area,
+unsigned long vaddr, pmdval_t pmd_val)
+{
+   pgd_t *pgd_p;
+   p4d_t *p4d_p;
+   pud_t *pud_p;
+   pmd_t *pmd_p;
+
+   pgd_p = pgd_base + pgd_index(vaddr);
+   if (native_pgd_val(*pgd_p)) {
+   if (IS_ENABLED(CONFIG_X86_5LEVEL))
+   p4d_p = (p4d_t *)(native_pgd_val(*pgd_p) & 
~PTE_FLAGS_MASK);
+   else
+   pud_p = (pud_t *)(native_pgd_val(*pgd_p) & 
~PTE_FLAGS_MASK);
+   } else {
+   pgd_t pgd;
+
+   if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
+   p4d_p = pgtable_area;
+   memset(p4d_p, 0, sizeof(*p4d_p) * PTRS_PER_P4D);
+   pgtable_area += sizeof(*p4d_p) * PTRS_PER_P4D;
+
+   pgd = native_make_pgd((pgdval_t)p4d_p + PGD_FLAGS);
+   } else {
+   pud_p = pgtable_area;
+   memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD);
+   pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD;
+
+   pgd = native_make_pgd((pgdval_t)pud_p + PGD_FLAGS);
+   }
+   native_set_pgd(pgd_p, pgd);
+   }
+
+   if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
+   p4d_p += p4d_index(vaddr);
+   if (native_p4d_val(*p4d_p)) {
+   pud_p = (pud_t *)(native_p4d_val(*p4d_p) & 
~PTE_FLAGS_MASK);
+   } else {
+   p4d_t p4d;
+
+   pud_p = pgtable_area;
+   memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD);
+   pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD;
+
+   p4d = native_make_p4d((p4dval_t)pud_p + P4D_FLAGS);
+   native_set_p4d(p4d_p, p4d);
+   }
+   }
+
+   pud_p += pud_index(vaddr);
+   if (native_pud_val(*pud_p)) {
+   if (n

<    1   2   3   4   5   6   7   8   >