Re: [PATCH 3/5] intel_pstate: remove intel_pstate.get()
On Wednesday, June 07, 2017 07:39:14 PM Len Brown wrote: > From: Len Brown > > The x86 cpufreq core now uses aperfmperf_khz_on_cpu() > to supply /sys/.../cpufreq/scaling_cur_freq > on all x86 systems supporting APERF/MPERF. > > That includes 100% of systems supported by intel_pstate, > and so intel_pstate.get() is now a NOP -- remove it. > > Invoke aperfmperf_khz_on_cpu() directly, > if legacy-mode p-state tracing is enabled. > > Signed-off-by: Len Brown > --- > drivers/cpufreq/intel_pstate.c | 16 +--- > 1 file changed, 1 insertion(+), 15 deletions(-) > > diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c > index b7de5bd..5d67780 100644 > --- a/drivers/cpufreq/intel_pstate.c > +++ b/drivers/cpufreq/intel_pstate.c > @@ -1597,12 +1597,6 @@ static inline bool intel_pstate_sample(struct cpudata > *cpu, u64 time) > return false; > } > > -static inline int32_t get_avg_frequency(struct cpudata *cpu) > -{ > - return mul_ext_fp(cpu->sample.core_avg_perf, > - cpu->pstate.max_pstate_physical * > cpu->pstate.scaling); > -} > - > static inline int32_t get_avg_pstate(struct cpudata *cpu) > { > return mul_ext_fp(cpu->pstate.max_pstate_physical, > @@ -1728,7 +1722,7 @@ static void intel_pstate_adjust_pstate(struct cpudata > *cpu, int target_pstate) > sample->mperf, > sample->aperf, > sample->tsc, > - get_avg_frequency(cpu), > + aperfmperf_khz_on_cpu(cpu->cpu), > fp_toint(cpu->iowait_boost * 100)); > } > > @@ -1922,13 +1916,6 @@ static int intel_pstate_init_cpu(unsigned int cpunum) > return 0; > } > > -static unsigned int intel_pstate_get(unsigned int cpu_num) > -{ > - struct cpudata *cpu = all_cpu_data[cpu_num]; > - > - return cpu ? get_avg_frequency(cpu) : 0; > -} > - > static void intel_pstate_set_update_util_hook(unsigned int cpu_num) > { > struct cpudata *cpu = all_cpu_data[cpu_num]; > @@ -2157,7 +2144,6 @@ static struct cpufreq_driver intel_pstate = { > .setpolicy = intel_pstate_set_policy, > .suspend= intel_pstate_hwp_save_state, > .resume = intel_pstate_resume, > - .get= intel_pstate_get, > .init = intel_pstate_cpu_init, > .exit = intel_pstate_cpu_exit, > .stop_cpu = intel_pstate_stop_cpu, > This change will cause cpufreq_quick_get() to work differently and it is called by KVM among other things. Will that still work? Thanks, Rafael
Re: LTS testing with latest kselftests - some failures
On Fri, Jun 16, 2017 at 06:46:51PM +0200, Luis R. Rodriguez wrote: Kees, please review 47e0bbb7fa98 below. Brian, please review be4a1326d12c below. On Thu, Jun 15, 2017 at 11:26:53PM +0530, Sumit Semwal wrote: Hello Greg, Shuah, While testing 4.4.y and 4.9.y LTS kernels with latest kselftest, To be clear it seems like you are taking the latest upstream ksefltest and run it against older stable kernels. Furthermore you seem to only run the shell script tests but are using older kselftests drivers? Is this all correct? Otherwise it is unclear how you are running into the issues below. Does 0-day so the same? I thought 0-day takes just the kselftest from each tree submitted. That *seemed* to me like the way it was designed. Shuah ? Yes in 0-day, we run the kselftest code corresponding to the current kernel. Thanks, Fengguang
[PATCH 3/3] block: order /proc/devices by major number
Presently, the order of the block devices listed in /proc/devices is not entirely sequential. If a block device has a major number greater than BLKDEV_MAJOR_HASH_SIZE (255), it will be ordered as if its major were module 255. For example, 511 appears after 1. This patch cleans that up and prints each major number in the correct order, regardless of where they are stored in the hash table. In order to do this, we introduce BLKDEV_MAJOR_MAX as an artificial limit (chosen to be 512). It will then print all devices in major order number from 0 to the maximum. Signed-off-by: Logan Gunthorpe Cc: Greg Kroah-Hartman Cc: Jens Axboe Cc: Jeff Layton Cc: "J. Bruce Fields" --- This is a patch Greg requested after I proposed[1] the same to char devs. It is based on the chardev patch I sent so it should be merged after it (to avoid conflicts). If there are any changes requested I'll resend the entire set as a whole. [1] https://patchwork.kernel.org/patch/9790093/ block/genhd.c | 18 +- fs/proc/devices.c | 4 ++-- include/linux/fs.h | 4 ++-- 3 files changed, 17 insertions(+), 9 deletions(-) diff --git a/block/genhd.c b/block/genhd.c index d252d29fe837..1fc734b1a0e4 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -242,6 +242,7 @@ EXPORT_SYMBOL_GPL(disk_map_sector_rcu); * Can be deleted altogether. Later. * */ +#define BLKDEV_MAJOR_HASH_SIZE 255 static struct blk_major_name { struct blk_major_name *next; int major; @@ -259,12 +260,11 @@ void blkdev_show(struct seq_file *seqf, off_t offset) { struct blk_major_name *dp; - if (offset < BLKDEV_MAJOR_HASH_SIZE) { - mutex_lock(&block_class_lock); - for (dp = major_names[offset]; dp; dp = dp->next) + mutex_lock(&block_class_lock); + for (dp = major_names[major_to_index(offset)]; dp; dp = dp->next) + if (dp->major == offset) seq_printf(seqf, "%3d %s\n", dp->major, dp->name); - mutex_unlock(&block_class_lock); - } + mutex_unlock(&block_class_lock); } #endif /* CONFIG_PROC_FS */ @@ -309,6 +309,14 @@ int register_blkdev(unsigned int major, const char *name) ret = major; } + if (major >= BLKDEV_MAJOR_MAX) { + pr_err("register_blkdev: major requested (%d) is greater than the maximum (%d) for %s\n", + major, BLKDEV_MAJOR_MAX, name); + + ret = -EINVAL; + goto out; + } + p = kmalloc(sizeof(struct blk_major_name), GFP_KERNEL); if (p == NULL) { ret = -ENOMEM; diff --git a/fs/proc/devices.c b/fs/proc/devices.c index d196e22c4f1c..e5709343feb7 100644 --- a/fs/proc/devices.c +++ b/fs/proc/devices.c @@ -25,7 +25,7 @@ static int devinfo_show(struct seq_file *f, void *v) static void *devinfo_start(struct seq_file *f, loff_t *pos) { - if (*pos < (BLKDEV_MAJOR_HASH_SIZE + CHRDEV_MAJOR_MAX)) + if (*pos < (BLKDEV_MAJOR_MAX + CHRDEV_MAJOR_MAX)) return pos; return NULL; } @@ -33,7 +33,7 @@ static void *devinfo_start(struct seq_file *f, loff_t *pos) static void *devinfo_next(struct seq_file *f, void *v, loff_t *pos) { (*pos)++; - if (*pos >= (BLKDEV_MAJOR_HASH_SIZE + CHRDEV_MAJOR_MAX)) + if (*pos >= (BLKDEV_MAJOR_MAX + CHRDEV_MAJOR_MAX)) return NULL; return pos; } diff --git a/include/linux/fs.h b/include/linux/fs.h index f1347c2ca3e9..8cc651807ea4 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2478,14 +2478,14 @@ static inline void unregister_chrdev(unsigned int major, const char *name) #define BDEVT_SIZE 10 /* Largest string for MAJ:MIN for blkdev */ #ifdef CONFIG_BLOCK -#define BLKDEV_MAJOR_HASH_SIZE 255 +#define BLKDEV_MAJOR_MAX 512 extern const char *__bdevname(dev_t, char *buffer); extern const char *bdevname(struct block_device *bdev, char *buffer); extern struct block_device *lookup_bdev(const char *); extern void blkdev_show(struct seq_file *,off_t); #else -#define BLKDEV_MAJOR_HASH_SIZE 0 +#define BLKDEV_MAJOR_MAX 0 #endif extern void init_special_inode(struct inode *, umode_t, dev_t); -- 2.11.0
Re: [PATCH 1/2] platform/x86: silead_dmi: Add touchscreen info for PoV mobii wintab p800w
On Fri, Jun 16, 2017 at 03:22:45PM +0200, Hans de Goede wrote: > Hi, > > On 16-06-17 14:44, Andy Shevchenko wrote: > > On Thu, Jun 15, 2017 at 7:53 PM, Darren Hart wrote: > > > On Thu, Jun 15, 2017 at 08:48:31AM +0200, Hans de Goede wrote: > > > > > > + /* Point of View mobii wintab p800w */ > > > > + .driver_data = (void *)&pov_mobii_wintab_p800w_data, > > > > + .matches = { > > > > + DMI_MATCH(DMI_BOARD_VENDOR, "AMI Corporation"), > > > > + DMI_MATCH(DMI_BOARD_NAME, "Aptio CRB"), > > > > + DMI_MATCH(DMI_BIOS_VERSION, "3BAIR1013"), > > > > + /* Above matches are too generic, add bios-date > > > > match */ > > > > + DMI_MATCH(DMI_BIOS_DATE, "08/22/2014"), > > > > > > This is the first time I've seen a BIOS date match used to determine > > > hardware > > > features. DMI matching is a (necessary) hack to begin with (the vendors > > > should > > > be providing this data via ACPI _DSD anyway) but a date match means we > > > would > > > need a kernel patch every time one of these tablets gets a BIOS update... > > > > > > With words like "Aptio CRB" it's clear the vendor isn't doing their job > > > and just > > > using unmodified reference code. The problem with this of course is that > > > the > > > vendor is not providing a way to identify this hardware. > > > > > > Andy, I'd appreciate your thoughts on this... I'm leaning towards not > > > accepting > > > bios date (or indeed, BIOS version) as a way to identify a platform. > > > > The question is what is the anticipated amount of affected devices > > with BIOS date included and otherwise? > > I expect there to be collisions (false positive matches) without the > BIOS_DATE check, a quick web-search finds other devices with a > 3BAIR1013 bios version. Those don't necessarily also use a Silead > touchscreen (which is needed for a collision to happen), but given > the popularity of Silead touchscreens on cheap devices a collision > is not unlikely. > > With the bios-date check added, I expect this match to be unique, > for it to not be unique we would need to be really unlucky. > > > If Hans believes that there will be no update for some devices, > > Yeah I'm pretty sure this specific device will not see any > BIOS updates ever. > > > while there are devices with the same DMI strings, but different date and > > _fixed_ issue, I think we have no other choice for now. > > Also can we use some other strings to distinguish group of devices > > which are affected? > > bios_date: 08/22/2014 > bios_vendor: American Megatrends Inc. > bios_version: 3BAIR1013 > board_asset_tag: To be filled by O.E.M. > board_name: Aptio CRB > board_serial: T80091A4C11B0848 > board_vendor: AMI Corporation > board_version: To be filled by O.E.M. > chassis_asset_tag: To Be Filled By O.E.M. > chassis_serial: To Be Filled By O.E.M. > chassis_type: 3 > chassis_vendor: To Be Filled By O.E.M. > chassis_version: To Be Filled By O.E.M. > product_name: To be filled by O.E.M. > product_serial: To be filled by O.E.M. > product_uuid: 03000200-0400-0500-0006-000700080009 > product_version: To be filled by O.E.M. > sys_vendor: To be filled by O.E.M. > > The product-uuid is a known example uuid, so is > no good. The board_serial might be useful, but > only if it is unique for the model and not per > tablet. Unfortunately I only have 1 of these > tablets, so I cannot tell. Do we have any indication that this BIOS Date isn't just the default value provided by AMI? Does it offer any more information than the BIOS Version? I suppose we may be able to do some kind of a partial match on the Board Serial if even that is platform specific (I suspect it is with the T800 at the beginning. The sloppy handling of this firmware really irks me. That's obviously not Hans' fault, so we'll take the patch. If we see a conflict in the future, we'll just have to compare the other DMI strings for a match and see what we can do I'm even tempted to insert a printk on this match, dumping the DMI values and requesting the user to copy.paste them into an email to this list I think we've already spent too much time on this patch based on this review: https://www.notebookcheck.net/Point-of-View-Mobii-WinTab-800W-Tablet-Review.129561.0.html Nice... -- Darren Hart VMware Open Source Technology Center
[PATCHv3 1/3] firmware_class: move NO_CACHE from private to driver_data_req_params
From: Yi Li This adds DRIVER_DATA_REQ_NO_CACHE flag with .req flag under struct driver_data_req_params. When this flag is set, the driver_data driver will not cache the firmware during PM cycle, which is expensive. It will be used by streaming case and other drivers which implement their own cache thing. Also added the debugfs interface to selftest. Signed-off-by: Yi Li --- drivers/base/firmware_class-dbg.c | 108 ++ drivers/base/firmware_class.c | 26 + include/linux/driver_data.h | 4 ++ 3 files changed, 127 insertions(+), 11 deletions(-) create mode 100644 drivers/base/firmware_class-dbg.c diff --git a/drivers/base/firmware_class-dbg.c b/drivers/base/firmware_class-dbg.c new file mode 100644 index 000..102a4cd --- /dev/null +++ b/drivers/base/firmware_class-dbg.c @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2017 by Yi Li + * + */ +/* This is part of firmware_class.c for testing firmware cache */ + +#ifndef CONFIG_TEST_DRIVER_DATA +static inline void create_debug_files(struct firmware_cache *cache) { } +static inline void remove_debug_files(struct firmware_cache *cache) { } +#else +#include +#include + +static int debug_cache_show(struct seq_file *s, void *v) +{ + struct firmware_cache *cache = s->private; + unsigned long flags; + struct fw_cache_entry *cache_entry; + + spin_lock_irqsave(&cache->lock, flags); + + list_for_each_entry(cache_entry, &cache->fw_names, list) + seq_printf(s, "cached %s\n", cache_entry->name); + + spin_unlock_irqrestore(&cache->lock, flags); + + return 0; +} + +static int debug_cache_open(struct inode *inode, struct file *file) +{ + return single_open(file, debug_cache_show, inode->i_private); +} + +#define MAX_LEN16 +/** + * test_cache - set value in the 'cache' control file + * + * The relevant values are: + * + * 1: Test the suspend and start the cache + * 0: Test the resume and clear the cache. + **/ +static ssize_t test_cache(struct file *fp, const char __user *user_buffer, + size_t size, loff_t *ppos) +{ + char buf[MAX_LEN]; + size_t len; + long cmd; + + len = min(size, (size_t)(MAX_LEN - 1)); + if (copy_from_user(buf, user_buffer, len)) + return -EFAULT; + buf[len] = 0; + if (kstrtol(buf, 10, &cmd)) + return -EFAULT; + +#ifdef CONFIG_PM_SLEEP + switch (cmd) { + /* Simulate PM suspend prepare and start to cache */ + case 1: + kill_pending_fw_fallback_reqs(true); + device_cache_fw_images(); + disable_firmware(); + break; + /* Simulate PM resume and un-cache */ + case 0: + mutex_lock(&fw_lock); + fw_cache.state = FW_LOADER_NO_CACHE; + mutex_unlock(&fw_lock); + enable_firmware(); + device_uncache_fw_images_delay(10); + break; + default: + pr_err("unexpected cmd\n"); + } +#endif + return size; +} + +static const struct file_operations debug_cache_fops = { + .open = debug_cache_open, + .read = seq_read, + .write = test_cache, + .llseek = seq_lseek, + .release = single_release, +}; + +static void create_debug_files(struct firmware_cache *cache) +{ + cache->debug = debugfs_create_dir("firmware", NULL); + if (!cache->debug) + return; + if (!debugfs_create_file("cache", 0644, cache->debug, +cache, &debug_cache_fops)) + goto failed_create; + return; + +failed_create: + debugfs_remove_recursive(cache->debug); +} + +static void remove_debug_files(struct firmware_cache *cache) +{ + debugfs_remove_recursive(cache->debug); + cache->debug = NULL; +} +#endif diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c index 7af430a..a70a2a7 100644 --- a/drivers/base/firmware_class.c +++ b/drivers/base/firmware_class.c @@ -72,14 +72,10 @@ enum driver_data_mode { * issue a uevent to userspace. Userspace in turn is expected to be * monitoring for uevents for the firmware_class and will use the * exposted sysfs interface to upload the driver data for the caller. - * @DRIVER_DATA_PRIV_REQ_NO_CACHE: indicates that the driver data request - * should not set up and use the internal caching mechanism to assist - * drivers from fetching driver data at resume time after suspend. */ enum driver_data_priv_reqs { DRIVER_DATA_PRIV_REQ_FALLBACK = 1 << 0, DRIVER_DATA_PRIV_REQ_FALLBACK_UEVENT= 1 << 1, - DRIVER_DATA_PRIV_REQ_NO_CACHE = 1 << 2, }; /** @@ -151,10 +147,12 @@ struct driver_data_params { } #define __DATA_REQ_FIRMWARE_BUF(buf, size) \ + .req_params = {
[PATCHv3 2/3] iwlwifi: use DRIVER_DATA_REQ_NO_CACHE for driver_data
From: Yi Li Set DRIVER_DATA_REQ_NO_CACHE flag to disable driver_data driver caching mechanism, iwlwifi has its own firmware cache management. Signed-off-by: Yi Li --- drivers/net/wireless/intel/iwlwifi/iwl-drv.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c index 028854d3..db4d6fc 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c @@ -229,6 +229,8 @@ static int iwl_request_firmware(struct iwl_drv *drv) const struct driver_data_req_params req_params = { DRIVER_DATA_API_CB(iwl_req_fw_callback, drv), DRIVER_DATA_API(cfg->ucode_api_min, cfg->ucode_api_max, ".ucode"), + .reqs = DRIVER_DATA_REQ_NO_CACHE | + DRIVER_DATA_REQ_USE_API_VERSIONING, }; return driver_data_request_async(name_pre, -- 2.7.4
[PATCHv3 0/3] Enable no_cache in driver_data
From: Yi Li Changes in v3: - Add a platform device to enable PM support in test driver - Add a debugfs device to enable driver data cache testing - Fix a logic error so driver_data can be able to cache - Add two test cases for caching and non-caching - Rebase to Luis R. Rodriguez's 20170605-driver-data branch Changes in v2: - Rebase to Luis R. Rodriguez's 20170501-driver-data-try2 branch - Expose DRIVER_DATA_REQ_NO_CACHE flag to public driver_data_req_params structure, so upper drivers can ask driver_data driver to bypass the internal caching mechanism. This will be used for streaming and other drivers maintains their own caching like iwlwifi. - Add self test cases. Yi Li (3): firmware_class: move NO_CACHE from private to driver_data_req_params iwlwifi: use DRIVER_DATA_REQ_NO_CACHE for driver_data test: add no_cache to driver_data load tester drivers/base/firmware_class-dbg.c | 108 drivers/base/firmware_class.c | 26 +++--- drivers/net/wireless/intel/iwlwifi/iwl-drv.c| 2 + include/linux/driver_data.h | 4 + lib/test_driver_data.c | 97 ++--- tools/testing/selftests/firmware/driver_data.sh | 69 +++ 6 files changed, 285 insertions(+), 21 deletions(-) create mode 100644 drivers/base/firmware_class-dbg.c -- 2.7.4
[PATCHv3 3/3] test: add no_cache to driver_data load tester
From: Yi Li Add a platform device to enable PM cache and add cache/no_cache test. Signed-off-by: Yi Li --- lib/test_driver_data.c | 97 ++--- tools/testing/selftests/firmware/driver_data.sh | 67 + 2 files changed, 154 insertions(+), 10 deletions(-) diff --git a/lib/test_driver_data.c b/lib/test_driver_data.c index c176527..cadd122 100644 --- a/lib/test_driver_data.c +++ b/lib/test_driver_data.c @@ -44,6 +44,8 @@ #include #include #include +#include +#include /* Used for the fallback default to test against */ #define TEST_DRIVER_DATA "test-driver_data.bin" @@ -73,6 +75,10 @@ int num_test_devs; * struct driver_data_reg_params @optional field for more information. * @keep: whether or not we wish to free the driver_data on our own, refer to * the struct driver_data_req_params @keep field for more information. + * @no_cache: whether or not we wish to use the internal caching mechanism + * to assist drivers from fetching driver data at resume time after + * suspend, refer to the struct driver_data_req_params .req + * DRIVER_DATA_REQ_NO_CACHE for more information. * @enable_opt_cb: whether or not the optional callback should be set * on a trigger. There is no equivalent setting on the struct * driver_data_req_params as this is implementation specific, and in @@ -121,6 +127,7 @@ struct test_config { bool async; bool optional; bool keep; + bool no_cache; bool enable_opt_cb; bool use_api_versioning; u8 api_min; @@ -163,6 +170,7 @@ struct test_driver_data_private { * a driver might typically use to stuff firmware / driver_data. * @misc_dev: we use a misc device under the hood * @dev: pointer to misc_dev's own struct device + * @pdev: pointer to platform device's struct device * @api_found_calls: number of calls a fetch for a driver was found. We use * for internal use on the api callback. * @driver_data_mutex: for access into the @driver_data, the fake storage @@ -181,6 +189,7 @@ struct driver_data_test_device { struct test_driver_data_private test_driver_data; struct miscdevice misc_dev; struct device *dev; + struct device *pdev; u8 api_found_calls; @@ -346,6 +355,9 @@ static ssize_t config_show(struct device *dev, len += snprintf(buf+len, PAGE_SIZE, "keep:\t\t%s\n", config->keep ? "true" : "false"); + len += snprintf(buf + len, PAGE_SIZE, + "no_cache:\t\t%s\n", + config->no_cache ? "true" : "false"); mutex_unlock(&test_dev->config_mutex); @@ -399,9 +411,9 @@ static int config_req_default(struct driver_data_test_device *test_dev) config->default_name); ret = driver_data_request_sync(config->default_name, - &req_params, test_dev->dev); + &req_params, test_dev->pdev); if (ret) - dev_info(test_dev->dev, + dev_info(test_dev->pdev, "load of default '%s' failed: %d\n", config->default_name, ret); @@ -456,14 +468,17 @@ static int trigger_config_sync(struct driver_data_test_device *test_dev) (config->optional ? DRIVER_DATA_REQ_OPTIONAL : 0) | (config->keep ? - DRIVER_DATA_REQ_KEEP : 0)), + DRIVER_DATA_REQ_KEEP : 0) | + (config->no_cache ? + DRIVER_DATA_REQ_NO_CACHE : 0)), }; const struct driver_data_req_params req_params_opt_cb = { DRIVER_DATA_DEFAULT_SYNC(config_sync_req_cb, test_dev), DRIVER_DATA_SYNC_OPT_CB(config_sync_req_default_cb, test_dev), .reqs = (config->optional ? DRIVER_DATA_REQ_OPTIONAL : 0) | - (config->keep ? DRIVER_DATA_REQ_KEEP : 0), + (config->keep ? DRIVER_DATA_REQ_KEEP : 0) | + (config->no_cache ? DRIVER_DATA_REQ_NO_CACHE : 0), }; const struct driver_data_req_params *req_params; @@ -472,9 +487,10 @@ static int trigger_config_sync(struct driver_data_test_device *test_dev) else req_params = &req_params_default; - ret = driver_data_request_sync(config->name, req_params, test_dev->dev); + ret = driver_data_request_sync(config->name, req_params, + test_dev->pdev); if (ret) - dev_err(test_dev->dev, "sync load of '%s' failed: %d\n", + dev_err(test_dev->
Re: [PATCH v9 2/4] arm64: dts: hisi: add kirin pcie node
On Sat, Jun 17, 2017 at 06:31:59AM +0800, Guodong Xu wrote: > Hi, Bjorn > > On Sat, Jun 17, 2017 at 5:11 AM, Bjorn Helgaas wrote: > > On Tue, Jun 06, 2017 at 07:19:53PM +0800, Guodong Xu wrote: > >> Hi, Arnd > >> > >> On Tue, Jun 6, 2017 at 5:23 PM, Arnd Bergmann wrote: > >> > On Sun, Jun 4, 2017 at 2:03 AM, kbuild test robot wrote: > >> >> Hi Xiaowei, > >> >> > >> >> [auto build test ERROR on pci/next] > >> >> [also build test ERROR on v4.12-rc3 next-20170602] > >> >> [if your patch is applied to the wrong git tree, please drop us a note > >> >> to help improve the system] > >> >> > >> >> url: > >> >> https://github.com/0day-ci/linux/commits/Xiaowei-Song/add-PCIe-driver-for-Kirin-PCIe/20170531-182118 > >> >> base: https://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci.git > >> >> next > >> >> config: arm64-allnoconfig (attached as .config) > >> >> compiler: aarch64-linux-gnu-gcc (Debian 6.1.1-9) 6.1.1 20160705 > >> >> reproduce: > >> >> wget > >> >> https://raw.githubusercontent.com/01org/lkp-tests/master/sbin/make.cross > >> >> -O ~/bin/make.cross > >> >> chmod +x ~/bin/make.cross > >> >> # save the attached .config to linux build tree > >> >> make.cross ARCH=arm64 > >> >> > >> >> All errors (new ones prefixed by >>): > >> >> > >> Error: arch/arm64/boot/dts/hisilicon/hi3660.dtsi:180.24-25 syntax > >> error > >> FATAL ERROR: Unable to parse input tree > >> > > >> > We keep getting the build errors for patch submissions. Obviously the > >> > patch is > >> > still broken and can't be merged as-is. What is the plan for merging the > >> > series? > >> > > >> > >> This dts patch can be applied to dts series [1]. For upstream review > >> purpose, hi3660-hikey960 dts patches, which don't have a related > >> driver changes, are sent in [1]. Other patches, which need driver > >> changes, like this one, are sent together with driver. > >> > >> Patchset [1] is now at its v2 review. Rob Herring already gave his ACK > >> for some of them in v1. Hopefully I can get more ACK for remaining > >> ones, and make them ready for v4.13 merging window. > >> > >> [1], http://www.spinics.net/lists/devicetree/msg178303.html > > > > I don't know how you want to deal with the DTS build failure. > > DTS part of this is also included in a broader Hi3660 dts patchset [1], and > was ACK'ed [2] today by HiSilicon SoC maintainer Xu Wei. Hopefully > they can land in next merge window. > > [1] https://www.spinics.net/lists/arm-kernel/msg588232.html > [2] https://www.spinics.net/lists/arm-kernel/msg588686.html This sounds good, but doesn't help me make progress. I don't want to apply [PATCH v9 2/4] because it didn't build. I haven't seen an updated series that *does* build. And it probably doesn't make sense for me to apply the arch/arm64 changes anyway because they aren't really in the PCI purview. If you want me to apply something, post patches 1 and 3 by themselves with the trival updates I included. Those are really only PCI and should build without error. > > From a > > PCI perspective, I think I could apply patches 1 and 3 pretty easily > > by themselves. > > > > If/when you post these again, please incorporate the following > > incremental diff to clean up various whitespace and capitalization > > nits (these are spread across several of your patches). > > > > > > diff --git a/Documentation/devicetree/bindings/pci/kirin-pcie.txt > > b/Documentation/devicetree/bindings/pci/kirin-pcie.txt > > index 68ffa0fbcd73..20357d840af1 100644 > > --- a/Documentation/devicetree/bindings/pci/kirin-pcie.txt > > +++ b/Documentation/devicetree/bindings/pci/kirin-pcie.txt > > @@ -24,8 +24,8 @@ Example based on kirin960: > > > > pcie@f400 { > > compatible = "hisilicon,kirin-pcie"; > > - reg = <0x0 0xf400 0x0 0x1000>, <0x0 0xff3fe000 0x0 > > 0x1000>, > > - <0x0 0xf3f2 0x0 0x4>, <0x0 0xF400 0 > > 0x2000>; > > + reg = <0x0 0xf400 0x0 0x1000>, <0x0 0xff3fe000 0x0 > > 0x1000>, > > + <0x0 0xf3f2 0x0 0x4>, <0x0 0xf400 0x0 > > 0x2000>; > > reg-names = "dbi","apb","phy", "config"; > > bus-range = <0x0 0x1>; > > #address-cells = <3>; > > @@ -46,5 +46,5 @@ Example based on kirin960: > > <&crg_ctrl HI3660_ACLK_GATE_PCIE>; > > clock-names = "pcie_phy_ref", "pcie_aux", > > "pcie_apb_phy", "pcie_apb_sys", "pcie_aclk"; > > - reset-gpios = <&gpio11 1 0 >; > > + reset-gpios = <&gpio11 1 0>; > > }; > > diff --git a/arch/arm64/boot/dts/hisilicon/hi3660.dtsi > > b/arch/arm64/boot/dts/hisilicon/hi3660.dtsi > > index e8feb2fb4d53..7bc89baa40ba 100644 > > --- a/arch/arm64/boot/dts/hisilicon/hi3660.dtsi > > +++ b/arch/arm64/boot/dts/hisilicon/hi3660.dtsi > > @@ -159,12 +159,12 @@ > > > > pcie@f400
Re: [RFC PATCH 7/7 v1]powerpc: Deliver SEGV signal on protection key violation.
On Fri, 2017-06-16 at 12:15 -0700, Ram Pai wrote: > gp_regs size is not changed, nor is the layout. A unused field in > the gp_regs is used to fill in the AMR contents. Old binaries will not > be knowing about this unused field, and hence should not break. > > New binaries can leverage this already existing but newly defined > field; to read the contents of AMR. > > Is it still a concern? Calls to sys_swapcontext with a made-up context will end up with a crap AMR if done by code who didn't know about that register. Ben.
Re: [PATCH] PCI: Move test of INTx masking to pci_setup_device
On Fri, May 26, 2017 at 10:02:25PM +0100, Piotr Gregor wrote: > The test for INTx masking via config space command performed > in pci_intx_mask_supported() should be performed before PCI device > can be used. This is to avoid reading/writing of PCI_COMMAND_INTX_DISABLE > register which may collide with MSI/MSI-X interrupts. > > This patch moves test performed in pci_intx_mask_supported() to > > static void pci_test_intx_masking(struct pci_dev *dev) > > defined in drivers/pci/probe.c. > > This function is called from pci_setup_device(). It skips the test > if the device has been already marked to have broken INTx masking > feature. Otherwise the test is executed and broken_intx_masking > field of struct pci_dev is set accordingly. broken_intx_masking > meaning is: if it is true then the test has been either skipped > because the device has been already known to have broken INTx > masking support, or the test's been done and it has detected INTx > masking support to be broken. > The test result can be queried at any time later from the pci_dev > using same interface as before (though whith changed implementation) > > static inline bool pci_intx_mask_supported(struct pci_dev *pdev) > { > /* > * INTx masking is supported if device has not been marked > * to have this feature broken and it has passed > * pci_test_intx_masking() test. > */ > return !pdev->broken_intx_masking; > } > > so current users of pci_intx_mask_supported: uio and vfio, keep > their code unchanged. > > Signed-off-by: Piotr Gregor > --- > drivers/pci/pci.c | 42 +- > drivers/pci/probe.c | 44 > include/linux/pci.h | 13 +++-- > 3 files changed, 56 insertions(+), 43 deletions(-) > > diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c > index b01bd5b..7c4e1aa 100644 > --- a/drivers/pci/pci.c > +++ b/drivers/pci/pci.c > @@ -3708,46 +3708,6 @@ void pci_intx(struct pci_dev *pdev, int enable) > } > EXPORT_SYMBOL_GPL(pci_intx); > > -/** > - * pci_intx_mask_supported - probe for INTx masking support > - * @dev: the PCI device to operate on > - * > - * Check if the device dev support INTx masking via the config space > - * command word. > - */ > -bool pci_intx_mask_supported(struct pci_dev *dev) > -{ > - bool mask_supported = false; > - u16 orig, new; > - > - if (dev->broken_intx_masking) > - return false; > - > - pci_cfg_access_lock(dev); > - > - pci_read_config_word(dev, PCI_COMMAND, &orig); > - pci_write_config_word(dev, PCI_COMMAND, > - orig ^ PCI_COMMAND_INTX_DISABLE); > - pci_read_config_word(dev, PCI_COMMAND, &new); > - > - /* > - * There's no way to protect against hardware bugs or detect them > - * reliably, but as long as we know what the value should be, let's > - * go ahead and check it. > - */ > - if ((new ^ orig) & ~PCI_COMMAND_INTX_DISABLE) { > - dev_err(&dev->dev, "Command register changed from 0x%x to 0x%x: > driver or hardware bug?\n", > - orig, new); > - } else if ((new ^ orig) & PCI_COMMAND_INTX_DISABLE) { > - mask_supported = true; > - pci_write_config_word(dev, PCI_COMMAND, orig); > - } > - > - pci_cfg_access_unlock(dev); > - return mask_supported; > -} > -EXPORT_SYMBOL_GPL(pci_intx_mask_supported); > - > static bool pci_check_and_set_intx_mask(struct pci_dev *dev, bool mask) > { > struct pci_bus *bus = dev->bus; > @@ -3798,7 +3758,7 @@ static bool pci_check_and_set_intx_mask(struct pci_dev > *dev, bool mask) > * @dev: the PCI device to operate on > * > * Check if the device dev has its INTx line asserted, mask it and > - * return true in that case. False is returned if not interrupt was > + * return true in that case. False is returned if no interrupt was > * pending. > */ > bool pci_check_and_mask_intx(struct pci_dev *dev) > diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c > index 19c8950..ee6b55c 100644 > --- a/drivers/pci/probe.c > +++ b/drivers/pci/probe.c > @@ -1330,6 +1330,48 @@ static void pci_msi_setup_pci_dev(struct pci_dev *dev) > } > > /** > + * pci_test_intx_masking - probe for INTx masking support > + * @dev: the PCI device to operate on > + * > + * Check if the @dev supports INTx masking via the config space > + * command word. Executed when PCI device is setup. Result is saved > + * in broken_intx_masking field of struct pci_dev and can be checked > + * with pci_intx_mask_supported at any time later, after the PCI device > + * has been setup (this avoids testing of PCI_COMMAND_INTX_DISABLE > + * register at runtime). > + */ > +static void pci_test_intx_masking(struct pci_dev *dev) > +{ > + u16 orig, toggle, new; > + > + /* > + * If device doesn't support this feature though it could pass the test. > + */ > + if (dev->broken_intx_masking) > +
Re: [RFC PATCH 1/3] atmel-hlcdc: add support for 8-bit color lookup table mode
Hm, it's probably too late to do it here. Planes have already been enabled and the engine may have started to fetch data and do the composition. You could do that in ->update_plane() [1], and make it a per-plane thing. I'm not sure, but I think you can get the new crtc_state from plane->crtc->state in this context (state have already been swapped, and new state is being applied, which means relevant locks are held). >>> >>> Ok, I can move it there. My plan is to just copy the default .update_plane >>> function and insert >>> >>> if (crtc->state->color_mgmt_changed && crtc->state->gamma_lut) { >>> ... >>> } >>> >>> just before the drm_atomic_commit(state) call. Sounds ok? >> >> Why would you copy the default ->update_plane() when we already have >> our own ->atomic_update_plane() implementation [1]? Just put it there >> (before the atmel_hlcdc_layer_update_commit() call) and we should be >> good. > > Ahh, but you said ->update_plane() and I took that as .update_plane in > layer_plane_funcs, not ->atomic_update() in > atmel_hlcdc_layer_plane_helper_funcs. > > Makes sense now, and much neater too. No, it doesn't make sense. There's no atmel_hlcdc_layer_update_commit call anywhere, and no such function. You seem to have some further changes that are not even in -next. Where am I getting those changes and why are they not upstream yet? There's a mention of the missing function here [1], but that's some 18 months ago. What's going on? [1] https://patchwork.kernel.org/patch/7965721/ Cheers, peda
endian bitshift defects [ was: staging: fusb302: don't bitshift __le16 type ]
On Fri, 2017-06-16 at 19:45 +0200, Frans Klaver wrote: > The header field in struct pd_message is declared as an __le16 type. The > data in the message is supposed to be little endian. This means we don't > have to go and shift the individual bytes into position when we're > filling the buffer, we can just copy the contents right away. As an > added benefit we don't get fishy results on big endian systems anymore. Thanks for pointing this out. There are several instances of this class of error. Here's a cocci script to find them. This is best used with cocci's --all-includes option like: $ spatch --all-includes --very-quiet --sp-file lebe_bitshifts.cocci . [ many defects...] $ cat lebe_bitshifts.cocci @@ typedef __le16, __le32, __le64, __be16, __be32, __be64; { __le16, __le32, __le64, __be16, __be32, __be64 } a; expression b; @@ * a << b @@ { __le16, __le32, __le64, __be16, __be32, __be64 } a; expression b; @@ * a <<= b @@ { __le16, __le32, __le64, __be16, __be32, __be64 } a; expression b; @@ * a >> b @@ { __le16, __le32, __le64, __be16, __be32, __be64 } a; expression b; @@ * a >>= b $
Re: [PATCH v9 2/4] arm64: dts: hisi: add kirin pcie node
Hi, Bjorn On Sat, Jun 17, 2017 at 5:11 AM, Bjorn Helgaas wrote: > On Tue, Jun 06, 2017 at 07:19:53PM +0800, Guodong Xu wrote: >> Hi, Arnd >> >> On Tue, Jun 6, 2017 at 5:23 PM, Arnd Bergmann wrote: >> > On Sun, Jun 4, 2017 at 2:03 AM, kbuild test robot wrote: >> >> Hi Xiaowei, >> >> >> >> [auto build test ERROR on pci/next] >> >> [also build test ERROR on v4.12-rc3 next-20170602] >> >> [if your patch is applied to the wrong git tree, please drop us a note to >> >> help improve the system] >> >> >> >> url: >> >> https://github.com/0day-ci/linux/commits/Xiaowei-Song/add-PCIe-driver-for-Kirin-PCIe/20170531-182118 >> >> base: https://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci.git >> >> next >> >> config: arm64-allnoconfig (attached as .config) >> >> compiler: aarch64-linux-gnu-gcc (Debian 6.1.1-9) 6.1.1 20160705 >> >> reproduce: >> >> wget >> >> https://raw.githubusercontent.com/01org/lkp-tests/master/sbin/make.cross >> >> -O ~/bin/make.cross >> >> chmod +x ~/bin/make.cross >> >> # save the attached .config to linux build tree >> >> make.cross ARCH=arm64 >> >> >> >> All errors (new ones prefixed by >>): >> >> >> Error: arch/arm64/boot/dts/hisilicon/hi3660.dtsi:180.24-25 syntax error >> FATAL ERROR: Unable to parse input tree >> > >> > We keep getting the build errors for patch submissions. Obviously the >> > patch is >> > still broken and can't be merged as-is. What is the plan for merging the >> > series? >> > >> >> This dts patch can be applied to dts series [1]. For upstream review >> purpose, hi3660-hikey960 dts patches, which don't have a related >> driver changes, are sent in [1]. Other patches, which need driver >> changes, like this one, are sent together with driver. >> >> Patchset [1] is now at its v2 review. Rob Herring already gave his ACK >> for some of them in v1. Hopefully I can get more ACK for remaining >> ones, and make them ready for v4.13 merging window. >> >> [1], http://www.spinics.net/lists/devicetree/msg178303.html > > I don't know how you want to deal with the DTS build failure. DTS part of this is also included in a broader Hi3660 dts patchset [1], and was ACK'ed [2] today by HiSilicon SoC maintainer Xu Wei. Hopefully they can land in next merge window. [1] https://www.spinics.net/lists/arm-kernel/msg588232.html [2] https://www.spinics.net/lists/arm-kernel/msg588686.html -Guodong > From a > PCI perspective, I think I could apply patches 1 and 3 pretty easily > by themselves. > > If/when you post these again, please incorporate the following > incremental diff to clean up various whitespace and capitalization > nits (these are spread across several of your patches). > > > diff --git a/Documentation/devicetree/bindings/pci/kirin-pcie.txt > b/Documentation/devicetree/bindings/pci/kirin-pcie.txt > index 68ffa0fbcd73..20357d840af1 100644 > --- a/Documentation/devicetree/bindings/pci/kirin-pcie.txt > +++ b/Documentation/devicetree/bindings/pci/kirin-pcie.txt > @@ -24,8 +24,8 @@ Example based on kirin960: > > pcie@f400 { > compatible = "hisilicon,kirin-pcie"; > - reg = <0x0 0xf400 0x0 0x1000>, <0x0 0xff3fe000 0x0 > 0x1000>, > - <0x0 0xf3f2 0x0 0x4>, <0x0 0xF400 0 0x2000>; > + reg = <0x0 0xf400 0x0 0x1000>, <0x0 0xff3fe000 0x0 > 0x1000>, > + <0x0 0xf3f2 0x0 0x4>, <0x0 0xf400 0x0 > 0x2000>; > reg-names = "dbi","apb","phy", "config"; > bus-range = <0x0 0x1>; > #address-cells = <3>; > @@ -46,5 +46,5 @@ Example based on kirin960: > <&crg_ctrl HI3660_ACLK_GATE_PCIE>; > clock-names = "pcie_phy_ref", "pcie_aux", > "pcie_apb_phy", "pcie_apb_sys", "pcie_aclk"; > - reset-gpios = <&gpio11 1 0 >; > + reset-gpios = <&gpio11 1 0>; > }; > diff --git a/arch/arm64/boot/dts/hisilicon/hi3660.dtsi > b/arch/arm64/boot/dts/hisilicon/hi3660.dtsi > index e8feb2fb4d53..7bc89baa40ba 100644 > --- a/arch/arm64/boot/dts/hisilicon/hi3660.dtsi > +++ b/arch/arm64/boot/dts/hisilicon/hi3660.dtsi > @@ -159,12 +159,12 @@ > > pcie@f400 { > compatible = "hisilicon,kirin960-pcie"; > - reg = <0x0 0xf400 0x0 0x1000>, > - <0x0 0xff3fe000 0x0 0x1000>, > + reg = <0x0 0xf400 0x0 0x1000>, > + <0x0 0xff3fe000 0x0 0x1000>, > <0x0 0xf3f2 0x0 0x4>, > - <0x0 0xF500 0x0 0x2000>; > + <0x0 0xf500 0x0 0x2000>; > reg-names = "dbi", "apb", "phy", "config"; > - bus-range = <0x0 0x1>; > + bus-range = <0x0 0x1>; > #address-cells = <3>; >
Re: [RFC PATCH 1/3] atmel-hlcdc: add support for 8-bit color lookup table mode
On 2017-06-16 23:12, Peter Rosin wrote: > On 2017-06-16 18:15, Boris Brezillon wrote: >> To be very clear, I'd like you to test it through DRM ioctls, not only >> through the fbdev emulation layer. > > ...so yeah, right, I couldn't agree more. Any pointers to code w/o a bunch > of complex library dependencies that I can test with? I have now built libdrm-2.4.81, and get this: $ modetest -M atmel-hlcdc -s 27@39:1024x768 setting mode 1024x768-60Hz@XR24 on connectors 27, crtc 39 $ modetest -M atmel-hlcdc -s 27@39:1024x768@RG16 setting mode 1024x768-60Hz@RG16 on connectors 27, crtc 39 $ modetest -M atmel-hlcdc -s 27@39:1024x768@C8 unknown format C8 (output on the lcd looks sane for the first two, not that I really know exactly what to expect) Looking at the libdrm code, I only find YUV and RGB modes in tests/util/format.c which make me less confident that I will find something sane to test with. So, pointers to code to test with desperately needed... Cheers, peda
Re: [PATCH] PCI / PM: Restore the status of PCI devices across hibernation
On Fri, Jun 16, 2017 at 9:45 PM, Bjorn Helgaas wrote: > On Thu, May 25, 2017 at 04:49:07PM +0800, Chen Yu wrote: >> Currently we saw a lot of "No irq handler" errors during hibernation, >> which caused the system hang finally: >> >> [ 710.141581] ata4.00: qc timeout (cmd 0xec) >> [ 710.147135] ata4.00: failed to IDENTIFY (I/O error, err_mask=0x4) >> [ 710.154593] ata4.00: revalidation failed (errno=-5) >> [ 710.468124] ata4: SATA link up 6.0 Gbps (SStatus 133 SControl 300) >> [ 710.477746] do_IRQ: 31.151 No irq handler for vector >> >> According to above logs, there is an interrupt triggered and it is >> dispatched to CPU31 with a vector number 151, but there is no handler >> for it, thus this irq will not get acked and caused irq flood which kill >> the system. To be more specific, the 31.151 is an interrupt from the ahci >> host controller. >> >> After some investigation, the reason why this issue is triggered is >> because the thaw_noirq() function does not restore the MSI/MSIX settings >> across hibernation. >> >> The scenario is illustrated below: >> >> 1. Before the hibernation starts, the irq 34 is the handler for the ahci >> device, >>which is binded on cpu31. >> 2. Hibernation starts, the ahci device is put into low power state. >> 3. All the nonboot CPUs are put offline, so the irq 34 has to be migrated to >>the last alive one - CPU0. >> 4. After the snapshot has been created, all the nonboot CPUs are brought up >> again, >>the CPU affinity for IRQ 34 remains to be 0. >> 5. ahci device are put into D0. >> 6. The snapshot is written to the disk. >> >> The issue is triggered in step 6, in theory the ahci interrupt should be >> delivered to CPU0, however the actually result is that this interrupt is >> delivered to the original CPU31 instead, which cause the "No irq handler" >> issue. >> >> Ying Huang has has provided a clue that, in step 3 it is possible that the >> writing >> to the register might not take effect as the PCI devices have been put >> suspended. >> Actually it is true: >> In step 3, the irq 34 affinity is supposed to be modified from 31 to 0, >> but actually it did not. In __pci_write_msi_msg(), if the device is already >> in low power state, the low level msi message entry will not be updated >> but cached. So in theory during the device restore process, the cached msi >> modification information should be written back to the hardware, and this >> is what pci_restore_msi_state() do during normal suspend-resume. >> But this is not the case for hibernation, pci_restore_msi_state() is not >> invoked currently, to be more specific, pci_restore_state() is not invoked >> in pci_pm_thaw_noirq(), although pci_save_state() has saved the necessary >> pci cached information in pci_pm_freeze_noirq(). >> >> This patch tries to restore the pci status for the device during hibernation, >> otherwise the status might be lost across hibernation(for example, the >> MSI/MSIX >> message settings), which might cause problems during hibernation. >> >> Suggested-by: Ying Huang >> Suggested-by: Rafael J. Wysocki >> Cc: Rafael J. Wysocki >> Cc: Bjorn Helgaas >> Cc: Len Brown >> Cc: Dan Williams >> Cc: Rui Zhang >> Cc: Ying Huang >> Cc: linux-...@vger.kernel.org >> Cc: linux...@vger.kernel.org >> Cc: linux-kernel@vger.kernel.org >> Signed-off-by: Chen Yu > > Added a stable tag and applied with Rafael's reviewed-by to pci/pm for > v4.13, thanks! > > pci_restore_state() restores a lot of stuff besides MSI/MSI-X: PCIe > device, link, slot control, ATS, VC, BARs, ACS, IOV. I guess I'm a > little surprised that we haven't noticed more issues if all these > things were broken. That's because they weren't broken. :-) None of them is expected to change over the image creation, which is why pci_pm_thaw_noirq() didn't call pci_restore_state(), but we overlooked the fact that taking nonboot CPUs offline changed the configuration of interrupts that needed to be restored afterward. So this one is really exceptional. Thanks, Rafael
Re: [PATCH v4] Introduce v3 namespaced file capabilities
On 06/14/2017 11:05 PM, Serge E. Hallyn wrote: On Wed, Jun 14, 2017 at 08:27:40AM -0400, Stefan Berger wrote: On 06/13/2017 07:55 PM, Serge E. Hallyn wrote: Quoting Stefan Berger (stef...@linux.vnet.ibm.com): If all extended attributes were to support this model, maybe the 'uid' could be associated with the 'name' of the xattr rather than its 'value' (not sure whether that's possible). Right, I missed that in your original email when I saw it this morning. It's not what my patch does, but it's an interesting idea. Do you have a patch to that effect? We might even be able to generalize that to No, I don't have a patch. It may not be possible to implement it. The xattr_handler's take the name of the xattr as input to get(). That may be ok though. Assume the host created a container with 10 as the uid for root, which created a container with 13 as uid for root. If root in the nested container tries to read the xattr, the kernel can check for security.foo[13] first, then security.foo[10], then security.foo. Or, it can do a listxattr and look for those. Am I overlooking one? So that sounds like a child would 'inherit' the value of an xattr from the closest parent if it doesn't have one itself. I guess it would depend on the xattr whether that should apply? And removing an xattr becomes difficult then if the parent container's xattr always shines through... So one could try to encode the mapped uid in the name. However, that I thought that's exactly what you were suggesting in your original email? "security.capability[uid=2000]" could lead to problems with stale xattrs in a shared filesystem over time unless one could limit the number of xattrs with the same prefix, e.g., security.capability*. So I doubt that it would work. Hm. Yeah. But really how many setups are there like that? I.e. if you launch a regular docker or lxd container, the image doesn't do a bind mount of a shared image, it layers something above it or does a copy. What setups do you know of where multiple containers in different user namespaces mount the same filesystem shared and writeable? So you think it's a good idea? I am not sure when I would get to it, though... Stefan Otherwise it would be good if the value was wrapped in a data structure use by all xattrs, but that doesn't seem to be the case, either. So I guess we have to go into each type of value structure and add a uid field there. namespace any security.* xattrs. Wouldn't be automatically enabled for anything but ima and capabilities, but we could make the infrastructure generic and re-usable.
Re: [kernel-hardening] Re: [PATCH v4 06/13] iscsi: ensure RNG is seeded before use
On 06/08/2017 05:09 AM, Jason A. Donenfeld wrote: > On Thu, Jun 8, 2017 at 4:43 AM, Theodore Ts'o wrote: >> What was the testing that was done for commit? It looks safe, but I'm >> unfamiliar enough with how the iSCSI authentication works that I'd >> prefer getting an ack'ed by from the iSCSI maintainers or >> alternativel, information about how to kick off some kind of automated >> test suite ala xfstests for file systems. > > Only very basic testing from my end. > > I'm thus adding the iSCSI list to see if they'll have a look (patch > reattached). > > Jason > It seems like what you are doing is basically "good", i.e. if there is not enough random data, don't use it. But what happens in that case? The authentication fails? How does the user know to wait and try again? -- Lee Duncan SUSE Labs
Re: [PATCH v2 2/5] dt-bindings: scsi: ufs: add document for hi3660-ufs
On Fri, Jun 16, 2017 at 8:51 AM, Bu Tao wrote: > add ufs node document for hi3660 > > Signed-off-by: Bu Tao > --- > .../devicetree/bindings/ufs/hi3660-ufs.txt | 58 > ++ > 1 file changed, 58 insertions(+) > create mode 100644 Documentation/devicetree/bindings/ufs/hi3660-ufs.txt > > diff --git a/Documentation/devicetree/bindings/ufs/hi3660-ufs.txt > b/Documentation/devicetree/bindings/ufs/hi3660-ufs.txt > new file mode 100644 > index ..461afc8ef017 > --- /dev/null > +++ b/Documentation/devicetree/bindings/ufs/hi3660-ufs.txt > @@ -0,0 +1,58 @@ > +* Hisilicon Universal Flash Storage (UFS) Host Controller > + > +UFS nodes are defined to describe on-chip UFS hardware macro. > +Each UFS Host Controller should have its own node. > + > +Required properties: > +- compatible: compatible list, contains one of the following - > + "hisilicon,hi3660-ufs" for hisi ufs host controller > +present on Hi3660 chipset. > +- reg : should contain UFS register address space & UFS SYS > CTRL register address, > +- interrupt-parent : interrupt device > +- interrupts: interrupt number > +- clocks : List of phandle and clock specifier pairs > +- clock-names : List of clock input name strings sorted in the same > + order as the clocks property. "clk_ref", "clk_phy" is > optional > +- resets: reset node register, one reset the clk and the other > reset the controller > +- reset-names : describe reset node register > + > +Optional properties for board device: > +- ufs-hi3660-use-rate-B: specifies UFS rate-B > +- ufs-hi3660-broken-fastauto : specifies no fastauto > +- ufs-hi3660-use-HS-GEAR3 : specifies UFS HS-GEAR3 > +- ufs-hi3660-use-HS-GEAR2 : specifies UFS HS-GEAR2 > +- ufs-hi3660-use-HS-GEAR1 : specifies UFS HS-GEAR1 > +- ufs-hi3660-broken-clk-gate-bypass: specifies no clk-gate > +- ufs-hi3660-use-one-line : specifies UFS use one line work > +- reset-gpio : specifies to reset devices Some of these sound rather generic and might apply to UFS implementations other than hi3660, so I'd suggest adding them to the base ufs binding with a generic name instead. Any DT properties that might be useful across multiple implementations should be parsed in generic code that gets called by the individual drivers, and then the properties that are specific to the integration work done by hisilicon should be prefixed with "hisilicon,", but not normally with the SoC name: it is quite possible that another SoC will be derived from this chip and it should reuse the properties. (note: this is different from the value of the "compatible" property that is meant to be as specific as possible". Also, please clarify how your binding relates to the ufshcd binding in Documentation/devicetree/bindings/ufs/ufshcd-pltfrm.txt: does hi3660 implement any registers that are shared with ufshcd, or does it use the same physical interface with a different register set? Arnd
Re: autofs multi-map regression
On Friday 2017-06-16 15:57, Eric W. Biederman wrote: | I don't believe this is a kernel change. | | I dug up an old VM and I was able to reproduce this issue simply | by installing autofs, and your auto.master and auto.net files. | | # uname -a | Linux ubuntu-16 4.4.0-24-generic #43-Ubuntu SMP Wed Jun 8 19:27:37 UTC 2016 x86_64 x86_64 x86_64 GNU/Linux | | # ls /net/ | localhost | # ls /net/localhost/loc | ls: cannot open directory '/net/localhost/loc': Too many levels of symbolic links | # ls /loc | ls: cannot open directory '/loc/': Too many levels of symbolic links | | I suspect there is configuration somewhere in your autofs | configuration. I don't speak autofs well enough to debug the issue at | this point. But I can conclusively say it was not the kernel commit you | pointed at, as I see the issue you are reporting and I don't have that | commit in the kernel under test. I have a second partition mounted on /loc, that is the reason for the multi-map autofs setup. With a separate mount on /loc, you won't see the errors with the old kernel. Fact is that my setup worked for a long time, and that it stopped working after the backport of commit 1064f874 to the ubuntu 4.4 kernel. -- Dick
[PATCH 3/5] soc: bcm: brcmstb: Add support for S2/S3/S5 suspend states (ARM)
From: Brian Norris This commit adds support for the Broadcom STB S2/S3/S5 suspend states on ARM based SoCs. This requires quite a lot of code in order to deal with the different HW blocks that need to be quiesced during suspend: - DDR PHY SHIM - DDR memory controller and sequencer - control processor The final steps of the suspend execute in an on-chip SRAM and there is a little bit of assembly code in order to shut down the DDR PHY PLL and then go into a wfi loop until a wake-up even occurs. Conversely the resume part involves waiting for the DDR PHY PLL to come back up and resume executions where we left. For S3, because of our memory hashing (actual hashing code not included for simplicity, and is bypassed) we need to relocate the writable variables (stack) into SRAM shortly before suspending in order to leave the DRAM untouched and create a reliable hash of its contents. This code has been contributed by Brian Norris initially and has been incrementally fixed and updated to support new chips by a lot of people. Signed-off-by: Brian Norris Signed-off-by: Markus Mayer Signed-off-by: Justin Chen Signed-off-by: Gareth Powell Signed-off-by: Doug Berger Signed-off-by: Florian Fainelli --- drivers/soc/bcm/brcmstb/Kconfig | 1 + drivers/soc/bcm/brcmstb/pm/Makefile | 1 + drivers/soc/bcm/brcmstb/pm/aon_defs.h | 113 + drivers/soc/bcm/brcmstb/pm/pm-arm.c | 836 ++ drivers/soc/bcm/brcmstb/pm/pm.h | 78 drivers/soc/bcm/brcmstb/pm/s2-arm.S | 76 6 files changed, 1105 insertions(+) create mode 100644 drivers/soc/bcm/brcmstb/pm/Makefile create mode 100644 drivers/soc/bcm/brcmstb/pm/aon_defs.h create mode 100644 drivers/soc/bcm/brcmstb/pm/pm-arm.c create mode 100644 drivers/soc/bcm/brcmstb/pm/pm.h create mode 100644 drivers/soc/bcm/brcmstb/pm/s2-arm.S diff --git a/drivers/soc/bcm/brcmstb/Kconfig b/drivers/soc/bcm/brcmstb/Kconfig index 996a75db015e..246fc0f30515 100644 --- a/drivers/soc/bcm/brcmstb/Kconfig +++ b/drivers/soc/bcm/brcmstb/Kconfig @@ -3,6 +3,7 @@ if SOC_BRCMSTB config BRCMSTB_PM bool "Support suspend/resume for STB platforms" default y + depends on ARM depends on PM endif # SOC_BRCMSTB diff --git a/drivers/soc/bcm/brcmstb/pm/Makefile b/drivers/soc/bcm/brcmstb/pm/Makefile new file mode 100644 index ..66a4f7e43ad5 --- /dev/null +++ b/drivers/soc/bcm/brcmstb/pm/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_ARM) += s2-arm.o pm-arm.o diff --git a/drivers/soc/bcm/brcmstb/pm/aon_defs.h b/drivers/soc/bcm/brcmstb/pm/aon_defs.h new file mode 100644 index ..fb936abd847d --- /dev/null +++ b/drivers/soc/bcm/brcmstb/pm/aon_defs.h @@ -0,0 +1,113 @@ +/* + * Always ON (AON) register interface between bootloader and Linux + * + * Copyright © 2014-2017 Broadcom + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef __BRCMSTB_AON_DEFS_H__ +#define __BRCMSTB_AON_DEFS_H__ + +#include + +/* Magic number in upper 16-bits */ +#define BRCMSTB_S3_MAGIC_MASK 0x +#define BRCMSTB_S3_MAGIC_SHORT 0x5AFE + +enum { + /* Restore random key for AES memory verification (off = fixed key) */ + S3_FLAG_LOAD_RANDKEY= (1 << 0), + + /* Scratch buffer page table is present */ + S3_FLAG_SCRATCH_BUFFER_TABLE= (1 << 1), + + /* Skip all memory verification */ + S3_FLAG_NO_MEM_VERIFY = (1 << 2), + + /* +* Modification of this bit reserved for bootloader only. +* 1=PSCI started Linux, 0=Direct jump to Linux. +*/ + S3_FLAG_PSCI_BOOT = (1 << 3), + + /* +* Modification of this bit reserved for bootloader only. +* 1=64 bit boot, 0=32 bit boot. +*/ + S3_FLAG_BOOTED64= (1 << 4), +}; + +#define BRCMSTB_HASH_LEN (128 / 8) /* 128-bit hash */ + +#define AON_REG_MAGIC_FLAGS0x00 +#define AON_REG_CONTROL_LOW0x04 +#define AON_REG_CONTROL_HIGH 0x08 +#define AON_REG_S3_HASH0x0c /* hash of S3 params */ +#define AON_REG_CONTROL_HASH_LEN 0x1c +#define AON_REG_PANIC 0x20 + +#define BRCMSTB_S3_MAGIC 0x5AFEB007 +#define BRCMSTB_PANIC_MAGIC0x512E115E +#define BOOTLOADER_SCRATCH_SIZE64 +#define BRCMSTB_DTU_STATE_MAP_ENTRIES (8*1024) +#define BRCMSTB_DTU_CONFIG_ENTRIES (512) +#define BRCMSTB_DTU_COUNT (2) + +
Re: [PATCH] drivers: net: phy: Correct kernel-doc comment parameter
On 06/16/2017 03:19 AM, sayli karnik wrote: > Correct function parameter in kernel-doc comment to fix following > warnings in the sphinx build: > > .//drivers/net/phy/phy.c:259: warning: No description found for > parameter 'features' > .//drivers/net/phy/phy.c:259: warning: Excess function parameter > 'feature' description in 'phy_lookup_setting' > > Signed-off-by: sayli karnik Already fixed. See commit 6c6ab3e73b391b38c9749c5bf21479ed7d60. What tree did you make this patch against? > --- > drivers/net/phy/phy.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c > index 82ab8fb..7524caa 100644 > --- a/drivers/net/phy/phy.c > +++ b/drivers/net/phy/phy.c > @@ -241,7 +241,7 @@ static const struct phy_setting settings[] = { > * phy_lookup_setting - lookup a PHY setting > * @speed: speed to match > * @duplex: duplex to match > - * @feature: allowed link modes > + * @features: allowed link modes > * @exact: an exact match is required > * > * Search the settings array for a setting that matches the speed and > -- ~Randy
[PATCH 0/5] Broadcom STB S2/S3/S5 support for ARM and MIPS
Hi, This patch series adds support for S2/S3/S5 suspend/resume states on ARM and MIPS based Broadcom STB SoCs. This was submitted a long time ago by Brian, and I am now picking this up and trying to get this included with support for our latest chips. Provided that I can collect the necessary Acks from Rob (DT) and other people (Rafael?) I would probably take this via the Broadcom ARM SoC pull requests. Thank you! Brian Norris (1): soc: bcm: brcmstb: Add support for S2/S3/S5 suspend states (ARM) Florian Fainelli (3): dt-bindings: Update Broadcom STB binding soc: bcm: brcmstb: Add Kconfig entry point for power management dt-bindings: Document MIPS Broadcom STB power management nodes Justin Chen (1): soc bcm: brcmstb: Add support for S2/S3/S5 suspend states (MIPS) .../devicetree/bindings/arm/bcm/brcm,brcmstb.txt | 6 +- .../devicetree/bindings/mips/brcm/soc.txt | 77 ++ drivers/soc/bcm/Kconfig| 2 + drivers/soc/bcm/brcmstb/Kconfig| 9 + drivers/soc/bcm/brcmstb/Makefile | 1 + drivers/soc/bcm/brcmstb/pm/Makefile| 2 + drivers/soc/bcm/brcmstb/pm/aon_defs.h | 113 +++ drivers/soc/bcm/brcmstb/pm/pm-arm.c| 836 + drivers/soc/bcm/brcmstb/pm/pm-mips.c | 461 drivers/soc/bcm/brcmstb/pm/pm.h| 89 +++ drivers/soc/bcm/brcmstb/pm/s2-arm.S| 76 ++ drivers/soc/bcm/brcmstb/pm/s2-mips.S | 200 + drivers/soc/bcm/brcmstb/pm/s3-mips.S | 146 13 files changed, 2017 insertions(+), 1 deletion(-) create mode 100644 drivers/soc/bcm/brcmstb/Kconfig create mode 100644 drivers/soc/bcm/brcmstb/pm/Makefile create mode 100644 drivers/soc/bcm/brcmstb/pm/aon_defs.h create mode 100644 drivers/soc/bcm/brcmstb/pm/pm-arm.c create mode 100644 drivers/soc/bcm/brcmstb/pm/pm-mips.c create mode 100644 drivers/soc/bcm/brcmstb/pm/pm.h create mode 100644 drivers/soc/bcm/brcmstb/pm/s2-arm.S create mode 100644 drivers/soc/bcm/brcmstb/pm/s2-mips.S create mode 100644 drivers/soc/bcm/brcmstb/pm/s3-mips.S -- 2.9.3
[PATCH 1/5] dt-bindings: Update Broadcom STB binding
Update the Broadcom STB binding document with new compatible strings for the DDR PHY and memory controller found on newer chips. Signed-off-by: Florian Fainelli --- Documentation/devicetree/bindings/arm/bcm/brcm,brcmstb.txt | 6 +- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/arm/bcm/brcm,brcmstb.txt b/Documentation/devicetree/bindings/arm/bcm/brcm,brcmstb.txt index 0d0c1ae81bed..790e6b0b8306 100644 --- a/Documentation/devicetree/bindings/arm/bcm/brcm,brcmstb.txt +++ b/Documentation/devicetree/bindings/arm/bcm/brcm,brcmstb.txt @@ -164,6 +164,8 @@ Control registers for this memory controller's DDR PHY. Required properties: - compatible : should contain one of these + "brcm,brcmstb-ddr-phy-v71.1" + "brcm,brcmstb-ddr-phy-v72.0" "brcm,brcmstb-ddr-phy-v225.1" "brcm,brcmstb-ddr-phy-v240.1" "brcm,brcmstb-ddr-phy-v240.2" @@ -184,7 +186,9 @@ Sequencer DRAM parameters and control registers. Used for Self-Refresh Power-Down (SRPD), among other things. Required properties: -- compatible : should contain "brcm,brcmstb-memc-ddr" +- compatible : should contain one of these + "brcm,brcmstb-memc-ddr-rev-b.2.2" + "brcm,brcmstb-memc-ddr" - reg: the MEMC DDR register range Example: -- 2.9.3
[PATCH 5/5] soc bcm: brcmstb: Add support for S2/S3/S5 suspend states (MIPS)
From: Justin Chen This commit adds support for the Broadcom STB S2/S3/S5 suspend states on MIPS based SoCs. This requires quite a lot of code in order to deal with the different HW blocks that need to be quiesced during suspend: - DDR PHY - DDR memory controller and arbiter - control processor The final steps of the suspend execute in cache and there is is a little bit of assembly code in order to shut down the DDR PHY PLL and then go into a wait loop until a wake-up even occurs. Conversely the resume part involves waiting for the DDR PHY PLL to come back up and resume executions where we left. Signed-off-by: Justin Chen Signed-off-by: Florian Fainelli --- drivers/soc/bcm/brcmstb/Kconfig | 2 +- drivers/soc/bcm/brcmstb/pm/Makefile | 1 + drivers/soc/bcm/brcmstb/pm/pm-mips.c | 461 +++ drivers/soc/bcm/brcmstb/pm/pm.h | 13 +- drivers/soc/bcm/brcmstb/pm/s2-mips.S | 200 +++ drivers/soc/bcm/brcmstb/pm/s3-mips.S | 146 +++ 6 files changed, 821 insertions(+), 2 deletions(-) create mode 100644 drivers/soc/bcm/brcmstb/pm/pm-mips.c create mode 100644 drivers/soc/bcm/brcmstb/pm/s2-mips.S create mode 100644 drivers/soc/bcm/brcmstb/pm/s3-mips.S diff --git a/drivers/soc/bcm/brcmstb/Kconfig b/drivers/soc/bcm/brcmstb/Kconfig index 246fc0f30515..7ab04f10dbe7 100644 --- a/drivers/soc/bcm/brcmstb/Kconfig +++ b/drivers/soc/bcm/brcmstb/Kconfig @@ -3,7 +3,7 @@ if SOC_BRCMSTB config BRCMSTB_PM bool "Support suspend/resume for STB platforms" default y - depends on ARM + depends on ARM || BMIPS_GENERIC depends on PM endif # SOC_BRCMSTB diff --git a/drivers/soc/bcm/brcmstb/pm/Makefile b/drivers/soc/bcm/brcmstb/pm/Makefile index 66a4f7e43ad5..d524ce9644ba 100644 --- a/drivers/soc/bcm/brcmstb/pm/Makefile +++ b/drivers/soc/bcm/brcmstb/pm/Makefile @@ -1 +1,2 @@ obj-$(CONFIG_ARM) += s2-arm.o pm-arm.o +obj-$(CONFIG_BMIPS_GENERIC)+= s2-mips.o s3-mips.o pm-mips.o diff --git a/drivers/soc/bcm/brcmstb/pm/pm-mips.c b/drivers/soc/bcm/brcmstb/pm/pm-mips.c new file mode 100644 index ..5037b2dc6be9 --- /dev/null +++ b/drivers/soc/bcm/brcmstb/pm/pm-mips.c @@ -0,0 +1,461 @@ +/* + * MIPS-specific support for Broadcom STB S2/S3/S5 power management + * + * Copyright (C) 2016-2017 Broadcom + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pm.h" + +#define S2_NUM_PARAMS 6 +#define MAX_NUM_MEMC 3 + +/* S3 constants */ +#define MAX_GP_REGS16 +#define MAX_CP0_REGS 32 +#define NUM_MEMC_CLIENTS 128 +#define AON_CTRL_RAM_SIZE 128 +#define BRCMSTB_S3_MAGIC 0x5AFEB007 + +#define CLEAR_RESET_MASK 0x01 + +/* Index each CP0 register that needs to be saved */ +#define CONTEXT0 +#define USER_LOCAL 1 +#define PGMK 2 +#define HWRENA 3 +#define COMPARE4 +#define STATUS 5 +#define CONFIG 6 +#define MODE 7 +#define EDSP 8 +#define BOOT_VEC 9 +#define EBASE 10 + +struct brcmstb_memc { + void __iomem *ddr_phy_base; + void __iomem *arb_base; +}; + +struct brcmstb_pm_control { + void __iomem *aon_ctrl_base; + void __iomem *aon_sram_base; + void __iomem *timers_base; + struct brcmstb_memc memcs[MAX_NUM_MEMC]; + int num_memc; +}; + +struct brcm_pm_s3_context { + u32 cp0_regs[MAX_CP0_REGS]; + u32 memc0_rts[NUM_MEMC_CLIENTS]; + u32 sc_boot_vec; +}; + +struct brcmstb_mem_transfer; + +struct brcmstb_mem_transfer { + struct brcmstb_mem_transfer *next; + void*src; + void*dst; + dma_addr_t pa_src; + dma_addr_t pa_dst; + u32 len; + u8 key; + u8 mode; + u8 src_remapped; + u8 dst_remapped; + u8 src_dst_remapped; +}; + +#define AON_SAVE_SRAM(base, idx, val) \ + __raw_writel(val, base + (idx << 2)) + +/* Used for saving registers in asm */ +u32 gp_regs[MAX_GP_REGS]; + +#defineBSP_CLOCK_STOP 0x00 +#define PM_INITIATE0x01 + +static struct brcmstb_pm_control ctrl; + +static void
[PATCH 4/5] dt-bindings: Document MIPS Broadcom STB power management nodes
Document the different nodes required for supporting S2/S3/S5 suspend states on MIPS-based Broadcom STB SoCs. Signed-off-by: Florian Fainelli --- .../devicetree/bindings/mips/brcm/soc.txt | 77 ++ 1 file changed, 77 insertions(+) diff --git a/Documentation/devicetree/bindings/mips/brcm/soc.txt b/Documentation/devicetree/bindings/mips/brcm/soc.txt index e4e1cd91fb1f..f7413168d938 100644 --- a/Documentation/devicetree/bindings/mips/brcm/soc.txt +++ b/Documentation/devicetree/bindings/mips/brcm/soc.txt @@ -11,3 +11,80 @@ Required properties: The experimental -viper variants are for running Linux on the 3384's BMIPS4355 cable modem CPU instead of the BMIPS5000 application processor. + +Power management + + +For power management (particularly, S2/S3/S5 system suspend), the following SoC +components are needed: + += Always-On control block (AON CTRL) + +This hardware provides control registers for the "always-on" (even in low-power +modes) hardware, such as the Power Management State Machine (PMSM). + +Required properties: +- compatible : should contain "brcm,brcmstb-aon-ctrl" +- reg: the register start and length for the AON CTRL block + +Example: + +aon-ctrl@41 { + compatible = "brcm,brcmstb-aon-ctrl"; + reg = <0x41 0x400>; +}; + += Memory controllers + +A Broadcom STB SoC typically has a number of independent memory controllers, +each of which may have several associated hardware blocks, which are versioned +independently (control registers, DDR PHYs, etc.). One might consider +describing these controllers as a parent "memory controllers" block, which +contains N sub-nodes (one for each controller in the system), each of which is +associated with a number of hardware register resources (e.g., its PHY). See +the example device tree snippet below. + +== MEMC (MEMory Controller) + +Represents a single memory controller instance. + +Required properties: +- compatible : should contain "brcm,brcmstb-memc" and "simple-bus" + +Should contain subnodes for any of the following relevant hardware resources: + +== DDR PHY control + +Control registers for this memory controller's DDR PHY. + +Required properties: +- compatible : should contain one of these + "brcm,brcmstb-ddr-phy-v64.5" + "brcm,brcmstb-ddr-phy" + +- reg: the DDR PHY register range + +== MEMC Arbiter + +The memory controller arbiter is responsible for memory clients allocation +(bandwidth, priorities etc.) and needs to have its contents restored during +deep sleep states (S3). + +Required properties: + +- compatible : should contain one of these + "brcm,brcmstb-memc-arb-v10.0.0.0" + "brcm,brcmstb-memc-arb" + +- reg : the DDR Arbiter register range + +== Timers + +The Broadcom STB chips contain a timer block with several general purpose +timers that can be used. + +Required properties: + +- compatible : should contain "brcm,brcmstb-timers" +- reg : the timers register range + -- 2.9.3
[PATCH 2/5] soc: bcm: brcmstb: Add Kconfig entry point for power management
Add the necessary pluming to select and build CONFIG_BRCMSTB_PM. Functional code is not added yet. Signed-off-by: Florian Fainelli --- drivers/soc/bcm/Kconfig | 2 ++ drivers/soc/bcm/brcmstb/Kconfig | 8 drivers/soc/bcm/brcmstb/Makefile | 1 + 3 files changed, 11 insertions(+) create mode 100644 drivers/soc/bcm/brcmstb/Kconfig diff --git a/drivers/soc/bcm/Kconfig b/drivers/soc/bcm/Kconfig index 49f1e2a75d61..055a845ed979 100644 --- a/drivers/soc/bcm/Kconfig +++ b/drivers/soc/bcm/Kconfig @@ -20,4 +20,6 @@ config SOC_BRCMSTB If unsure, say N. +source "drivers/soc/bcm/brcmstb/Kconfig" + endmenu diff --git a/drivers/soc/bcm/brcmstb/Kconfig b/drivers/soc/bcm/brcmstb/Kconfig new file mode 100644 index ..996a75db015e --- /dev/null +++ b/drivers/soc/bcm/brcmstb/Kconfig @@ -0,0 +1,8 @@ +if SOC_BRCMSTB + +config BRCMSTB_PM +bool "Support suspend/resume for STB platforms" +default y +depends on PM + +endif # SOC_BRCMSTB diff --git a/drivers/soc/bcm/brcmstb/Makefile b/drivers/soc/bcm/brcmstb/Makefile index 9120b2715d3e..ee5b4de741b8 100644 --- a/drivers/soc/bcm/brcmstb/Makefile +++ b/drivers/soc/bcm/brcmstb/Makefile @@ -1 +1,2 @@ obj-y += common.o biuctrl.o +obj-y += pm/ -- 2.9.3
[PATCH v2 0/4] Generalize fncpy availability
Hi all, This patch series makes ARM's fncpy() implementation more generic (dropping the Thumb-specifics) and available in an asm-generic header file. Tested on a Broadcom ARM64 STB platform with code that is written to SRAM. Changes in v2: - leave the ARM implementation where it is - make the generic truly generic (no) This is helpful in making SoC-specific power management code become true drivers that can be shared between different architectures. Thanks! Florian Fainelli (4): ARM: fncpy: Rename include guards asm-generic: Provide a fncpy() implementation arm64: Provide a fncpy implementation misc: sram: Allow ARM64 to select SRAM_EXEC arch/arm/include/asm/fncpy.h | 6 +-- arch/arm64/include/asm/fncpy.h | 6 +++ drivers/misc/Kconfig | 2 +- include/asm-generic/fncpy.h| 93 ++ 4 files changed, 103 insertions(+), 4 deletions(-) create mode 100644 arch/arm64/include/asm/fncpy.h create mode 100644 include/asm-generic/fncpy.h -- 2.9.3
[PATCH v2 2/4] asm-generic: Provide a fncpy() implementation
Define a generic fncpy() implementation largely based on the ARM version that requires an 8 bytes alignment for the destination address where to copy this function as well as the function's own address. Signed-off-by: Florian Fainelli --- include/asm-generic/fncpy.h | 93 + 1 file changed, 93 insertions(+) create mode 100644 include/asm-generic/fncpy.h diff --git a/include/asm-generic/fncpy.h b/include/asm-generic/fncpy.h new file mode 100644 index ..5bb3e5d20ae0 --- /dev/null +++ b/include/asm-generic/fncpy.h @@ -0,0 +1,93 @@ +/* + * include/asm-generic/fncpy.h - helper macros for function body copying + * + * Copyright (C) 2011 Linaro Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* + * These macros are intended for use when there is a need to copy a low-level + * function body into special memory. + * + * For example, when reconfiguring the SDRAM controller, the code doing the + * reconfiguration may need to run from SRAM. + * + * NOTE: that the copied function body must be entirely self-contained and + * position-independent in order for this to work properly. + * + * NOTE: in order for embedded literals and data to get referenced correctly, + * the alignment of functions must be preserved when copying. To ensure this, + * the source and destination addresses for fncpy() must be aligned to a + * multiple of 8 bytes: you will be get a BUG() if this condition is not met. + * You will typically need a ".align 3" directive in the assembler where the + * function to be copied is defined, and ensure that your allocator for the + * destination buffer returns 8-byte-aligned pointers. + * + * Typical usage example: + * + * extern int f(args); + * extern uint32_t size_of_f; + * int (*copied_f)(args); + * void *sram_buffer; + * + * copied_f = fncpy(sram_buffer, &f, size_of_f); + * + * ... later, call the function: ... + * + * copied_f(args); + * + * The size of the function to be copied can't be determined from C: + * this must be determined by other means, such as adding assmbler directives + * in the file where f is defined. + */ + +#ifndef __ASM_ARM_FNCPY_H +#define __ASM_ARM_FNCPY_H + +#include +#include + +#include +#include + +/* + * Minimum alignment requirement for the source and destination addresses + * for function copying. + */ +#define FNCPY_ALIGN 8 + +#define fncpy(dest_buf, funcp, size) ({ \ + uintptr_t __funcp_address; \ + typeof(funcp) __result; \ + \ + asm("" : "=r" (__funcp_address) : "0" (funcp)); \ + \ + /* \ +* Ensure alignment of source and destination addresses.\ +*/ \ + BUG_ON((uintptr_t)(dest_buf) & (FNCPY_ALIGN - 1) || \ + (__funcp_address & (FNCPY_ALIGN - 1))); \ + \ + memcpy(dest_buf, (void const *)__funcp_address, size); \ + flush_icache_range((unsigned long)(dest_buf), \ + (unsigned long)(dest_buf) + (size));\ + \ + asm("" : "=r" (__result)\ + : "0" ((uintptr_t)(dest_buf))); \ + \ + __result; \ +}) + +#endif /* !__ASM_ARM_FNCPY_H */ -- 2.9.3
[PATCH v2 4/4] misc: sram: Allow ARM64 to select SRAM_EXEC
Now that ARM64 also has a fncpy() implementation, allow selection SRAM_EXEC for ARM64 as well. Signed-off-by: Florian Fainelli --- drivers/misc/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index 07bbd4cc1852..ac8779278c0c 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -464,7 +464,7 @@ config SRAM bool "Generic on-chip SRAM driver" depends on HAS_IOMEM select GENERIC_ALLOCATOR - select SRAM_EXEC if ARM + select SRAM_EXEC if ARM || ARM64 help This driver allows you to declare a memory region to be managed by the genalloc API. It is supposed to be used for small on-chip SRAM -- 2.9.3
[PATCH v2 1/4] ARM: fncpy: Rename include guards
In preparation for allowing a generic fncpy() implementation to live under include/asm-generic/fncpy.h, rename the current include guards to be __ASM_ARM_FNCPY_H, this also makes the header file more consistent with other headers in the same directory. Signed-off-by: Florian Fainelli --- arch/arm/include/asm/fncpy.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/include/asm/fncpy.h b/arch/arm/include/asm/fncpy.h index de5354746924..86a8fc14cde9 100644 --- a/arch/arm/include/asm/fncpy.h +++ b/arch/arm/include/asm/fncpy.h @@ -53,8 +53,8 @@ * in the file where f is defined. */ -#ifndef __ASM_FNCPY_H -#define __ASM_FNCPY_H +#ifndef __ASM_ARM_FNCPY_H +#define __ASM_ARM_FNCPY_H #include #include @@ -91,4 +91,4 @@ __result; \ }) -#endif /* !__ASM_FNCPY_H */ +#endif /* !__ASM_ARM_FNCPY_H */ -- 2.9.3
[PATCH v2 3/4] arm64: Provide a fncpy implementation
Utilize the asm-generic/fncpy.h implementation for ARM64 to allow the use of drivers/misc/sram*.c on these platforms as well. Signed-off-by: Florian Fainelli --- arch/arm64/include/asm/fncpy.h | 6 ++ 1 file changed, 6 insertions(+) create mode 100644 arch/arm64/include/asm/fncpy.h diff --git a/arch/arm64/include/asm/fncpy.h b/arch/arm64/include/asm/fncpy.h new file mode 100644 index ..578f942f55e4 --- /dev/null +++ b/arch/arm64/include/asm/fncpy.h @@ -0,0 +1,6 @@ +#ifndef __ASMARM64_FNCPY_H +#define __ASMARM64_FNCPY_H + +#include + +#endif /* __ASMARM64_FNCPY_H */ -- 2.9.3
Re: autofs multi-map regression
Dick Streefland writes: > On Friday 2017-06-16 12:03, Eric W. Biederman wrote: > | Interesting... > | > | Can you test this on a stock 4.11 kernel? > | > | I definitely need a little bit more information to solve this. That > | commit did not add any new error condidtions so I need to understand > | what state you are getting yourself into that is affected by this > | commit. > | > | Is there a chance you can post /proc/self/mountinfo from when this is > | happening? > > I've installed the mainline 4.11 kernel from: > > http://kernel.ubuntu.com/~kernel-ppa/mainline/v4.11/ > > and this kernel works correctly! > > So either this issue was fixed in the meantime, or it is something > specific to the Ubuntu kernel. I guess I should file a bug report > with Ubuntu then? Please. > I've also looked at /proc/self/mountinfo before and directly after the > mount attempt. Here are the ext4 and autofs entries for the failing 4.4 > kernel: Thank you. I am definitely out of my depth on the autofs portion of this. As things are working with 4.11 and failing with my test of 4.4 with a much older kernel. I will leave this with you and the ubuntu folks to sort out. Good Luck, Eric > before: > 23 0 8:2 / / rw,relatime shared:1 - ext4 /dev/sda2 > rw,errors=remount-ro,data=ordered > 41 19 0:34 / /proc/sys/fs/binfmt_misc rw,relatime shared:24 - autofs > systemd-1 rw,fd=34,pgrp=1,timeout=0,minproto=5,maxproto=5,direct > 46 23 8:4 / /loc rw,nosuid,nodev,noatime shared:30 - ext4 /dev/sda4 > rw,block_validity,delalloc,barrier,user_xattr,acl > 202 23 0:44 / /net rw,relatime shared:160 - autofs /etc/auto.net > rw,fd=6,pgrp=1724,timeout=120,minproto=5,maxproto=5,indirect > > after: > 23 0 8:2 / / rw,relatime shared:1 - ext4 /dev/sda2 > rw,errors=remount-ro,data=ordered > 41 19 0:34 / /proc/sys/fs/binfmt_misc rw,relatime shared:24 - autofs > systemd-1 rw,fd=34,pgrp=1,timeout=0,minproto=5,maxproto=5,direct > 46 162 8:4 / /loc rw,nosuid,nodev,noatime shared:30 - ext4 /dev/sda4 > rw,block_validity,delalloc,barrier,user_xattr,acl > 202 23 0:44 / /net rw,relatime shared:160 - autofs /etc/auto.net > rw,fd=6,pgrp=1724,timeout=120,minproto=5,maxproto=5,indirect > 157 202 8:2 / /net/localhost rw,relatime shared:1 - ext4 /dev/sda2 > rw,errors=remount-ro,data=ordered > 161 157 0:47 / /net/localhost/loc rw,relatime shared:119 - autofs > /etc/auto.net rw,fd=6,pgrp=1724,timeout=120,minproto=5,maxproto=5,offset > 162 23 0:47 / /loc rw,relatime shared:119 - autofs /etc/auto.net > rw,fd=6,pgrp=1724,timeout=120,minproto=5,maxproto=5,offset > > And here the info for the working mainline 4.11 kernel: > > before: > 23 0 8:2 / / rw,relatime shared:1 - ext4 /dev/sda2 > rw,errors=remount-ro,data=ordered > 74 19 0:36 / /proc/sys/fs/binfmt_misc rw,relatime shared:56 - autofs > systemd-1 > rw,fd=35,pgrp=1,timeout=0,minproto=5,maxproto=5,direct,pipe_ino=12754 > 45 23 8:4 / /loc rw,nosuid,nodev,noatime shared:28 - ext4 /dev/sda4 > rw,block_validity,delalloc,barrier,user_xattr,acl > 208 23 0:46 / /net rw,relatime shared:164 - autofs /etc/auto.net > rw,fd=6,pgrp=1545,timeout=120,minproto=5,maxproto=5,indirect,pipe_ino=26555 > > after: > 23 0 8:2 / / rw,relatime shared:1 - ext4 /dev/sda2 > rw,errors=remount-ro,data=ordered > 74 19 0:36 / /proc/sys/fs/binfmt_misc rw,relatime shared:56 - autofs > systemd-1 > rw,fd=35,pgrp=1,timeout=0,minproto=5,maxproto=5,direct,pipe_ino=12754 > 45 175 8:4 / /loc rw,nosuid,nodev,noatime shared:28 - ext4 /dev/sda4 > rw,block_validity,delalloc,barrier,user_xattr,acl > 208 23 0:46 / /net rw,relatime shared:164 - autofs /etc/auto.net > rw,fd=6,pgrp=1545,timeout=120,minproto=5,maxproto=5,indirect,pipe_ino=26555 > 162 208 8:2 / /net/localhost rw,relatime shared:1 - ext4 /dev/sda2 > rw,errors=remount-ro,data=ordered > 166 162 0:48 / /net/localhost/loc rw,relatime shared:122 - autofs > /etc/auto.net > rw,fd=6,pgrp=1545,timeout=120,minproto=5,maxproto=5,offset,pipe_ino=26555 > 167 23 0:48 / /loc rw,relatime shared:122 - autofs /etc/auto.net > rw,fd=6,pgrp=1545,timeout=120,minproto=5,maxproto=5,offset,pipe_ino=26555 > 174 166 8:4 / /net/localhost/loc rw,nosuid,nodev,noatime shared:28 - ext4 > /dev/sda4 rw,block_validity,delalloc,barrier,user_xattr,acl > 175 167 8:4 / /loc rw,nosuid,nodev,noatime shared:28 - ext4 /dev/sda4 > rw,block_validity,delalloc,barrier,user_xattr,acl
Re: [RFC PATCH 1/3] atmel-hlcdc: add support for 8-bit color lookup table mode
On 2017-06-16 18:15, Boris Brezillon wrote: > Hi Peter, > > On Fri, 16 Jun 2017 17:54:04 +0200 > Peter Rosin wrote: > >> On 2017-06-16 12:01, Boris Brezillon wrote: >>> Hi Peter, >>> >>> On Fri, 16 Jun 2017 11:12:25 +0200 >>> Peter Rosin wrote: >>> All layers of chips support this, the only variable is the base address of the lookup table in the register map. Signed-off-by: Peter Rosin --- drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c | 48 + drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c| 13 +++ drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.h| 16 + drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c | 5 +++ 4 files changed, 82 insertions(+) diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c index 5348985..75871b5 100644 --- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c +++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c @@ -61,6 +61,7 @@ struct atmel_hlcdc_crtc { struct atmel_hlcdc_dc *dc; struct drm_pending_vblank_event *event; int id; + u32 clut[ATMEL_HLCDC_CLUT_SIZE]; >>> >>> Do we really need to duplicate this table here? I mean, the gamma_lut >>> table should always be available in the crtc_state, so do you have a >>> good reason a copy here? >> >> If I don't keep a copy in the driver, it doesn't work when there's no >> gamma_lut. And there is no gamma_lut when I use fbdev emulation. Maybe >> that's a bug somewhere else? > > Can't we re-use crtc->gamma_store? Honnestly, I don't know how the > fbdev->DRM link should be done, so we'd better wait for DRM maintainers > feedback here (Daniel, any opinion?). Ahh, gamma_store. Makes perfect sense. Thanks for that pointer! >> >> Sure, I could have added it in patch 3/3 instead, but didn't when I >> divided the work into patches... > > No, my point is that IMO it shouldn't be needed at all. Right, with gamma_store it's no longer needed. >> }; static inline struct atmel_hlcdc_crtc * @@ -140,6 +141,46 @@ static void atmel_hlcdc_crtc_mode_set_nofb(struct drm_crtc *c) cfg); } +static void +atmel_hlcdc_crtc_load_lut(struct drm_crtc *c) +{ + struct atmel_hlcdc_crtc *crtc = drm_crtc_to_atmel_hlcdc_crtc(c); + struct atmel_hlcdc_dc *dc = crtc->dc; + int layer; + int idx; + + for (layer = 0; layer < ATMEL_HLCDC_MAX_LAYERS; layer++) { + if (!dc->layers[layer]) + continue; + for (idx = 0; idx < ATMEL_HLCDC_CLUT_SIZE; idx++) + atmel_hlcdc_layer_write_clut(dc->layers[layer], + idx, crtc->clut[idx]); + } +} + +static void +atmel_hlcdc_crtc_flush_lut(struct drm_crtc *c) +{ + struct atmel_hlcdc_crtc *crtc = drm_crtc_to_atmel_hlcdc_crtc(c); + struct drm_crtc_state *state = c->state; + struct drm_color_lut *lut; + int idx; + + if (!state->gamma_lut) + return; + + lut = (struct drm_color_lut *)state->gamma_lut->data; + + for (idx = 0; idx < ATMEL_HLCDC_CLUT_SIZE; idx++) { + crtc->clut[idx] = + ((lut[idx].red << 8) & 0xff) | + (lut[idx].green & 0xff00) | + (lut[idx].blue >> 8); + } + + atmel_hlcdc_crtc_load_lut(c); +} + static enum drm_mode_status atmel_hlcdc_crtc_mode_valid(struct drm_crtc *c, const struct drm_display_mode *mode) @@ -312,6 +353,9 @@ static void atmel_hlcdc_crtc_atomic_flush(struct drm_crtc *crtc, struct drm_crtc_state *old_s) { /* TODO: write common plane control register if available */ + + if (crtc->state->color_mgmt_changed) + atmel_hlcdc_crtc_flush_lut(crtc); >>> >>> Hm, it's probably too late to do it here. Planes have already been >>> enabled and the engine may have started to fetch data and do the >>> composition. You could do that in ->update_plane() [1], and make it a >>> per-plane thing. >>> >>> I'm not sure, but I think you can get the new crtc_state from >>> plane->crtc->state in this context (state have already been swapped, >>> and new state is being applied, which means relevant locks are held). >> >> Ok, I can move it there. My plan is to just copy the default .update_plane >> function and insert >> >> if (crtc->state->color_mgmt_changed && crtc->state->gamma_lut) { >> ... >> } >> >> just before the drm_atomic_commit(state) call. Sounds ok? > > Why would you copy the default ->update_plane() when we already have > our own ->atomic_update_plane() implementation [1]? Just put it there > (before the atmel
Re: [PATCH V1 09/15] spmi: pmic-arb: check apid enabled before calling the handler
On 06/14, kgu...@codeaurora.org wrote: > On 2017-06-01 02:09, Stephen Boyd wrote: > >On 05/30, Kiran Gunda wrote: > >>From: Abhijeet Dharmapurikar > >> > >>The driver currently invokes the apid handler (periph_handler()) > > > >You mean periph_interrupt()? > > > Yes. > >>once it sees that the summary status bit for that apid is set. > >> > >>However the hardware is designed to set that bit even if the apid > >>interrupts are disabled. The driver should check whether the apid > >>is indeed enabled before calling the apid handler. > > > >Really? Wow that is awful. Or is this because ACC_ENABLE bit is > >always set now and never cleared? > > > Yes. It is awful. It is not because of the ACC_ENABLE bit is set. > >> > >>Signed-off-by: Abhijeet Dharmapurikar > >>Signed-off-by: Kiran Gunda > >>--- > >> drivers/spmi/spmi-pmic-arb.c | 10 +++--- > >> 1 file changed, 7 insertions(+), 3 deletions(-) > >> > >>diff --git a/drivers/spmi/spmi-pmic-arb.c > >>b/drivers/spmi/spmi-pmic-arb.c > >>index ad34491..f8638fa 100644 > >>--- a/drivers/spmi/spmi-pmic-arb.c > >>+++ b/drivers/spmi/spmi-pmic-arb.c > >>@@ -536,8 +536,8 @@ static void pmic_arb_chained_irq(struct > >>irq_desc *desc) > >>void __iomem *intr = pa->intr; > >>int first = pa->min_apid >> 5; > >>int last = pa->max_apid >> 5; > >>- u32 status; > >>- int i, id; > >>+ u32 status, enable; > >>+ int i, id, apid; > >> > >>chained_irq_enter(chip, desc); > >> > >>@@ -547,7 +547,11 @@ static void pmic_arb_chained_irq(struct > >>irq_desc *desc) > >>while (status) { > >>id = ffs(status) - 1; > >>status &= ~BIT(id); > >>- periph_interrupt(pa, id + i * 32); > >>+ apid = id + i * 32; > >>+ enable = readl_relaxed(intr + > >>+ pa->ver_ops->acc_enable(apid)); > > > >Do we need to read the hardware to figure this out? After earlier > >patches in this series we would never clear the > >SPMI_PIC_ACC_ENABLE_BIT after one of the irqs in a peripheral is > >unmasked for the first time (which looks to be fixing a bug in > >the existing driver BTW). So in practice, this should almost > >always be true. > > > yes. We have removed clearing the SPMI_PIC_ACC_ENABLE_BIT from the > irq_mask, > because if we disable this BIT it disables all the peripheral IRQs, > which we don't want. Right, we could reference count it though and only clear and set the bit when we mask and unmask the last irq in the peripheral. > > Once the peripheral fires the interrupt the summary status bit for > that peripheral > is set even though the SPMI_PIC_ACC_ENABLE_BIT is not enabled. > That's why we have to > read this register to not service the interrupt that is not > requested/enabled yet. > This SPMI_PIC_ACC_ENABLE_BIT is enabled during the irq_unmask which > is called from request_irq. Ok. So this is again about handling the case where an interrupt is pending out of the bootloader? > > >In the one case that it isn't true, we'll be handling some other > >irq for another peripheral and then hardware will tell us there's > >an interrupt for a peripheral that doesn't have any interrupts > >unmasked. We would call periph_interrupt() and then that > >shouldn't see any interrupts in the status register for that > >APID. So we do some more work, but nothing happens still. Did I > >miss something? What is this fixing? > > Yes. As you said this fixes the issue of calling the periph_interrupt > for some other irq that is not yet requested and enabled yet. Hmm. I seemed to miss the fact that periph_interrupt() will see an unmasked interrupt and think it's valid. I thought that only SPMI_PIC_ACC_ENABLE_BIT was broken, but you're saying that the status register for a particular peripheral will always latch interrupts even if we haven't enabled them? -- Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux Foundation Collaborative Project
Re: [PATCH v9 2/4] arm64: dts: hisi: add kirin pcie node
On Tue, Jun 06, 2017 at 07:19:53PM +0800, Guodong Xu wrote: > Hi, Arnd > > On Tue, Jun 6, 2017 at 5:23 PM, Arnd Bergmann wrote: > > On Sun, Jun 4, 2017 at 2:03 AM, kbuild test robot wrote: > >> Hi Xiaowei, > >> > >> [auto build test ERROR on pci/next] > >> [also build test ERROR on v4.12-rc3 next-20170602] > >> [if your patch is applied to the wrong git tree, please drop us a note to > >> help improve the system] > >> > >> url: > >> https://github.com/0day-ci/linux/commits/Xiaowei-Song/add-PCIe-driver-for-Kirin-PCIe/20170531-182118 > >> base: https://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci.git > >> next > >> config: arm64-allnoconfig (attached as .config) > >> compiler: aarch64-linux-gnu-gcc (Debian 6.1.1-9) 6.1.1 20160705 > >> reproduce: > >> wget > >> https://raw.githubusercontent.com/01org/lkp-tests/master/sbin/make.cross > >> -O ~/bin/make.cross > >> chmod +x ~/bin/make.cross > >> # save the attached .config to linux build tree > >> make.cross ARCH=arm64 > >> > >> All errors (new ones prefixed by >>): > >> > Error: arch/arm64/boot/dts/hisilicon/hi3660.dtsi:180.24-25 syntax error > FATAL ERROR: Unable to parse input tree > > > > We keep getting the build errors for patch submissions. Obviously the patch > > is > > still broken and can't be merged as-is. What is the plan for merging the > > series? > > > > This dts patch can be applied to dts series [1]. For upstream review > purpose, hi3660-hikey960 dts patches, which don't have a related > driver changes, are sent in [1]. Other patches, which need driver > changes, like this one, are sent together with driver. > > Patchset [1] is now at its v2 review. Rob Herring already gave his ACK > for some of them in v1. Hopefully I can get more ACK for remaining > ones, and make them ready for v4.13 merging window. > > [1], http://www.spinics.net/lists/devicetree/msg178303.html I don't know how you want to deal with the DTS build failure. From a PCI perspective, I think I could apply patches 1 and 3 pretty easily by themselves. If/when you post these again, please incorporate the following incremental diff to clean up various whitespace and capitalization nits (these are spread across several of your patches). diff --git a/Documentation/devicetree/bindings/pci/kirin-pcie.txt b/Documentation/devicetree/bindings/pci/kirin-pcie.txt index 68ffa0fbcd73..20357d840af1 100644 --- a/Documentation/devicetree/bindings/pci/kirin-pcie.txt +++ b/Documentation/devicetree/bindings/pci/kirin-pcie.txt @@ -24,8 +24,8 @@ Example based on kirin960: pcie@f400 { compatible = "hisilicon,kirin-pcie"; - reg = <0x0 0xf400 0x0 0x1000>, <0x0 0xff3fe000 0x0 0x1000>, - <0x0 0xf3f2 0x0 0x4>, <0x0 0xF400 0 0x2000>; + reg = <0x0 0xf400 0x0 0x1000>, <0x0 0xff3fe000 0x0 0x1000>, + <0x0 0xf3f2 0x0 0x4>, <0x0 0xf400 0x0 0x2000>; reg-names = "dbi","apb","phy", "config"; bus-range = <0x0 0x1>; #address-cells = <3>; @@ -46,5 +46,5 @@ Example based on kirin960: <&crg_ctrl HI3660_ACLK_GATE_PCIE>; clock-names = "pcie_phy_ref", "pcie_aux", "pcie_apb_phy", "pcie_apb_sys", "pcie_aclk"; - reset-gpios = <&gpio11 1 0 >; + reset-gpios = <&gpio11 1 0>; }; diff --git a/arch/arm64/boot/dts/hisilicon/hi3660.dtsi b/arch/arm64/boot/dts/hisilicon/hi3660.dtsi index e8feb2fb4d53..7bc89baa40ba 100644 --- a/arch/arm64/boot/dts/hisilicon/hi3660.dtsi +++ b/arch/arm64/boot/dts/hisilicon/hi3660.dtsi @@ -159,12 +159,12 @@ pcie@f400 { compatible = "hisilicon,kirin960-pcie"; - reg = <0x0 0xf400 0x0 0x1000>, - <0x0 0xff3fe000 0x0 0x1000>, + reg = <0x0 0xf400 0x0 0x1000>, + <0x0 0xff3fe000 0x0 0x1000>, <0x0 0xf3f2 0x0 0x4>, - <0x0 0xF500 0x0 0x2000>; + <0x0 0xf500 0x0 0x2000>; reg-names = "dbi", "apb", "phy", "config"; - bus-range = <0x0 0x1>; + bus-range = <0x0 0x1>; #address-cells = <3>; #size-cells = <2>; device_type = "pci"; @@ -173,7 +173,7 @@ num-lanes = <1>; #interrupt-cells = <1>; interrupt-map-mask = <0xf800 0 0 7>; - interrupt-map = <0x0 0 0 1 &gic 0 0 0 282 4>, + interrupt-map = <0x0 0 0 1 &gic 0 0 0 282 4>, <0x0 0 0 2 &gic 0 0 0 283 4>, <0x0 0 0 3 &gic 0 0 0 284 4>,
Re: [PATCH v2 03/11] tty: kbd: reduce stack size with KASAN
On Fri, Jun 16, 2017 at 1:56 PM, Arnd Bergmann wrote: > On Fri, Jun 16, 2017 at 7:29 PM, Dmitry Torokhov > wrote: >> On Fri, Jun 16, 2017 at 8:58 AM, Samuel Thibault >> wrote: >>> I'm however afraid we'd have to mark a lot of static functions that way, >>> depending on the aggressivity of gcc... I'd indeed really argue that gcc >>> should consider stack usage when inlining. >>> >>> static int f(int foo) { >>> char c[256]; >>> g(c, foo); >>> } >>> >>> is really not something that I'd want to see the compiler to inline. >> >> Why would not we want it be inlined? What we do not want us several >> calls having _separate_ instances of 'c' generated on the stack, all >> inlined calls should share 'c'. And of course if we have f1, f2, and >> f3 with c1, c2, and c3, GCC should not blow up the stack inlining and >> allocating stack for all 3 of them beforehand. >> >> But this all seems to me issue that should be solved in toolchain, not >> trying to play whack-a-mole with kernel sources. > > The problem for the Samuel's example is that > > a) the "--param asan-stack=1" option in KASAN does blow up the >stack, which is why the annotation is now called 'noinline_if_stackbloat'. > > b) The toolchain cannot solve the problem, as most instances of the >problem (unlike kbd_put_queue) force the inlining unless you build >with the x86-specific CONFIG_OPTIMIZE_INLINING. If inlining done right there should be no change in stack size, because if calls are not inlined then stack storage is "shared" between calls, and it should similarly be shared when calls are inlined. And that is toolchain issue. -- Dmitry
Re: autofs multi-map regression
Dick Streefland writes: > After a recent upgrade of a Ubuntu xenial machine, a particular > autofs multi-map mount setup stopped working. A simplified example is: > > :: > auto.master > :: > /net /etc/auto.net > :: > auto.net > :: > localhost / :/ /loc :/loc > > Accessing /net/localhost/loc should trigger two nested bind mounts on > /net/localhost and /net/localhost/loc, but with the new kernel, it fails > with ELOOP: > > $ ls /net/localhost/loc > ls: cannot open directory '/net/localhost/loc': Too many levels of symbolic > links > > The problem is related to the upgrade of the Ubuntu xenial kernel from > 4.4.0-38.57 to 4.4.0-78.99. I bisected the regression to commit > 731ac92843877f3633325203abc942193c1e9001, which is a Ubuntu backport > of this upstream kernel commit: > > commit 1064f874abc0d05eeed8993815f584d847b72486 > Author: Eric W. Biederman > Date: Fri Jan 20 18:28:35 2017 +1300 > > mnt: Tuck mounts under others instead of creating shadow/side mounts. I don't believe this is a kernel change. I dug up an old VM and I was able to reproduce this issue simply by installing autofs, and your auto.master and auto.net files. # uname -a Linux ubuntu-16 4.4.0-24-generic #43-Ubuntu SMP Wed Jun 8 19:27:37 UTC 2016 x86_64 x86_64 x86_64 GNU/Linux # ls /net/ localhost # ls /net/localhost/loc ls: cannot open directory '/net/localhost/loc': Too many levels of symbolic links # ls /loc ls: cannot open directory '/loc/': Too many levels of symbolic links I suspect there is configuration somewhere in your autofs configuration. I don't speak autofs well enough to debug the issue at this point. But I can conclusively say it was not the kernel commit you pointed at, as I see the issue you are reporting and I don't have that commit in the kernel under test. Eric
Re: [RFC PATCH 0/2] crypto: caam - fix cts(cbc(aes)) with CAAM driver
On 6/16/2017 11:00 AM, Herbert Xu wrote: > On Fri, Jun 16, 2017 at 07:57:00AM +, Horia Geantă wrote: >> >> Commit 0605c41cc53ca ("crypto: cts - Convert to skcipher") appends >> CRYPTO_TFM_REQ_MAY_BACKLOG to the original crypto request flags for the >> last block - when calling cts_cbc_encrypt(). >> Is it really needed? > > Yes, because at this point we cannot tell the sender to back off. > >> For cts(cbc(aes)) with cbc(aes) offloaded in HW, i.e. running in async >> mode, we get the below stack for CAAM driver. >> Driver is told that it can sleep (CRYPTO_TFM_REQ_MAY_BACKLOG flag), so >> it uses GFP_KERNEL to allocate memory. However, this is incorrect, since >> driver runs in atomic context (softirq). > > This is wrong. Whether you can sleep or not is determined by > MAY_SLEEP, not MAY_BACKLOG. MAY_BACKLOG only indicates that this > request must be queued, even if the queue is full. > Indeed, CAAM driver incorrectly decides to use GFP_KERNEL for allocation when MAY_BACKLOG flag is set. This seems to be a long-standing issue, I will send a fix (separately). Still I think we have a problem. David reported that the user is fscrypt. Looking into fscrypt code, I see that besides MAY_BACKLOG, MAY_SLEEP flag is also set. So we end up in the situation I described earlier: the last block is encrypted in atomic context and with MAY_SLEEP set. Thanks, Horia
mmotm 2017-06-16-13-59 uploaded
The mm-of-the-moment snapshot 2017-06-16-13-59 has been uploaded to http://www.ozlabs.org/~akpm/mmotm/ mmotm-readme.txt says README for mm-of-the-moment: http://www.ozlabs.org/~akpm/mmotm/ This is a snapshot of my -mm patch queue. Uploaded at random hopefully more than once a week. You will need quilt to apply these patches to the latest Linus release (4.x or 4.x-rcY). The series file is in broken-out.tar.gz and is duplicated in http://ozlabs.org/~akpm/mmotm/series The file broken-out.tar.gz contains two datestamp files: .DATE and .DATE--mm-dd-hh-mm-ss. Both contain the string -mm-dd-hh-mm-ss, followed by the base kernel version against which this patch series is to be applied. This tree is partially included in linux-next. To see which patches are included in linux-next, consult the `series' file. Only the patches within the #NEXT_PATCHES_START/#NEXT_PATCHES_END markers are included in linux-next. A git tree which contains the memory management portion of this tree is maintained at git://git.kernel.org/pub/scm/linux/kernel/git/mhocko/mm.git by Michal Hocko. It contains the patches which are between the "#NEXT_PATCHES_START mm" and "#NEXT_PATCHES_END" markers, from the series file, http://www.ozlabs.org/~akpm/mmotm/series. A full copy of the full kernel tree with the linux-next and mmotm patches already applied is available through git within an hour of the mmotm release. Individual mmotm releases are tagged. The master branch always points to the latest release, so it's constantly rebasing. http://git.cmpxchg.org/cgit.cgi/linux-mmotm.git/ To develop on top of mmotm git: $ git remote add mmotm git://git.kernel.org/pub/scm/linux/kernel/git/mhocko/mm.git $ git remote update mmotm $ git checkout -b topic mmotm/master $ git send-email mmotm/master.. [...] To rebase a branch with older patches to a new mmotm release: $ git remote update mmotm $ git rebase --onto mmotm/master topic The directory http://www.ozlabs.org/~akpm/mmots/ (mm-of-the-second) contains daily snapshots of the -mm tree. It is updated more frequently than mmotm, and is untested. A git copy of this tree is available at http://git.cmpxchg.org/cgit.cgi/linux-mmots.git/ and use of this tree is similar to http://git.cmpxchg.org/cgit.cgi/linux-mmotm.git/, described above. This mmotm tree contains the following patches against 4.12-rc5: (patches marked "*" will be included in linux-next) origin.patch i-need-old-gcc.patch * mm-hwpoison-use-compound_head-flags-for-huge-pages.patch * swap-cond_resched-in-swap_cgroup_prepare.patch * mm-numa-avoid-waiting-on-freed-migrated-pages.patch * userfaultfd-shmem-handle-coredumping-in-handle_userfault.patch * mm-correct-the-comment-when-reclaimed-pages-exceed-the-scanned-pages.patch * mm-correct-the-comment-when-reclaimed-pages-exceed-the-scanned-pages-fix.patch * mm-list_lruc-use-cond_resched_lock-for-nlru-lock.patch * mm-skip-hwpoisoned-pages-when-onlining-pages.patch * mm-huge-vmap-fail-gracefully-on-unexpected-huge-vmap-mappings.patch * autofs-sanity-check-status-reported-with-autofs_dev_ioctl_fail.patch * arm-arch-arm-include-asm-pageh-needs-personalityh.patch * mn10300-remove-wrapper-header-for-asm-deviceh.patch * mn10300-use-generic-fbh.patch * tile-provide-default-ioremap-declaration.patch * teach-initramfs_root_uid-and-initramfs_root_gid-that-1-means-current-user.patch * clarify-help-text-that-compression-applies-to-ramfs-as-well-as-legacy-ramdisk.patch * sh-intc-delete-an-error-message-for-a-failed-memory-allocation-in-add_virq_to_pirq.patch * ocfs2-fix-a-static-checker-warning.patch * ocfs2-use-magich.patch * ocfs2-get-rid-of-ocfs2_is_o2cb_active-function.patch * ocfs2-old-mle-put-and-release-after-the-function-dlm_add_migration_mle-called.patch * ocfs2-old-mle-put-and-release-after-the-function-dlm_add_migration_mle-called-fix.patch * ocfs2-dlm-optimization-of-code-while-free-dead-node-locks.patch * ocfs2-dlm-optimization-of-code-while-free-dead-node-locks-checkpatch-fixes.patch * ocfs2-give-an-obvious-tip-for-dismatch-cluster-names.patch * ocfs2-give-an-obvious-tip-for-dismatch-cluster-names-v2.patch * ocfs2-move-some-definitions-to-header-file.patch * ocfs2-fix-some-small-problems.patch * ocfs2-add-kobject-for-online-file-check.patch * ocfs2-add-duplicative-ino-number-check.patch * block-restore-proc-partitions-to-not-display-non-partitionable-removable-devices.patch * sendfile-do-not-update-file-offset-of-non-lseekable-objects.patch * fs-file-replace-alloc_fdmem-with-kvmalloc-alternative.patch * watchdog-remove-unused-declaration.patch * watchdog-introduce-arch_touch_nmi_watchdog.patch * watchdog-split-up-config-options.patch * watchdog-provide-watchdog_reconfigure-for-arch-watchdogs.patch * powerpc-64s-implement-arch-specific-hardlockup-watchdog.patch * powerpc-64s-implement-arch-specific-hardlockup-watchdog-checkpatch-fixes.patch mm.patch * mm-slub-remove-a-redundant-assignment-in-___slab_alloc.patch * mm-slub-rese
Re: [PATCH v5 3/4] ARM64: dts: meson-gx: use stable UART bindings with correct gate clock
Neil Armstrong writes: > From: Helmut Klein > > This patch switches to the stable UART bindings but also add the correct > gate clock to the non-AO UART nodes for GXBB and GXL SoCs. > > Acked-by: Jerome Brunet > Signed-off-by: Helmut Klein > Signed-off-by: Neil Armstrong > --- > arch/arm64/boot/dts/amlogic/meson-gx.dtsi | 12 +--- > arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi | 25 + > arch/arm64/boot/dts/amlogic/meson-gxl.dtsi | 25 + > 3 files changed, 55 insertions(+), 7 deletions(-) > > diff --git a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi > b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi > index 603491d..86a4018 100644 > --- a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi > +++ b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi > @@ -225,7 +225,7 @@ > }; > > uart_A: serial@84c0 { > - compatible = "amlogic,meson-uart"; > + compatible = "amlogic,meson-gx-uart"; IMO, we should keep both compatibles (the more specific one first.) That would allow new DTs to continue to run on older kernels. It would also allow this DT change to be completely independent of the driver changes. Otherwise, if I merge this before the driver change are merged, we'll have a bunch of boards with no more serial console output. Kevin
Re: [PATCH v2 03/11] tty: kbd: reduce stack size with KASAN
On Fri, Jun 16, 2017 at 7:29 PM, Dmitry Torokhov wrote: > On Fri, Jun 16, 2017 at 8:58 AM, Samuel Thibault > wrote: >> Arnd Bergmann, on ven. 16 juin 2017 17:41:47 +0200, wrote: >>> The problem are the 'ch' and 'flag' variables that are passed into >>> tty_insert_flip_char by value, and from there into >>> tty_insert_flip_string_flags by reference. In this case, kasan tries >>> to detect whether tty_insert_flip_string_flags() does any out-of-bounds >>> access on the pointers and adds 64 bytes redzone around each of >>> the two variables. >> >> Ouch. >> >>> gcc-6.3.1 happens to inline 16 calls of tty_insert_flip_char() into > > I wonder if we should stop marking tty_insert_flip_char() as inline. That would be an easy solution, yes. tty_insert_flip_char() was apparently meant to be optimized for the fast path to completely avoid calling into another function, but that fast path got a bit more complex with commit acc0f67f307f ("tty: Halve flip buffer GFP_ATOMIC memory consumption"). If we move it out of line, the fast path optimization goes away and we could just have a simple implementation like int tty_insert_flip_char(struct tty_port *port, unsigned char ch, char flag) { struct tty_buffer *tb = port->buf.tail; int flags = (flag == TTY_NORMAL) ? TTYB_NORMAL : 0; if (!__tty_buffer_request_room(port, 1, flags)) return 0; if (~tb->flags & TTYB_NORMAL) *flag_buf_ptr(tb, tb->used) = flag; *char_buf_ptr(tb, tb->used++) = ch; return 1; } One rather simple change I found would actually avoid the warning and would seem to actually give us better runtime behavior even without KASAN: diff --git a/include/linux/tty_flip.h b/include/linux/tty_flip.h index c28dd523f96e..15d03a14ad0f 100644 --- a/include/linux/tty_flip.h +++ b/include/linux/tty_flip.h @@ -26,7 +26,7 @@ static inline int tty_insert_flip_char(struct tty_port *port, *char_buf_ptr(tb, tb->used++) = ch; return 1; } - return tty_insert_flip_string_flags(port, &ch, &flag, 1); + return tty_insert_flip_string_fixed_flag(port, &ch, flag, 1); } static inline int tty_insert_flip_string(struct tty_port *port, This reduces the stack frame size for kbd_event() to 1256 bytes, which is well within the limit, and it lets us keep the flag-less buffers across a 'tb->used >= tb->size' condition. Calling into tty_insert_flip_string_flags() today will allocate a flag buffer if there isn't already one, even when it is not needed. >> I'm however afraid we'd have to mark a lot of static functions that way, >> depending on the aggressivity of gcc... I'd indeed really argue that gcc >> should consider stack usage when inlining. >> >> static int f(int foo) { >> char c[256]; >> g(c, foo); >> } >> >> is really not something that I'd want to see the compiler to inline. > > Why would not we want it be inlined? What we do not want us several > calls having _separate_ instances of 'c' generated on the stack, all > inlined calls should share 'c'. And of course if we have f1, f2, and > f3 with c1, c2, and c3, GCC should not blow up the stack inlining and > allocating stack for all 3 of them beforehand. > > But this all seems to me issue that should be solved in toolchain, not > trying to play whack-a-mole with kernel sources. The problem for the Samuel's example is that a) the "--param asan-stack=1" option in KASAN does blow up the stack, which is why the annotation is now called 'noinline_if_stackbloat'. b) The toolchain cannot solve the problem, as most instances of the problem (unlike kbd_put_queue) force the inlining unless you build with the x86-specific CONFIG_OPTIMIZE_INLINING. Arnd
Re: [PATCH RESEND 02/13] mfd: cros_ec: Add EC console read structures definitions
Hi Enric, On 05/16/2017 09:13 AM, Enric Balletbo i Serra wrote: > From: Nicolas Boichat > > ec_params_console_read_v1 is used to capture EC logs from kernel, > and ec_params_get_cmd_versions_v1 is used to probe whether EC > supports that command. > > Signed-off-by: Nicolas Boichat > Reviewed-by: Guenter Roeck > Acked-by: Lee Jones > Tested-by: Enric Balletbo i Serra Thanks. Applied. -- Benson Leung Staff Software Engineer Chrome OS Kernel Google Inc. ble...@google.com Chromium OS Project ble...@chromium.org signature.asc Description: OpenPGP digital signature
Re: [PATCH RESEND 01/13] mfd: cros_ec: Add helper for event notifier.
Hi Enric, On 05/16/2017 09:13 AM, Enric Balletbo i Serra wrote: > From: Gwendal Grignou > > Add cros_ec_get_event() entry point to retrieve event within functions > called by the notifier. > > Signed-off-by: Gwendal Grignou > Signed-off-by: Enric Balletbo i Serra > Acked-by: Lee Jones Applied to my branch. I'll let you know when the whole thing is ready. -- Benson Leung Staff Software Engineer Chrome OS Kernel Google Inc. ble...@google.com Chromium OS Project ble...@chromium.org signature.asc Description: OpenPGP digital signature
Re: [PATCH 00/13] block: assorted cleanup for bio splitting and cloning.
On 06/16/2017 01:34 AM, Christoph Hellwig wrote: > On Fri, Jun 16, 2017 at 05:30:50PM +1000, NeilBrown wrote: >> I've pushed the new version to the same place. Do you actually want >> me to re-post all the patches? > > I personally prefer to always have patches on the list, but I can't > speak for Jens of course. Yes please, I'd prefer them posted again as well. -- Jens Axboe
Re: [RFC v2 0/2] swait: add idle to make idle-hacks on kthreads explicit
"Paul E. McKenney" writes: > On Fri, Jun 16, 2017 at 01:26:19AM +0200, Luis R. Rodriguez wrote: >> On Thu, Jun 15, 2017 at 02:57:17PM -0700, Paul E. McKenney wrote: >> > On Thu, Jun 15, 2017 at 11:48:18AM -0700, Luis R. Rodriguez wrote: >> > > While reviewing RCU's interruptible swaits I noticed signals were >> > > actually >> > > not expected. Paul explained that the reason signals are not expected is >> > > we use kthreads, which don't get signals, furthermore the code avoided >> > > the >> > > uninterruptible swaits as otherwise it would contribute to the system >> > > load >> > > average on idle, bumping it from 0 to 2 or 3 (depending on preemption). >> > > >> > > Since this can be confusing its best to be explicit about the >> > > requirements and >> > > goals. This patch depends on the other killable swaits [0] recently >> > > proposed as >> > > well interms of context. Thee patch can however be tested independently >> > > if >> > > the hunk is addressed separately. >> > > >> > > [0] https://lkml.kernel.org/r/20170614222017.14653-3-mcg...@kernel.org >> > >> > Tested-by: Paul E. McKenney >> > >> > Are you looking to push these or were you wanting me to? >> >> I'd be happy for you to take them. > > OK, let's see if we can get some Acked-by's or Reviewed-by's from the > relevant people. > > For but one example, Eric, does this look good to you or are adjustments > needed? Other than an unnecessary return code I don't see any issues. Acked-by: "Eric W. Biederman" In truth I am just barely ahead of you folks. I ran into the same issue the other day with a piece of my code and someone pointed me to TASK_IDLE. Eric
Re: [PATCH] fs: don't forget to put old mntns in mntns_install
Hi Alexander, Do you have any comments about this patch? Here is a reproducer for this leak: $ cat mount.sh set -e -x mount --make-rprivate / mount -t tmpfs zdtm /mnt mount --make-shared /mnt mount -t proc procX /proc for i in `seq $1`; do mount --bind /mnt /mnt done mount --make-rprivate /mnt unshare -m sleep 1000 & pid=$! unshare -m nsenter -m -t $pid nsenter -m -t $$ true $ while :; do unshare -Umpfr sh mount.sh 16 || break; done ... $ cat /proc/slabinfo | grep mnt mnt_cache 3281745 3281776512 162 : tunables00 0 : slabdata 205111 205111 0 On Thu, Jun 08, 2017 at 05:32:29PM -0700, Andrei Vagin wrote: > Fixes: 4f757f3cbf54 ("make sure that mntns_install() doesn't end up with > referral for root") > Cc: Al Viro > Signed-off-by: Andrei Vagin > --- > fs/namespace.c | 2 ++ > 1 file changed, 2 insertions(+) > > diff --git a/fs/namespace.c b/fs/namespace.c > index 8bd3e4d..5a44384 100644 > --- a/fs/namespace.c > +++ b/fs/namespace.c > @@ -3488,6 +3488,8 @@ static int mntns_install(struct nsproxy *nsproxy, > struct ns_common *ns) > return err; > } > > + put_mnt_ns(old_mnt_ns); > + > /* Update the pwd and root */ > set_fs_pwd(fs, &root); > set_fs_root(fs, &root); > -- > 2.9.4 >
Re: [PATCH 03/44] dmaengine: ioat: don't use DMA_ERROR_CODE
On Fri, Jun 16, 2017 at 11:10 AM, Christoph Hellwig wrote: > DMA_ERROR_CODE is not a public API and will go away. Instead properly > unwind based on the loop counter. > > Signed-off-by: Christoph Hellwig > Acked-by: Dave Jiang > Acked-By: Vinod Koul > --- > drivers/dma/ioat/init.c | 24 +++- > 1 file changed, 7 insertions(+), 17 deletions(-) > > diff --git a/drivers/dma/ioat/init.c b/drivers/dma/ioat/init.c > index 6ad4384b3fa8..ed8ed1192775 100644 > --- a/drivers/dma/ioat/init.c > +++ b/drivers/dma/ioat/init.c > @@ -839,8 +839,6 @@ static int ioat_xor_val_self_test(struct ioatdma_device > *ioat_dma) > goto free_resources; > } > > - for (i = 0; i < IOAT_NUM_SRC_TEST; i++) > - dma_srcs[i] = DMA_ERROR_CODE; > for (i = 0; i < IOAT_NUM_SRC_TEST; i++) { > dma_srcs[i] = dma_map_page(dev, xor_srcs[i], 0, PAGE_SIZE, >DMA_TO_DEVICE); > @@ -910,8 +908,6 @@ static int ioat_xor_val_self_test(struct ioatdma_device > *ioat_dma) > > xor_val_result = 1; > > - for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) > - dma_srcs[i] = DMA_ERROR_CODE; > for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) { > dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE, >DMA_TO_DEVICE); > @@ -965,8 +961,6 @@ static int ioat_xor_val_self_test(struct ioatdma_device > *ioat_dma) > op = IOAT_OP_XOR_VAL; > > xor_val_result = 0; > - for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) > - dma_srcs[i] = DMA_ERROR_CODE; > for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) { > dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE, >DMA_TO_DEVICE); > @@ -1017,18 +1011,14 @@ static int ioat_xor_val_self_test(struct > ioatdma_device *ioat_dma) > goto free_resources; > dma_unmap: > if (op == IOAT_OP_XOR) { > - if (dest_dma != DMA_ERROR_CODE) > - dma_unmap_page(dev, dest_dma, PAGE_SIZE, > - DMA_FROM_DEVICE); > - for (i = 0; i < IOAT_NUM_SRC_TEST; i++) > - if (dma_srcs[i] != DMA_ERROR_CODE) > - dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, > - DMA_TO_DEVICE); > + while (--i >= 0) > + dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, > + DMA_TO_DEVICE); > + dma_unmap_page(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE); > } else if (op == IOAT_OP_XOR_VAL) { > - for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) > - if (dma_srcs[i] != DMA_ERROR_CODE) > - dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, > - DMA_TO_DEVICE); > + while (--i >= 0) > + dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, > + DMA_TO_DEVICE); Wouldn't it make more sense to pull out the while loop and just call dma_unmap_page on dest_dma if "op == IOAT_OP_XOR"? Odds are it is what the compiler is already generating and will save a few lines of code so what you end up with is something like: while (--i >= 0) dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, DMA_TO_DEVICE); if (op == IOAT_OP_XOR) dma_unmap_page(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE); > } > free_resources: > dma->device_free_chan_resources(dma_chan); > -- > 2.11.0 >
Re: autofs multi-map regression
On Friday 2017-06-16 12:03, Eric W. Biederman wrote: | Interesting... | | Can you test this on a stock 4.11 kernel? | | I definitely need a little bit more information to solve this. That | commit did not add any new error condidtions so I need to understand | what state you are getting yourself into that is affected by this | commit. | | Is there a chance you can post /proc/self/mountinfo from when this is | happening? I've installed the mainline 4.11 kernel from: http://kernel.ubuntu.com/~kernel-ppa/mainline/v4.11/ and this kernel works correctly! So either this issue was fixed in the meantime, or it is something specific to the Ubuntu kernel. I guess I should file a bug report with Ubuntu then? I've also looked at /proc/self/mountinfo before and directly after the mount attempt. Here are the ext4 and autofs entries for the failing 4.4 kernel: before: 23 0 8:2 / / rw,relatime shared:1 - ext4 /dev/sda2 rw,errors=remount-ro,data=ordered 41 19 0:34 / /proc/sys/fs/binfmt_misc rw,relatime shared:24 - autofs systemd-1 rw,fd=34,pgrp=1,timeout=0,minproto=5,maxproto=5,direct 46 23 8:4 / /loc rw,nosuid,nodev,noatime shared:30 - ext4 /dev/sda4 rw,block_validity,delalloc,barrier,user_xattr,acl 202 23 0:44 / /net rw,relatime shared:160 - autofs /etc/auto.net rw,fd=6,pgrp=1724,timeout=120,minproto=5,maxproto=5,indirect after: 23 0 8:2 / / rw,relatime shared:1 - ext4 /dev/sda2 rw,errors=remount-ro,data=ordered 41 19 0:34 / /proc/sys/fs/binfmt_misc rw,relatime shared:24 - autofs systemd-1 rw,fd=34,pgrp=1,timeout=0,minproto=5,maxproto=5,direct 46 162 8:4 / /loc rw,nosuid,nodev,noatime shared:30 - ext4 /dev/sda4 rw,block_validity,delalloc,barrier,user_xattr,acl 202 23 0:44 / /net rw,relatime shared:160 - autofs /etc/auto.net rw,fd=6,pgrp=1724,timeout=120,minproto=5,maxproto=5,indirect 157 202 8:2 / /net/localhost rw,relatime shared:1 - ext4 /dev/sda2 rw,errors=remount-ro,data=ordered 161 157 0:47 / /net/localhost/loc rw,relatime shared:119 - autofs /etc/auto.net rw,fd=6,pgrp=1724,timeout=120,minproto=5,maxproto=5,offset 162 23 0:47 / /loc rw,relatime shared:119 - autofs /etc/auto.net rw,fd=6,pgrp=1724,timeout=120,minproto=5,maxproto=5,offset And here the info for the working mainline 4.11 kernel: before: 23 0 8:2 / / rw,relatime shared:1 - ext4 /dev/sda2 rw,errors=remount-ro,data=ordered 74 19 0:36 / /proc/sys/fs/binfmt_misc rw,relatime shared:56 - autofs systemd-1 rw,fd=35,pgrp=1,timeout=0,minproto=5,maxproto=5,direct,pipe_ino=12754 45 23 8:4 / /loc rw,nosuid,nodev,noatime shared:28 - ext4 /dev/sda4 rw,block_validity,delalloc,barrier,user_xattr,acl 208 23 0:46 / /net rw,relatime shared:164 - autofs /etc/auto.net rw,fd=6,pgrp=1545,timeout=120,minproto=5,maxproto=5,indirect,pipe_ino=26555 after: 23 0 8:2 / / rw,relatime shared:1 - ext4 /dev/sda2 rw,errors=remount-ro,data=ordered 74 19 0:36 / /proc/sys/fs/binfmt_misc rw,relatime shared:56 - autofs systemd-1 rw,fd=35,pgrp=1,timeout=0,minproto=5,maxproto=5,direct,pipe_ino=12754 45 175 8:4 / /loc rw,nosuid,nodev,noatime shared:28 - ext4 /dev/sda4 rw,block_validity,delalloc,barrier,user_xattr,acl 208 23 0:46 / /net rw,relatime shared:164 - autofs /etc/auto.net rw,fd=6,pgrp=1545,timeout=120,minproto=5,maxproto=5,indirect,pipe_ino=26555 162 208 8:2 / /net/localhost rw,relatime shared:1 - ext4 /dev/sda2 rw,errors=remount-ro,data=ordered 166 162 0:48 / /net/localhost/loc rw,relatime shared:122 - autofs /etc/auto.net rw,fd=6,pgrp=1545,timeout=120,minproto=5,maxproto=5,offset,pipe_ino=26555 167 23 0:48 / /loc rw,relatime shared:122 - autofs /etc/auto.net rw,fd=6,pgrp=1545,timeout=120,minproto=5,maxproto=5,offset,pipe_ino=26555 174 166 8:4 / /net/localhost/loc rw,nosuid,nodev,noatime shared:28 - ext4 /dev/sda4 rw,block_validity,delalloc,barrier,user_xattr,acl 175 167 8:4 / /loc rw,nosuid,nodev,noatime shared:28 - ext4 /dev/sda4 rw,block_validity,delalloc,barrier,user_xattr,acl -- Dick
Re: [PATCH RESEND 0/2] Add support for ARM CCN-502 interconnect
Arnd, Should this patchset go through the ARM maintainers? On 17-06-15 11:39 PM, Scott Branden wrote: Add compatible string for ARM CCN-502 interconnect. CCN-502 interconnect is already compatible with the existing ARM CCN driver, which supports CCN-504. Velibor Markovski (2): dt-bindings: arm-ccn: Add bindings info for CCN-502 compatible string bus: arm-ccn: Enable stats for CCN-502 interconnect Documentation/devicetree/bindings/arm/ccn.txt | 1 + drivers/bus/arm-ccn.c | 1 + 2 files changed, 2 insertions(+)
Re: [RFC v2 1/2] swait: add idle variants which don't contribute to load average
"Luis R. Rodriguez" writes: > There are cases where folks are using an interruptible swait when > using kthreads. This is rather confusing given you'd expect > interruptible waits to be -- interruptible, but kthreads are not > interruptible ! The reason for such practice though is to avoid > having these kthreads contribute to the system load average. > > When systems are idle some kthreads may spend a lot of time blocking if > using swait_event_timeout(). This would contribute to the system load > average. On systems without preemption this would mean the load average > of an idle system is bumped to 2 instead of 0. On systems with PREEMPT=y > this would mean the load average of an idle system is bumped to 3 > instead of 0. > > This adds proper API using TASK_IDLE to make such goals explicit and > avoid confusion. > > Suggested-by: "Eric W. Biederman" > Signed-off-by: Luis R. Rodriguez > --- > include/linux/swait.h | 25 + > 1 file changed, 25 insertions(+) > > diff --git a/include/linux/swait.h b/include/linux/swait.h > index 2c700694d50a..105c70e23286 100644 > --- a/include/linux/swait.h > +++ b/include/linux/swait.h > @@ -194,4 +194,29 @@ do { > \ > __ret; \ > }) > > +#define __swait_event_idle(wq, condition)\ > + ___swait_event(wq, condition, TASK_IDLE, 0, schedule()) > + > +#define swait_event_idle(wq, condition) > \ > +({ \ > + int __ret = 0; \ > + if (!(condition)) \ > + __ret = __swait_event_idle(wq, condition); \ > + __ret; \ > +}) The wait isn't interruptible so a return code doesn't make sense here. > +#define __swait_event_idle_timeout(wq, condition, timeout) \ > + ___swait_event(wq, ___wait_cond_timeout(condition), \ > +TASK_IDLE, timeout, \ > +__ret = schedule_timeout(__ret)) > + > +#define swait_event_idle_timeout(wq, condition, timeout) \ > +({ \ > + long __ret = timeout; \ > + if (!___wait_cond_timeout(condition)) \ > + __ret = __swait_event_idle_timeout(wq, \ > +condition, timeout); \ > + __ret; \ > +}) > + > #endif /* _LINUX_SWAIT_H */
Re: [PATCH] rtlwifi: rtl8821ae: remove unused variable
On 06/13/2017 03:42 PM, Gustavo A. R. Silva wrote: Remove unused variable rtlhal. Addresses-Coverity-ID: 1248810 Signed-off-by: Gustavo A. R. Silva --- NACK!! That variable is used in file core.c in driver rtlwifi, which is loaded and used by rtl8821ae. Please do more than blindly follow Coverity outputs, or improve that tool! Larry drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c index 2bc6bac..d158e34 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c @@ -1360,7 +1360,6 @@ static bool _rtl8821ae_reset_pcie_interface_dma(struct ieee80211_hw *hw, static void _rtl8821ae_get_wakeup_reason(struct ieee80211_hw *hw) { struct rtl_priv *rtlpriv = rtl_priv(hw); - struct rtl_hal *rtlhal = rtl_hal(rtl_priv(hw)); struct rtl_ps_ctl *ppsc = rtl_psc(rtlpriv); u8 fw_reason = 0; struct timeval ts; @@ -1372,8 +1371,6 @@ static void _rtl8821ae_get_wakeup_reason(struct ieee80211_hw *hw) ppsc->wakeup_reason = 0; - rtlhal->last_suspend_sec = ts.tv_sec; - switch (fw_reason) { case FW_WOW_V2_PTK_UPDATE_EVENT: ppsc->wakeup_reason = WOL_REASON_PTK_UPDATE;
Re: hexagon: build error in -next due to 'mm: memcontrol: per-lruvec stats infrastructure'
On Fri, 16 Jun 2017 16:15:23 -0400 Johannes Weiner wrote: > In any case, memcontrol.h doesn't/shouldn't need hardirq.h. When that > include is removed, the below patch compiles on: x86 allno, x86_64 > allno, and my regular x86_64 config: > > --- > Subject: mm-memcontrol-per-lruvec-stats-infrastructure-fix-4 Did you try x86_64 allmodconfig? I'm getting a mess: In file included from ./include/linux/mm.h:1032, from ./include/linux/highmem.h:7, from ./include/linux/bio.h:21, from ./include/linux/writeback.h:205, from ./include/linux/memcontrol.h:28, from ./include/linux/swap.h:8, from ./include/linux/suspend.h:4, from arch/x86/kernel/asm-offsets.c:12: ./include/linux/vmstat.h: In function 'lruvec_page_state': ./include/linux/vmstat.h:362: error: implicit declaration of function 'mem_cgroup_disabled' ./include/linux/vmstat.h:365: error: dereferencing pointer to incomplete type ./include/linux/vmstat.h:365: error: type defaults to 'int' in declaration of 'type name' ... Presumably because we have memcontrol.h indirectly including mm.h which includes vmstat.h (from a stupid place) and with this patch we have vmstat.h including memcontrol.h.
Re: [PATCH] ipmi: use rcu lock around call to intf->handlers->sender()
On 06/16/2017 08:11 AM, Tony Camuso wrote: On 06/16/2017 08:15 AM, Corey Minyard wrote: On 06/15/2017 10:54 AM, Corey Minyard wrote: On 06/13/2017 09:54 AM, Tony Camuso wrote: A vendor with a system having more than 128 CPUs occasionally encounters a crash during shutdown. This is not an easily reproduceable event, but the vendor was able to provide the following analysis of the crash, which exhibits the same footprint each time. crash> bt PID: 0 TASK: 88017c70ce70 CPU: 5 COMMAND: "swapper/5" #0 [88085c143ac8] machine_kexec at 81059c8b #1 [88085c143b28] __crash_kexec at 811052e2 #2 [88085c143bf8] crash_kexec at 811053d0 #3 [88085c143c10] oops_end at 8168ef88 #4 [88085c143c38] no_context at 8167ebb3 #5 [88085c143c88] __bad_area_nosemaphore at 8167ec49 #6 [88085c143cd0] bad_area_nosemaphore at 8167edb3 #7 [88085c143ce0] __do_page_fault at 81691d1e #8 [88085c143d40] do_page_fault at 81691ec5 #9 [88085c143d70] page_fault at 8168e188 [exception RIP: unknown or invalid address] RIP: a053c800 RSP: 88085c143e28 RFLAGS: 00010206 RAX: 88017c72bfd8 RBX: 88017a8dc000 RCX: 8810588b5ac8 RDX: 8810588b5a00 RSI: a053c800 RDI: 8810588b5a00 RBP: 88085c143e58 R8: 88017c70d408 R9: 88017a8dc000 R10: 0002 R11: 88085c143da0 R12: 8810588b5ac8 R13: 0100 R14: a053c800 R15: 8810588b5a00 ORIG_RAX: CS: 0010 SS: 0018 --- --- [exception RIP: cpuidle_enter_state+82] RIP: 81514192 RSP: 88017c72be50 RFLAGS: 0202 RAX: 001e4c3c6f16 RBX: f8a0 RCX: 0018 RDX: 000225c17d03 RSI: 88017c72bfd8 RDI: 001e4c3c6f16 RBP: 88017c72be78 R8: 237e R9: 0018 R10: 2494 R11: 0001 R12: 88017c72be20 R13: 88085c14f8e0 R14: 0082 R15: 001e4c3bb400 ORIG_RAX: ff10 CS: 0010 SS: 0018 This is the corresponding stack trace It has crashed because the area pointed with RIP extracted from timer element is already removed during a shutdown process. The function is smi_timeout(). And we think 8810588b5a00 in RDX is a parameter struct smi_info crash> rd 8810588b5a00 20 8810588b5a00: 8810588b6000 .`.X 8810588b5a10: 880853264400 a05417e0 .D&S..T. 8810588b5a20: 24a024a0 .$.$ 8810588b5a30: 8810588b5a40: a053a040 a053a060 @.S.`.S. 8810588b5a50: 00010001 8810588b5a60: 0e00 8810588b5a70: a053a580 a053a6e0 ..S...S. 8810588b5a80: a053a4a0 a053a250 ..S.P.S. 8810588b5a90: 00050002 Unfortunately the top of this area is already detroyed by someone. But because of two reasonns we think this is struct smi_info 1) The address included in between 8810588b5a70 and 8810588b5a80: are inside of ipmi_si_intf.c see crash> module 88085779d2c0 2) We've found the area which point this. It is offset 0x68 of 880859df4000 crash> rd 880859df4000 100 880859df4000: 0001 880859df4010: a0535290 dead0200 .RS. 880859df4020: 880859df4020 880859df4020 @.Y @.Y 880859df4030: 0002 00100010 880859df4040: 880859df4040 880859df4040 @@.Y@@.Y 880859df4050: 880859df4060: 8810588b5a00 .Z.X 880859df4070: 0001 880859df4078 x@.Y If we regards it as struct ipmi_smi in shutdown process it looks consistent. The remedy for this apparent race is affixed below. I think you are right about this problem, but in_shutdown is checked already a bit before when newmsg is extracted from the list. Wouldn't it be better to add the rcu_read_lock() region starting right before the previous in_shutdown check to after the send? That would avoid a leak in this case. While lying awake unable to sleep, I realized that you can't call the sender function while holding rcu_read_lock(). That will break RT, because you can't claim a mutex while holding rcu_read_lock(), and the sender function will claim normal spinlocks. So I need to think about this a bit. -corey Thanks, -corey Would this be adequate to prevent the race? Is the sender's mutex/spinlock sufficient to limit acc
Re: [RFC PATCH 00/13] Switchtec NTB Support
On Fri, Jun 16, 2017 at 01:34:59PM -0600, Logan Gunthorpe wrote: > > > On 16/06/17 12:38 PM, Serge Semin wrote: > > On Fri, Jun 16, 2017 at 11:08:52AM -0600, Logan Gunthorpe > > wrote: > > It's the way the NTB API was created for, to have set of functions to access > > NTB devices in the similar way. These aren't my beliefs, it's the way it was > > created. I agree it can be optional, but it shouldn't be made as the basics > > of the driver. It is called NTB "hardware" driver after all, not > > "emulating" or > > "abstracting" driver. > > Just more philosophy. You haven't given any good reason to remove the > functionality. Vague references to the way things were created aren't > compelling arguments. Better to cite code and point out actual problems. > Actual problem is the design of your driver. Of course you can disagree as much as you want. > > ntb_transport could work without Scratchpads, if it's properly altered to > > use NTB messaging. This should be the way to make things compatible, but not > > making the hardware driver suitable for just one ntb_transport. > > Ok, well when all the NTB clients no longer require using scratchpads > and we can all abide by the rule that clients must function without > them. Then, I'll remove the emulation. Until then, it stays. > > > It's not like my whim or something, but the way it's usually done. > > https://kernelnewbies.org/PatchPhilosophy > > > Cite from there: > > "Each patch should group changes into a logical sequence. Bug fixes must > > come first in the patchset, then new features. This is because we need to be > > able to backport bug fixes to older kernels, and they should not depend on > > new features." > > You should probably read that again because it doesn't actually support > your point (in fact it's saying something quite unrelated). It is also > probably a good idea to read the rest of the seciton you cite: > > "The idea here is that you should break changes up in such a way that it > will be easy to review." > > "When creating a new feature patchset, you may need to break up your > changes into multiple commits. " > > "Clean up patches that are over 200 lines long are discouraged, because > they are hard to review. Break those patches up into smaller patches. " > This doesn't prove your way of splitting patchset is correct, but supports my point. As well as the sentence about the logical sentence in addition to the thing about easy review. > Also, to quote Greg Kroah-Hartman from my last series[1]: > > "That's one big patch to review, would you want to do that? > > Can you break it up into smaller parts?" > > > You grouped the patches in according to your logical view or development > > progress (I don't know for sure), but it's not obvious for reviewers. > > From my perspective your new Microsemi Switchtec NTB driver is just one > > feature. I don't know who would think differently so to split the solid > > driver up for review. Switchtec management driver alteration might be the > > same - just one fix. It's much easier for you to have your commits squashed, > > than for me to look at your git tree, than get back to your patchset looking > > for a necessary peace of patch and commenting it there. > > Well you're free to think that but, in my experience, your opinion > differs significantly from the rest of the kernel community which I > personally agree with. > And your quotation doesn't prove you are right. Greg asked you to split at least the documentation. He had point to ask it, since it's logically correct. You wasn't arguing with him, was you? But in this case you have sent the set of incremental patches of your own code, so I don't see how it can be easier for review, than a combined text. > Now, if you'd like to actually review the code I'd be happy to address > any concerns you find. I won't be responding to any more philosophical > arguments or bike-shedding over the format of the patch. > I don't want to review a patchset, which isn't properly formated. > Logan > > [1] https://lkml.org/lkml/2017/1/31/637 > > -- > You received this message because you are subscribed to the Google Groups > "linux-ntb" group. > To unsubscribe from this group and stop receiving emails from it, send an > email to linux-ntb+unsubscr...@googlegroups.com. > To post to this group, send email to linux-...@googlegroups.com. > To view this discussion on the web visit > https://groups.google.com/d/msgid/linux-ntb/33b6c321-c0af-7340-8e8e-e929a5c7%40deltatee.com. > For more options, visit https://groups.google.com/d/optout.
Re: [PATCH] MIPS: Make individual platforms select ARCH_MIGHT_HAVE_PC_SERIO
On Fri, 16 Jun 2017, Florian Fainelli wrote: > > How did you determine that? Malta for one not only has an SMSC FDC37M817 > > Super I/O Controller featuring an 8042-compatible core, but actual PS/2 > > keyboard and mouse connectors as well. > > I was just grepping for i8042 in platform code to determine that, this > came after having SERIO accidentally enabled on my platform > (BMIPS_GENERIC) and seeing that it crashed badly and it annoyed the crap > out of me that MIPS had ARCH_MIGHT_HAVE_PC_SERIO for platforms that > don't need it. > > Will come up with a v2 that includes malta, any other platforms for > which it's not obvious? I don't know offhand, but in principle anything that has PCI and a southbridge (not all PCI platforms have one, e.g. Broadcom SWARM and BigSur are legacy-free) can have an 8042 wired. Ideally probing for 8042 hardware should be done by platform code and the driver's init code would not be called at all if there's no 8042 present, similarly to how e.g. RTC is usually registered. Maciej
Re: hexagon: build error in -next due to 'mm: memcontrol: per-lruvec stats infrastructure'
On Fri, Jun 16, 2017 at 12:14:53PM -0700, Andrew Morton wrote: > On Fri, 16 Jun 2017 14:49:51 -0400 Johannes Weiner wrote: > > > On Wed, Jun 14, 2017 at 12:26:46AM -0700, Guenter Roeck wrote: > > > Hi, > > > > > > I see the following build error in -next when building hexagon images. > > > > > > CC arch/hexagon/kernel/asm-offsets.s > > > In file included from ./include/linux/memcontrol.h:30:0, > > > from ./include/linux/swap.h:8, > > > from ./arch/hexagon/include/asm/pgtable.h:27, > > > from ./include/linux/mm.h:70, > > > from arch/hexagon/kernel/asm-offsets.c:28: > > > ./include/linux/vmstat.h: In function '__inc_zone_page_state': > > > ./include/linux/vmstat.h:294:2: error: implicit declaration of function > > > 'page_zone' [-Werror=implicit-function-declaration] > > > ./include/linux/vmstat.h:294:2: warning: passing argument 1 of > > > '__inc_zone_state' makes pointer from integer without a cast [enabled by > > > default] > > > ./include/linux/vmstat.h:267:20: note: expected 'struct zone *' but > > > argument is of type 'int' > > > > vmstat.h depends on definitions in mm.h, but mm.h through the above > > chain includes vmstat.h first. It worked in my x86 test because x86 > > pgtable.h doesn't include swap.h. > > > > The headers are a bit of a mess. memcontrol.h is supposed to be a > > lower level header than mm.h and vmstat.h, yet the new accounting > > functions depend on mm.h definitions. > > > > Let's move the lruvec accounting infra to vmstat.h and shuffle > > memcontrol.h into the stack under mm.h and vmstat.h. > > > > Does the following fix the hexagon build? > > This breaks x86_64 allnoconfig. > > arch/x86/mm/pat.c:734: error: redefinition of 'arch_io_reserve_memtype_wc' > ./include/linux/io.h:175: note: previous definition of > 'arch_io_reserve_memtype_wc' was here > arch/x86/mm/pat.c:742: error: redefinition of 'arch_io_free_memtype_wc' > ./include/linux/io.h:181: note: previous definition of > 'arch_io_free_memtype_wc' was here wat: /home/hannes/src/linux/linux/arch/x86/mm/pat.c:734:5: error: redefinition of ‘arch_io_reserve_memtype_wc’ int arch_io_reserve_memtype_wc(resource_size_t start, resource_size_t size) ^~ In file included from /home/hannes/src/linux/linux/include/linux/irq.h:24:0, from /home/hannes/src/linux/linux/arch/x86/include/asm/hardirq.h:5, from /home/hannes/src/linux/linux/include/linux/hardirq.h:8, from /home/hannes/src/linux/linux/include/linux/memcontrol.h:24, from /home/hannes/src/linux/linux/include/linux/vmstat.h:9, from /home/hannes/src/linux/linux/include/linux/mm.h:1032, from /home/hannes/src/linux/linux/include/linux/pfn_t.h:3, from /home/hannes/src/linux/linux/arch/x86/mm/pat.c:15: /home/hannes/src/linux/linux/include/linux/io.h:175:19: note: previous definition of ‘arch_io_reserve_memtype_wc’ was here static inline int arch_io_reserve_memtype_wc(resource_size_t base, ^~ In any case, memcontrol.h doesn't/shouldn't need hardirq.h. When that include is removed, the below patch compiles on: x86 allno, x86_64 allno, and my regular x86_64 config: --- Subject: mm-memcontrol-per-lruvec-stats-infrastructure-fix-4 On Wed, Jun 14, 2017 at 12:26:46AM -0700, Guenter Roeck wrote: > Hi, > > I see the following build error in -next when building hexagon images. > > CC arch/hexagon/kernel/asm-offsets.s > In file included from ./include/linux/memcontrol.h:30:0, > from ./include/linux/swap.h:8, > from ./arch/hexagon/include/asm/pgtable.h:27, > from ./include/linux/mm.h:70, > from arch/hexagon/kernel/asm-offsets.c:28: > ./include/linux/vmstat.h: In function '__inc_zone_page_state': > ./include/linux/vmstat.h:294:2: error: implicit declaration of function > 'page_zone' [-Werror=implicit-function-declaration] > ./include/linux/vmstat.h:294:2: warning: passing argument 1 of > '__inc_zone_state' makes pointer from integer without a cast [enabled by > default] > ./include/linux/vmstat.h:267:20: note: expected 'struct zone *' but argument > is of type 'int' vmstat.h depends on definitions in mm.h, but mm.h through the above chain includes vmstat.h first. It worked in my x86 test because x86 pgtable.h doesn't include swap.h. The headers are a bit of a mess. memcontrol.h is supposed to be a lower level header than mm.h and vmstat.h, yet the new accounting functions depend on mm.h definitions. Let's move the lruvec accounting infra to vmstat.h and shuffle memcontrol.h into the stack under mm.h and vmstat.h. Reported-by: Guenter Roeck Signed-off-by
[PATCH] mm/hwpoison: Clear PRESENT bit for kernel 1:1 mappings of poison pages
From: Tony Luck Speculative processor accesses may reference any memory that has a valid page table entry. While a speculative access won't generate a machine check, it will log the error in a machine check bank. That could cause escalation of a subsequent error since the overflow bit will be then set in the machine check bank status register. Code has to be double-plus-tricky to avoid mentioning the 1:1 virtual address of the page we want to map out otherwise we may trigger the very problem we are trying to avoid. We use a non-canonical address that passes through the usual Linux table walking code to get to the same "pte". Cc: Dave Hansen Cc: Naoya Horiguchi Cc: x...@kernel.org Cc: linux...@kvack.org Cc: linux-kernel@vger.kernel.org Cc: sta...@vger.kernel.org Signed-off-by: Tony Luck --- Thanks to Dave Hansen for reviewing several iterations of this. arch/x86/include/asm/page_64.h | 4 arch/x86/kernel/cpu/mcheck/mce.c | 35 +++ include/linux/mm_inline.h| 6 ++ mm/memory-failure.c | 2 ++ 4 files changed, 47 insertions(+) diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h index b4a0d43248cf..b50df06ad251 100644 --- a/arch/x86/include/asm/page_64.h +++ b/arch/x86/include/asm/page_64.h @@ -51,6 +51,10 @@ static inline void clear_page(void *page) void copy_page(void *to, void *from); +#ifdef CONFIG_X86_MCE +#define arch_unmap_kpfn arch_unmap_kpfn +#endif + #endif /* !__ASSEMBLY__ */ #ifdef CONFIG_X86_VSYSCALL_EMULATION diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 5cfbaeb6529a..56563db0b2be 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -51,6 +51,7 @@ #include #include #include +#include #include "mce-internal.h" @@ -1056,6 +1057,40 @@ static int do_memory_failure(struct mce *m) return ret; } +#ifdef CONFIG_X86_64 + +void arch_unmap_kpfn(unsigned long pfn) +{ + unsigned long decoy_addr; + + /* +* Unmap this page from the kernel 1:1 mappings to make sure +* we don't log more errors because of speculative access to +* the page. +* We would like to just call: +* set_memory_np((unsigned long)pfn_to_kaddr(pfn), 1); +* but doing that would radically increase the odds of a +* speculative access to the posion page because we'd have +* the virtual address of the kernel 1:1 mapping sitting +* around in registers. +* Instead we get tricky. We create a non-canonical address +* that looks just like the one we want, but has bit 63 flipped. +* This relies on set_memory_np() not checking whether we passed +* a legal address. +*/ + +#if PGDIR_SHIFT + 9 < 63 /* 9 because cpp doesn't grok ilog2(PTRS_PER_PGD) */ + decoy_addr = (pfn << PAGE_SHIFT) + (PAGE_OFFSET ^ BIT(63)); +#else +#error "no unused virtual bit available" +#endif + + if (set_memory_np(decoy_addr, 1)) + pr_warn("Could not invalidate pfn=0x%lx from 1:1 map \n", pfn); + +} +#endif + /* * The actual machine check handler. This only handles real * exceptions when something got corrupted coming in through int 18. diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index e030a68ead7e..25438b2b6f22 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -126,4 +126,10 @@ static __always_inline enum lru_list page_lru(struct page *page) #define lru_to_page(head) (list_entry((head)->prev, struct page, lru)) +#ifdef arch_unmap_kpfn +extern void arch_unmap_kpfn(unsigned long pfn); +#else +static __always_inline void arch_unmap_kpfn(unsigned long pfn) { } +#endif + #endif diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 342fac9ba89b..9479e190dcbd 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1071,6 +1071,8 @@ int memory_failure(unsigned long pfn, int trapno, int flags) return 0; } + arch_unmap_kpfn(pfn); + /* * Currently errors on hugetlbfs pages are measured in hugepage units, * so nr_pages should be 1 << compound_order. OTOH when errors are on -- 2.11.0
[PATCH] [perf/core branch] perf coresight: Fix ARM builds caused by misplaced __printf
Trailing __printf attributes work for function declarations, but not definitions. This patch fixes arm32/64 builds by placing __printf before the declarator. Otherwise this happens: arch/arm64/util/../../arm/util/cs-etm.c:586:1: error: attributes should be specified before the declarator in a function definition static int cs_device__print_file(const char *name, const char *fmt, ...) __printf(2, 3) ^~ arch/arm64/util/../../arm/util/cs-etm.c: In function ‘cs_etm_set_drv_config’: arch/arm64/util/../../arm/util/cs-etm.c:610:8: error: implicit declaration of function ‘cs_device__print_file’ [-Werror=implicit-function-declaration] ret = cs_device__print_file(enable_sink, "%d", 1); ^ arch/arm64/util/../../arm/util/cs-etm.c:610:2: error: nested extern declaration of ‘cs_device__print_file’ [-Werror=nested-externs] ret = cs_device__print_file(enable_sink, "%d", 1); ^~~ At top level: arch/arm64/util/../../arm/util/cs-etm.c:566:14: error: ‘cs_device__open_file’ defined but not used [-Werror=unused-function] static FILE *cs_device__open_file(const char *name) ^~~~ cc1: all warnings being treated as errors Fixes: 2ee261d962ac "tools: Adopt __printf from kernel sources" Cc: Arnaldo Carvalho de Melo Cc: Mathieu Poirier Signed-off-by: Kim Phillips --- Applies to acme's perf/core branch tools/perf/arch/arm/util/cs-etm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c index 90a6f42ba904..7ce3d1a25133 100644 --- a/tools/perf/arch/arm/util/cs-etm.c +++ b/tools/perf/arch/arm/util/cs-etm.c @@ -583,7 +583,7 @@ static FILE *cs_device__open_file(const char *name) } -static int cs_device__print_file(const char *name, const char *fmt, ...) __printf(2, 3) +static int __printf(2, 3) cs_device__print_file(const char *name, const char *fmt, ...) { va_list args; FILE *file; -- 2.11.0
Re: perf report: fix off-by-one for non-activation frames
On Freitag, 16. Juni 2017 13:57:44 CEST Jan Kratochvil wrote: > On Fri, 16 Jun 2017 13:51:37 +0200, Milian Wolff wrote: > > > perf-4.12.0-0.rc5.git0.1.fc27.x86_64 > > > > > > 39e32e gdb_main (/usr/libexec/gdb) > > > 10b6fa main (/usr/libexec/gdb) > > > > > >0x5565f6f6 <+54>:callq 0x558f17a0 > > >:mov > > >0x18(%rsp),%rcx > > [...] > > > Excuse me, but I'm having trouble following you. The non-GDB backtraces > > you > > are pasting do not show srcline information. So what exactly is broken? > > There is broken that perf now reports address 10b6fa (corresponding to > relocated address 0x5565f6fa) but there is no instruction on address > 0x5565f6fa. If you 'objdump -d' it you cannot find any instruction > on adress 0x5565f6fa (or on address 0x10b6fa). There is > instruction on address 0x5565f6fb. > > > Maybe paste the perf output you get now and highlight what you'd expect > > instead? > > Actual: > 39e32e gdb_main (/usr/libexec/gdb) > 10b6fa main (/usr/libexec/gdb) > Expected: > 39e32f gdb_main (/usr/libexec/gdb) > 10b6fb main (/usr/libexec/gdb) > > I agree perf needs to calculate with 39e32e and 10b6fa. But it should > display to user 39e32f and 10b6fb. Hmmm this will require some more changes throughout the stack then. I.e. we'll have to remember the "isactivation" flag along with the original IP, and only apply the offset then when we query for inliners or srcline information. Maybe I can pull that off somehow in the patch series I'm working on currently, which refactors the whole inline/srcline/callchain logic anyways. I don't see an easy way to fix the behavior. Does anyone else? So how do we deal with this situation in the interim? I'd prefer we keep the current "broken" state, as I consider it less broken than what we had before... I guess some of the core perf developers should decide how to handle this. Thanks -- Milian Wolff | milian.wo...@kdab.com | Senior Software Engineer KDAB (Deutschland) GmbH&Co KG, a KDAB Group company Tel: +49-30-521325470 KDAB - The Qt Experts
Re: LTS testing with latest kselftests - some failures
On Fri, Jun 16, 2017 at 09:29:52PM +0200, Greg Kroah-Hartman wrote: > On Fri, Jun 16, 2017 at 06:46:51PM +0200, Luis R. Rodriguez wrote: > > Kees, please review 47e0bbb7fa98 below. > > Brian, please review be4a1326d12c below. > > > > On Thu, Jun 15, 2017 at 11:26:53PM +0530, Sumit Semwal wrote: > > > Hello Greg, Shuah, > > > > > > While testing 4.4.y and 4.9.y LTS kernels with latest kselftest, > > > > To be clear it seems like you are taking the latest upstream ksefltest and > > run > > it against older stable kernels. Furthermore you seem to only run the shell > > script tests but are using older kselftests drivers? Is this all correct? > > Otherwise it is unclear how you are running into the issues below. > > > > Does 0-day so the same? I thought 0-day takes just the kselftest from each > > tree > > submitted. That *seemed* to me like the way it was designed. Shuah ? > > > > What's the name of *this* testing effort BTW? Is this part of the overall > > kselftest ? Or is this something Linaro does for LTS kernels ? If there > > is a name to your effort can you document it here so that others are aware: > > It's a "test LTS kernels to make sure Greg didn't break anything" type > of testing effort that Linaro is helping out with. OK so its "standard" :) > This could also be called, "it's about time someone did this..." :) Good to know! > > > we found a couple more test failures due to test-kernel mismatch: > > > > > > 1. firmware tests: - linux 4.5 [1] and 4.10 [2] added a few updates to > > > tests, and related updates to lib/test_firmware.c to improve the > > > tests. Stable-4.4 misses these patches to lib/test_firmware.c. Stable > > > 4.9 misses the second update. > > > > <-- snip, skipped 2. and 3. --> > > > > > For all the 3 listed above, we will try and update the tests to > > > gracefully exit. > > > > Hmm, this actually raises a good kselftest question: > > > > I *though* kselftests were running tests on par with the kernels, so we > > would > > *not* take latest upstream kselftests to test against older kernels. Is this > > incorrect? > > That is incorrect. Your test should always degrade gracefully if the > feature is not present in the kernel under test. OK perfect, now I know to look for knobs in the shell tests to ensure this doesn't happen again. Some of the knobs however are for extending tests for existing APIs in older kernels, the async and custom fallback one are an example. There are a series of test cases later added which could help test LTS kernels. Would Linaro pick these test driver enhancements to help increase coverage of tests? Or is it not worth it? If its worth it then what I was curious was how to help make this easier for this process to bloom. > If the test is for a > bug that was fixed, then that fix should also go to a stable kernel > release. Indeed, that was perfectly clear. Luis
Re: [PATCH] tools/testing/selftests/sysctl: Add pre-check to the value of writes_strict
On 16 June 2017 at 22:49, Sumit Semwal wrote: > Hi Orson, > > Thanks for the patch. > > On 16 June 2017 at 14:58, Orson Zhai wrote: >> Sysctl test will fail in some items if the value of /proc/sys/kernel >> /sysctrl_writes_strict is 0 as the default value in kernel older than v4.5. >> >> Make this test more robust and compatible with older kernels by checking and >> update sysctrl_writes_strict value and restore it when test is done. >> >> Signed-off-by: Orson Zhai > > Please feel free to add my > Reviewed-by: Sumit Semwal > Tested-by: Sumit Semwal > [sumits: tested LTS-4.4 with hikey (arm64) ] Sure. I will add them to my patch V2. Thanks, Orson > >> --- >> tools/testing/selftests/sysctl/common_tests | 14 ++ >> tools/testing/selftests/sysctl/run_numerictests | 3 +++ >> tools/testing/selftests/sysctl/run_stringtests | 3 +++ >> 3 files changed, 20 insertions(+) >> >> diff --git a/tools/testing/selftests/sysctl/common_tests >> b/tools/testing/selftests/sysctl/common_tests >> index 17d534b1b7b4..f5c5c51d16f3 100644 >> --- a/tools/testing/selftests/sysctl/common_tests >> +++ b/tools/testing/selftests/sysctl/common_tests >> @@ -63,6 +63,20 @@ else >> echo "ok" >> fi >> >> +echo -n "Checking writes strict setting ... " >> +WRITES_STRICT="${SYSCTL}/kernel/sysctl_writes_strict" >> +if [ ! -e ${WRITES_STRICT} ]; then >> + echo "FAIL, but skip in case of old kernel" >&2 >> +else >> + val=$(cat ${WRITES_STRICT}) >> + if [ "$val" = "1" ]; then >> + echo "ok" >> + else >> + echo "FAIL, strict value is 0 but force to 1 to continue" >&2 >> + echo "1" > ${WRITES_STRICT} >> + fi >> +fi >> + >> # Now that we've validated the sanity of "set_test" and "set_orig", >> # we can use those functions to set starting states before running >> # specific behavioral tests. >> diff --git a/tools/testing/selftests/sysctl/run_numerictests >> b/tools/testing/selftests/sysctl/run_numerictests >> index 8510f93f2d14..c0a98fd82c5c 100755 >> --- a/tools/testing/selftests/sysctl/run_numerictests >> +++ b/tools/testing/selftests/sysctl/run_numerictests >> @@ -7,4 +7,7 @@ TEST_STR=$(( $ORIG + 1 )) >> >> . ./common_tests >> >> +if [ ! -z ${val} ]; then >> + echo ${val} > ${WRITES_STRICT} >> +fi >> exit $rc >> diff --git a/tools/testing/selftests/sysctl/run_stringtests >> b/tools/testing/selftests/sysctl/run_stringtests >> index 90a9293d520c..ae98d66a9ec6 100755 >> --- a/tools/testing/selftests/sysctl/run_stringtests >> +++ b/tools/testing/selftests/sysctl/run_stringtests >> @@ -74,4 +74,7 @@ else >> echo "ok" >> fi >> >> +if [ ! -z ${val} ]; then >> + echo ${val} > ${WRITES_STRICT} >> +fi >> exit $rc >> -- >> 2.12.2 > > > Best, > Sumit.
Re: [PATCH v2 3/3] dax: use common 4k zero page for dax mmap reads
On Thu, Jun 15, 2017 at 04:58:56PM +0200, Jan Kara wrote: > On Wed 14-06-17 11:22:11, Ross Zwisler wrote: > > @@ -216,17 +217,6 @@ static void dax_unlock_mapping_entry(struct > > address_space *mapping, > > dax_wake_mapping_entry_waiter(mapping, index, entry, false); > > } > > > > -static void put_locked_mapping_entry(struct address_space *mapping, > > -pgoff_t index, void *entry) > > -{ > > - if (!radix_tree_exceptional_entry(entry)) { > > - unlock_page(entry); > > - put_page(entry); > > - } else { > > - dax_unlock_mapping_entry(mapping, index); > > - } > > -} > > - > > The naming becomes asymetric with this. So I'd prefer keeping > put_locked_mapping_entry() as a trivial wrapper around > dax_unlock_mapping_entry() unless we can craft more sensible naming / API > for entry grabbing (and that would be a separate patch anyway). Sure, that works for me. I'll fix for v3. > > -static int dax_load_hole(struct address_space *mapping, void **entry, > > +static int dax_load_hole(struct address_space *mapping, void *entry, > > struct vm_fault *vmf) > > { > > struct inode *inode = mapping->host; > > - struct page *page; > > - int ret; > > - > > - /* Hole page already exists? Return it... */ > > - if (!radix_tree_exceptional_entry(*entry)) { > > - page = *entry; > > - goto finish_fault; > > - } > > + unsigned long vaddr = vmf->address; > > + int ret = VM_FAULT_NOPAGE; > > + struct page *zero_page; > > + void *entry2; > > > > - /* This will replace locked radix tree entry with a hole page */ > > - page = find_or_create_page(mapping, vmf->pgoff, > > - vmf->gfp_mask | __GFP_ZERO); > > With this gone, you can also remove the special DAX handling from > mm/filemap.c: page_cache_tree_insert() and remove from dax.h > dax_wake_mapping_entry_waiter(), dax_radix_locked_entry() and RADIX_DAX > definitions. Yay! As a separate patch please. Oh, yay! :) Sure, I'll have this patch for v3.
Re: [PATCH] PCI / PM: Restore the status of PCI devices across hibernation
On Thu, May 25, 2017 at 04:49:07PM +0800, Chen Yu wrote: > Currently we saw a lot of "No irq handler" errors during hibernation, > which caused the system hang finally: > > [ 710.141581] ata4.00: qc timeout (cmd 0xec) > [ 710.147135] ata4.00: failed to IDENTIFY (I/O error, err_mask=0x4) > [ 710.154593] ata4.00: revalidation failed (errno=-5) > [ 710.468124] ata4: SATA link up 6.0 Gbps (SStatus 133 SControl 300) > [ 710.477746] do_IRQ: 31.151 No irq handler for vector > > According to above logs, there is an interrupt triggered and it is > dispatched to CPU31 with a vector number 151, but there is no handler > for it, thus this irq will not get acked and caused irq flood which kill > the system. To be more specific, the 31.151 is an interrupt from the ahci > host controller. > > After some investigation, the reason why this issue is triggered is > because the thaw_noirq() function does not restore the MSI/MSIX settings > across hibernation. > > The scenario is illustrated below: > > 1. Before the hibernation starts, the irq 34 is the handler for the ahci > device, >which is binded on cpu31. > 2. Hibernation starts, the ahci device is put into low power state. > 3. All the nonboot CPUs are put offline, so the irq 34 has to be migrated to >the last alive one - CPU0. > 4. After the snapshot has been created, all the nonboot CPUs are brought up > again, >the CPU affinity for IRQ 34 remains to be 0. > 5. ahci device are put into D0. > 6. The snapshot is written to the disk. > > The issue is triggered in step 6, in theory the ahci interrupt should be > delivered to CPU0, however the actually result is that this interrupt is > delivered to the original CPU31 instead, which cause the "No irq handler" > issue. > > Ying Huang has has provided a clue that, in step 3 it is possible that the > writing > to the register might not take effect as the PCI devices have been put > suspended. > Actually it is true: > In step 3, the irq 34 affinity is supposed to be modified from 31 to 0, > but actually it did not. In __pci_write_msi_msg(), if the device is already > in low power state, the low level msi message entry will not be updated > but cached. So in theory during the device restore process, the cached msi > modification information should be written back to the hardware, and this > is what pci_restore_msi_state() do during normal suspend-resume. > But this is not the case for hibernation, pci_restore_msi_state() is not > invoked currently, to be more specific, pci_restore_state() is not invoked > in pci_pm_thaw_noirq(), although pci_save_state() has saved the necessary > pci cached information in pci_pm_freeze_noirq(). > > This patch tries to restore the pci status for the device during hibernation, > otherwise the status might be lost across hibernation(for example, the > MSI/MSIX > message settings), which might cause problems during hibernation. > > Suggested-by: Ying Huang > Suggested-by: Rafael J. Wysocki > Cc: Rafael J. Wysocki > Cc: Bjorn Helgaas > Cc: Len Brown > Cc: Dan Williams > Cc: Rui Zhang > Cc: Ying Huang > Cc: linux-...@vger.kernel.org > Cc: linux...@vger.kernel.org > Cc: linux-kernel@vger.kernel.org > Signed-off-by: Chen Yu Added a stable tag and applied with Rafael's reviewed-by to pci/pm for v4.13, thanks! pci_restore_state() restores a lot of stuff besides MSI/MSI-X: PCIe device, link, slot control, ATS, VC, BARs, ACS, IOV. I guess I'm a little surprised that we haven't noticed more issues if all these things were broken. > --- > drivers/pci/pci-driver.c | 1 + > 1 file changed, 1 insertion(+) > > diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c > index 192e7b6..b399fa3 100644 > --- a/drivers/pci/pci-driver.c > +++ b/drivers/pci/pci-driver.c > @@ -964,6 +964,7 @@ static int pci_pm_thaw_noirq(struct device *dev) > return pci_legacy_resume_early(dev); > > pci_update_current_state(pci_dev, PCI_D0); > + pci_restore_state(pci_dev); > > if (drv && drv->pm && drv->pm->thaw_noirq) > error = drv->pm->thaw_noirq(dev); > -- > 2.7.4 >
Re: [PATCH v2 1/3] mm: add vm_insert_mixed_mkwrite()
On Thu, Jun 15, 2017 at 04:42:04PM +0200, Jan Kara wrote: > On Wed 14-06-17 11:22:09, Ross Zwisler wrote: > > To be able to use the common 4k zero page in DAX we need to have our PTE > > fault path look more like our PMD fault path where a PTE entry can be > > marked as dirty and writeable as it is first inserted, rather than waiting > > for a follow-up dax_pfn_mkwrite() => finish_mkwrite_fault() call. > > > > Right now we can rely on having a dax_pfn_mkwrite() call because we can > > distinguish between these two cases in do_wp_page(): > > > > case 1: 4k zero page => writable DAX storage > > case 2: read-only DAX storage => writeable DAX storage > > > > This distinction is made by via vm_normal_page(). vm_normal_page() returns > > false for the common 4k zero page, though, just as it does for DAX ptes. > > Instead of special casing the DAX + 4k zero page case, we will simplify our > > DAX PTE page fault sequence so that it matches our DAX PMD sequence, and > > get rid of dax_pfn_mkwrite() completely. > > > > This means that insert_pfn() needs to follow the lead of insert_pfn_pmd() > > and allow us to pass in a 'mkwrite' flag. If 'mkwrite' is set insert_pfn() > > will do the work that was previously done by wp_page_reuse() as part of the > > dax_pfn_mkwrite() call path. > > > > Signed-off-by: Ross Zwisler > > So I agree that getting rid of dax_pfn_mkwrite() and using fault handler in > that case is a way to go. However I somewhat dislike the > vm_insert_mixed_mkwrite() thing - it looks like a hack - and I'm aware that > we have a similar thing for PMD which is ugly as well. Besides being ugly > I'm also concerned that when 'mkwrite' is set, we just silently overwrite > whatever PTE was installed at that position. Not that I'd see how that > could screw us for DAX but still a concern that e.g. some PTE flag could > get discarded by this is there... In fact, for !HAVE_PTE_SPECIAL > architectures, you will leak zero page references by just overwriting the > PTE - for those archs you really need to unmap zero page before replacing > PTE (and the same for PMD I suppose). > > So how about some vmf_insert_pfn(vmf, pe_size, pfn) helper that would > properly detect PTE / PMD case, read / write case etc., check that PTE did > not change from orig_pte, and handle all the nasty details instead of > messing with insert_pfn? > > Honza Sounds good, I'll figure this out for v3. Thanks for the review!
Re: [PATCH RESEND 03/13] mfd: cros_ec: add debugfs, console log file
Hi Enric, I have gotten around to reviewing this series, and hope to get this in ASAP. I found an issue with this commit, but I'll go ahead and fix it myself as I'm creating the immutable branch. No need to respin the series. On Tue, May 16, 2017 at 06:13:09PM +0200, Enric Balletbo i Serra wrote: > +static int ec_read_version_supported(struct cros_ec_dev *ec) > +{ > + struct ec_params_get_cmd_versions_v1 *params; > + struct ec_response_get_cmd_versions *response; > + int ret; > + > + struct cros_ec_command *msg; > + > + msg = kzalloc(sizeof(*msg) + max(sizeof(params), sizeof(response)), > + GFP_KERNEL); > + if (!msg) > + return 0; > + > + msg->command = EC_CMD_GET_CMD_VERSIONS + ec->cmd_offset; > + msg->outsize = sizeof(*params); > + msg->insize = sizeof(*response); By my diff, the above two lines were changed from the original CHROMIUM commit, based on Doug's comment here: https://lkml.org/lkml/2017/2/22/630 However, this is an incomplete fix. Instead, we should pick this: https://chromium-review.googlesource.com/#/c/444085/ I'll go ahead and do that. Thanks! Benson -- Benson Leung Staff Software Engineer Chrome OS Kernel Google Inc. ble...@google.com Chromium OS Project ble...@chromium.org signature.asc Description: Digital signature
[tip:perf/urgent] perf evsel: Fix probing of precise_ip level for default cycles event
Commit-ID: 7a1ac110c22eb726684c837544a2d42c33e07be7 Gitweb: http://git.kernel.org/tip/7a1ac110c22eb726684c837544a2d42c33e07be7 Author: Arnaldo Carvalho de Melo AuthorDate: Fri, 9 Jun 2017 16:54:28 -0300 Committer: Arnaldo Carvalho de Melo CommitDate: Wed, 14 Jun 2017 15:44:29 -0300 perf evsel: Fix probing of precise_ip level for default cycles event Since commit 18e7a45af91a ("perf/x86: Reject non sampling events with precise_ip") returns -EINVAL for sys_perf_event_open() with an attribute with (attr.precise_ip > 0 && attr.sample_period == 0), just like is done in the routine used to probe the max precise level when no events were passed to 'perf record' or 'perf top', i.e.: perf_evsel__new_cycles() perf_event_attr__set_max_precise_ip() The x86 code, in x86_pmu_hw_config(), which is called all the way from sys_perf_event_open() did, starting with the aforementioned commit: /* There's no sense in having PEBS for non sampling events: */ if (!is_sampling_event(event)) return -EINVAL; Which makes it fail for cycles:ppp, cycles:pp and cycles:p, always using just the non precise cycles variant. To make sure that this is the case, I tested it, before this patch, with: # perf probe -L x86_pmu_hw_config 0 int x86_pmu_hw_config(struct perf_event *event) 1 { 2 if (event->attr.precise_ip) { 17 if (event->attr.precise_ip > precise) 18 return -EOPNOTSUPP; /* There's no sense in having PEBS for non sampling events: */ 21 if (!is_sampling_event(event)) 22 return -EINVAL; } # perf probe x86_pmu_hw_config:22 Added new events: probe:x86_pmu_hw_config (on x86_pmu_hw_config:22) probe:x86_pmu_hw_config_1 (on x86_pmu_hw_config:22) You can now use it in all perf tools, such as: perf record -e probe:x86_pmu_hw_config_1 -aR sleep 1 # perf trace -e perf_event_open,probe:x86_pmu_hwconfig*/max-stack=16/ perf record usleep 1 0.000 ( 0.015 ms): perf/4150 perf_event_open(attr_uptr: 0x7ffebc8ba110, cpu: -1, group_fd: -1 ) ... 0.015 ( ): probe:x86_pmu_hw_config:(9c0065e1)) x86_pmu_hw_config ([kernel.kallsyms]) hsw_hw_config ([kernel.kallsyms]) x86_pmu_event_init ([kernel.kallsyms]) perf_try_init_event ([kernel.kallsyms]) perf_event_alloc ([kernel.kallsyms]) SYSC_perf_event_open ([kernel.kallsyms]) sys_perf_event_open ([kernel.kallsyms]) do_syscall_64 ([kernel.kallsyms]) return_from_SYSCALL_64 ([kernel.kallsyms]) syscall (/usr/lib64/libc-2.24.so) perf_event_attr__set_max_precise_ip (/home/acme/bin/perf) perf_evsel__new_cycles (/home/acme/bin/perf) perf_evlist__add_default (/home/acme/bin/perf) cmd_record (/home/acme/bin/perf) run_builtin (/home/acme/bin/perf) handle_internal_command (/home/acme/bin/perf) 0.000 ( 0.021 ms): perf/4150 ... [continued]: perf_event_open()) = -1 EINVAL Invalid argument 0.023 ( 0.002 ms): perf/4150 perf_event_open(attr_uptr: 0x7ffebc8ba110, cpu: -1, group_fd: -1 ) ... 0.025 ( ): probe:x86_pmu_hw_config:(9c0065e1)) x86_pmu_hw_config ([kernel.kallsyms]) hsw_hw_config ([kernel.kallsyms]) x86_pmu_event_init ([kernel.kallsyms]) perf_try_init_event ([kernel.kallsyms]) perf_event_alloc ([kernel.kallsyms]) SYSC_perf_event_open ([kernel.kallsyms]) sys_perf_event_open ([kernel.kallsyms]) do_syscall_64 ([kernel.kallsyms]) return_from_SYSCALL_64 ([kernel.kallsyms]) syscall (/usr/lib64/libc-2.24.so) perf_event_attr__set_max_precise_ip (/home/acme/bin/perf) perf_evsel__new_cycles (/home/acme/bin/perf) perf_evlist__add_default (/home/acme/bin/perf) cmd_record (/home/acme/bin/perf)
[tip:perf/urgent] perf tools: Fix build with ARCH=x86_64
Commit-ID: 7a759cd8e8272ee18922838ee711219c7c796a31 Gitweb: http://git.kernel.org/tip/7a759cd8e8272ee18922838ee711219c7c796a31 Author: Jiada Wang AuthorDate: Sun, 9 Apr 2017 20:02:37 -0700 Committer: Arnaldo Carvalho de Melo CommitDate: Wed, 14 Jun 2017 15:44:29 -0300 perf tools: Fix build with ARCH=x86_64 With commit: 0a943cb10ce78 (tools build: Add HOSTARCH Makefile variable) when building for ARCH=x86_64, ARCH=x86_64 is passed to perf instead of ARCH=x86, so the perf build process searchs header files from tools/arch/x86_64/include, which doesn't exist. The following build failure is seen: In file included from util/event.c:2:0: tools/include/uapi/linux/mman.h:4:27: fatal error: uapi/asm/mman.h: No such file or directory compilation terminated. Fix this issue by using SRCARCH instead of ARCH in perf, just like the main kernel Makefile and tools/objtool's. Signed-off-by: Jiada Wang Tested-by: Arnaldo Carvalho de Melo Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Eugeniu Rosca Cc: Jan Stancek Cc: Masami Hiramatsu Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Rui Teng Cc: Sukadev Bhattiprolu Cc: Wang Nan Fixes: 0a943cb10ce7 ("tools build: Add HOSTARCH Makefile variable") Link: http://lkml.kernel.org/r/1491793357-14977-2-git-send-email-jiada_w...@mentor.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 38 +++--- tools/perf/Makefile.perf| 2 +- tools/perf/arch/Build | 2 +- tools/perf/pmu-events/Build | 4 ++-- tools/perf/tests/Build | 2 +- tools/perf/util/header.c| 2 +- 6 files changed, 25 insertions(+), 25 deletions(-) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 8354d04..1f4fbc9 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -19,18 +19,18 @@ CFLAGS := $(EXTRA_CFLAGS) $(EXTRA_WARNINGS) include $(srctree)/tools/scripts/Makefile.arch -$(call detected_var,ARCH) +$(call detected_var,SRCARCH) NO_PERF_REGS := 1 # Additional ARCH settings for ppc -ifeq ($(ARCH),powerpc) +ifeq ($(SRCARCH),powerpc) NO_PERF_REGS := 0 LIBUNWIND_LIBS := -lunwind -lunwind-ppc64 endif # Additional ARCH settings for x86 -ifeq ($(ARCH),x86) +ifeq ($(SRCARCH),x86) $(call detected,CONFIG_X86) ifeq (${IS_64_BIT}, 1) CFLAGS += -DHAVE_ARCH_X86_64_SUPPORT -DHAVE_SYSCALL_TABLE -I$(OUTPUT)arch/x86/include/generated @@ -43,12 +43,12 @@ ifeq ($(ARCH),x86) NO_PERF_REGS := 0 endif -ifeq ($(ARCH),arm) +ifeq ($(SRCARCH),arm) NO_PERF_REGS := 0 LIBUNWIND_LIBS = -lunwind -lunwind-arm endif -ifeq ($(ARCH),arm64) +ifeq ($(SRCARCH),arm64) NO_PERF_REGS := 0 LIBUNWIND_LIBS = -lunwind -lunwind-aarch64 endif @@ -61,7 +61,7 @@ endif # Disable it on all other architectures in case libdw unwind # support is detected in system. Add supported architectures # to the check. -ifneq ($(ARCH),$(filter $(ARCH),x86 arm)) +ifneq ($(SRCARCH),$(filter $(SRCARCH),x86 arm)) NO_LIBDW_DWARF_UNWIND := 1 endif @@ -115,9 +115,9 @@ endif FEATURE_CHECK_CFLAGS-libbabeltrace := $(LIBBABELTRACE_CFLAGS) FEATURE_CHECK_LDFLAGS-libbabeltrace := $(LIBBABELTRACE_LDFLAGS) -lbabeltrace-ctf -FEATURE_CHECK_CFLAGS-bpf = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(ARCH)/include/uapi -I$(srctree)/tools/include/uapi +FEATURE_CHECK_CFLAGS-bpf = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(SRCARCH)/include/uapi -I$(srctree)/tools/include/uapi # include ARCH specific config --include $(src-perf)/arch/$(ARCH)/Makefile +-include $(src-perf)/arch/$(SRCARCH)/Makefile ifdef PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET CFLAGS += -DHAVE_ARCH_REGS_QUERY_REGISTER_OFFSET @@ -228,12 +228,12 @@ ifeq ($(DEBUG),0) endif INC_FLAGS += -I$(src-perf)/util/include -INC_FLAGS += -I$(src-perf)/arch/$(ARCH)/include +INC_FLAGS += -I$(src-perf)/arch/$(SRCARCH)/include INC_FLAGS += -I$(srctree)/tools/include/uapi INC_FLAGS += -I$(srctree)/tools/include/ -INC_FLAGS += -I$(srctree)/tools/arch/$(ARCH)/include/uapi -INC_FLAGS += -I$(srctree)/tools/arch/$(ARCH)/include/ -INC_FLAGS += -I$(srctree)/tools/arch/$(ARCH)/ +INC_FLAGS += -I$(srctree)/tools/arch/$(SRCARCH)/include/uapi +INC_FLAGS += -I$(srctree)/tools/arch/$(SRCARCH)/include/ +INC_FLAGS += -I$(srctree)/tools/arch/$(SRCARCH)/ # $(obj-perf) for generated common-cmds.h # $(obj-perf)/util for generated bison/flex headers @@ -355,7 +355,7 @@ ifndef NO_LIBELF ifndef NO_DWARF ifeq ($(origin PERF_HAVE_DWARF_REGS), undefined) - msg := $(warning DWARF register mappings have not been defined for architecture $(ARCH), DWARF support disabled); + msg := $(warning DWARF register mappings have not been defined for architecture $(SRCARCH), DWARF support disabled); NO_DWARF := 1 else CFLAGS += -DHAVE_DWARF_SUPPORT $(LIBDW_CFLAGS) @@ -380,7 +380,7 @@ ifndef NO_LIBELF CFLAGS += -DHAVE_BPF_PROLOGUE $(call detected,CONFIG_BPF_PR
[tip:perf/urgent] perf unwind: Report module before querying isactivation in dwfl unwind
Commit-ID: 9126cbbacecb8917bd0418809ef1d26616b2061e Gitweb: http://git.kernel.org/tip/9126cbbacecb8917bd0418809ef1d26616b2061e Author: Milian Wolff AuthorDate: Fri, 2 Jun 2017 16:37:53 +0200 Committer: Arnaldo Carvalho de Melo CommitDate: Fri, 16 Jun 2017 14:37:30 -0300 perf unwind: Report module before querying isactivation in dwfl unwind The PC returned by dwfl_frame_pc() may map into a not-yet-reported module. We have to report it before we continue unwinding. But when we query for the isactivation flag in dwfl_frame_pc, libdw will actually do one more unwinding step internally which can then break and lead to missed frames or broken stacks. With libunwind we get e.g.: ~ heaptrack_gui 2228 135073.400474: 613969 cycles: 108c8e [unknown] (/usr/lib/libQt5Core.so.5.8.0) 1093bc [unknown] (/usr/lib/libQt5Core.so.5.8.0) 109e7b QLocale::QLocale (/usr/lib/libQt5Core.so.5.8.0) 1470ff [unknown] (/usr/lib/libQt5Core.so.5.8.0) 147f67 QSystemLocale::query (/usr/lib/libQt5Core.so.5.8.0) 109fbf QLocalePrivate::updateSystemPrivate (/usr/lib/libQt5Core.so.5.8.0) 10aa27 QLocale::QLocale (/usr/lib/libQt5Core.so.5.8.0) 1e02c3 [unknown] (/usr/lib/libQt5Core.so.5.8.0) 2113bb [unknown] (/usr/lib/libQt5Core.so.5.8.0) 211505 [unknown] (/usr/lib/libQt5Core.so.5.8.0) 1b5df0 QFileInfo::exists (/usr/lib/libQt5Core.so.5.8.0) 92eb2 [unknown] (/usr/lib/libQt5Core.so.5.8.0) 93423 [unknown] (/usr/lib/libQt5Core.so.5.8.0) 93d2a QLibraryInfo::location (/usr/lib/libQt5Core.so.5.8.0) 2170af [unknown] (/usr/lib/libQt5Core.so.5.8.0) 297c53 QCoreApplicationPrivate::init (/usr/lib/libQt5Core.so.5.8.0) f7cde QGuiApplicationPrivate::init (/usr/lib/libQt5Gui.so.5.8.0) 1589e8 QApplicationPrivate::init (/usr/lib/libQt5Widgets.so.5.8.0) 78622 main (/home/milian/projects/compiled/other/bin/heaptrack_gui) 20439 __libc_start_main (/usr/lib/libc-2.25.so) 78299 _start (/home/milian/projects/compiled/other/bin/heaptrack_gui) heaptrack_gui 2228 135073.401156: 569521 cycles: 131633 QString::endsWith (/usr/lib/libQt5Core.so.5.8.0) 1a0701 QDir::cleanPath (/usr/lib/libQt5Core.so.5.8.0) 21b82d [unknown] (/usr/lib/libQt5Core.so.5.8.0) 1b3727 QFileInfo::canonicalFilePath (/usr/lib/libQt5Core.so.5.8.0) 2780c7 QFactoryLoader::update (/usr/lib/libQt5Core.so.5.8.0) 279525 QFactoryLoader::QFactoryLoader (/usr/lib/libQt5Core.so.5.8.0) e5bd0 QPlatformIntegrationFactory::create (/usr/lib/libQt5Gui.so.5.8.0) f5a1c QGuiApplicationPrivate::createPlatformIntegration (/usr/lib/libQt5Gui.so.5.8.0) f650c QGuiApplicationPrivate::createEventDispatcher (/usr/lib/libQt5Gui.so.5.8.0) 298524 QCoreApplicationPrivate::init (/usr/lib/libQt5Core.so.5.8.0) f7cde QGuiApplicationPrivate::init (/usr/lib/libQt5Gui.so.5.8.0) 1589e8 QApplicationPrivate::init (/usr/lib/libQt5Widgets.so.5.8.0) 78622 main (/home/milian/projects/compiled/other/bin/heaptrack_gui) 20439 __libc_start_main (/usr/lib/libc-2.25.so) 78299 _start (/home/milian/projects/compiled/other/bin/heaptrack_gui) ~ Note the two frames 1589e8 and 78622 in the first sample. These are missing when unwinding with libdw. The second sample's breakage is more obvious: ~ heaptrack_gui 2228 135073.400474: 613969 cycles: 108c8e [unknown] (/usr/lib/libQt5Core.so.5.8.0) 1093bc [unknown] (/usr/lib/libQt5Core.so.5.8.0) 109e7b QLocale::QLocale (/usr/lib/libQt5Core.so.5.8.0) 1470ff [unknown] (/usr/lib/libQt5Core.so.5.8.0) 147f67 QSystemLocale::query (/usr/lib/libQt5Core.so.5.8.0) 109fbf QLocalePrivate::updateSystemPrivate (/usr/lib/libQt5Core.so.5.8.0) 10aa27 QLocale::QLocale (/usr/lib/libQt5Core.so.5.8.0) 1e02c3 [unknown] (/usr/lib/libQt5Core.so.5.8.0) 2113bb [unknown] (/usr/lib/libQt5Core.so.5.8.0) 211505 [unknown] (/usr/lib/libQt5Core.so.5.8.0) 1b5df0 QFileInfo::exists (/usr/lib/libQt5Core.so.5.8.0) 92eb2 [unknown] (/usr/lib/libQt5Core.so.5.8.0) 93423 [unknown] (/usr/lib/libQt5Core.so.5.8.0) 93d2a QLibraryInfo::location (/usr/lib/libQt5Core.so.5.8.0) 2170af [unknown] (/usr/lib/libQt5Core.so.5.8.0) 297c53 QCoreApplicationPrivate::init (/usr/lib/l
Re: [RFC PATCH 7/7 v1]powerpc: Deliver SEGV signal on protection key violation.
On Fri, Jun 16, 2017 at 09:18:29PM +1000, Michael Ellerman wrote: > Ram Pai writes: > > diff --git a/arch/powerpc/include/uapi/asm/ptrace.h > > b/arch/powerpc/include/uapi/asm/ptrace.h > > index 8036b38..109d0c2 100644 > > --- a/arch/powerpc/include/uapi/asm/ptrace.h > > +++ b/arch/powerpc/include/uapi/asm/ptrace.h > > @@ -49,6 +49,8 @@ struct pt_regs { > > unsigned long dar; /* Fault registers */ > > unsigned long dsisr;/* on 4xx/Book-E used for ESR */ > > unsigned long result; /* Result of a system call */ > > + unsigned long dscr; /* contents of the DSCR register */ > > + unsigned long amr; /* contents of AMR register */ > > }; > > You can't change pt_regs, it's ABI. > > > @@ -109,7 +111,8 @@ struct pt_regs { > > #define PT_DSISR 42 > > #define PT_RESULT 43 > > #define PT_DSCR 44 > > -#define PT_REGS_COUNT 44 > > +#define PT_AMR 45 > > +#define PT_REGS_COUNT 45 > > You can add PT_AMR, but it has to be synthetic like DSCR, ie. not > actually in pt_regs but available via ptrace. ok. > > But do we want to do that? How does the x86 code export the key(s) of a > process? Or doesn't it? The semantics defined on x86 is, signal handler has to have a way of knowing the contents of the PKRU; (the x86 equivalent of AMR). Also the signal handler has to have the ability to modify the PKRU before it returns from the signal handler. This modified information will be used by the kernel to program the CPU's PKRU register. if the signal handler does not have the ability to do so, than when the signal handler returns and the user code restarts executing where it had left, it will continue to access the same protected address and fault again, which will again invoke the signal handler and this will continue infinitely. We have to provide the same semantics on powerpc. The way I intend to do it is to use one of the unused field in the gp_regs and fill that with the contents of the AMR register. PT_AMR, at offset 45 in gp_regs is not used currently. offset 45, 46, and 47 are available AFIACT. Dave: Why is it not ok to reprogram the PKRU from the signal handler, instead of telling the kernel to do so on its behalf? Or have I got my understanding of the semantics wrong? > > cheers -- Ram Pai
Re: [GIT PULL 0/3] perf/urgent fixes
* Arnaldo Carvalho de Melo wrote: > Hi Ingo, > > Please consider pulling, > > - Arnaldo > > Test results at the end of this message, as usual. > > The following changes since commit 63f700aab4c11d46626de3cd051dae56cf7e9056: > > Merge tag 'xtensa-20170612' of git://github.com/jcmvbkbc/linux-xtensa > (2017-06-13 15:09:10 +0900) > > are available in the git repository at: > > git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux.git > tags/perf-urgent-for-mingo-4.12-20170616 > > for you to fetch changes up to 9126cbbacecb8917bd0418809ef1d26616b2061e: > > perf unwind: Report module before querying isactivation in dwfl unwind > (2017-06-16 14:37:30 -0300) > > > perf/urgent fixes: > > - Fix probing of precise_ip level for default cycles event, that > got broken recently on x86_64 when its arch code started > considering invalid requesting precise samples when not sampling > (i.e. when attr.sample_period == 0). > > This also fixes another problem in s/390 where the precision > probing with sample_period == 0 returned precise_ip > 0, that > then, when setting up the real cycles event (not probing) would > return EOPNOTSUPP for precise_ip > 0 (as determined previously > by probing) and sample_period > 0. > > These problems resulted in attr_precise not being set to the > highest precision available on x86.64 when no event was specified, > i.e. the canonical: > > perf record ./workload > > would end up using attr.precise_ip = 0. As a workaround this would > need to be done: > > perf record -e cycles:P ./workload > > And on s/390 it would plain not work, requiring using: > > perf record -e cycles ./workload > > as a workaround. (Arnaldo Carvalho de Melo) > > - Fix perf build with ARCH=x86_64, when ARCH should be transformed > into ARCH=x86, just like with the main kernel Makefile and > tools/objtool's, i.e. use SRCARCH. (Jiada Wang) > > - Avoid accessing uninitialized data structures when unwinding with > elfutils's libdw, making it more closely mimic libunwind's unwinder. > (Milian Wolff) > > Signed-off-by: Arnaldo Carvalho de Melo > > > Arnaldo Carvalho de Melo (1): > perf evsel: Fix probing of precise_ip level for default cycles event > > Jiada Wang (1): > perf tools: Fix build with ARCH=x86_64 > > Milian Wolff (1): > perf unwind: Report module before querying isactivation in dwfl unwind > > tools/perf/Makefile.config | 38 +++--- > tools/perf/Makefile.perf | 2 +- > tools/perf/arch/Build | 2 +- > tools/perf/pmu-events/Build| 4 ++-- > tools/perf/tests/Build | 2 +- > tools/perf/tests/task-exit.c | 2 +- > tools/perf/util/evsel.c| 12 > tools/perf/util/header.c | 2 +- > tools/perf/util/unwind-libdw.c | 8 > 9 files changed, 46 insertions(+), 26 deletions(-) Pulled, thanks a lot Arnaldo! Ingo
Re: [RFC PATCH 00/13] Switchtec NTB Support
On 16/06/17 12:38 PM, Serge Semin wrote: > On Fri, Jun 16, 2017 at 11:08:52AM -0600, Logan Gunthorpe > wrote: > It's the way the NTB API was created for, to have set of functions to access > NTB devices in the similar way. These aren't my beliefs, it's the way it was > created. I agree it can be optional, but it shouldn't be made as the basics > of the driver. It is called NTB "hardware" driver after all, not "emulating" > or > "abstracting" driver. Just more philosophy. You haven't given any good reason to remove the functionality. Vague references to the way things were created aren't compelling arguments. Better to cite code and point out actual problems. > ntb_transport could work without Scratchpads, if it's properly altered to > use NTB messaging. This should be the way to make things compatible, but not > making the hardware driver suitable for just one ntb_transport. Ok, well when all the NTB clients no longer require using scratchpads and we can all abide by the rule that clients must function without them. Then, I'll remove the emulation. Until then, it stays. > It's not like my whim or something, but the way it's usually done. > https://kernelnewbies.org/PatchPhilosophy > Cite from there: > "Each patch should group changes into a logical sequence. Bug fixes must > come first in the patchset, then new features. This is because we need to be > able to backport bug fixes to older kernels, and they should not depend on > new features." You should probably read that again because it doesn't actually support your point (in fact it's saying something quite unrelated). It is also probably a good idea to read the rest of the seciton you cite: "The idea here is that you should break changes up in such a way that it will be easy to review." "When creating a new feature patchset, you may need to break up your changes into multiple commits. " "Clean up patches that are over 200 lines long are discouraged, because they are hard to review. Break those patches up into smaller patches. " Also, to quote Greg Kroah-Hartman from my last series[1]: "That's one big patch to review, would you want to do that? Can you break it up into smaller parts?" > You grouped the patches in according to your logical view or development > progress (I don't know for sure), but it's not obvious for reviewers. > From my perspective your new Microsemi Switchtec NTB driver is just one > feature. I don't know who would think differently so to split the solid > driver up for review. Switchtec management driver alteration might be the > same - just one fix. It's much easier for you to have your commits squashed, > than for me to look at your git tree, than get back to your patchset looking > for a necessary peace of patch and commenting it there. Well you're free to think that but, in my experience, your opinion differs significantly from the rest of the kernel community which I personally agree with. Now, if you'd like to actually review the code I'd be happy to address any concerns you find. I won't be responding to any more philosophical arguments or bike-shedding over the format of the patch. Logan [1] https://lkml.org/lkml/2017/1/31/637
Re: LTS testing with latest kselftests - some failures
On Fri, Jun 16, 2017 at 06:46:51PM +0200, Luis R. Rodriguez wrote: > Kees, please review 47e0bbb7fa98 below. > Brian, please review be4a1326d12c below. > > On Thu, Jun 15, 2017 at 11:26:53PM +0530, Sumit Semwal wrote: > > Hello Greg, Shuah, > > > > While testing 4.4.y and 4.9.y LTS kernels with latest kselftest, > > To be clear it seems like you are taking the latest upstream ksefltest and run > it against older stable kernels. Furthermore you seem to only run the shell > script tests but are using older kselftests drivers? Is this all correct? > Otherwise it is unclear how you are running into the issues below. > > Does 0-day so the same? I thought 0-day takes just the kselftest from each > tree > submitted. That *seemed* to me like the way it was designed. Shuah ? > > What's the name of *this* testing effort BTW? Is this part of the overall > kselftest ? Or is this something Linaro does for LTS kernels ? If there > is a name to your effort can you document it here so that others are aware: It's a "test LTS kernels to make sure Greg didn't break anything" type of testing effort that Linaro is helping out with. This could also be called, "it's about time someone did this..." :) > > we found a couple more test failures due to test-kernel mismatch: > > > > 1. firmware tests: - linux 4.5 [1] and 4.10 [2] added a few updates to > > tests, and related updates to lib/test_firmware.c to improve the > > tests. Stable-4.4 misses these patches to lib/test_firmware.c. Stable > > 4.9 misses the second update. > > <-- snip, skipped 2. and 3. --> > > > For all the 3 listed above, we will try and update the tests to gracefully > > exit. > > Hmm, this actually raises a good kselftest question: > > I *though* kselftests were running tests on par with the kernels, so we would > *not* take latest upstream kselftests to test against older kernels. Is this > incorrect? That is incorrect. Your test should always degrade gracefully if the feature is not present in the kernel under test. If the test is for a bug that was fixed, then that fix should also go to a stable kernel release. thanks, greg k-h
[PATCH 1/6] rtc: s3c: Jump to central exit point on getting src clock error
In other error paths in probe, centralized exit point was used so make this consistent. Signed-off-by: Krzysztof Kozlowski --- drivers/rtc/rtc-s3c.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c index d44fb34df8fe..c5aa7a35d07f 100644 --- a/drivers/rtc/rtc-s3c.c +++ b/drivers/rtc/rtc-s3c.c @@ -510,8 +510,7 @@ static int s3c_rtc_probe(struct platform_device *pdev) else dev_dbg(&pdev->dev, "probe deferred due to missing rtc src clk\n"); - clk_disable_unprepare(info->rtc_clk); - return ret; + goto err_src_clk; } clk_prepare_enable(info->rtc_src_clk); } @@ -575,6 +574,7 @@ static int s3c_rtc_probe(struct platform_device *pdev) if (info->data->needs_src_clk) clk_disable_unprepare(info->rtc_src_clk); +err_src_clk: clk_disable_unprepare(info->rtc_clk); return ret; -- 2.9.3
[PATCH 5/6] rtc: s3c: Handle clock prepare failures in probe
clk_prepare_enable() can fail so handle such case. Signed-off-by: Krzysztof Kozlowski --- drivers/rtc/rtc-s3c.c | 8 ++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c index c666b95fb8d7..0cb2f27a30b4 100644 --- a/drivers/rtc/rtc-s3c.c +++ b/drivers/rtc/rtc-s3c.c @@ -498,7 +498,9 @@ static int s3c_rtc_probe(struct platform_device *pdev) dev_dbg(&pdev->dev, "probe deferred due to missing rtc clk\n"); return ret; } - clk_prepare_enable(info->rtc_clk); + ret = clk_prepare_enable(info->rtc_clk); + if (ret) + return ret; if (info->data->needs_src_clk) { info->rtc_src_clk = devm_clk_get(&pdev->dev, "rtc_src"); @@ -512,7 +514,9 @@ static int s3c_rtc_probe(struct platform_device *pdev) "probe deferred due to missing rtc src clk\n"); goto err_src_clk; } - clk_prepare_enable(info->rtc_src_clk); + ret = clk_prepare_enable(info->rtc_src_clk); + if (ret) + goto err_src_clk; } /* check to see if everything is setup correctly */ -- 2.9.3
[PATCH 2/6] rtc: s3c: Minor white-space cleanups
Minor cleanups to make the code easier to read. No functional changes. 1. Remove one space before labels as this is nowadays mostly preferred. 2. Fix indentation of arguments in function calls. 3. Split structure member declaration. Signed-off-by: Krzysztof Kozlowski --- drivers/rtc/rtc-s3c.c | 47 +++ 1 file changed, 23 insertions(+), 24 deletions(-) diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c index c5aa7a35d07f..2b503dab7957 100644 --- a/drivers/rtc/rtc-s3c.c +++ b/drivers/rtc/rtc-s3c.c @@ -49,7 +49,8 @@ struct s3c_rtc { spinlock_t pie_lock; spinlock_t alarm_clk_lock; - int ticnt_save, ticnt_en_save; + int ticnt_save; + int ticnt_en_save; bool wake_en; }; @@ -169,7 +170,7 @@ static int s3c_rtc_gettime(struct device *dev, struct rtc_time *rtc_tm) s3c_rtc_enable_clk(info); - retry_get_time: +retry_get_time: rtc_tm->tm_min = readb(info->base + S3C2410_RTCMIN); rtc_tm->tm_hour = readb(info->base + S3C2410_RTCHOUR); rtc_tm->tm_mday = readb(info->base + S3C2410_RTCDATE); @@ -199,8 +200,8 @@ static int s3c_rtc_gettime(struct device *dev, struct rtc_time *rtc_tm) rtc_tm->tm_year += 100; dev_dbg(dev, "read time %04d.%02d.%02d %02d:%02d:%02d\n", -1900 + rtc_tm->tm_year, rtc_tm->tm_mon, rtc_tm->tm_mday, -rtc_tm->tm_hour, rtc_tm->tm_min, rtc_tm->tm_sec); + 1900 + rtc_tm->tm_year, rtc_tm->tm_mon, rtc_tm->tm_mday, + rtc_tm->tm_hour, rtc_tm->tm_min, rtc_tm->tm_sec); rtc_tm->tm_mon -= 1; @@ -213,8 +214,8 @@ static int s3c_rtc_settime(struct device *dev, struct rtc_time *tm) int year = tm->tm_year - 100; dev_dbg(dev, "set time %04d.%02d.%02d %02d:%02d:%02d\n", -1900 + tm->tm_year, tm->tm_mon, tm->tm_mday, -tm->tm_hour, tm->tm_min, tm->tm_sec); + 1900 + tm->tm_year, tm->tm_mon, tm->tm_mday, + tm->tm_hour, tm->tm_min, tm->tm_sec); /* we get around y2k by simply not supporting it */ @@ -259,9 +260,9 @@ static int s3c_rtc_getalarm(struct device *dev, struct rtc_wkalrm *alrm) alrm->enabled = (alm_en & S3C2410_RTCALM_ALMEN) ? 1 : 0; dev_dbg(dev, "read alarm %d, %04d.%02d.%02d %02d:%02d:%02d\n", -alm_en, -1900 + alm_tm->tm_year, alm_tm->tm_mon, alm_tm->tm_mday, -alm_tm->tm_hour, alm_tm->tm_min, alm_tm->tm_sec); + alm_en, + 1900 + alm_tm->tm_year, alm_tm->tm_mon, alm_tm->tm_mday, + alm_tm->tm_hour, alm_tm->tm_min, alm_tm->tm_sec); /* decode the alarm enable field */ if (alm_en & S3C2410_RTCALM_SECEN) @@ -295,9 +296,9 @@ static int s3c_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm) int year = tm->tm_year - 100; dev_dbg(dev, "s3c_rtc_setalarm: %d, %04d.%02d.%02d %02d:%02d:%02d\n", -alrm->enabled, -1900 + tm->tm_year, tm->tm_mon + 1, tm->tm_mday, -tm->tm_hour, tm->tm_min, tm->tm_sec); + alrm->enabled, + 1900 + tm->tm_year, tm->tm_mon + 1, tm->tm_mday, + tm->tm_hour, tm->tm_min, tm->tm_sec); s3c_rtc_enable_clk(info); @@ -378,8 +379,7 @@ static void s3c24xx_rtc_enable(struct s3c_rtc *info) dev_info(info->dev, "rtc disabled, re-enabling\n"); tmp = readw(info->base + S3C2410_RTCCON); - writew(tmp | S3C2410_RTCCON_RTCEN, - info->base + S3C2410_RTCCON); + writew(tmp | S3C2410_RTCCON_RTCEN, info->base + S3C2410_RTCCON); } if (con & S3C2410_RTCCON_CNTSEL) { @@ -387,7 +387,7 @@ static void s3c24xx_rtc_enable(struct s3c_rtc *info) tmp = readw(info->base + S3C2410_RTCCON); writew(tmp & ~S3C2410_RTCCON_CNTSEL, - info->base + S3C2410_RTCCON); + info->base + S3C2410_RTCCON); } if (con & S3C2410_RTCCON_CLKRST) { @@ -395,7 +395,7 @@ static void s3c24xx_rtc_enable(struct s3c_rtc *info) tmp = readw(info->base + S3C2410_RTCCON); writew(tmp & ~S3C2410_RTCCON_CLKRST, - info->base + S3C2410_RTCCON); + info->base + S3C2410_RTCCON); } } @@ -481,7 +481,7 @@ static int s3c_rtc_probe(struct platform_device *pdev) } dev_dbg(&pdev->dev, "s3c2410_rtc: tick irq %d, alarm irq %d\n", -info->irq_tick, info->irq_alarm); + info->irq_tick, info->irq_alarm); /* get the memory region */ res = platform_get_resource(pdev, IORESOURCE_MEM, 0); @@ -520,7 +520,7 @@ static int s3c_rtc_probe(struct platform_device *pdev) info->data->enable(info); dev_dbg(&pdev->dev, "s3c2410_rtc: RTCCON=%02x\n", -readw(info->base +
[PATCH 6/6] rtc: s3c: Handle clock enable failures
clk_enable() can fail so handle such case. Signed-off-by: Krzysztof Kozlowski --- drivers/rtc/rtc-s3c.c | 72 --- 1 file changed, 57 insertions(+), 15 deletions(-) diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c index 0cb2f27a30b4..a8992c227f61 100644 --- a/drivers/rtc/rtc-s3c.c +++ b/drivers/rtc/rtc-s3c.c @@ -68,18 +68,32 @@ struct s3c_rtc_data { void (*disable) (struct s3c_rtc *info); }; -static void s3c_rtc_enable_clk(struct s3c_rtc *info) +static int s3c_rtc_enable_clk(struct s3c_rtc *info) { unsigned long irq_flags; + int ret = 0; spin_lock_irqsave(&info->alarm_clk_lock, irq_flags); + if (info->clk_disabled) { - clk_enable(info->rtc_clk); - if (info->data->needs_src_clk) - clk_enable(info->rtc_src_clk); + ret = clk_enable(info->rtc_clk); + if (ret) + goto out; + + if (info->data->needs_src_clk) { + ret = clk_enable(info->rtc_src_clk); + if (ret) { + clk_disable(info->rtc_clk); + goto out; + } + } info->clk_disabled = false; } + +out: spin_unlock_irqrestore(&info->alarm_clk_lock, irq_flags); + + return ret; } static void s3c_rtc_disable_clk(struct s3c_rtc *info) @@ -122,10 +136,13 @@ static int s3c_rtc_setaie(struct device *dev, unsigned int enabled) { struct s3c_rtc *info = dev_get_drvdata(dev); unsigned int tmp; + int ret; dev_dbg(info->dev, "%s: aie=%d\n", __func__, enabled); - s3c_rtc_enable_clk(info); + ret = s3c_rtc_enable_clk(info); + if (ret) + return ret; tmp = readb(info->base + S3C2410_RTCALM) & ~S3C2410_RTCALM_ALMEN; @@ -136,10 +153,13 @@ static int s3c_rtc_setaie(struct device *dev, unsigned int enabled) s3c_rtc_disable_clk(info); - if (enabled) - s3c_rtc_enable_clk(info); - else + if (enabled) { + ret = s3c_rtc_enable_clk(info); + if (ret) + return ret; + } else { s3c_rtc_disable_clk(info); + } return 0; } @@ -147,10 +167,14 @@ static int s3c_rtc_setaie(struct device *dev, unsigned int enabled) /* Set RTC frequency */ static int s3c_rtc_setfreq(struct s3c_rtc *info, int freq) { + int ret; + if (!is_power_of_2(freq)) return -EINVAL; - s3c_rtc_enable_clk(info); + ret = s3c_rtc_enable_clk(info); + if (ret) + return ret; spin_lock_irq(&info->pie_lock); if (info->data->set_freq) @@ -167,8 +191,11 @@ static int s3c_rtc_gettime(struct device *dev, struct rtc_time *rtc_tm) { struct s3c_rtc *info = dev_get_drvdata(dev); unsigned int have_retried = 0; + int ret; - s3c_rtc_enable_clk(info); + ret = s3c_rtc_enable_clk(info); + if (ret) + return ret; retry_get_time: rtc_tm->tm_min = readb(info->base + S3C2410_RTCMIN); @@ -212,6 +239,7 @@ static int s3c_rtc_settime(struct device *dev, struct rtc_time *tm) { struct s3c_rtc *info = dev_get_drvdata(dev); int year = tm->tm_year - 100; + int ret; dev_dbg(dev, "set time %04d.%02d.%02d %02d:%02d:%02d\n", 1900 + tm->tm_year, tm->tm_mon, tm->tm_mday, @@ -224,7 +252,9 @@ static int s3c_rtc_settime(struct device *dev, struct rtc_time *tm) return -EINVAL; } - s3c_rtc_enable_clk(info); + ret = s3c_rtc_enable_clk(info); + if (ret) + return ret; writeb(bin2bcd(tm->tm_sec), info->base + S3C2410_RTCSEC); writeb(bin2bcd(tm->tm_min), info->base + S3C2410_RTCMIN); @@ -243,8 +273,11 @@ static int s3c_rtc_getalarm(struct device *dev, struct rtc_wkalrm *alrm) struct s3c_rtc *info = dev_get_drvdata(dev); struct rtc_time *alm_tm = &alrm->time; unsigned int alm_en; + int ret; - s3c_rtc_enable_clk(info); + ret = s3c_rtc_enable_clk(info); + if (ret) + return ret; alm_tm->tm_sec = readb(info->base + S3C2410_ALMSEC); alm_tm->tm_min = readb(info->base + S3C2410_ALMMIN); @@ -293,6 +326,7 @@ static int s3c_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm) struct s3c_rtc *info = dev_get_drvdata(dev); struct rtc_time *tm = &alrm->time; unsigned int alrm_en; + int ret; int year = tm->tm_year - 100; dev_dbg(dev, "s3c_rtc_setalarm: %d, %04d.%02d.%02d %02d:%02d:%02d\n", @@ -300,7 +334,9 @@ static int s3c_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm) 1900 + tm->tm_year, tm->tm_mon + 1, tm->tm_mday, tm->tm_hour, tm->tm_min,
[PATCH 3/6] rtc: s3c: Drop unneeded cast to void pointer
There is no need for casting to void pointer for of_device_id data. Signed-off-by: Krzysztof Kozlowski --- drivers/rtc/rtc-s3c.c | 10 +- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c index 2b503dab7957..bfc8660ff1e7 100644 --- a/drivers/rtc/rtc-s3c.c +++ b/drivers/rtc/rtc-s3c.c @@ -801,19 +801,19 @@ static struct s3c_rtc_data const s3c6410_rtc_data = { static const struct of_device_id s3c_rtc_dt_match[] = { { .compatible = "samsung,s3c2410-rtc", - .data = (void *)&s3c2410_rtc_data, + .data = &s3c2410_rtc_data, }, { .compatible = "samsung,s3c2416-rtc", - .data = (void *)&s3c2416_rtc_data, + .data = &s3c2416_rtc_data, }, { .compatible = "samsung,s3c2443-rtc", - .data = (void *)&s3c2443_rtc_data, + .data = &s3c2443_rtc_data, }, { .compatible = "samsung,s3c6410-rtc", - .data = (void *)&s3c6410_rtc_data, + .data = &s3c6410_rtc_data, }, { .compatible = "samsung,exynos3250-rtc", - .data = (void *)&s3c6410_rtc_data, + .data = &s3c6410_rtc_data, }, { /* sentinel */ }, }; -- 2.9.3
[PATCH 4/6] rtc: s3c: Do not remove const from rodata memory
All instances of struct s3c_rtc_data are in fact static const thus put in rodata so we should not drop the const while getting the pointer to them. Signed-off-by: Krzysztof Kozlowski --- drivers/rtc/rtc-s3c.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c index bfc8660ff1e7..c666b95fb8d7 100644 --- a/drivers/rtc/rtc-s3c.c +++ b/drivers/rtc/rtc-s3c.c @@ -41,7 +41,7 @@ struct s3c_rtc { struct clk *rtc_src_clk; bool clk_disabled; - struct s3c_rtc_data *data; + const struct s3c_rtc_data *data; int irq_alarm; int irq_tick; @@ -437,12 +437,12 @@ static int s3c_rtc_remove(struct platform_device *pdev) static const struct of_device_id s3c_rtc_dt_match[]; -static struct s3c_rtc_data *s3c_rtc_get_data(struct platform_device *pdev) +static const struct s3c_rtc_data *s3c_rtc_get_data(struct platform_device *pdev) { const struct of_device_id *match; match = of_match_node(s3c_rtc_dt_match, pdev->dev.of_node); - return (struct s3c_rtc_data *)match->data; + return match->data; } static int s3c_rtc_probe(struct platform_device *pdev) -- 2.9.3
Re: LTS testing with latest kselftests - some failures
On Fri, Jun 16, 2017 at 01:08:04PM +0530, Sumit Semwal wrote: > > Thanks, this was quite helpful, and so now bpf tests build on x86_64 > with current mainline for me. Perhaps we should document these > somewhere, as dependencies? > There is already some documentation available[0], but something in the kernel tree would be nice. Please send the patch(es) to netdev. Thanks. [0]: http://docs.cilium.io/en/latest/bpf/#development-environment -- Mit freundlichen Grüßen Alexander Alemayhu
[PATCH 1/3] perf evsel: Fix probing of precise_ip level for default cycles event
From: Arnaldo Carvalho de Melo Since commit 18e7a45af91a ("perf/x86: Reject non sampling events with precise_ip") returns -EINVAL for sys_perf_event_open() with an attribute with (attr.precise_ip > 0 && attr.sample_period == 0), just like is done in the routine used to probe the max precise level when no events were passed to 'perf record' or 'perf top', i.e.: perf_evsel__new_cycles() perf_event_attr__set_max_precise_ip() The x86 code, in x86_pmu_hw_config(), which is called all the way from sys_perf_event_open() did, starting with the aforementioned commit: /* There's no sense in having PEBS for non sampling events: */ if (!is_sampling_event(event)) return -EINVAL; Which makes it fail for cycles:ppp, cycles:pp and cycles:p, always using just the non precise cycles variant. To make sure that this is the case, I tested it, before this patch, with: # perf probe -L x86_pmu_hw_config 0 int x86_pmu_hw_config(struct perf_event *event) 1 { 2 if (event->attr.precise_ip) { 17 if (event->attr.precise_ip > precise) 18 return -EOPNOTSUPP; /* There's no sense in having PEBS for non sampling events: */ 21 if (!is_sampling_event(event)) 22 return -EINVAL; } # perf probe x86_pmu_hw_config:22 Added new events: probe:x86_pmu_hw_config (on x86_pmu_hw_config:22) probe:x86_pmu_hw_config_1 (on x86_pmu_hw_config:22) You can now use it in all perf tools, such as: perf record -e probe:x86_pmu_hw_config_1 -aR sleep 1 # perf trace -e perf_event_open,probe:x86_pmu_hwconfig*/max-stack=16/ perf record usleep 1 0.000 ( 0.015 ms): perf/4150 perf_event_open(attr_uptr: 0x7ffebc8ba110, cpu: -1, group_fd: -1 ) ... 0.015 ( ): probe:x86_pmu_hw_config:(9c0065e1)) x86_pmu_hw_config ([kernel.kallsyms]) hsw_hw_config ([kernel.kallsyms]) x86_pmu_event_init ([kernel.kallsyms]) perf_try_init_event ([kernel.kallsyms]) perf_event_alloc ([kernel.kallsyms]) SYSC_perf_event_open ([kernel.kallsyms]) sys_perf_event_open ([kernel.kallsyms]) do_syscall_64 ([kernel.kallsyms]) return_from_SYSCALL_64 ([kernel.kallsyms]) syscall (/usr/lib64/libc-2.24.so) perf_event_attr__set_max_precise_ip (/home/acme/bin/perf) perf_evsel__new_cycles (/home/acme/bin/perf) perf_evlist__add_default (/home/acme/bin/perf) cmd_record (/home/acme/bin/perf) run_builtin (/home/acme/bin/perf) handle_internal_command (/home/acme/bin/perf) 0.000 ( 0.021 ms): perf/4150 ... [continued]: perf_event_open()) = -1 EINVAL Invalid argument 0.023 ( 0.002 ms): perf/4150 perf_event_open(attr_uptr: 0x7ffebc8ba110, cpu: -1, group_fd: -1 ) ... 0.025 ( ): probe:x86_pmu_hw_config:(9c0065e1)) x86_pmu_hw_config ([kernel.kallsyms]) hsw_hw_config ([kernel.kallsyms]) x86_pmu_event_init ([kernel.kallsyms]) perf_try_init_event ([kernel.kallsyms]) perf_event_alloc ([kernel.kallsyms]) SYSC_perf_event_open ([kernel.kallsyms]) sys_perf_event_open ([kernel.kallsyms]) do_syscall_64 ([kernel.kallsyms]) return_from_SYSCALL_64 ([kernel.kallsyms]) syscall (/usr/lib64/libc-2.24.so) perf_event_attr__set_max_precise_ip (/home/acme/bin/perf) perf_evsel__new_cycles (/home/acme/bin/perf) perf_evlist__add_default (/home/acme/bin/perf) cmd_record (/home/acme/bin/perf) run_builtin (/home/acme/bin/perf) handle_internal_command (/home/acme/bin/perf) 0.023 ( 0.004 ms): perf/4150 ... [continued]: perf_event_open()) = -1 EINVAL Invalid argument 0.028 ( 0.002 ms): perf/4150 perf_event_open(attr_uptr: 0x7ffebc8ba110,
[PATCH 3/3] perf unwind: Report module before querying isactivation in dwfl unwind
From: Milian Wolff The PC returned by dwfl_frame_pc() may map into a not-yet-reported module. We have to report it before we continue unwinding. But when we query for the isactivation flag in dwfl_frame_pc, libdw will actually do one more unwinding step internally which can then break and lead to missed frames or broken stacks. With libunwind we get e.g.: ~ heaptrack_gui 2228 135073.400474: 613969 cycles: 108c8e [unknown] (/usr/lib/libQt5Core.so.5.8.0) 1093bc [unknown] (/usr/lib/libQt5Core.so.5.8.0) 109e7b QLocale::QLocale (/usr/lib/libQt5Core.so.5.8.0) 1470ff [unknown] (/usr/lib/libQt5Core.so.5.8.0) 147f67 QSystemLocale::query (/usr/lib/libQt5Core.so.5.8.0) 109fbf QLocalePrivate::updateSystemPrivate (/usr/lib/libQt5Core.so.5.8.0) 10aa27 QLocale::QLocale (/usr/lib/libQt5Core.so.5.8.0) 1e02c3 [unknown] (/usr/lib/libQt5Core.so.5.8.0) 2113bb [unknown] (/usr/lib/libQt5Core.so.5.8.0) 211505 [unknown] (/usr/lib/libQt5Core.so.5.8.0) 1b5df0 QFileInfo::exists (/usr/lib/libQt5Core.so.5.8.0) 92eb2 [unknown] (/usr/lib/libQt5Core.so.5.8.0) 93423 [unknown] (/usr/lib/libQt5Core.so.5.8.0) 93d2a QLibraryInfo::location (/usr/lib/libQt5Core.so.5.8.0) 2170af [unknown] (/usr/lib/libQt5Core.so.5.8.0) 297c53 QCoreApplicationPrivate::init (/usr/lib/libQt5Core.so.5.8.0) f7cde QGuiApplicationPrivate::init (/usr/lib/libQt5Gui.so.5.8.0) 1589e8 QApplicationPrivate::init (/usr/lib/libQt5Widgets.so.5.8.0) 78622 main (/home/milian/projects/compiled/other/bin/heaptrack_gui) 20439 __libc_start_main (/usr/lib/libc-2.25.so) 78299 _start (/home/milian/projects/compiled/other/bin/heaptrack_gui) heaptrack_gui 2228 135073.401156: 569521 cycles: 131633 QString::endsWith (/usr/lib/libQt5Core.so.5.8.0) 1a0701 QDir::cleanPath (/usr/lib/libQt5Core.so.5.8.0) 21b82d [unknown] (/usr/lib/libQt5Core.so.5.8.0) 1b3727 QFileInfo::canonicalFilePath (/usr/lib/libQt5Core.so.5.8.0) 2780c7 QFactoryLoader::update (/usr/lib/libQt5Core.so.5.8.0) 279525 QFactoryLoader::QFactoryLoader (/usr/lib/libQt5Core.so.5.8.0) e5bd0 QPlatformIntegrationFactory::create (/usr/lib/libQt5Gui.so.5.8.0) f5a1c QGuiApplicationPrivate::createPlatformIntegration (/usr/lib/libQt5Gui.so.5.8.0) f650c QGuiApplicationPrivate::createEventDispatcher (/usr/lib/libQt5Gui.so.5.8.0) 298524 QCoreApplicationPrivate::init (/usr/lib/libQt5Core.so.5.8.0) f7cde QGuiApplicationPrivate::init (/usr/lib/libQt5Gui.so.5.8.0) 1589e8 QApplicationPrivate::init (/usr/lib/libQt5Widgets.so.5.8.0) 78622 main (/home/milian/projects/compiled/other/bin/heaptrack_gui) 20439 __libc_start_main (/usr/lib/libc-2.25.so) 78299 _start (/home/milian/projects/compiled/other/bin/heaptrack_gui) ~ Note the two frames 1589e8 and 78622 in the first sample. These are missing when unwinding with libdw. The second sample's breakage is more obvious: ~ heaptrack_gui 2228 135073.400474: 613969 cycles: 108c8e [unknown] (/usr/lib/libQt5Core.so.5.8.0) 1093bc [unknown] (/usr/lib/libQt5Core.so.5.8.0) 109e7b QLocale::QLocale (/usr/lib/libQt5Core.so.5.8.0) 1470ff [unknown] (/usr/lib/libQt5Core.so.5.8.0) 147f67 QSystemLocale::query (/usr/lib/libQt5Core.so.5.8.0) 109fbf QLocalePrivate::updateSystemPrivate (/usr/lib/libQt5Core.so.5.8.0) 10aa27 QLocale::QLocale (/usr/lib/libQt5Core.so.5.8.0) 1e02c3 [unknown] (/usr/lib/libQt5Core.so.5.8.0) 2113bb [unknown] (/usr/lib/libQt5Core.so.5.8.0) 211505 [unknown] (/usr/lib/libQt5Core.so.5.8.0) 1b5df0 QFileInfo::exists (/usr/lib/libQt5Core.so.5.8.0) 92eb2 [unknown] (/usr/lib/libQt5Core.so.5.8.0) 93423 [unknown] (/usr/lib/libQt5Core.so.5.8.0) 93d2a QLibraryInfo::location (/usr/lib/libQt5Core.so.5.8.0) 2170af [unknown] (/usr/lib/libQt5Core.so.5.8.0) 297c53 QCoreApplicationPrivate::init (/usr/lib/libQt5Core.so.5.8.0) f7cde QGuiApplicationPrivate::init (/usr/lib/libQt5Gui.so.5.8.0) 20439 __libc_start_main (/usr/lib/libc-2.25.so) 78299 _start (/home/milian/projects/compiled/other/bin/heaptrack_gui) heaptrack_gui 2228 135073.401156: 569521 cycles:
[PATCH 2/3] perf tools: Fix build with ARCH=x86_64
From: Jiada Wang With commit: 0a943cb10ce78 (tools build: Add HOSTARCH Makefile variable) when building for ARCH=x86_64, ARCH=x86_64 is passed to perf instead of ARCH=x86, so the perf build process searchs header files from tools/arch/x86_64/include, which doesn't exist. The following build failure is seen: In file included from util/event.c:2:0: tools/include/uapi/linux/mman.h:4:27: fatal error: uapi/asm/mman.h: No such file or directory compilation terminated. Fix this issue by using SRCARCH instead of ARCH in perf, just like the main kernel Makefile and tools/objtool's. Signed-off-by: Jiada Wang Tested-by: Arnaldo Carvalho de Melo Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Eugeniu Rosca Cc: Jan Stancek Cc: Masami Hiramatsu Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Rui Teng Cc: Sukadev Bhattiprolu Cc: Wang Nan Fixes: 0a943cb10ce7 ("tools build: Add HOSTARCH Makefile variable") Link: http://lkml.kernel.org/r/1491793357-14977-2-git-send-email-jiada_w...@mentor.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 38 +++--- tools/perf/Makefile.perf| 2 +- tools/perf/arch/Build | 2 +- tools/perf/pmu-events/Build | 4 ++-- tools/perf/tests/Build | 2 +- tools/perf/util/header.c| 2 +- 6 files changed, 25 insertions(+), 25 deletions(-) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 8354d04b392f..1f4fbc9a3292 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -19,18 +19,18 @@ CFLAGS := $(EXTRA_CFLAGS) $(EXTRA_WARNINGS) include $(srctree)/tools/scripts/Makefile.arch -$(call detected_var,ARCH) +$(call detected_var,SRCARCH) NO_PERF_REGS := 1 # Additional ARCH settings for ppc -ifeq ($(ARCH),powerpc) +ifeq ($(SRCARCH),powerpc) NO_PERF_REGS := 0 LIBUNWIND_LIBS := -lunwind -lunwind-ppc64 endif # Additional ARCH settings for x86 -ifeq ($(ARCH),x86) +ifeq ($(SRCARCH),x86) $(call detected,CONFIG_X86) ifeq (${IS_64_BIT}, 1) CFLAGS += -DHAVE_ARCH_X86_64_SUPPORT -DHAVE_SYSCALL_TABLE -I$(OUTPUT)arch/x86/include/generated @@ -43,12 +43,12 @@ ifeq ($(ARCH),x86) NO_PERF_REGS := 0 endif -ifeq ($(ARCH),arm) +ifeq ($(SRCARCH),arm) NO_PERF_REGS := 0 LIBUNWIND_LIBS = -lunwind -lunwind-arm endif -ifeq ($(ARCH),arm64) +ifeq ($(SRCARCH),arm64) NO_PERF_REGS := 0 LIBUNWIND_LIBS = -lunwind -lunwind-aarch64 endif @@ -61,7 +61,7 @@ endif # Disable it on all other architectures in case libdw unwind # support is detected in system. Add supported architectures # to the check. -ifneq ($(ARCH),$(filter $(ARCH),x86 arm)) +ifneq ($(SRCARCH),$(filter $(SRCARCH),x86 arm)) NO_LIBDW_DWARF_UNWIND := 1 endif @@ -115,9 +115,9 @@ endif FEATURE_CHECK_CFLAGS-libbabeltrace := $(LIBBABELTRACE_CFLAGS) FEATURE_CHECK_LDFLAGS-libbabeltrace := $(LIBBABELTRACE_LDFLAGS) -lbabeltrace-ctf -FEATURE_CHECK_CFLAGS-bpf = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(ARCH)/include/uapi -I$(srctree)/tools/include/uapi +FEATURE_CHECK_CFLAGS-bpf = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(SRCARCH)/include/uapi -I$(srctree)/tools/include/uapi # include ARCH specific config --include $(src-perf)/arch/$(ARCH)/Makefile +-include $(src-perf)/arch/$(SRCARCH)/Makefile ifdef PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET CFLAGS += -DHAVE_ARCH_REGS_QUERY_REGISTER_OFFSET @@ -228,12 +228,12 @@ ifeq ($(DEBUG),0) endif INC_FLAGS += -I$(src-perf)/util/include -INC_FLAGS += -I$(src-perf)/arch/$(ARCH)/include +INC_FLAGS += -I$(src-perf)/arch/$(SRCARCH)/include INC_FLAGS += -I$(srctree)/tools/include/uapi INC_FLAGS += -I$(srctree)/tools/include/ -INC_FLAGS += -I$(srctree)/tools/arch/$(ARCH)/include/uapi -INC_FLAGS += -I$(srctree)/tools/arch/$(ARCH)/include/ -INC_FLAGS += -I$(srctree)/tools/arch/$(ARCH)/ +INC_FLAGS += -I$(srctree)/tools/arch/$(SRCARCH)/include/uapi +INC_FLAGS += -I$(srctree)/tools/arch/$(SRCARCH)/include/ +INC_FLAGS += -I$(srctree)/tools/arch/$(SRCARCH)/ # $(obj-perf) for generated common-cmds.h # $(obj-perf)/util for generated bison/flex headers @@ -355,7 +355,7 @@ ifndef NO_LIBELF ifndef NO_DWARF ifeq ($(origin PERF_HAVE_DWARF_REGS), undefined) - msg := $(warning DWARF register mappings have not been defined for architecture $(ARCH), DWARF support disabled); + msg := $(warning DWARF register mappings have not been defined for architecture $(SRCARCH), DWARF support disabled); NO_DWARF := 1 else CFLAGS += -DHAVE_DWARF_SUPPORT $(LIBDW_CFLAGS) @@ -380,7 +380,7 @@ ifndef NO_LIBELF CFLAGS += -DHAVE_BPF_PROLOGUE $(call detected,CONFIG_BPF_PROLOGUE) else -msg := $(warning BPF prologue is not supported by architecture $(ARCH), missing regs_query_register_offset()); +msg := $(warning BPF prologue is not supported by architecture $(SRCARCH), missing regs_query_register_offset()); endif else
[GIT PULL 0/3] perf/urgent fixes
Hi Ingo, Please consider pulling, - Arnaldo Test results at the end of this message, as usual. The following changes since commit 63f700aab4c11d46626de3cd051dae56cf7e9056: Merge tag 'xtensa-20170612' of git://github.com/jcmvbkbc/linux-xtensa (2017-06-13 15:09:10 +0900) are available in the git repository at: git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux.git tags/perf-urgent-for-mingo-4.12-20170616 for you to fetch changes up to 9126cbbacecb8917bd0418809ef1d26616b2061e: perf unwind: Report module before querying isactivation in dwfl unwind (2017-06-16 14:37:30 -0300) perf/urgent fixes: - Fix probing of precise_ip level for default cycles event, that got broken recently on x86_64 when its arch code started considering invalid requesting precise samples when not sampling (i.e. when attr.sample_period == 0). This also fixes another problem in s/390 where the precision probing with sample_period == 0 returned precise_ip > 0, that then, when setting up the real cycles event (not probing) would return EOPNOTSUPP for precise_ip > 0 (as determined previously by probing) and sample_period > 0. These problems resulted in attr_precise not being set to the highest precision available on x86.64 when no event was specified, i.e. the canonical: perf record ./workload would end up using attr.precise_ip = 0. As a workaround this would need to be done: perf record -e cycles:P ./workload And on s/390 it would plain not work, requiring using: perf record -e cycles ./workload as a workaround. (Arnaldo Carvalho de Melo) - Fix perf build with ARCH=x86_64, when ARCH should be transformed into ARCH=x86, just like with the main kernel Makefile and tools/objtool's, i.e. use SRCARCH. (Jiada Wang) - Avoid accessing uninitialized data structures when unwinding with elfutils's libdw, making it more closely mimic libunwind's unwinder. (Milian Wolff) Signed-off-by: Arnaldo Carvalho de Melo Arnaldo Carvalho de Melo (1): perf evsel: Fix probing of precise_ip level for default cycles event Jiada Wang (1): perf tools: Fix build with ARCH=x86_64 Milian Wolff (1): perf unwind: Report module before querying isactivation in dwfl unwind tools/perf/Makefile.config | 38 +++--- tools/perf/Makefile.perf | 2 +- tools/perf/arch/Build | 2 +- tools/perf/pmu-events/Build| 4 ++-- tools/perf/tests/Build | 2 +- tools/perf/tests/task-exit.c | 2 +- tools/perf/util/evsel.c| 12 tools/perf/util/header.c | 2 +- tools/perf/util/unwind-libdw.c | 8 9 files changed, 46 insertions(+), 26 deletions(-) Test results: The first ones are container (docker) based builds of tools/perf with and without libelf support, objtool where it is supported and samples/bpf/, ditto. Where clang is available, it is also used to build perf with/without libelf. Several are cross builds, the ones with -x-ARCH, and the android one, and those may not have all the features built, due to lack of multi-arch devel packages, available and being used so far on just a few, like debian:experimental-x-{arm64,mipsel}. The 'perf test' one will perform a variety of tests exercising tools/perf/util/, tools/lib/{bpf,traceevent,etc}, as well as run perf commands with a variety of command line event specifications to then intercept the sys_perf_event syscall to check that the perf_event_attr fields are set up as expected, among a variety of other unit tests. Then there is the 'make -C tools/perf build-test' ones, that build tools/perf/ with a variety of feature sets, exercising the build with an incomplete set of features as well as with a complete one. It is planned to have it run on each of the containers mentioned above, using some container orchestration infrastructure. Get in contact if interested in helping having this in place. # dm 1 alpine:3.4: Ok 2 alpine:3.5: Ok 3 alpine:3.6: Ok 4 alpine:edge: Ok 5 android-ndk:r12b-arm: Ok 6 archlinux:latest: Ok 7 centos:5: Ok 8 centos:6: Ok 9 centos:7: Ok 10 debian:7: Ok 11 debian:8: Ok 12 debian:9: Ok 13 debian:experimental: Ok 14 debian:experimental-x-arm64: Ok 15 debian:experimental-x-mips: Ok 16 debian:experimental-x-mips64: Ok 17 debian:experimental-x-mipsel: Ok 18 fedora:20: Ok 19 fedora:21: Ok 20 fedora:22: Ok 21 fedora:23: Ok 22 fedora:24: Ok 23 fedora:24-x-ARC-uClibc: Ok 24 fedora:25: Ok 25 fedora:rawhide: Ok 26 mageia:5: Ok 27 opensuse:13.2: Ok 28 opensuse:42.1: Ok 29 opensuse:tumbleweed: Ok 30 ubuntu:12.04.5: Ok 31 ubuntu:14.04.4: Ok 32 ubuntu:14.04.4-x-linaro-arm64: Ok 33 ubuntu:15.10: Ok 34 ubuntu:16.04: Ok 35 ubuntu:16.04-x-arm: Ok 3
Re: [PATCH net-next] net: dsa: add cross-chip multicast support
From: Vivien Didelot Date: Thu, 15 Jun 2017 16:14:48 -0400 > Similarly to how cross-chip VLAN works, define a bitmap of multicast > group members for a switch, now including its DSA ports, so that > multicast traffic can be sent to all switches of the fabric. > > A switch may drop the frames if no user port is a member. > > This brings support for multicast in a multi-chip environment. > As of now, all switches of the fabric must support the multicast > operations in order to program a single fabric port. > > Reported-by: Jason Cobham > Signed-off-by: Vivien Didelot Applied, thanks Vivien.
Re: [RFC PATCH 00/13] Switchtec NTB Support
On 16/06/17 12:08 PM, Allen Hubbe wrote: > Alright. I'll leave it to you to find and reconcile common functionalities > of the drivers. What about making spad emulation optional? Ok. I don't see the point of making spad emulation optional. Who would want to disable it and what would be the benefit? Logan
Re: [RFC PATCH 7/7 v1]powerpc: Deliver SEGV signal on protection key violation.
On Fri, Jun 16, 2017 at 08:33:01PM +1000, Benjamin Herrenschmidt wrote: > On Fri, 2017-06-16 at 14:50 +0530, Anshuman Khandual wrote: > > On 06/06/2017 06:35 AM, Ram Pai wrote: > > > The value of the AMR register at the time of the exception > > > is made available in gp_regs[PT_AMR] of the siginfo. > > > > But its already available there in uctxt->uc_mcontext.regs->amr > > while inside the signal delivery context in the user space. The > > pt_regs already got updated with new AMR register. Then why we > > need gp_regs to also contain AMR as well ? > > Also changing gp_regs layout/size is a major ABI issue... Ben, gp_regs size is not changed, nor is the layout. A unused field in the gp_regs is used to fill in the AMR contents. Old binaries will not be knowing about this unused field, and hence should not break. New binaries can leverage this already existing but newly defined field; to read the contents of AMR. Is it still a concern? RP > > Ben. -- Ram Pai
Re: hexagon: build error in -next due to 'mm: memcontrol: per-lruvec stats infrastructure'
On Fri, 16 Jun 2017 14:49:51 -0400 Johannes Weiner wrote: > On Wed, Jun 14, 2017 at 12:26:46AM -0700, Guenter Roeck wrote: > > Hi, > > > > I see the following build error in -next when building hexagon images. > > > > CC arch/hexagon/kernel/asm-offsets.s > > In file included from ./include/linux/memcontrol.h:30:0, > > from ./include/linux/swap.h:8, > > from ./arch/hexagon/include/asm/pgtable.h:27, > > from ./include/linux/mm.h:70, > > from arch/hexagon/kernel/asm-offsets.c:28: > > ./include/linux/vmstat.h: In function '__inc_zone_page_state': > > ./include/linux/vmstat.h:294:2: error: implicit declaration of function > > 'page_zone' [-Werror=implicit-function-declaration] > > ./include/linux/vmstat.h:294:2: warning: passing argument 1 of > > '__inc_zone_state' makes pointer from integer without a cast [enabled by > > default] > > ./include/linux/vmstat.h:267:20: note: expected 'struct zone *' but > > argument is of type 'int' > > vmstat.h depends on definitions in mm.h, but mm.h through the above > chain includes vmstat.h first. It worked in my x86 test because x86 > pgtable.h doesn't include swap.h. > > The headers are a bit of a mess. memcontrol.h is supposed to be a > lower level header than mm.h and vmstat.h, yet the new accounting > functions depend on mm.h definitions. > > Let's move the lruvec accounting infra to vmstat.h and shuffle > memcontrol.h into the stack under mm.h and vmstat.h. > > Does the following fix the hexagon build? This breaks x86_64 allnoconfig. arch/x86/mm/pat.c:734: error: redefinition of 'arch_io_reserve_memtype_wc' ./include/linux/io.h:175: note: previous definition of 'arch_io_reserve_memtype_wc' was here arch/x86/mm/pat.c:742: error: redefinition of 'arch_io_free_memtype_wc' ./include/linux/io.h:181: note: previous definition of 'arch_io_free_memtype_wc' was here
Re: [PATCH 0/5] perf: add support for capturing skid IP
On Fri, Jun 16, 2017 at 10:50 AM, Andi Kleen wrote: >> > Yeah, I think it is easier and more portable, especially on hardware with a >> > PEBS-like mechanism but no branch buffer (like LBR). FYI, I did do a test >> > implementation yesterday to evaluate the difficulty. >> > >> A more generalized usage of the feature is to evaluate the amount of skid >> for any precise event. > > It should be always the same (one instruction), except for the control flow > change case. > That's on Intel X86. What about the other arch? > -Andi
Re: [RFC PATCH 7/7 v1]powerpc: Deliver SEGV signal on protection key violation.
On Fri, Jun 16, 2017 at 02:50:13PM +0530, Anshuman Khandual wrote: > On 06/06/2017 06:35 AM, Ram Pai wrote: > > The value of the AMR register at the time of the exception > > is made available in gp_regs[PT_AMR] of the siginfo. > > But its already available there in uctxt->uc_mcontext.regs->amr > while inside the signal delivery context in the user space. The > pt_regs already got updated with new AMR register. Then why we > need gp_regs to also contain AMR as well ? It should not be available in uctxt->uc_mcontext.regs->amr. In fact that field itself should not be there. The ideas was to use one of the unused fields in gp_regs; without extending gp_regs, to provide the contents of AMR. the PT_AMR offset in gp_regs is currently not used, which I am using in this patch. However this patch needs to be modified not to extend pt_regs, or uctxt->uc_mcontext.regs Thanks for initiating this concern. RP -- Ram Pai
[PATCH v7 00/36] x86: Secure Memory Encryption (AMD)
This patch series provides support for AMD's new Secure Memory Encryption (SME) feature. SME can be used to mark individual pages of memory as encrypted through the page tables. A page of memory that is marked encrypted will be automatically decrypted when read from DRAM and will be automatically encrypted when written to DRAM. Details on SME can found in the links below. The SME feature is identified through a CPUID function and enabled through the SYSCFG MSR. Once enabled, page table entries will determine how the memory is accessed. If a page table entry has the memory encryption mask set, then that memory will be accessed as encrypted memory. The memory encryption mask (as well as other related information) is determined from settings returned through the same CPUID function that identifies the presence of the feature. The approach that this patch series takes is to encrypt everything possible starting early in the boot where the kernel is encrypted. Using the page table macros the encryption mask can be incorporated into all page table entries and page allocations. By updating the protection map, userspace allocations are also marked encrypted. Certain data must be accounted for as having been placed in memory before SME was enabled (EFI, initrd, etc.) and accessed accordingly. This patch series is a pre-cursor to another AMD processor feature called Secure Encrypted Virtualization (SEV). The support for SEV will build upon the SME support and will be submitted later. Details on SEV can be found in the links below. The following links provide additional detail: AMD Memory Encryption whitepaper: http://amd-dev.wpengine.netdna-cdn.com/wordpress/media/2013/12/AMD_Memory_Encryption_Whitepaper_v7-Public.pdf AMD64 Architecture Programmer's Manual: http://support.amd.com/TechDocs/24593.pdf SME is section 7.10 SEV is section 15.34 --- This patch series is based off of the master branch of tip: https://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git master Commit 3d55328fd1f8 ("Merge branch 'WIP.x86/fpu'") Source code is also available at https://github.com/codomania/tip/tree/sme-v7 Still to do: - Kdump support, including using memremap() instead of ioremap_cache() Changes since v6: - Fixed the asm include file issue that caused build errors on other archs - Rebased the CR3 register changes on top of Andy Lutomirski's patch - Added a patch to clear the SME cpu feature if running as a PV guest under Xen - Added a patch to obtain the AMD microcode level earlier in the boot instead of directly reading the MSR - Refactor patch #8 ("x86/mm: Add support to enable SME in early boot processing") because the 5-level paging support moved the code into the new C-function __startup_64() - Removed need to decrypt trampoline area in-place (set memory attributes before copying the trampoline code) - General code cleanup based on feedback Changes since v5: - Added support for 5-level paging - Added IOMMU support - Created a generic asm/mem_encrypt.h in order to remove a bunch of #ifndef/#define entries - Removed changes to the __va() macro and defined a function to return the true physical address in cr3 - Removed sysfs support as it was determined not to be needed - General code cleanup based on feedback - General cleanup of patch subjects and descriptions Changes since v4: - Re-worked mapping of setup data to not use a fixed list. Rather, check dynamically whether the requested early_memremap()/memremap() call needs to be mapped decrypted. - Moved SME cpu feature into scattered features - Moved some declarations into header files - Cleared the encryption mask from the __PHYSICAL_MASK so that users of macros such as pmd_pfn_mask() don't have to worry/know about the encryption mask - Updated some return types and values related to EFI and e820 functions so that an error could be returned - During cpu shutdown, removed cache disabling and added a check for kexec in progress to use wbinvd followed immediately by halt in order to avoid any memory corruption - Update how persistent memory is identified - Added a function to find command line arguments and their values - Added sysfs support - General code cleanup based on feedback - General cleanup of patch subjects and descriptions Changes since v3: - Broke out some of the patches into smaller individual patches - Updated Documentation - Added a message to indicate why the IOMMU was disabled - Updated CPU feature support for SME by taking into account whether BIOS has enabled SME - Eliminated redundant functions - Added some warning messages for DMA usage of bounce buffers when SME is active - Added support for persistent memory - Added support to determine when setup data is being mapped and be sure to map it un-encrypted - Added CONFIG support to set the default action of whether to activate SME if it is supported/enabled - Added support for (re)booting with kexec Changes since v2: - Updated Documentati
[PATCH v7 03/36] x86, mpparse, x86/acpi, x86/PCI, x86/dmi, SFI: Use memremap for RAM mappings
The ioremap() function is intended for mapping MMIO. For RAM, the memremap() function should be used. Convert calls from ioremap() to memremap() when re-mapping RAM. This will be used later by SME to control how the encryption mask is applied to memory mappings, with certain memory locations being mapped decrypted vs encrypted. Signed-off-by: Tom Lendacky --- arch/x86/include/asm/dmi.h |8 arch/x86/kernel/acpi/boot.c |6 +++--- arch/x86/kernel/kdebugfs.c | 34 +++--- arch/x86/kernel/ksysfs.c | 28 ++-- arch/x86/kernel/mpparse.c| 10 +- arch/x86/pci/common.c|4 ++-- drivers/firmware/dmi-sysfs.c |5 +++-- drivers/firmware/pcdp.c |4 ++-- drivers/sfi/sfi_core.c | 22 +++--- 9 files changed, 55 insertions(+), 66 deletions(-) diff --git a/arch/x86/include/asm/dmi.h b/arch/x86/include/asm/dmi.h index 3c69fed..a8e15b0 100644 --- a/arch/x86/include/asm/dmi.h +++ b/arch/x86/include/asm/dmi.h @@ -13,9 +13,9 @@ static __always_inline __init void *dmi_alloc(unsigned len) } /* Use early IO mappings for DMI because it's initialized early */ -#define dmi_early_remapearly_ioremap -#define dmi_early_unmapearly_iounmap -#define dmi_remap ioremap_cache -#define dmi_unmap iounmap +#define dmi_early_remapearly_memremap +#define dmi_early_unmapearly_memunmap +#define dmi_remap(_x, _l) memremap(_x, _l, MEMREMAP_WB) +#define dmi_unmap(_x) memunmap(_x) #endif /* _ASM_X86_DMI_H */ diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 6bb6806..850160a 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -115,7 +115,7 @@ #defineACPI_INVALID_GSIINT_MIN /* - * This is just a simple wrapper around early_ioremap(), + * This is just a simple wrapper around early_memremap(), * with sanity checks for phys == 0 and size == 0. */ char *__init __acpi_map_table(unsigned long phys, unsigned long size) @@ -124,7 +124,7 @@ char *__init __acpi_map_table(unsigned long phys, unsigned long size) if (!phys || !size) return NULL; - return early_ioremap(phys, size); + return early_memremap(phys, size); } void __init __acpi_unmap_table(char *map, unsigned long size) @@ -132,7 +132,7 @@ void __init __acpi_unmap_table(char *map, unsigned long size) if (!map || !size) return; - early_iounmap(map, size); + early_memunmap(map, size); } #ifdef CONFIG_X86_LOCAL_APIC diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c index 38b6458..fd6f8fb 100644 --- a/arch/x86/kernel/kdebugfs.c +++ b/arch/x86/kernel/kdebugfs.c @@ -33,7 +33,6 @@ static ssize_t setup_data_read(struct file *file, char __user *user_buf, struct setup_data_node *node = file->private_data; unsigned long remain; loff_t pos = *ppos; - struct page *pg; void *p; u64 pa; @@ -47,18 +46,13 @@ static ssize_t setup_data_read(struct file *file, char __user *user_buf, count = node->len - pos; pa = node->paddr + sizeof(struct setup_data) + pos; - pg = pfn_to_page((pa + count - 1) >> PAGE_SHIFT); - if (PageHighMem(pg)) { - p = ioremap_cache(pa, count); - if (!p) - return -ENXIO; - } else - p = __va(pa); + p = memremap(pa, count, MEMREMAP_WB); + if (!p) + return -ENOMEM; remain = copy_to_user(user_buf, p, count); - if (PageHighMem(pg)) - iounmap(p); + memunmap(p); if (remain) return -EFAULT; @@ -109,7 +103,6 @@ static int __init create_setup_data_nodes(struct dentry *parent) struct setup_data *data; int error; struct dentry *d; - struct page *pg; u64 pa_data; int no = 0; @@ -126,16 +119,12 @@ static int __init create_setup_data_nodes(struct dentry *parent) goto err_dir; } - pg = pfn_to_page((pa_data+sizeof(*data)-1) >> PAGE_SHIFT); - if (PageHighMem(pg)) { - data = ioremap_cache(pa_data, sizeof(*data)); - if (!data) { - kfree(node); - error = -ENXIO; - goto err_dir; - } - } else - data = __va(pa_data); + data = memremap(pa_data, sizeof(*data), MEMREMAP_WB); + if (!data) { + kfree(node); + error = -ENOMEM; + goto err_dir; + } node->paddr = pa_data; node->type = data->type; @@ -143,8 +132,7 @
[PATCH v7 05/36] x86/CPU/AMD: Handle SME reduction in physical address size
When System Memory Encryption (SME) is enabled, the physical address space is reduced. Adjust the x86_phys_bits value to reflect this reduction. Reviewed-by: Borislav Petkov Signed-off-by: Tom Lendacky --- arch/x86/kernel/cpu/amd.c | 10 +++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index c47ceee..5bdcbd4 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -613,15 +613,19 @@ static void early_init_amd(struct cpuinfo_x86 *c) set_cpu_bug(c, X86_BUG_AMD_E400); /* -* BIOS support is required for SME. If BIOS has not enabled SME -* then don't advertise the feature (set in scattered.c) +* BIOS support is required for SME. If BIOS has enabld SME then +* adjust x86_phys_bits by the SME physical address space reduction +* value. If BIOS has not enabled SME then don't advertise the +* feature (set in scattered.c). */ if (cpu_has(c, X86_FEATURE_SME)) { u64 msr; /* Check if SME is enabled */ rdmsrl(MSR_K8_SYSCFG, msr); - if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT)) + if (msr & MSR_K8_SYSCFG_MEM_ENCRYPT) + c->x86_phys_bits -= (cpuid_ebx(0x801f) >> 6) & 0x3f; + else clear_cpu_cap(c, X86_FEATURE_SME); } }
[PATCH v7 10/36] x86/mm: Provide general kernel support for memory encryption
Changes to the existing page table macros will allow the SME support to be enabled in a simple fashion with minimal changes to files that use these macros. Since the memory encryption mask will now be part of the regular pagetable macros, we introduce two new macros (_PAGE_TABLE_NOENC and _KERNPG_TABLE_NOENC) to allow for early pagetable creation/initialization without the encryption mask before SME becomes active. Two new pgprot() macros are defined to allow setting or clearing the page encryption mask. The FIXMAP_PAGE_NOCACHE define is introduced for use with MMIO. SME does not support encryption for MMIO areas so this define removes the encryption mask from the page attribute. Two new macros are introduced (__sme_pa() / __sme_pa_nodebug()) to allow creating a physical address with the encryption mask. These are used when working with the cr3 register so that the PGD can be encrypted. The current __va() macro is updated so that the virtual address is generated based off of the physical address without the encryption mask thus allowing the same virtual address to be generated regardless of whether encryption is enabled for that physical location or not. Also, an early initialization function is added for SME. If SME is active, this function: - Updates the early_pmd_flags so that early page faults create mappings with the encryption mask. - Updates the __supported_pte_mask to include the encryption mask. - Updates the protection_map entries to include the encryption mask so that user-space allocations will automatically have the encryption mask applied. Reviewed-by: Borislav Petkov Signed-off-by: Tom Lendacky --- arch/x86/boot/compressed/pagetable.c |7 + arch/x86/include/asm/fixmap.h|7 + arch/x86/include/asm/mem_encrypt.h | 13 ++ arch/x86/include/asm/page_types.h|3 ++ arch/x86/include/asm/pgtable.h |9 +++ arch/x86/include/asm/pgtable_types.h | 45 ++ arch/x86/include/asm/processor.h |3 ++ arch/x86/kernel/espfix_64.c |2 +- arch/x86/kernel/head64.c | 11 +++- arch/x86/kernel/head_64.S| 20 --- arch/x86/mm/kasan_init_64.c |4 ++- arch/x86/mm/mem_encrypt.c| 18 ++ arch/x86/mm/pageattr.c |3 ++ include/asm-generic/pgtable.h|8 ++ include/linux/mem_encrypt.h |8 ++ 15 files changed, 128 insertions(+), 33 deletions(-) diff --git a/arch/x86/boot/compressed/pagetable.c b/arch/x86/boot/compressed/pagetable.c index 8e69df9..246bf29 100644 --- a/arch/x86/boot/compressed/pagetable.c +++ b/arch/x86/boot/compressed/pagetable.c @@ -15,6 +15,13 @@ #define __pa(x) ((unsigned long)(x)) #define __va(x) ((void *)((unsigned long)(x))) +/* + * The pgtable.h and mm/ident_map.c includes make use of the SME related + * information which is not used in the compressed image support. Un-define + * the SME support to avoid any compile and link errors. + */ +#undef CONFIG_AMD_MEM_ENCRYPT + #include "misc.h" /* These actually do the work of building the kernel identity maps. */ diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index b65155c..d9ff226 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h @@ -157,6 +157,13 @@ static inline void __set_fixmap(enum fixed_addresses idx, } #endif +/* + * FIXMAP_PAGE_NOCACHE is used for MMIO. Memory encryption is not + * supported for MMIO addresses, so make sure that the memory encryption + * mask is not part of the page attributes. + */ +#define FIXMAP_PAGE_NOCACHE PAGE_KERNEL_IO_NOCACHE + #include #define __late_set_fixmap(idx, phys, flags) __set_fixmap(idx, phys, flags) diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h index 988b336..faae4e1 100644 --- a/arch/x86/include/asm/mem_encrypt.h +++ b/arch/x86/include/asm/mem_encrypt.h @@ -21,18 +21,31 @@ extern unsigned long sme_me_mask; +void __init sme_early_init(void); + void __init sme_enable(void); #else /* !CONFIG_AMD_MEM_ENCRYPT */ #define sme_me_mask0UL +static inline void __init sme_early_init(void) { } + static inline void __init sme_enable(void) { } #endif /* CONFIG_AMD_MEM_ENCRYPT */ unsigned long sme_get_me_mask(void); +/* + * The __sme_pa() and __sme_pa_nodebug() macros are meant for use when + * writing to or comparing values from the cr3 register. Having the + * encryption mask set in cr3 enables the PGD entry to be encrypted and + * avoid special case handling of PGD allocations. + */ +#define __sme_pa(x)(__pa(x) | sme_me_mask) +#define __sme_pa_nodebug(x)(__pa_nodebug(x) | sme_me_mask) + #endif /* __ASSEMBLY__ */ #endif /* __X86_MEM_ENCRYPT_H__ */ diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h index 7bd0099..b98ed9d 100644 --- a/arch/x86/include/asm/page_types.h +++ b/
[PATCH v7 19/36] x86/mm: Add support to access boot related data in the clear
Boot data (such as EFI related data) is not encrypted when the system is booted because UEFI/BIOS does not run with SME active. In order to access this data properly it needs to be mapped decrypted. Update early_memremap() to provide an arch specific routine to modify the pagetable protection attributes before they are applied to the new mapping. This is used to remove the encryption mask for boot related data. Update memremap() to provide an arch specific routine to determine if RAM remapping is allowed. RAM remapping will cause an encrypted mapping to be generated. By preventing RAM remapping, ioremap_cache() will be used instead, which will provide a decrypted mapping of the boot related data. Signed-off-by: Tom Lendacky --- arch/x86/include/asm/io.h |5 + arch/x86/mm/ioremap.c | 179 + include/linux/io.h|2 + kernel/memremap.c | 20 - mm/early_ioremap.c| 18 - 5 files changed, 217 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index 7afb0e2..09c5557 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -381,4 +381,9 @@ extern int __must_check arch_phys_wc_add(unsigned long base, #define arch_io_reserve_memtype_wc arch_io_reserve_memtype_wc #endif +extern bool arch_memremap_can_ram_remap(resource_size_t offset, + unsigned long size, + unsigned long flags); +#define arch_memremap_can_ram_remap arch_memremap_can_ram_remap + #endif /* _ASM_X86_IO_H */ diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 4feda83..f3fa007 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -22,6 +23,7 @@ #include #include #include +#include #include "physaddr.h" @@ -422,6 +424,183 @@ void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr) iounmap((void __iomem *)((unsigned long)addr & PAGE_MASK)); } +/* + * Examine the physical address to determine if it is an area of memory + * that should be mapped decrypted. If the memory is not part of the + * kernel usable area it was accessed and created decrypted, so these + * areas should be mapped decrypted. + */ +static bool memremap_should_map_decrypted(resource_size_t phys_addr, + unsigned long size) +{ + /* Check if the address is outside kernel usable area */ + switch (e820__get_entry_type(phys_addr, phys_addr + size - 1)) { + case E820_TYPE_RESERVED: + case E820_TYPE_ACPI: + case E820_TYPE_NVS: + case E820_TYPE_UNUSABLE: + return true; + default: + break; + } + + return false; +} + +/* + * Examine the physical address to determine if it is EFI data. Check + * it against the boot params structure and EFI tables and memory types. + */ +static bool memremap_is_efi_data(resource_size_t phys_addr, +unsigned long size) +{ + u64 paddr; + + /* Check if the address is part of EFI boot/runtime data */ + if (!efi_enabled(EFI_BOOT)) + return false; + + paddr = boot_params.efi_info.efi_memmap_hi; + paddr <<= 32; + paddr |= boot_params.efi_info.efi_memmap; + if (phys_addr == paddr) + return true; + + paddr = boot_params.efi_info.efi_systab_hi; + paddr <<= 32; + paddr |= boot_params.efi_info.efi_systab; + if (phys_addr == paddr) + return true; + + if (efi_is_table_address(phys_addr)) + return true; + + switch (efi_mem_type(phys_addr)) { + case EFI_BOOT_SERVICES_DATA: + case EFI_RUNTIME_SERVICES_DATA: + return true; + default: + break; + } + + return false; +} + +/* + * Examine the physical address to determine if it is boot data by checking + * it against the boot params setup_data chain. + */ +static bool memremap_is_setup_data(resource_size_t phys_addr, + unsigned long size) +{ + struct setup_data *data; + u64 paddr, paddr_next; + + paddr = boot_params.hdr.setup_data; + while (paddr) { + unsigned int len; + + if (phys_addr == paddr) + return true; + + data = memremap(paddr, sizeof(*data), + MEMREMAP_WB | MEMREMAP_DEC); + + paddr_next = data->next; + len = data->len; + + memunmap(data); + + if ((phys_addr > paddr) && (phys_addr < (paddr + len))) + return true; + + paddr = paddr_next; + } + + return false; +} + +/* + * Examine the physical address to determine if it is boot data by checking + * it against the boot params se
[PATCH v7 20/36] x86, mpparse: Use memremap to map the mpf and mpc data
The SMP MP-table is built by UEFI and placed in memory in a decrypted state. These tables are accessed using a mix of early_memremap(), early_memunmap(), phys_to_virt() and virt_to_phys(). Change all accesses to use early_memremap()/early_memunmap(). This allows for proper setting of the encryption mask so that the data can be successfully accessed when SME is active. Signed-off-by: Tom Lendacky --- arch/x86/kernel/mpparse.c | 98 - 1 file changed, 70 insertions(+), 28 deletions(-) diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index fd37f39..5cbb317 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -429,7 +429,7 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type) } } -static struct mpf_intel *mpf_found; +static unsigned long mpf_base; static unsigned long __init get_mpc_size(unsigned long physptr) { @@ -451,6 +451,7 @@ static int __init check_physptr(struct mpf_intel *mpf, unsigned int early) size = get_mpc_size(mpf->physptr); mpc = early_memremap(mpf->physptr, size); + /* * Read the physical hardware table. Anything here will * override the defaults. @@ -497,12 +498,12 @@ static int __init check_physptr(struct mpf_intel *mpf, unsigned int early) */ void __init default_get_smp_config(unsigned int early) { - struct mpf_intel *mpf = mpf_found; + struct mpf_intel *mpf; if (!smp_found_config) return; - if (!mpf) + if (!mpf_base) return; if (acpi_lapic && early) @@ -515,6 +516,12 @@ void __init default_get_smp_config(unsigned int early) if (acpi_lapic && acpi_ioapic) return; + mpf = early_memremap(mpf_base, sizeof(*mpf)); + if (!mpf) { + pr_err("MPTABLE: error mapping MP table\n"); + return; + } + pr_info("Intel MultiProcessor Specification v1.%d\n", mpf->specification); #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32) @@ -529,7 +536,7 @@ void __init default_get_smp_config(unsigned int early) /* * Now see if we need to read further. */ - if (mpf->feature1 != 0) { + if (mpf->feature1) { if (early) { /* * local APIC has default address @@ -542,8 +549,10 @@ void __init default_get_smp_config(unsigned int early) construct_default_ISA_mptable(mpf->feature1); } else if (mpf->physptr) { - if (check_physptr(mpf, early)) + if (check_physptr(mpf, early)) { + early_memunmap(mpf, sizeof(*mpf)); return; + } } else BUG(); @@ -552,6 +561,8 @@ void __init default_get_smp_config(unsigned int early) /* * Only use the first configuration found. */ + + early_memunmap(mpf, sizeof(*mpf)); } static void __init smp_reserve_memory(struct mpf_intel *mpf) @@ -561,15 +572,16 @@ static void __init smp_reserve_memory(struct mpf_intel *mpf) static int __init smp_scan_config(unsigned long base, unsigned long length) { - unsigned int *bp = phys_to_virt(base); + unsigned int *bp; struct mpf_intel *mpf; - unsigned long mem; + int ret = 0; apic_printk(APIC_VERBOSE, "Scan for SMP in [mem %#010lx-%#010lx]\n", base, base + length - 1); BUILD_BUG_ON(sizeof(*mpf) != 16); while (length > 0) { + bp = early_memremap(base, length); mpf = (struct mpf_intel *)bp; if ((*bp == SMP_MAGIC_IDENT) && (mpf->length == 1) && @@ -579,24 +591,26 @@ static int __init smp_scan_config(unsigned long base, unsigned long length) #ifdef CONFIG_X86_LOCAL_APIC smp_found_config = 1; #endif - mpf_found = mpf; + mpf_base = base; - pr_info("found SMP MP-table at [mem %#010llx-%#010llx] mapped at [%p]\n", - (unsigned long long) virt_to_phys(mpf), - (unsigned long long) virt_to_phys(mpf) + - sizeof(*mpf) - 1, mpf); + pr_info("found SMP MP-table at [mem %#010lx-%#010lx] mapped at [%p]\n", + base, base + sizeof(*mpf) - 1, mpf); - mem = virt_to_phys(mpf); - memblock_reserve(mem, sizeof(*mpf)); + memblock_reserve(base, sizeof(*mpf)); if (mpf->physptr) smp_reserve_memory(mpf); - return 1; + ret = 1; } - bp += 4; + early_memunmap(bp, length); + +
[PATCH v7 23/36] x86, realmode: Decrypt trampoline area if memory encryption is active
When Secure Memory Encryption is enabled, the trampoline area must not be encrypted. A CPU running in real mode will not be able to decrypt memory that has been encrypted because it will not be able to use addresses with the memory encryption mask. Signed-off-by: Tom Lendacky --- arch/x86/realmode/init.c |8 1 file changed, 8 insertions(+) diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c index cd4be19..d6ddc7e 100644 --- a/arch/x86/realmode/init.c +++ b/arch/x86/realmode/init.c @@ -1,6 +1,7 @@ #include #include #include +#include #include #include @@ -59,6 +60,13 @@ static void __init setup_real_mode(void) base = (unsigned char *)real_mode_header; + /* +* If SME is active, the trampoline area will need to be in +* decrypted memory in order to bring up other processors +* successfully. +*/ + set_memory_decrypted((unsigned long)base, size >> PAGE_SHIFT); + memcpy(base, real_mode_blob, size); phys_base = __pa(base);
[PATCH v7 30/36] kvm: x86: svm: Support Secure Memory Encryption within KVM
Update the KVM support to work with SME. The VMCB has a number of fields where physical addresses are used and these addresses must contain the memory encryption mask in order to properly access the encrypted memory. Also, use the memory encryption mask when creating and using the nested page tables. Reviewed-by: Borislav Petkov Signed-off-by: Tom Lendacky --- arch/x86/include/asm/kvm_host.h |2 +- arch/x86/kvm/mmu.c | 12 arch/x86/kvm/mmu.h |2 +- arch/x86/kvm/svm.c | 35 ++- arch/x86/kvm/vmx.c |3 ++- arch/x86/kvm/x86.c |3 ++- 6 files changed, 32 insertions(+), 25 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 695605e..6d1267f 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1069,7 +1069,7 @@ struct kvm_arch_async_pf { void kvm_mmu_uninit_vm(struct kvm *kvm); void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 p_mask, - u64 acc_track_mask); + u64 acc_track_mask, u64 me_mask); void kvm_mmu_reset_context(struct kvm_vcpu *vcpu); void kvm_mmu_slot_remove_write_access(struct kvm *kvm, diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index cb82259..e85888c 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -107,7 +107,7 @@ enum { (((address) >> PT32_LEVEL_SHIFT(level)) & ((1 << PT32_LEVEL_BITS) - 1)) -#define PT64_BASE_ADDR_MASK (((1ULL << 52) - 1) & ~(u64)(PAGE_SIZE-1)) +#define PT64_BASE_ADDR_MASK __sme_clr1ULL << 52) - 1) & ~(u64)(PAGE_SIZE-1))) #define PT64_DIR_BASE_ADDR_MASK \ (PT64_BASE_ADDR_MASK & ~((1ULL << (PAGE_SHIFT + PT64_LEVEL_BITS)) - 1)) #define PT64_LVL_ADDR_MASK(level) \ @@ -125,7 +125,7 @@ enum { * PT32_LEVEL_BITS))) - 1)) #define PT64_PERM_MASK (PT_PRESENT_MASK | PT_WRITABLE_MASK | shadow_user_mask \ - | shadow_x_mask | shadow_nx_mask) + | shadow_x_mask | shadow_nx_mask | shadow_me_mask) #define ACC_EXEC_MASK1 #define ACC_WRITE_MASK PT_WRITABLE_MASK @@ -184,6 +184,7 @@ struct kvm_shadow_walk_iterator { static u64 __read_mostly shadow_dirty_mask; static u64 __read_mostly shadow_mmio_mask; static u64 __read_mostly shadow_present_mask; +static u64 __read_mostly shadow_me_mask; /* * The mask/value to distinguish a PTE that has been marked not-present for @@ -317,7 +318,7 @@ static bool check_mmio_spte(struct kvm_vcpu *vcpu, u64 spte) void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 p_mask, - u64 acc_track_mask) + u64 acc_track_mask, u64 me_mask) { if (acc_track_mask != 0) acc_track_mask |= SPTE_SPECIAL_MASK; @@ -330,6 +331,7 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, shadow_present_mask = p_mask; shadow_acc_track_mask = acc_track_mask; WARN_ON(shadow_accessed_mask != 0 && shadow_acc_track_mask != 0); + shadow_me_mask = me_mask; } EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes); @@ -2398,7 +2400,8 @@ static void link_shadow_page(struct kvm_vcpu *vcpu, u64 *sptep, BUILD_BUG_ON(VMX_EPT_WRITABLE_MASK != PT_WRITABLE_MASK); spte = __pa(sp->spt) | shadow_present_mask | PT_WRITABLE_MASK | - shadow_user_mask | shadow_x_mask | shadow_accessed_mask; + shadow_user_mask | shadow_x_mask | shadow_accessed_mask | + shadow_me_mask; mmu_spte_set(sptep, spte); @@ -2700,6 +2703,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, pte_access &= ~ACC_WRITE_MASK; spte |= (u64)pfn << PAGE_SHIFT; + spte |= shadow_me_mask; if (pte_access & ACC_WRITE_MASK) { diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 330bf3a..08b779d 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -48,7 +48,7 @@ static inline u64 rsvd_bits(int s, int e) { - return ((1ULL << (e - s + 1)) - 1) << s; + return __sme_clr(((1ULL << (e - s + 1)) - 1) << s); } void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index ba9891a..d2e9fca 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1138,9 +1138,9 @@ static void avic_init_vmcb(struct vcpu_svm *svm) { struct vmcb *vmcb = svm->vmcb; struct kvm_arch *vm_data = &svm->vcpu.kvm->arch; - phys_addr_t bpa = page_to_phys(svm->avic_backing_page); - phys_addr_t lpa = page_to_phys(vm_data->avic_logical_id_table_page); - phys_addr_t ppa = page_to_phys(vm_data->avic_physical_id_table_page); + phys_addr_t bpa = __sme_set(page_to_phys(svm->avic_backing_page)); + phys_addr_t lpa = __sme_set(page_to_phys(vm_data->avic
[PATCH v7 36/36] x86/mm: Add support to make use of Secure Memory Encryption
Add support to check if SME has been enabled and if memory encryption should be activated (checking of command line option based on the configuration of the default state). If memory encryption is to be activated, then the encryption mask is set and the kernel is encrypted "in place." Signed-off-by: Tom Lendacky --- arch/x86/include/asm/mem_encrypt.h |6 ++- arch/x86/kernel/head64.c |4 +- arch/x86/mm/mem_encrypt.c | 86 +++- 3 files changed, 90 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h index 7da6de3..aac9ed9 100644 --- a/arch/x86/include/asm/mem_encrypt.h +++ b/arch/x86/include/asm/mem_encrypt.h @@ -17,6 +17,8 @@ #include +#include + #ifdef CONFIG_AMD_MEM_ENCRYPT extern unsigned long sme_me_mask; @@ -37,7 +39,7 @@ void __init sme_early_decrypt(resource_size_t paddr, void __init sme_early_init(void); -void __init sme_enable(void); +void __init sme_enable(struct boot_params *bp); /* Architecture __weak replacement functions */ void __init mem_encrypt_init(void); @@ -58,7 +60,7 @@ static inline void __init sme_unmap_bootdata(char *real_mode_data) { } static inline void __init sme_early_init(void) { } -static inline void __init sme_enable(void) { } +static inline void __init sme_enable(struct boot_params *bp) { } #endif /* CONFIG_AMD_MEM_ENCRYPT */ diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 9e94ed2..1ff2e98 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -43,7 +43,7 @@ static void __init *fixup_pointer(void *ptr, unsigned long physaddr) return ptr - (void *)_text + (void *)physaddr; } -void __init __startup_64(unsigned long physaddr) +void __init __startup_64(unsigned long physaddr, struct boot_params *bp) { unsigned long load_delta, *p; unsigned long pgtable_flags; @@ -68,7 +68,7 @@ void __init __startup_64(unsigned long physaddr) for (;;); /* Activate Secure Memory Encryption (SME) if supported and enabled */ - sme_enable(); + sme_enable(bp); /* Include the SME encryption mask in the fixup value */ load_delta += sme_get_me_mask(); diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c index 6e87662..13f780e 100644 --- a/arch/x86/mm/mem_encrypt.c +++ b/arch/x86/mm/mem_encrypt.c @@ -13,19 +13,34 @@ #include #include +#include + #ifdef CONFIG_AMD_MEM_ENCRYPT #include #include #include +#include #include #include #include -#include #include #include #include +#include +#include +#include + +/* + * Some SME functions run very early causing issues with the stack-protector + * support. Provide a way to turn off this support on a per-function basis. + */ +#define SME_NOSTACKP __attribute__((__optimize__("no-stack-protector"))) + +static char sme_cmdline_arg[] __initdata = "mem_encrypt"; +static char sme_cmdline_on[] __initdata = "on"; +static char sme_cmdline_off[] __initdata = "off"; /* * Since SME related variables are set early in the boot process they must @@ -200,6 +215,8 @@ void __init mem_encrypt_init(void) /* Call into SWIOTLB to update the SWIOTLB DMA buffers */ swiotlb_update_mem_attributes(); + + pr_info("AMD Secure Memory Encryption (SME) active\n"); } void swiotlb_set_mem_attributes(void *vaddr, unsigned long size) @@ -527,8 +544,73 @@ void __init sme_encrypt_kernel(void) native_write_cr3(__native_read_cr3()); } -void __init sme_enable(void) +void __init SME_NOSTACKP sme_enable(struct boot_params *bp) { + const char *cmdline_ptr, *cmdline_arg, *cmdline_on, *cmdline_off; + unsigned int eax, ebx, ecx, edx; + bool active_by_default; + unsigned long me_mask; + char buffer[16]; + u64 msr; + + /* Check for the SME support leaf */ + eax = 0x8000; + ecx = 0; + native_cpuid(&eax, &ebx, &ecx, &edx); + if (eax < 0x801f) + return; + + /* +* Check for the SME feature: +* CPUID Fn8000_001F[EAX] - Bit 0 +* Secure Memory Encryption support +* CPUID Fn8000_001F[EBX] - Bits 5:0 +* Pagetable bit position used to indicate encryption +*/ + eax = 0x801f; + ecx = 0; + native_cpuid(&eax, &ebx, &ecx, &edx); + if (!(eax & 1)) + return; + + me_mask = 1UL << (ebx & 0x3f); + + /* Check if SME is enabled */ + msr = __rdmsr(MSR_K8_SYSCFG); + if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT)) + return; + + /* +* Fixups have not been applied to phys_base yet and we're running +* identity mapped, so we must obtain the address to the SME command +* line argument data using rip-relative addressing. +*/ + asm ("lea sme_cmdline_arg(%%rip), %0" +: "=r" (cmdline_arg) +
[PATCH v7 34/36] x86/mm: Add support to encrypt the kernel in-place
Add the support to encrypt the kernel in-place. This is done by creating new page mappings for the kernel - a decrypted write-protected mapping and an encrypted mapping. The kernel is encrypted by copying it through a temporary buffer. Signed-off-by: Tom Lendacky --- arch/x86/include/asm/mem_encrypt.h |6 + arch/x86/mm/Makefile |2 arch/x86/mm/mem_encrypt.c | 314 arch/x86/mm/mem_encrypt_boot.S | 150 + 4 files changed, 472 insertions(+) create mode 100644 arch/x86/mm/mem_encrypt_boot.S diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h index af835cf..7da6de3 100644 --- a/arch/x86/include/asm/mem_encrypt.h +++ b/arch/x86/include/asm/mem_encrypt.h @@ -21,6 +21,12 @@ extern unsigned long sme_me_mask; +void sme_encrypt_execute(unsigned long encrypted_kernel_vaddr, +unsigned long decrypted_kernel_vaddr, +unsigned long kernel_len, +unsigned long encryption_wa, +unsigned long encryption_pgd); + void __init sme_early_encrypt(resource_size_t paddr, unsigned long size); void __init sme_early_decrypt(resource_size_t paddr, diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 9e13841..0633142 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -38,3 +38,5 @@ obj-$(CONFIG_NUMA_EMU)+= numa_emulation.o obj-$(CONFIG_X86_INTEL_MPX)+= mpx.o obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o + +obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt_boot.o diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c index 842c8a6..6e87662 100644 --- a/arch/x86/mm/mem_encrypt.c +++ b/arch/x86/mm/mem_encrypt.c @@ -24,6 +24,8 @@ #include #include #include +#include +#include /* * Since SME related variables are set early in the boot process they must @@ -209,8 +211,320 @@ void swiotlb_set_mem_attributes(void *vaddr, unsigned long size) set_memory_decrypted((unsigned long)vaddr, size >> PAGE_SHIFT); } +static void __init sme_clear_pgd(pgd_t *pgd_base, unsigned long start, +unsigned long end) +{ + unsigned long pgd_start, pgd_end, pgd_size; + pgd_t *pgd_p; + + pgd_start = start & PGDIR_MASK; + pgd_end = end & PGDIR_MASK; + + pgd_size = (((pgd_end - pgd_start) / PGDIR_SIZE) + 1); + pgd_size *= sizeof(pgd_t); + + pgd_p = pgd_base + pgd_index(start); + + memset(pgd_p, 0, pgd_size); +} + +#ifndef CONFIG_X86_5LEVEL +#define native_make_p4d(_x)(p4d_t) { .pgd = native_make_pgd(_x) } +#endif + +#define PGD_FLAGS _KERNPG_TABLE_NOENC +#define P4D_FLAGS _KERNPG_TABLE_NOENC +#define PUD_FLAGS _KERNPG_TABLE_NOENC +#define PMD_FLAGS (__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL) + +static void __init *sme_populate_pgd(pgd_t *pgd_base, void *pgtable_area, +unsigned long vaddr, pmdval_t pmd_val) +{ + pgd_t *pgd_p; + p4d_t *p4d_p; + pud_t *pud_p; + pmd_t *pmd_p; + + pgd_p = pgd_base + pgd_index(vaddr); + if (native_pgd_val(*pgd_p)) { + if (IS_ENABLED(CONFIG_X86_5LEVEL)) + p4d_p = (p4d_t *)(native_pgd_val(*pgd_p) & ~PTE_FLAGS_MASK); + else + pud_p = (pud_t *)(native_pgd_val(*pgd_p) & ~PTE_FLAGS_MASK); + } else { + pgd_t pgd; + + if (IS_ENABLED(CONFIG_X86_5LEVEL)) { + p4d_p = pgtable_area; + memset(p4d_p, 0, sizeof(*p4d_p) * PTRS_PER_P4D); + pgtable_area += sizeof(*p4d_p) * PTRS_PER_P4D; + + pgd = native_make_pgd((pgdval_t)p4d_p + PGD_FLAGS); + } else { + pud_p = pgtable_area; + memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD); + pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD; + + pgd = native_make_pgd((pgdval_t)pud_p + PGD_FLAGS); + } + native_set_pgd(pgd_p, pgd); + } + + if (IS_ENABLED(CONFIG_X86_5LEVEL)) { + p4d_p += p4d_index(vaddr); + if (native_p4d_val(*p4d_p)) { + pud_p = (pud_t *)(native_p4d_val(*p4d_p) & ~PTE_FLAGS_MASK); + } else { + p4d_t p4d; + + pud_p = pgtable_area; + memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD); + pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD; + + p4d = native_make_p4d((p4dval_t)pud_p + P4D_FLAGS); + native_set_p4d(p4d_p, p4d); + } + } + + pud_p += pud_index(vaddr); + if (native_pud_val(*pud_p)) { + if (n