date:20181128

[PATCH AUTOSEL 4.19 57/68] NFSv4.2 copy do not allocate memory under the lock

2018-11-28 Thread Sasha Levin

From: Olga Kornievskaia 

[ Upstream commit 99f2c55591fb5c1b536263970d98c2ebc2089906 ]

Bruce pointed out that we shouldn't allocate memory while holding
a lock in the nfs4_callback_offload() and handle_async_copy()
that deal with a racing CB_OFFLOAD and reply to COPY case.

Signed-off-by: Olga Kornievskaia 
Signed-off-by: Trond Myklebust 
Signed-off-by: Sasha Levin 
---
 fs/nfs/callback_proc.c | 22 +++---
 fs/nfs/nfs42proc.c | 19 ++-
 2 files changed, 21 insertions(+), 20 deletions(-)

diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index fa515d5ea5ba..48b2e9063b0a 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -686,20 +686,24 @@ __be32 nfs4_callback_offload(void *data, void *dummy,
 {
struct cb_offloadargs *args = data;
struct nfs_server *server;
-   struct nfs4_copy_state *copy;
+   struct nfs4_copy_state *copy, *tmp_copy;
bool found = false;
 
+   copy = kzalloc(sizeof(struct nfs4_copy_state), GFP_NOFS);
+   if (!copy)
+   return htonl(NFS4ERR_SERVERFAULT);
+
spin_lock(>clp->cl_lock);
rcu_read_lock();
list_for_each_entry_rcu(server, >clp->cl_superblocks,
client_link) {
-   list_for_each_entry(copy, >ss_copies, copies) {
+   list_for_each_entry(tmp_copy, >ss_copies, copies) {
if (memcmp(args->coa_stateid.other,
-   copy->stateid.other,
+   tmp_copy->stateid.other,
sizeof(args->coa_stateid.other)))
continue;
-   nfs4_copy_cb_args(copy, args);
-   complete(>completion);
+   nfs4_copy_cb_args(tmp_copy, args);
+   complete(_copy->completion);
found = true;
goto out;
}
@@ -707,15 +711,11 @@ __be32 nfs4_callback_offload(void *data, void *dummy,
 out:
rcu_read_unlock();
if (!found) {
-   copy = kzalloc(sizeof(struct nfs4_copy_state), GFP_NOFS);
-   if (!copy) {
-   spin_unlock(>clp->cl_lock);
-   return htonl(NFS4ERR_SERVERFAULT);
-   }
memcpy(>stateid, >coa_stateid, NFS4_STATEID_SIZE);
nfs4_copy_cb_args(copy, args);
list_add_tail(>copies, >clp->pending_cb_stateids);
-   }
+   } else
+   kfree(copy);
spin_unlock(>clp->cl_lock);
 
return 0;
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index ac5b784a1de0..fed06fd9998d 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -137,31 +137,32 @@ static int handle_async_copy(struct nfs42_copy_res *res,
 struct file *dst,
 nfs4_stateid *src_stateid)
 {
-   struct nfs4_copy_state *copy;
+   struct nfs4_copy_state *copy, *tmp_copy;
int status = NFS4_OK;
bool found_pending = false;
struct nfs_open_context *ctx = nfs_file_open_context(dst);
 
+   copy = kzalloc(sizeof(struct nfs4_copy_state), GFP_NOFS);
+   if (!copy)
+   return -ENOMEM;
+
spin_lock(>nfs_client->cl_lock);
-   list_for_each_entry(copy, >nfs_client->pending_cb_stateids,
+   list_for_each_entry(tmp_copy, >nfs_client->pending_cb_stateids,
copies) {
-   if (memcmp(>write_res.stateid, >stateid,
+   if (memcmp(>write_res.stateid, _copy->stateid,
NFS4_STATEID_SIZE))
continue;
found_pending = true;
-   list_del(>copies);
+   list_del(_copy->copies);
break;
}
if (found_pending) {
spin_unlock(>nfs_client->cl_lock);
+   kfree(copy);
+   copy = tmp_copy;
goto out;
}
 
-   copy = kzalloc(sizeof(struct nfs4_copy_state), GFP_NOFS);
-   if (!copy) {
-   spin_unlock(>nfs_client->cl_lock);
-   return -ENOMEM;
-   }
memcpy(>stateid, >write_res.stateid, NFS4_STATEID_SIZE);
init_completion(>completion);
copy->parent_state = ctx->state;
-- 
2.17.1

[PATCH AUTOSEL 4.19 52/68] iomap: sub-block dio needs to zeroout beyond EOF

2018-11-28 Thread Sasha Levin

From: Dave Chinner 

[ Upstream commit b450672fb66b4a991a5b55ee24209ac7ae7690ce ]

If we are doing sub-block dio that extends EOF, we need to zero
the unused tail of the block to initialise the data in it it. If we
do not zero the tail of the block, then an immediate mmap read of
the EOF block will expose stale data beyond EOF to userspace. Found
with fsx running sub-block DIO sizes vs MAPREAD/MAPWRITE operations.

Fix this by detecting if the end of the DIO write is beyond EOF
and zeroing the tail if necessary.

Signed-off-by: Dave Chinner 
Reviewed-by: Christoph Hellwig 
Reviewed-by: Darrick J. Wong 
Signed-off-by: Darrick J. Wong 
Signed-off-by: Sasha Levin 
---
 fs/iomap.c | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/fs/iomap.c b/fs/iomap.c
index fa46e3ed8f53..82e35265679d 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -1678,7 +1678,14 @@ iomap_dio_bio_actor(struct inode *inode, loff_t pos, 
loff_t length,
dio->submit.cookie = submit_bio(bio);
} while (nr_pages);
 
-   if (need_zeroout) {
+   /*
+* We need to zeroout the tail of a sub-block write if the extent type
+* requires zeroing or the write extends beyond EOF. If we don't zero
+* the block tail in the latter case, we can expose stale data via mmap
+* reads of the EOF block.
+*/
+   if (need_zeroout ||
+   ((dio->flags & IOMAP_DIO_WRITE) && pos >= i_size_read(inode))) {
/* zero out from the end of the write to the end of the block */
pad = pos & (fs_block_size - 1);
if (pad)
-- 
2.17.1

[PATCH AUTOSEL 4.19 50/68] riscv: fix warning in arch/riscv/include/asm/module.h

2018-11-28 Thread Sasha Levin

From: David Abdurachmanov 

[ Upstream commit 0138ebb90c633f76bc71617f8f23635ce41c84fd ]

Fixes warning: 'struct module' declared inside parameter list will not be
visible outside of this definition or declaration

Signed-off-by: David Abdurachmanov 
Acked-by: Olof Johansson 
Signed-off-by: Palmer Dabbelt 
Signed-off-by: Sasha Levin 
---
 arch/riscv/include/asm/module.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/riscv/include/asm/module.h b/arch/riscv/include/asm/module.h
index 349df33808c4..cd2af4b013e3 100644
--- a/arch/riscv/include/asm/module.h
+++ b/arch/riscv/include/asm/module.h
@@ -8,6 +8,7 @@
 
 #define MODULE_ARCH_VERMAGIC"riscv"
 
+struct module;
 u64 module_emit_got_entry(struct module *mod, u64 val);
 u64 module_emit_plt_entry(struct module *mod, u64 val);
 
-- 
2.17.1

[PATCH AUTOSEL 4.19 51/68] iomap: FUA is wrong for DIO O_DSYNC writes into unwritten extents

2018-11-28 Thread Sasha Levin

From: Dave Chinner 

[ Upstream commit 0929d8580071c6a1cec1a7916a8f674c243ceee1 ]

When we write into an unwritten extent via direct IO, we dirty
metadata on IO completion to convert the unwritten extent to
written. However, when we do the FUA optimisation checks, the inode
may be clean and so we issue a FUA write into the unwritten extent.
This means we then bypass the generic_write_sync() call after
unwritten extent conversion has ben done and we don't force the
modified metadata to stable storage.

This violates O_DSYNC semantics. The window of exposure is a single
IO, as the next DIO write will see the inode has dirty metadata and
hence will not use the FUA optimisation. Calling
generic_write_sync() after completion of the second IO will also
sync the first write and it's metadata.

Fix this by avoiding the FUA optimisation when writing to unwritten
extents.

Signed-off-by: Dave Chinner 
Reviewed-by: Christoph Hellwig 
Reviewed-by: Darrick J. Wong 
Signed-off-by: Darrick J. Wong 
Signed-off-by: Sasha Levin 
---
 fs/iomap.c | 11 ++-
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/fs/iomap.c b/fs/iomap.c
index ec15cf2ec696..fa46e3ed8f53 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -1597,12 +1597,13 @@ iomap_dio_bio_actor(struct inode *inode, loff_t pos, 
loff_t length,
 
if (iomap->flags & IOMAP_F_NEW) {
need_zeroout = true;
-   } else {
+   } else if (iomap->type == IOMAP_MAPPED) {
/*
-* Use a FUA write if we need datasync semantics, this
-* is a pure data IO that doesn't require any metadata
-* updates and the underlying device supports FUA. This
-* allows us to avoid cache flushes on IO completion.
+* Use a FUA write if we need datasync semantics, this is a pure
+* data IO that doesn't require any metadata updates (including
+* after IO completion such as unwritten extent conversion) and
+* the underlying device supports FUA. This allows us to avoid
+* cache flushes on IO completion.
 */
if (!(iomap->flags & (IOMAP_F_SHARED|IOMAP_F_DIRTY)) &&
(dio->flags & IOMAP_DIO_WRITE_FUA) &&
-- 
2.17.1

Re: [PATCH v3 0/3] Add Amlogic Meson GX SoC Clock Measure Driver

2018-11-28 Thread Kevin Hilman

Neil Armstrong  writes:

> The Amlogic Meson GX SoCs embeds a clock measurer IP to measure the internal
> clocks frequencies.
> The precision is determined by stepping into the duration until the counter
> overflows.
> The debugfs shows a pretty summary and each clock can be measured
> individually aswell.
>
> This patchset includes the dt-bindings, driver and the DT node added to the
> meson-gx dtsi.

Queued for v4.21...

[...]

> Neil Armstrong (3):
>   dt-bindings: amlogic: Add Internal Clock Measurer bindings
>   soc: amlogic: Add Meson Clock Measure driver

...on branch v4.21/drivers and...

>   ARM64: dts: meson-gx: Add Internal Clock Measurer node

...on branch v4.21/dt64,

Thanks,

Kevin

Re: [tip:timers/core] time: Add SPDX license identifiers

2018-11-28 Thread Joe Perches

On Fri, 2018-11-23 at 20:15 -0800, Joe Perches wrote:
> On Fri, 2018-11-23 at 03:19 -0800, tip-bot for Thomas Gleixner wrote:
> > The SPDX
> > identifier is a legally binding shorthand, which can be used instead of the
> > full boiler plate text.
> 
> Is the "legally binding shorthand" actually proven anywhere?
> I am not aware of any case law for this.

So is there any case law that supports this "legally binding"
statement or is it merely wishful thinking?

[PATCH V4 4/4] ARM64: dts: imx: add i.MX8QXP system controller RTC support

2018-11-28 Thread Anson Huang

Add i.MX8QXP system controller RTC support.

Signed-off-by: Anson Huang 
---
ChangeLog:
V3->V4:
  *move sc rtc to inside of SCU node.
 arch/arm64/boot/dts/freescale/imx8qxp.dtsi | 4 
 1 file changed, 4 insertions(+)

diff --git a/arch/arm64/boot/dts/freescale/imx8qxp.dtsi 
b/arch/arm64/boot/dts/freescale/imx8qxp.dtsi
index 9155d45..b38730a 100644
--- a/arch/arm64/boot/dts/freescale/imx8qxp.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8qxp.dtsi
@@ -84,6 +84,10 @@
compatible = "fsl,imx8qxp-scu-pd";
#power-domain-cells = <1>;
};
+
+   rtc: rtc {
+   compatible = "fsl,imx8qxp-sc-rtc";
+   };
};
 
tsens: thermal-sensor {
-- 
2.7.4

[PATCH V4 0/4] Add i.MX system controller RTC driver

2018-11-28 Thread Anson Huang

NXP i.MX8QXP is an ARMv8 SoC with a Cortex-M4 core inside as
system controller, the system controller is in charge of system
power, clock and secure RTC etc. management, Linux kernel
has to communicate with system controller via MU (message unit)
IPC to do RTC operation.

Since the RTC set time MUST to be done in secure EL3 mode (required
by system controller firmware) and ALARM functions needs to be done
with general MU IRQ handle, these are NOT ready NOW, so this patch
ONLY supports RTC read time for now.

Note that this patch set is based on [V4,5/5] defconfig: arm64: add imx8qxp 
support,
https://patchwork.kernel.org/patch/10677315/

Anson Huang (4):
  rtc: add i.MX system controller RTC support
  dt-bindings: rtc: add binding doc for i.MX system controller RTC
driver
  defconfig: arm64: add i.MX system controller RTC support
  ARM64: dts: imx: add i.MX8QXP system controller RTC support

 .../devicetree/bindings/rtc/rtc-imx-sc.txt |  10 ++
 arch/arm64/boot/dts/freescale/imx8qxp.dtsi |   4 +
 arch/arm64/configs/defconfig   |   1 +
 drivers/rtc/Kconfig|   6 ++
 drivers/rtc/Makefile   |   1 +
 drivers/rtc/rtc-imx-sc.c   | 104 +
 6 files changed, 126 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/rtc/rtc-imx-sc.txt
 create mode 100644 drivers/rtc/rtc-imx-sc.c

-- 
2.7.4

Re: [PATCH v7 0/4] clk: meson: add a sub EMMC clock controller support

2018-11-28 Thread Jianxin Pan

Hi Jerome,

I made some modifications as you suggested， could you please take a look?

On 2018/11/15 20:18, Jianxin Pan wrote:
> This driver will add a MMC clock controller driver support.
> The original idea about adding a clock controller is during the
> discussion in the NAND driver mainline effort[1].
> 
> This driver is tested in the S400 board (AXG platform) with NAND driver.
> 
> Changes since v6 [7]:
>  - add one based support for sclk divier
>  - alloc sclk in probe for multiple instance
>  - fix coding styles
> 
> Changes since v5 [6]:
>  - remove divider ops with .init and use sclk_div instead
>  - drop CLK_DIVIDER_ROUND_CLOSEST in mux and div
>  - drop the useless type cast 
> 
> Changes since v4 [5]:
>  - use struct parm in phase delay driver
>  - remove 0 delay releted part in phase delay driver
>  - don't rebuild the parent name once again
>  - add divider ops with .init
> 
> Changes since v3 [4]:
>  - separate clk-phase-delay driver
>  - replace clk_get_rate() with clk_hw_get_rate()
>  - collect Rob's R-Y
>  - drop 'meson-' prefix from compatible string
> 
>  Changes since v2 [3]:
>  - squash dt-binding clock-id patch
>  - update license
>  - fix alignment
>  - construct a clk register helper() function
> 
> Changes since v1 [2]:
>  - implement phase clock
>  - update compatible name
>  - adjust file name
>  - divider probe() into small functions, and re-use them
> 
> [1] https://lkml.kernel.org/r/20180628090034.0637a062@xps13
> [2] https://lkml.kernel.org/r/20180703145716.31860-1-yixun@amlogic.com
> [3] https://lkml.kernel.org/r/20180710163658.6175-1-yixun@amlogic.com
> [4] https://lkml.kernel.org/r/20180712211244.11428-1-yixun@amlogic.com
> [5] https://lkml.kernel.org/r/20180809070724.11935-4-yixun@amlogic.com
> [6] 
> https://lkml.kernel.org/r/1539839245-13793-1-git-send-email-jianxin@amlogic.com
> [7] 
> https://lkml.kernel.org/r/1541089855-19356-1-git-send-email-jianxin@amlogic.com
> Yixun Lan (3):
>   clk: meson: add emmc sub clock phase delay driver
>   clk: meson: add DT documentation for emmc clock controller
>   clk: meson: add sub MMC clock controller driver
>   clk: meson: add one based divider support for sclk divider
> 
>  .../devicetree/bindings/clock/amlogic,mmc-clkc.txt |  39 +++
>  drivers/clk/meson/Kconfig  |  10 +
>  drivers/clk/meson/Makefile |   3 +-
>  drivers/clk/meson/clk-phase-delay.c|  64 +
>  drivers/clk/meson/clkc-audio.h |   1 +
>  drivers/clk/meson/clkc.h   |  13 +
>  drivers/clk/meson/mmc-clkc.c   | 313 
> +
>  drivers/clk/meson/sclk-div.c   |  28 +-
>  include/dt-bindings/clock/amlogic,mmc-clkc.h   |  17 ++
>  9 files changed, 477 insertions(+), 11 deletions(-)
>  create mode 100644 
> Documentation/devicetree/bindings/clock/amlogic,mmc-clkc.txt
>  create mode 100644 drivers/clk/meson/clk-phase-delay.c
>  create mode 100644 drivers/clk/meson/mmc-clkc.c
>  create mode 100644 include/dt-bindings/clock/amlogic,mmc-clkc.h
>

Re: [RFC PATCH 0/5] x86: dynamic indirect call promotion

2018-11-28 Thread Nadav Amit

> On Nov 28, 2018, at 5:40 PM, Andy Lutomirski  wrote:
> 
> On Wed, Nov 28, 2018 at 4:38 PM Josh Poimboeuf  wrote:
>> On Wed, Nov 28, 2018 at 07:34:52PM +, Nadav Amit wrote:
 On Nov 28, 2018, at 8:08 AM, Josh Poimboeuf  wrote:
 
 On Wed, Oct 17, 2018 at 05:54:15PM -0700, Nadav Amit wrote:
> This RFC introduces indirect call promotion in runtime, which for the
> matter of simplification (and branding) will be called here "relpolines"
> (relative call + trampoline). Relpolines are mainly intended as a way
> of reducing retpoline overheads due to Spectre v2.
> 
> Unlike indirect call promotion through profile guided optimization, the
> proposed approach does not require a profiling stage, works well with
> modules whose address is unknown and can adapt to changing workloads.
> 
> The main idea is simple: for every indirect call, we inject a piece of
> code with fast- and slow-path calls. The fast path is used if the target
> matches the expected (hot) target. The slow-path uses a retpoline.
> During training, the slow-path is set to call a function that saves the
> call source and target in a hash-table and keep count for call
> frequency. The most common target is then patched into the hot path.
> 
> The patching is done on-the-fly by patching the conditional branch
> (opcode and offset) that is used to compare the target to the hot
> target. This allows to direct all cores to the fast-path, while patching
> the slow-path and vice-versa. Patching follows 2 more rules: (1) Only
> patch a single byte when the code might be executed by any core. (2)
> When patching more than one byte, ensure that all cores do not run the
> to-be-patched-code by preventing this code from being preempted, and
> using synchronize_sched() after patching the branch that jumps over this
> code.
> 
> Changing all the indirect calls to use relpolines is done using assembly
> macro magic. There are alternative solutions, but this one is
> relatively simple and transparent. There is also logic to retrain the
> software predictor, but the policy it uses may need to be refined.
> 
> Eventually the results are not bad (2 VCPU VM, throughput reported):
> 
>   baserelpoline
>   -
> nginx  22898   25178 (+10%)
> redis-ycsb 24523   25486 (+4%)
> dbench 21442103 (+2%)
> 
> When retpolines are disabled, and if retraining is off, performance
> benefits are up to 2% (nginx), but are much less impressive.
 
 Hi Nadav,
 
 Peter pointed me to these patches during a discussion about retpoline
 profiling.  Personally, I think this is brilliant.  This could help
 networking and filesystem intensive workloads a lot.
>>> 
>>> Thanks! I was a bit held-back by the relatively limited number of responses.
>> 
>> It is a rather, erm, ambitious idea, maybe they were speechless :-)
>> 
>>> I finished another version two weeks ago, and every day I think: "should it
>>> be RFCv2 or v1”, ending up not sending it…
>>> 
>>> There is one issue that I realized while working on the new version: I’m not
>>> sure it is well-defined what an outline retpoline is allowed to do. The
>>> indirect branch promotion code can change rflags, which might cause
>>> correction issues. In practice, using gcc, it is not a problem.
>> 
>> Callees can clobber flags, so it seems fine to me.
> 
> Just to check I understand your approach right: you made a macro
> called "call", and you're therefore causing all instances of "call" to
> become magic?  This is... terrifying.  It's even plausibly worse than
> "#define if" :)  The scariest bit is that it will impact inline asm as
> well.  Maybe a gcc plugin would be less alarming?

It is likely to look less alarming. When I looked at the inline retpoline
implementation of gcc, it didn’t look much better than what I did - it
basically just emits assembly instructions.

Anyhow, I look (again) into using gcc-plugins.

>>> 1. An indirect branch inside the BP handler might be the one we patch
>> 
>> I _think_ nested INT3s should be doable, because they don't use IST.
>> Maybe Andy can clarify.
> 
> int3 should survive recursion these days.  Although I admit I'm
> currently wondering what happens if one thread puts a kprobe on an
> address that another thread tries to text_poke.

The issue I regarded is having an indirect call *inside* the the handler.
For example, you try to patch the call to bp_int3_handler and then get an
int3. They can be annotated to prevent them from being patched. Then again,
I need to see how gcc plugins can get these annotations.

> 
> Also, this relpoline magic is likely to start patching text at runtime
> on a semi-regular basis.  This type of patching is *slow*.  Is it a
> problem?

It didn’t appear so. Although there are >1 indirect

[PATCH] ASoC: stm32: sai: Fix unsigned comparison with less than zero

2018-11-28 Thread Wen Yang

The return from the call to stm32_sai_get_clk_div can be a negative error
code however this is being assigned to an unsigned variable 'div'
hence the check is always false. Fix this by making 'div' an
int.

Detected by Coccinelle ("Unsigned expression compared with zero:stages
< 0")

Signed-off-by: Wen Yang 
CC: Mark Brown 
CC: Olivier Moysan 
CC: Arnaud Pouliquen 
CC: Liam Girdwood 
CC: Jaroslav Kysela 
CC: Takashi Iwai 
CC: Maxime Coquelin 
CC: Alexandre Torgue 
CC: Julia Lawall 
CC: Wen Yang 
CC: linux-st...@st-md-mailman.stormreply.com
CC: linux-kernel@vger.kernel.org
---
 sound/soc/stm/stm32_sai_sub.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/soc/stm/stm32_sai_sub.c b/sound/soc/stm/stm32_sai_sub.c
index 211589b0b2ef..6e9fff0f9ed7 100644
--- a/sound/soc/stm/stm32_sai_sub.c
+++ b/sound/soc/stm/stm32_sai_sub.c
@@ -336,7 +336,7 @@ static int stm32_sai_mclk_set_rate(struct clk_hw *hw, 
unsigned long rate,
 {
struct stm32_sai_mclk_data *mclk = to_mclk_data(hw);
struct stm32_sai_sub_data *sai = mclk->sai_data;
-   unsigned int div;
+   int div;
int ret;
 
div = stm32_sai_get_clk_div(sai, parent_rate, rate);
-- 
2.19.1

Re: [PATCH v3 7/7] zram: writeback throttle

2018-11-28 Thread Sergey Senozhatsky

On (11/27/18 14:54), Minchan Kim wrote:
> diff --git a/Documentation/ABI/testing/sysfs-block-zram 
> b/Documentation/ABI/testing/sysfs-block-zram
> index 65fc33b2f53b..9d2339a485c8 100644
> --- a/Documentation/ABI/testing/sysfs-block-zram
> +++ b/Documentation/ABI/testing/sysfs-block-zram
> @@ -121,3 +121,12 @@ Contact: Minchan Kim 
>   The bd_stat file is read-only and represents backing device's
>   statistics (bd_count, bd_reads, bd_writes) in a format
>   similar to block layer statistics file format.
> +
> +What:/sys/block/zram/writeback_limit
> +Date:November 2018
> +Contact: Minchan Kim 
> +Description:
> + The writeback_limit file is read-write and specifies the maximum
> + amount of writeback ZRAM can do. The limit could be changed
> + in run time and "0" means disable the limit.
> + No limit is the initial state.
> diff --git a/Documentation/blockdev/zram.txt b/Documentation/blockdev/zram.txt
> index 906df97527a7..64b61925e475 100644
> --- a/Documentation/blockdev/zram.txt
> +++ b/Documentation/blockdev/zram.txt
> @@ -164,6 +164,8 @@ reset WOtrigger device reset
>  mem_used_max  WOreset the `mem_used_max' counter (see later)
>  mem_limit WOspecifies the maximum amount of memory ZRAM can use
>  to store the compressed data
> +writeback_limit   WOspecifies the maximum amount of write IO zram can
> + write out to backing device as 4KB unit
   
page size units?

-ss

Re: [PATCH] mm: remove pte_lock_deinit()

2018-11-28 Thread Matthew Wilcox

On Wed, Nov 28, 2018 at 04:55:25PM -0700, Yu Zhao wrote:
> Pagetable page doesn't touch page->mapping or have any used field
> that overlaps with it. No need to clear mapping in dtor. In fact,
> doing so might mask problems that otherwise would be detected by
> bad_page().
> 
> Signed-off-by: Yu Zhao 

Reviewed-by: Matthew Wilcox 

I do have plans to use page->mapping for pt_mm, but this patch won't
get in my way when I find the round tuits to do that work.

[PATCH v27 1/2] arm64: dts: mt8173: Add GCE node

2018-11-28 Thread Houlong Wei

This patch adds the device node of the GCE hardware for CMDQ module.

Signed-off-by: Houlong Wei 
Signed-off-by: HS Liao 
---
 arch/arm64/boot/dts/mediatek/mt8173.dtsi |   10 ++
 1 file changed, 10 insertions(+)

diff --git a/arch/arm64/boot/dts/mediatek/mt8173.dtsi 
b/arch/arm64/boot/dts/mediatek/mt8173.dtsi
index abd2f15..412ffd4 100644
--- a/arch/arm64/boot/dts/mediatek/mt8173.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt8173.dtsi
@@ -18,6 +18,7 @@
 #include 
 #include 
 #include 
+#include 
 #include "mt8173-pinfunc.h"
 
 / {
@@ -521,6 +522,15 @@
status = "disabled";
};
 
+   gce: mailbox@10212000 {
+   compatible = "mediatek,mt8173-gce";
+   reg = <0 0x10212000 0 0x1000>;
+   interrupts = ;
+   clocks = < CLK_INFRA_GCE>;
+   clock-names = "gce";
+   #mbox-cells = <3>;
+   };
+
mipi_tx0: mipi-dphy@10215000 {
compatible = "mediatek,mt8173-mipi-tx";
reg = <0 0x10215000 0 0x1000>;
-- 
1.7.9.5

[PATCH v27 2/2] soc: mediatek: Add Mediatek CMDQ helper

2018-11-28 Thread Houlong Wei

Add Mediatek CMDQ helper to create CMDQ packet and assemble GCE op code.

Signed-off-by: Houlong Wei 
Signed-off-by: HS Liao 
---
 drivers/soc/mediatek/Kconfig   |   12 ++
 drivers/soc/mediatek/Makefile  |1 +
 drivers/soc/mediatek/mtk-cmdq-helper.c |  300 
 include/linux/soc/mediatek/mtk-cmdq.h  |  133 ++
 4 files changed, 446 insertions(+)
 create mode 100644 drivers/soc/mediatek/mtk-cmdq-helper.c
 create mode 100644 include/linux/soc/mediatek/mtk-cmdq.h

diff --git a/drivers/soc/mediatek/Kconfig b/drivers/soc/mediatek/Kconfig
index a7d0667..17bd759 100644
--- a/drivers/soc/mediatek/Kconfig
+++ b/drivers/soc/mediatek/Kconfig
@@ -4,6 +4,18 @@
 menu "MediaTek SoC drivers"
depends on ARCH_MEDIATEK || COMPILE_TEST
 
+config MTK_CMDQ
+   tristate "MediaTek CMDQ Support"
+   depends on ARCH_MEDIATEK || COMPILE_TEST
+   select MAILBOX
+   select MTK_CMDQ_MBOX
+   select MTK_INFRACFG
+   help
+ Say yes here to add support for the MediaTek Command Queue (CMDQ)
+ driver. The CMDQ is used to help read/write registers with critical
+ time limitation, such as updating display configuration during the
+ vblank.
+
 config MTK_INFRACFG
bool "MediaTek INFRACFG Support"
select REGMAP
diff --git a/drivers/soc/mediatek/Makefile b/drivers/soc/mediatek/Makefile
index 12998b0..64ce5ee 100644
--- a/drivers/soc/mediatek/Makefile
+++ b/drivers/soc/mediatek/Makefile
@@ -1,3 +1,4 @@
+obj-$(CONFIG_MTK_CMDQ) += mtk-cmdq-helper.o
 obj-$(CONFIG_MTK_INFRACFG) += mtk-infracfg.o
 obj-$(CONFIG_MTK_PMIC_WRAP) += mtk-pmic-wrap.o
 obj-$(CONFIG_MTK_SCPSYS) += mtk-scpsys.o
diff --git a/drivers/soc/mediatek/mtk-cmdq-helper.c 
b/drivers/soc/mediatek/mtk-cmdq-helper.c
new file mode 100644
index 000..ff9fef5
--- /dev/null
+++ b/drivers/soc/mediatek/mtk-cmdq-helper.c
@@ -0,0 +1,300 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Copyright (c) 2018 MediaTek Inc.
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#define CMDQ_ARG_A_WRITE_MASK  0x
+#define CMDQ_WRITE_ENABLE_MASK BIT(0)
+#define CMDQ_EOC_IRQ_ENBIT(0)
+#define CMDQ_EOC_CMD   ((u64)((CMDQ_CODE_EOC << CMDQ_OP_CODE_SHIFT)) \
+   << 32 | CMDQ_EOC_IRQ_EN)
+
+static void cmdq_client_timeout(struct timer_list *t)
+{
+   struct cmdq_client *client = from_timer(client, t, timer);
+
+   dev_err(client->client.dev, "cmdq timeout!\n");
+}
+
+struct cmdq_client *cmdq_mbox_create(struct device *dev, int index, u32 
timeout)
+{
+   struct cmdq_client *client;
+
+   client = kzalloc(sizeof(*client), GFP_KERNEL);
+   if (!client)
+   return (struct cmdq_client *)-ENOMEM;
+
+   client->timeout_ms = timeout;
+   if (timeout != CMDQ_NO_TIMEOUT) {
+   spin_lock_init(>lock);
+   timer_setup(>timer, cmdq_client_timeout, 0);
+   }
+   client->pkt_cnt = 0;
+   client->client.dev = dev;
+   client->client.tx_block = false;
+   client->chan = mbox_request_channel(>client, index);
+
+   if (IS_ERR(client->chan)) {
+   long err;
+
+   dev_err(dev, "failed to request channel\n");
+   err = PTR_ERR(client->chan);
+   kfree(client);
+
+   return ERR_PTR(err);
+   }
+
+   return client;
+}
+EXPORT_SYMBOL(cmdq_mbox_create);
+
+void cmdq_mbox_destroy(struct cmdq_client *client)
+{
+   if (client->timeout_ms != CMDQ_NO_TIMEOUT) {
+   spin_lock(>lock);
+   del_timer_sync(>timer);
+   spin_unlock(>lock);
+   }
+   mbox_free_channel(client->chan);
+   kfree(client);
+}
+EXPORT_SYMBOL(cmdq_mbox_destroy);
+
+struct cmdq_pkt *cmdq_pkt_create(struct cmdq_client *client, size_t size)
+{
+   struct cmdq_pkt *pkt;
+   struct device *dev;
+   dma_addr_t dma_addr;
+
+   pkt = kzalloc(sizeof(*pkt), GFP_KERNEL);
+   if (!pkt)
+   return ERR_PTR(-ENOMEM);
+   pkt->va_base = kzalloc(size, GFP_KERNEL);
+   if (!pkt->va_base) {
+   kfree(pkt);
+   return ERR_PTR(-ENOMEM);
+   }
+   pkt->buf_size = size;
+   pkt->cl = (void *)client;
+
+   dev = client->chan->mbox->dev;
+   dma_addr = dma_map_single(dev, pkt->va_base, pkt->buf_size,
+ DMA_TO_DEVICE);
+   if (dma_mapping_error(dev, dma_addr)) {
+   dev_err(dev, "dma map failed, size=%u\n", (u32)(u64)size);
+   kfree(pkt->va_base);
+   kfree(pkt);
+   return ERR_PTR(-ENOMEM);
+   }
+
+   pkt->pa_base = dma_addr;
+
+   return pkt;
+}
+EXPORT_SYMBOL(cmdq_pkt_create);
+
+void cmdq_pkt_destroy(struct cmdq_pkt *pkt)
+{
+   struct cmdq_client *client = (struct cmdq_client *)pkt->cl;
+
+   dma_unmap_single(client->chan->mbox->dev, pkt->pa_base, pkt->buf_size,
+

Re: [PATCH 0/8] HID: MS and Logitech high-resolution scroll wheel support

2018-11-28 Thread Peter Hutterer

On Wed, Nov 28, 2018 at 03:22:14PM -0800, Harry Cutts wrote:
> On Wed, 21 Nov 2018 at 22:34, Peter Hutterer  wrote:
> > [snip]
> > Devices tested:
> > - Microsoft Comfort Optical Mouse 3000
> > - Microsoft Sculpt Ergonomic Mouse
> > - Microsoft Surface mouse
> > - Logitech MX Anywhere 2S
> >
> > The following devices were tested for the HID feature and didn't have it:
> > - Logitech G500s, G303
> > - Roccat Kone XTD
> > - all the cheap Lenovo, HP, Dell, Logitech USB mice that come with a
> >   workstation that I could find in the local office
> > - Etekcity something something
> > - Razer Imperator
> > - Microsoft Classic IntelliMouse
> > - Microsoft Surface Mobile Mouse
> 
> I just tested the patches with the Microsoft Comfort Optical Mouse
> 3000. I also tested with the Microsoft Surface Precision mouse [0],
> and like the Surface Mobile mouse it didn't seem to report the HID
> feature (at least, it was only reporting REL_WHEEL_HI_RES changes of
> 120 in evtest).

IIRC that's the same mouse benjamin has and it does have the HID feature, it
just ends up reporting the same number of clicks anyway so there's no
visible effect. Which in itself is a good sign for the patch series, I
guess ;)

> For the series:
> Acked-by: Harry Cutts 
> Verified-by: Harry Cutts 

thanks, much appreciated.

Cheers,
   Peter

> Harry Cutts
> Chrome OS Touch/Input team
> 
> [0]: https://microsoft.com/en-us/p/surface-precision-mouse/8qc5p0d8ddjt

[PATCH v3 0/4] powerpc: system call table generation support

2018-11-28 Thread Firoz Khan

The purpose of this patch series is, we can easily
add/modify/delete system call table support by cha-
nging entry in syscall.tbl file instead of manually
changing many files. The other goal is to unify the 
system call table generation support implementation 
across all the architectures. 

The system call tables are in different format in 
all architecture. It will be difficult to manually
add, modify or delete the system calls in the resp-
ective files manually. To make it easy by keeping a 
script and which'll generate uapi header file and 
syscall table file.

syscall.tbl contains the list of available system 
calls along with system call number and correspond-
ing entry point. Add a new system call in this arch-
itecture will be possible by adding new entry in 
the syscall.tbl file.

Adding a new table entry consisting of:
- System call number.
- ABI.
- System call name.
- Entry point name.
- Compat entry name, if required.
- spu entry name, if required.

ARM, s390 and x86 architecuture does exist the sim-
ilar support. I leverage their implementation to 
come up with a generic solution.

I have done the same support for work for alpha, 
ia64, m68k, microblaze, mips, parisc, sh, sparc, 
and xtensa. Below mentioned git repository contains
more details about the workflow.

https://github.com/frzkhn/system_call_table_generator/

Finally, this is the ground work to solve the Y2038
issue. We need to add two dozen of system calls to 
solve Y2038 issue. So this patch series will help to
add new system calls easily by adding new entry in the
syscall.tbl.

changes since v2:
 - modified/optimized the syscall.tbl to avoid duplicate
   for the spu entries.
 - updated the syscalltbl.sh to meet the above point.

changes since v1:
 - optimized/updated the syscall table generation 
   scripts.
 - fixed all mixed indentation issues in syscall.tbl.
 - added "comments" in syscall_*.tbl.
 - changed from generic-y to generated-y in Kbuild.

Firoz Khan (4):
  powerpc: add __NR_syscalls along with NR_syscalls
  powerpc: move macro definition from asm/systbl.h
  powerpc: add system call table generation support
  powerpc: generate uapi header and system call table files

 arch/powerpc/Makefile   |   3 +
 arch/powerpc/include/asm/Kbuild |   4 +
 arch/powerpc/include/asm/systbl.h   | 396 --
 arch/powerpc/include/asm/unistd.h   |   3 +-
 arch/powerpc/include/uapi/asm/Kbuild|   2 +
 arch/powerpc/include/uapi/asm/unistd.h  | 389 +
 arch/powerpc/kernel/Makefile|  10 -
 arch/powerpc/kernel/syscalls/Makefile   |  63 
 arch/powerpc/kernel/syscalls/syscall.tbl| 427 
 arch/powerpc/kernel/syscalls/syscallhdr.sh  |  36 +++
 arch/powerpc/kernel/syscalls/syscalltbl.sh  |  36 +++
 arch/powerpc/kernel/systbl.S|  37 +--
 arch/powerpc/kernel/systbl_chk.c|  60 
 arch/powerpc/platforms/cell/spu_callbacks.c |  17 +-
 14 files changed, 591 insertions(+), 892 deletions(-)
 delete mode 100644 arch/powerpc/include/asm/systbl.h
 create mode 100644 arch/powerpc/kernel/syscalls/Makefile
 create mode 100644 arch/powerpc/kernel/syscalls/syscall.tbl
 create mode 100644 arch/powerpc/kernel/syscalls/syscallhdr.sh
 create mode 100644 arch/powerpc/kernel/syscalls/syscalltbl.sh
 delete mode 100644 arch/powerpc/kernel/systbl_chk.c

-- 
1.9.1

[PATCH AUTOSEL 4.14 19/35] mtd: rawnand: qcom: Namespace prefix some commands

2018-11-28 Thread Sasha Levin

From: Olof Johansson 

[ Upstream commit 33bf5519ae5dd356b182a94e3622f42860274a38 ]

PAGE_READ is used by RISC-V arch code included through mm headers,
and it makes sense to bring in a prefix on these in the driver.

drivers/mtd/nand/raw/qcom_nandc.c:153: warning: "PAGE_READ" redefined
 #define PAGE_READ   0x2
In file included from include/linux/memremap.h:7,
 from include/linux/mm.h:27,
 from include/linux/scatterlist.h:8,
 from include/linux/dma-mapping.h:11,
 from drivers/mtd/nand/raw/qcom_nandc.c:17:
arch/riscv/include/asm/pgtable.h:48: note: this is the location of the previous 
definition

Caught by riscv allmodconfig.

Signed-off-by: Olof Johansson 
Reviewed-by: Miquel Raynal 
Signed-off-by: Boris Brezillon 
Signed-off-by: Sasha Levin 
---
 drivers/mtd/nand/qcom_nandc.c | 32 
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/drivers/mtd/nand/qcom_nandc.c b/drivers/mtd/nand/qcom_nandc.c
index b49ca02b399d..09d5f7df6023 100644
--- a/drivers/mtd/nand/qcom_nandc.c
+++ b/drivers/mtd/nand/qcom_nandc.c
@@ -149,15 +149,15 @@
 #defineNAND_VERSION_MINOR_SHIFT16
 
 /* NAND OP_CMDs */
-#definePAGE_READ   0x2
-#definePAGE_READ_WITH_ECC  0x3
-#definePAGE_READ_WITH_ECC_SPARE0x4
-#definePROGRAM_PAGE0x6
-#definePAGE_PROGRAM_WITH_ECC   0x7
-#definePROGRAM_PAGE_SPARE  0x9
-#defineBLOCK_ERASE 0xa
-#defineFETCH_ID0xb
-#defineRESET_DEVICE0xd
+#defineOP_PAGE_READ0x2
+#defineOP_PAGE_READ_WITH_ECC   0x3
+#defineOP_PAGE_READ_WITH_ECC_SPARE 0x4
+#defineOP_PROGRAM_PAGE 0x6
+#defineOP_PAGE_PROGRAM_WITH_ECC0x7
+#defineOP_PROGRAM_PAGE_SPARE   0x9
+#defineOP_BLOCK_ERASE  0xa
+#defineOP_FETCH_ID 0xb
+#defineOP_RESET_DEVICE 0xd
 
 /* Default Value for NAND_DEV_CMD_VLD */
 #define NAND_DEV_CMD_VLD_VAL   (READ_START_VLD | WRITE_START_VLD | \
@@ -629,11 +629,11 @@ static void update_rw_regs(struct qcom_nand_host *host, 
int num_cw, bool read)
 
if (read) {
if (host->use_ecc)
-   cmd = PAGE_READ_WITH_ECC | PAGE_ACC | LAST_PAGE;
+   cmd = OP_PAGE_READ_WITH_ECC | PAGE_ACC | LAST_PAGE;
else
-   cmd = PAGE_READ | PAGE_ACC | LAST_PAGE;
+   cmd = OP_PAGE_READ | PAGE_ACC | LAST_PAGE;
} else {
-   cmd = PROGRAM_PAGE | PAGE_ACC | LAST_PAGE;
+   cmd = OP_PROGRAM_PAGE | PAGE_ACC | LAST_PAGE;
}
 
if (host->use_ecc) {
@@ -1030,7 +1030,7 @@ static int nandc_param(struct qcom_nand_host *host)
 * in use. we configure the controller to perform a raw read of 512
 * bytes to read onfi params
 */
-   nandc_set_reg(nandc, NAND_FLASH_CMD, PAGE_READ | PAGE_ACC | LAST_PAGE);
+   nandc_set_reg(nandc, NAND_FLASH_CMD, OP_PAGE_READ | PAGE_ACC | 
LAST_PAGE);
nandc_set_reg(nandc, NAND_ADDR0, 0);
nandc_set_reg(nandc, NAND_ADDR1, 0);
nandc_set_reg(nandc, NAND_DEV0_CFG0, 0 << CW_PER_PAGE
@@ -1084,7 +1084,7 @@ static int erase_block(struct qcom_nand_host *host, int 
page_addr)
struct qcom_nand_controller *nandc = get_qcom_nand_controller(chip);
 
nandc_set_reg(nandc, NAND_FLASH_CMD,
- BLOCK_ERASE | PAGE_ACC | LAST_PAGE);
+ OP_BLOCK_ERASE | PAGE_ACC | LAST_PAGE);
nandc_set_reg(nandc, NAND_ADDR0, page_addr);
nandc_set_reg(nandc, NAND_ADDR1, 0);
nandc_set_reg(nandc, NAND_DEV0_CFG0,
@@ -1115,7 +1115,7 @@ static int read_id(struct qcom_nand_host *host, int 
column)
if (column == -1)
return 0;
 
-   nandc_set_reg(nandc, NAND_FLASH_CMD, FETCH_ID);
+   nandc_set_reg(nandc, NAND_FLASH_CMD, OP_FETCH_ID);
nandc_set_reg(nandc, NAND_ADDR0, column);
nandc_set_reg(nandc, NAND_ADDR1, 0);
nandc_set_reg(nandc, NAND_FLASH_CHIP_SELECT,
@@ -1136,7 +1136,7 @@ static int reset(struct qcom_nand_host *host)
struct nand_chip *chip = >chip;
struct qcom_nand_controller *nandc = get_qcom_nand_controller(chip);
 
-   nandc_set_reg(nandc, NAND_FLASH_CMD, RESET_DEVICE);
+   nandc_set_reg(nandc, NAND_FLASH_CMD, OP_RESET_DEVICE);
nandc_set_reg(nandc, NAND_EXEC_CMD, 1);
 
write_reg_dma(nandc, NAND_FLASH_CMD, 1, NAND_BAM_NEXT_SGL);
-- 
2.17.1

[PATCH AUTOSEL 4.14 26/35] net: faraday: ftmac100: remove netif_running(netdev) check before disabling interrupts

2018-11-28 Thread Sasha Levin

From: Vincent Chen 

[ Upstream commit 426a593e641ebf0d9288f0a2fcab644a86820220 ]

In the original ftmac100_interrupt(), the interrupts are only disabled when
the condition "netif_running(netdev)" is true. However, this condition
causes kerenl hang in the following case. When the user requests to
disable the network device, kernel will clear the bit __LINK_STATE_START
from the dev->state and then call the driver's ndo_stop function. Network
device interrupts are not blocked during this process. If an interrupt
occurs between clearing __LINK_STATE_START and stopping network device,
kernel cannot disable the interrupts due to the condition
"netif_running(netdev)" in the ISR. Hence, kernel will hang due to the
continuous interruption of the network device.

In order to solve the above problem, the interrupts of the network device
should always be disabled in the ISR without being restricted by the
condition "netif_running(netdev)".

[V2]
Remove unnecessary curly braces.

Signed-off-by: Vincent Chen 
Signed-off-by: David S. Miller 
Signed-off-by: Sasha Levin 
---
 drivers/net/ethernet/faraday/ftmac100.c | 7 +++
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/faraday/ftmac100.c 
b/drivers/net/ethernet/faraday/ftmac100.c
index 66928a922824..415fd93e9930 100644
--- a/drivers/net/ethernet/faraday/ftmac100.c
+++ b/drivers/net/ethernet/faraday/ftmac100.c
@@ -870,11 +870,10 @@ static irqreturn_t ftmac100_interrupt(int irq, void 
*dev_id)
struct net_device *netdev = dev_id;
struct ftmac100 *priv = netdev_priv(netdev);
 
-   if (likely(netif_running(netdev))) {
-   /* Disable interrupts for polling */
-   ftmac100_disable_all_int(priv);
+   /* Disable interrupts for polling */
+   ftmac100_disable_all_int(priv);
+   if (likely(netif_running(netdev)))
napi_schedule(>napi);
-   }
 
return IRQ_HANDLED;
 }
-- 
2.17.1

[PATCH AUTOSEL 4.14 16/35] net/mlx4_core: Fix uninitialized variable compilation warning

2018-11-28 Thread Sasha Levin

From: Tariq Toukan 

[ Upstream commit 3ea7e7ea53c9f6ee41cb69a29c375fe9dd9a56a7 ]

Initialize the uid variable to zero to avoid the compilation warning.

Fixes: 7a89399ffad7 ("net/mlx4: Add mlx4_bitmap zone allocator")
Signed-off-by: Tariq Toukan 
Signed-off-by: David S. Miller 
Signed-off-by: Sasha Levin 
---
 drivers/net/ethernet/mellanox/mlx4/alloc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/alloc.c 
b/drivers/net/ethernet/mellanox/mlx4/alloc.c
index 6dabd983e7e0..94f4dc4a77e9 100644
--- a/drivers/net/ethernet/mellanox/mlx4/alloc.c
+++ b/drivers/net/ethernet/mellanox/mlx4/alloc.c
@@ -337,7 +337,7 @@ void mlx4_zone_allocator_destroy(struct mlx4_zone_allocator 
*zone_alloc)
 static u32 __mlx4_alloc_from_zone(struct mlx4_zone_entry *zone, int count,
  int align, u32 skip_mask, u32 *puid)
 {
-   u32 uid;
+   u32 uid = 0;
u32 res;
struct mlx4_zone_allocator *zone_alloc = zone->allocator;
struct mlx4_zone_entry *curr_node;
-- 
2.17.1

[PATCH AUTOSEL 4.14 07/35] HID: input: Ignore battery reported by Symbol DS4308

2018-11-28 Thread Sasha Levin

From: Benson Leung 

[ Upstream commit 0fd791841a6d67af1155a9c3de54dea51220721e ]

The Motorola/Zebra Symbol DS4308-HD is a handheld USB barcode scanner
which does not have a battery, but reports one anyway that always has
capacity 2.

Let's apply the IGNORE quirk to prevent it from being treated like a
power supply so that userspaces don't get confused that this
accessory is almost out of power and warn the user that they need to charge
their wired barcode scanner.

Reported here: https://bugs.chromium.org/p/chromium/issues/detail?id=804720

Signed-off-by: Benson Leung 
Reviewed-by: Benjamin Tissoires 
Signed-off-by: Benjamin Tissoires 
Signed-off-by: Sasha Levin 
---
 drivers/hid/hid-ids.h   | 1 +
 drivers/hid/hid-input.c | 3 +++
 2 files changed, 4 insertions(+)

diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index 3fc8c0d67592..87904d2adadb 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -1001,6 +1001,7 @@
 #define USB_VENDOR_ID_SYMBOL   0x05e0
 #define USB_DEVICE_ID_SYMBOL_SCANNER_1 0x0800
 #define USB_DEVICE_ID_SYMBOL_SCANNER_2 0x1300
+#define USB_DEVICE_ID_SYMBOL_SCANNER_3 0x1200
 
 #define USB_VENDOR_ID_SYNAPTICS0x06cb
 #define USB_DEVICE_ID_SYNAPTICS_TP 0x0001
diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c
index bb984cc9753b..d146a9b545ee 100644
--- a/drivers/hid/hid-input.c
+++ b/drivers/hid/hid-input.c
@@ -325,6 +325,9 @@ static const struct hid_device_id hid_battery_quirks[] = {
{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_ELECOM,
USB_DEVICE_ID_ELECOM_BM084),
  HID_BATTERY_QUIRK_IGNORE },
+   { HID_USB_DEVICE(USB_VENDOR_ID_SYMBOL,
+   USB_DEVICE_ID_SYMBOL_SCANNER_3),
+ HID_BATTERY_QUIRK_IGNORE },
{}
 };
 
-- 
2.17.1

[PATCH AUTOSEL 4.14 10/35] amd/iommu: Fix Guest Virtual APIC Log Tail Address Register

2018-11-28 Thread Sasha Levin

From: Filippo Sironi 

[ Upstream commit ab99be4683d9db33b100497d463274ebd23bd67e ]

This register should have been programmed with the physical address
of the memory location containing the shadow tail pointer for
the guest virtual APIC log instead of the base address.

Fixes: 8bda0cfbdc1a  ('iommu/amd: Detect and initialize guest vAPIC log')
Signed-off-by: Filippo Sironi 
Signed-off-by: Wei Wang 
Signed-off-by: Suravee Suthikulpanit 
Signed-off-by: Joerg Roedel 
Signed-off-by: Sasha Levin 
---
 drivers/iommu/amd_iommu_init.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 6fe2d0346073..b97984a5ddad 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -796,7 +796,8 @@ static int iommu_init_ga_log(struct amd_iommu *iommu)
entry = iommu_virt_to_phys(iommu->ga_log) | GA_LOG_SIZE_512;
memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_BASE_OFFSET,
, sizeof(entry));
-   entry = (iommu_virt_to_phys(iommu->ga_log) & 0xFULL) & 
~7ULL;
+   entry = (iommu_virt_to_phys(iommu->ga_log_tail) &
+(BIT_ULL(52)-1)) & ~7ULL;
memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_TAIL_OFFSET,
, sizeof(entry));
writel(0x00, iommu->mmio_base + MMIO_GA_HEAD_OFFSET);
-- 
2.17.1

[PATCH AUTOSEL 4.14 24/35] qed: Fix QM getters to always return a valid pq

2018-11-28 Thread Sasha Levin

From: Denis Bolotin 

[ Upstream commit eb62cca9bee842e5b23bd0ddfb1f271ca95e8759 ]

The getter callers doesn't know the valid Physical Queues (PQ) values.
This patch makes sure that a valid PQ will always be returned.

The patch consists of 3 fixes:

 - When qed_init_qm_get_idx_from_flags() receives a disabled flag, it
   returned PQ 0, which can potentially be another function's pq. Verify
   that flag is enabled, otherwise return default start_pq.

 - When qed_init_qm_get_idx_from_flags() receives an unknown flag, it
   returned NULL and could lead to a segmentation fault. Return default
   start_pq instead.

 - A modulo operation was added to MCOS/VFS PQ getters to make sure the
   PQ returned is in range of the required flag.

Fixes: b5a9ee7cf3be ("qed: Revise QM cofiguration")
Signed-off-by: Denis Bolotin 
Signed-off-by: Michal Kalderon 
Signed-off-by: David S. Miller 
Signed-off-by: Sasha Levin 
---
 drivers/net/ethernet/qlogic/qed/qed_dev.c | 24 +++
 1 file changed, 20 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c 
b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index a51cd1028ecb..16953c4ebd71 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -446,6 +446,11 @@ static u16 *qed_init_qm_get_idx_from_flags(struct qed_hwfn 
*p_hwfn,
goto err;
}
 
+   if (!(qed_get_pq_flags(p_hwfn) & pq_flags)) {
+   DP_ERR(p_hwfn, "pq flag 0x%x is not set\n", pq_flags);
+   goto err;
+   }
+
switch (pq_flags) {
case PQ_FLAGS_RLS:
return _info->first_rl_pq;
@@ -468,8 +473,7 @@ static u16 *qed_init_qm_get_idx_from_flags(struct qed_hwfn 
*p_hwfn,
}
 
 err:
-   DP_ERR(p_hwfn, "BAD pq flags %d\n", pq_flags);
-   return NULL;
+   return _info->start_pq;
 }
 
 /* save pq index in qm info */
@@ -493,20 +497,32 @@ u16 qed_get_cm_pq_idx_mcos(struct qed_hwfn *p_hwfn, u8 tc)
 {
u8 max_tc = qed_init_qm_get_num_tcs(p_hwfn);
 
+   if (max_tc == 0) {
+   DP_ERR(p_hwfn, "pq with flag 0x%lx do not exist\n",
+  PQ_FLAGS_MCOS);
+   return p_hwfn->qm_info.start_pq;
+   }
+
if (tc > max_tc)
DP_ERR(p_hwfn, "tc %d must be smaller than %d\n", tc, max_tc);
 
-   return qed_get_cm_pq_idx(p_hwfn, PQ_FLAGS_MCOS) + tc;
+   return qed_get_cm_pq_idx(p_hwfn, PQ_FLAGS_MCOS) + (tc % max_tc);
 }
 
 u16 qed_get_cm_pq_idx_vf(struct qed_hwfn *p_hwfn, u16 vf)
 {
u16 max_vf = qed_init_qm_get_num_vfs(p_hwfn);
 
+   if (max_vf == 0) {
+   DP_ERR(p_hwfn, "pq with flag 0x%lx do not exist\n",
+  PQ_FLAGS_VFS);
+   return p_hwfn->qm_info.start_pq;
+   }
+
if (vf > max_vf)
DP_ERR(p_hwfn, "vf %d must be smaller than %d\n", vf, max_vf);
 
-   return qed_get_cm_pq_idx(p_hwfn, PQ_FLAGS_VFS) + vf;
+   return qed_get_cm_pq_idx(p_hwfn, PQ_FLAGS_VFS) + (vf % max_vf);
 }
 
 u16 qed_get_cm_pq_idx_rl(struct qed_hwfn *p_hwfn, u8 rl)
-- 
2.17.1

[PATCH AUTOSEL 4.14 18/35] gpio: mockup: fix indicated direction

2018-11-28 Thread Sasha Levin

From: Bartosz Golaszewski 

[ Upstream commit bff466bac59994cfcceabe4d0be5fdc1c20cd5b8 ]

Commit 3edfb7bd76bd ("gpiolib: Show correct direction from the
beginning") fixed an existing issue but broke libgpiod tests by
changing the default direction of dummy lines to output.

We don't break user-space so make gpio-mockup behave as before.

Signed-off-by: Bartosz Golaszewski 
Signed-off-by: Linus Walleij 
Signed-off-by: Sasha Levin 
---
 drivers/gpio/gpio-mockup.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpio/gpio-mockup.c b/drivers/gpio/gpio-mockup.c
index 9532d86a82f7..d99c8d8da9a0 100644
--- a/drivers/gpio/gpio-mockup.c
+++ b/drivers/gpio/gpio-mockup.c
@@ -35,8 +35,8 @@
 #define GPIO_MOCKUP_MAX_RANGES (GPIO_MOCKUP_MAX_GC * 2)
 
 enum {
-   GPIO_MOCKUP_DIR_OUT = 0,
-   GPIO_MOCKUP_DIR_IN = 1,
+   GPIO_MOCKUP_DIR_IN = 0,
+   GPIO_MOCKUP_DIR_OUT = 1,
 };
 
 /*
@@ -112,7 +112,7 @@ static int gpio_mockup_get_direction(struct gpio_chip *gc, 
unsigned int offset)
 {
struct gpio_mockup_chip *chip = gpiochip_get_data(gc);
 
-   return chip->lines[offset].dir;
+   return !chip->lines[offset].dir;
 }
 
 static int gpio_mockup_name_lines(struct device *dev,
-- 
2.17.1

[PATCH AUTOSEL 4.14 15/35] net/mlx4_core: Zero out lkey field in SW2HW_MPT fw command

2018-11-28 Thread Sasha Levin

From: Jack Morgenstein 

[ Upstream commit bd85fbc2038a1bbe84990b23ff69b6fc81a32b2c ]

When re-registering a user mr, the mpt information for the
existing mr when running SRIOV is obtained via the QUERY_MPT
fw command. The returned information includes the mpt's lkey.

This retrieved mpt information is used to move the mpt back
to hardware ownership in the rereg flow (via the SW2HW_MPT
fw command when running SRIOV).

The fw API spec states that for SW2HW_MPT, the lkey field
must be zero. Any ConnectX-3 PF driver which checks for strict spec
adherence will return failure for SW2HW_MPT if the lkey field is not
zero (although the fw in practice ignores this field for SW2HW_MPT).

Thus, in order to conform to the fw API spec, set the lkey field to zero
before invoking SW2HW_MPT when running SRIOV.

Fixes: e630664c8383 ("mlx4_core: Add helper functions to support MR 
re-registration")
Signed-off-by: Jack Morgenstein 
Signed-off-by: Tariq Toukan 
Signed-off-by: David S. Miller 
Signed-off-by: Sasha Levin 
---
 drivers/net/ethernet/mellanox/mlx4/mr.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/mellanox/mlx4/mr.c 
b/drivers/net/ethernet/mellanox/mlx4/mr.c
index c7c0764991c9..20043f82c1d8 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mr.c
+++ b/drivers/net/ethernet/mellanox/mlx4/mr.c
@@ -363,6 +363,7 @@ int mlx4_mr_hw_write_mpt(struct mlx4_dev *dev, struct 
mlx4_mr *mmr,
container_of((void *)mpt_entry, struct mlx4_cmd_mailbox,
 buf);
 
+   (*mpt_entry)->lkey = 0;
err = mlx4_SW2HW_MPT(dev, mailbox, key);
}
 
-- 
2.17.1

[PATCH AUTOSEL 4.14 09/35] batman-adv: Expand merged fragment buffer for full packet

2018-11-28 Thread Sasha Levin

From: Sven Eckelmann 

[ Upstream commit d7d8bbb40a5b1f682ee6589e212934f4c6b8ad60 ]

The complete size ("total_size") of the fragmented packet is stored in the
fragment header and in the size of the fragment chain. When the fragments
are ready for merge, the skbuff's tail of the first fragment is expanded to
have enough room after the data pointer for at least total_size. This means
that it gets expanded by total_size - first_skb->len.

But this is ignoring the fact that after expanding the buffer, the fragment
header is pulled by from this buffer. Assuming that the tailroom of the
buffer was already 0, the buffer after the data pointer of the skbuff is
now only total_size - len(fragment_header) large. When the merge function
is then processing the remaining fragments, the code to copy the data over
to the merged skbuff will cause an skb_over_panic when it tries to actually
put enough data to fill the total_size bytes of the packet.

The size of the skb_pull must therefore also be taken into account when the
buffer's tailroom is expanded.

Fixes: 610bfc6bc99b ("batman-adv: Receive fragmented packets and merge")
Reported-by: Martin Weinelt 
Co-authored-by: Linus Lüssing 
Signed-off-by: Sven Eckelmann 
Signed-off-by: Simon Wunderlich 
Signed-off-by: Sasha Levin 
---
 net/batman-adv/fragmentation.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index b6abd19ab23e..c6d37d22bd12 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -274,7 +274,7 @@ batadv_frag_merge_packets(struct hlist_head *chain)
kfree(entry);
 
packet = (struct batadv_frag_packet *)skb_out->data;
-   size = ntohs(packet->total_size);
+   size = ntohs(packet->total_size) + hdr_size;
 
/* Make room for the rest of the fragments. */
if (pskb_expand_head(skb_out, 0, size - skb_out->len, GFP_ATOMIC) < 0) {
-- 
2.17.1

[PATCH AUTOSEL 4.14 22/35] mtd: spi-nor: Fix Cadence QSPI page fault kernel panic

2018-11-28 Thread Sasha Levin

From: Thor Thayer 

[ Upstream commit a6a66f80c85e8e20573ca03fabf32445954a88d5 ]

The current Cadence QSPI driver caused a kernel panic sporadically
when writing to QSPI. The problem was caused by writing more bytes
than needed because the QSPI operated on 4 bytes at a time.

[   11.202044] Unable to handle kernel paging request at virtual address 
bffd3000
[   11.209254] pgd = e463054d
[   11.211948] [bffd3000] *pgd=2fffb811, *pte=, *ppte=
[   11.218202] Internal error: Oops: 7 [#1] SMP ARM
[   11.222797] Modules linked in:
[   11.225844] CPU: 1 PID: 1317 Comm: systemd-hwdb Not tainted 
4.17.7-d0c45cd44a8f
[   11.235796] Hardware name: Altera SOCFPGA Arria10
[   11.240487] PC is at __raw_writesl+0x70/0xd4
[   11.244741] LR is at cqspi_write+0x1a0/0x2cc

On a page boundary limit the number of bytes copied from the tx buffer
to remain within the page.

This patch uses a temporary buffer to hold the 4 bytes to write and then
copies only the bytes required from the tx buffer.

Reported-by: Adrian Amborzewicz 
Signed-off-by: Thor Thayer 
Signed-off-by: Boris Brezillon 
Signed-off-by: Sasha Levin 
---
 drivers/mtd/spi-nor/cadence-quadspi.c | 19 ---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/drivers/mtd/spi-nor/cadence-quadspi.c 
b/drivers/mtd/spi-nor/cadence-quadspi.c
index 8d89204b90d2..f22dd34f4f83 100644
--- a/drivers/mtd/spi-nor/cadence-quadspi.c
+++ b/drivers/mtd/spi-nor/cadence-quadspi.c
@@ -625,9 +625,23 @@ static int cqspi_indirect_write_execute(struct spi_nor 
*nor,
   reg_base + CQSPI_REG_INDIRECTWR);
 
while (remaining > 0) {
+   size_t write_words, mod_bytes;
+
write_bytes = remaining > page_size ? page_size : remaining;
-   iowrite32_rep(cqspi->ahb_base, txbuf,
- DIV_ROUND_UP(write_bytes, 4));
+   write_words = write_bytes / 4;
+   mod_bytes = write_bytes % 4;
+   /* Write 4 bytes at a time then single bytes. */
+   if (write_words) {
+   iowrite32_rep(cqspi->ahb_base, txbuf, write_words);
+   txbuf += (write_words * 4);
+   }
+   if (mod_bytes) {
+   unsigned int temp = 0x;
+
+   memcpy(, txbuf, mod_bytes);
+   iowrite32(temp, cqspi->ahb_base);
+   txbuf += mod_bytes;
+   }
 
ret = wait_for_completion_timeout(>transfer_complete,
  msecs_to_jiffies
@@ -638,7 +652,6 @@ static int cqspi_indirect_write_execute(struct spi_nor *nor,
goto failwr;
}
 
-   txbuf += write_bytes;
remaining -= write_bytes;
 
if (remaining > 0)
-- 
2.17.1

[PATCH AUTOSEL 4.14 21/35] HID: multitouch: Add pointstick support for Cirque Touchpad

2018-11-28 Thread Sasha Levin

From: Kai-Heng Feng 

[ Upstream commit 12d43aacf9a74d0eb66fd0ea54ebeb79ca28940f ]

Cirque Touchpad/Pointstick combo is similar to Alps devices, it requires
MT_CLS_WIN_8_DUAL to expose its pointstick as a mouse.

Signed-off-by: Kai-Heng Feng 
Signed-off-by: Jiri Kosina 
Signed-off-by: Sasha Levin 
---
 drivers/hid/hid-ids.h| 3 +++
 drivers/hid/hid-multitouch.c | 6 ++
 2 files changed, 9 insertions(+)

diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index 87904d2adadb..fcc688df694c 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -266,6 +266,9 @@
 
 #define USB_VENDOR_ID_CIDC 0x1677
 
+#define I2C_VENDOR_ID_CIRQUE   0x0488
+#define I2C_PRODUCT_ID_CIRQUE_121F 0x121F
+
 #define USB_VENDOR_ID_CJTOUCH  0x24b8
 #define USB_DEVICE_ID_CJTOUCH_MULTI_TOUCH_0020 0x0020
 #define USB_DEVICE_ID_CJTOUCH_MULTI_TOUCH_0040 0x0040
diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c
index c3b9bd5dba75..07d92d4a9f7c 100644
--- a/drivers/hid/hid-multitouch.c
+++ b/drivers/hid/hid-multitouch.c
@@ -1474,6 +1474,12 @@ static const struct hid_device_id mt_devices[] = {
MT_USB_DEVICE(USB_VENDOR_ID_CHUNGHWAT,
USB_DEVICE_ID_CHUNGHWAT_MULTITOUCH) },
 
+   /* Cirque devices */
+   { .driver_data = MT_CLS_WIN_8_DUAL,
+   HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8,
+   I2C_VENDOR_ID_CIRQUE,
+   I2C_PRODUCT_ID_CIRQUE_121F) },
+
/* CJTouch panels */
{ .driver_data = MT_CLS_NSMU,
MT_USB_DEVICE(USB_VENDOR_ID_CJTOUCH,
-- 
2.17.1

[PATCH AUTOSEL 4.14 20/35] exec: make de_thread() freezable

2018-11-28 Thread Sasha Levin

From: Chanho Min 

[ Upstream commit c22397888f1eed98cd59f0a88f2a5f6925f80e15 ]

Suspend fails due to the exec family of functions blocking the freezer.
The casue is that de_thread() sleeps in TASK_UNINTERRUPTIBLE waiting for
all sub-threads to die, and we have the deadlock if one of them is frozen.
This also can occur with the schedule() waiting for the group thread leader
to exit if it is frozen.

In our machine, it causes freeze timeout as bellows.

Freezing of tasks failed after 20.010 seconds (1 tasks refusing to freeze, 
wq_busy=0):
setcpushares-ls D ffc8ed70 0  5817   1483 0x004d
 Call trace:
[] __switch_to+0x88/0xa0
[] __schedule+0x1bc/0x720
[] schedule+0x40/0xa8
[] flush_old_exec+0xdc/0x640
[] load_elf_binary+0x2a8/0x1090
[] search_binary_handler+0x9c/0x240
[] load_script+0x20c/0x228
[] search_binary_handler+0x9c/0x240
[] do_execveat_common.isra.14+0x4f8/0x6e8
[] compat_SyS_execve+0x38/0x48
[] el0_svc_naked+0x24/0x28

To fix this, make de_thread() freezable. It looks safe and works fine.

Suggested-by: Oleg Nesterov 
Signed-off-by: Chanho Min 
Acked-by: Oleg Nesterov 
Acked-by: Pavel Machek 
Acked-by: Michal Hocko 
Signed-off-by: Rafael J. Wysocki 
Signed-off-by: Sasha Levin 
---
 fs/exec.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/fs/exec.c b/fs/exec.c
index 0da4d748b4e6..25c529f46aaa 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -62,6 +62,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -1079,7 +1080,7 @@ static int de_thread(struct task_struct *tsk)
while (sig->notify_count) {
__set_current_state(TASK_KILLABLE);
spin_unlock_irq(lock);
-   schedule();
+   freezable_schedule();
if (unlikely(__fatal_signal_pending(tsk)))
goto killed;
spin_lock_irq(lock);
@@ -1107,7 +1108,7 @@ static int de_thread(struct task_struct *tsk)
__set_current_state(TASK_KILLABLE);
write_unlock_irq(_lock);
cgroup_threadgroup_change_end(tsk);
-   schedule();
+   freezable_schedule();
if (unlikely(__fatal_signal_pending(tsk)))
goto killed;
}
-- 
2.17.1

[PATCH AUTOSEL 4.14 17/35] net/mlx4: Fix UBSAN warning of signed integer overflow

2018-11-28 Thread Sasha Levin

From: Aya Levin 

[ Upstream commit a463146e67c848cbab5ce706d6528281b7cded08 ]

UBSAN: Undefined behavior in
drivers/net/ethernet/mellanox/mlx4/resource_tracker.c:626:29
signed integer overflow: 1802201963 + 1802201963 cannot be represented
in type 'int'

The union of res_reserved and res_port_rsvd[MLX4_MAX_PORTS] monitors
granting of reserved resources. The grant operation is calculated and
protected, thus both members of the union cannot be negative.  Changed
type of res_reserved and of res_port_rsvd[MLX4_MAX_PORTS] from signed
int to unsigned int, allowing large value.

Fixes: 5a0d0a6161ae ("mlx4: Structures and init/teardown for VF resource 
quotas")
Signed-off-by: Aya Levin 
Signed-off-by: Tariq Toukan 
Signed-off-by: David S. Miller 
Signed-off-by: Sasha Levin 
---
 drivers/net/ethernet/mellanox/mlx4/mlx4.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h 
b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index c68da1986e51..aaeb446bba62 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -541,8 +541,8 @@ struct slave_list {
 struct resource_allocator {
spinlock_t alloc_lock; /* protect quotas */
union {
-   int res_reserved;
-   int res_port_rsvd[MLX4_MAX_PORTS];
+   unsigned int res_reserved;
+   unsigned int res_port_rsvd[MLX4_MAX_PORTS];
};
union {
int res_free;
-- 
2.17.1

[PATCH AUTOSEL 4.14 12/35] qed: Fix PTT leak in qed_drain()

2018-11-28 Thread Sasha Levin

From: Denis Bolotin 

[ Upstream commit 9aaa4e8ba12972d674caeefbc5f88d83235dd697 ]

Release PTT before entering error flow.

Signed-off-by: Denis Bolotin 
Signed-off-by: Michal Kalderon 
Signed-off-by: David S. Miller 
Signed-off-by: Sasha Levin 
---
 drivers/net/ethernet/qlogic/qed/qed_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c 
b/drivers/net/ethernet/qlogic/qed/qed_main.c
index 954f7ce4cf28..ecc2d4296526 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -1561,9 +1561,9 @@ static int qed_drain(struct qed_dev *cdev)
return -EBUSY;
}
rc = qed_mcp_drain(hwfn, ptt);
+   qed_ptt_release(hwfn, ptt);
if (rc)
return rc;
-   qed_ptt_release(hwfn, ptt);
}
 
return 0;
-- 
2.17.1

[PATCH AUTOSEL 4.14 25/35] iomap: sub-block dio needs to zeroout beyond EOF

2018-11-28 Thread Sasha Levin

From: Dave Chinner 

[ Upstream commit b450672fb66b4a991a5b55ee24209ac7ae7690ce ]

If we are doing sub-block dio that extends EOF, we need to zero
the unused tail of the block to initialise the data in it it. If we
do not zero the tail of the block, then an immediate mmap read of
the EOF block will expose stale data beyond EOF to userspace. Found
with fsx running sub-block DIO sizes vs MAPREAD/MAPWRITE operations.

Fix this by detecting if the end of the DIO write is beyond EOF
and zeroing the tail if necessary.

Signed-off-by: Dave Chinner 
Reviewed-by: Christoph Hellwig 
Reviewed-by: Darrick J. Wong 
Signed-off-by: Darrick J. Wong 
Signed-off-by: Sasha Levin 
---
 fs/iomap.c | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/fs/iomap.c b/fs/iomap.c
index 8f7673a69273..407efdae3978 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -940,7 +940,14 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t 
length,
dio->submit.cookie = submit_bio(bio);
} while (nr_pages);
 
-   if (need_zeroout) {
+   /*
+* We need to zeroout the tail of a sub-block write if the extent type
+* requires zeroing or the write extends beyond EOF. If we don't zero
+* the block tail in the latter case, we can expose stale data via mmap
+* reads of the EOF block.
+*/
+   if (need_zeroout ||
+   ((dio->flags & IOMAP_DIO_WRITE) && pos >= i_size_read(inode))) {
/* zero out from the end of the write to the end of the block */
pad = pos & (fs_block_size - 1);
if (pad)
-- 
2.17.1

[PATCH AUTOSEL 4.14 27/35] iommu/vt-d: Use memunmap to free memremap

2018-11-28 Thread Sasha Levin

From: Pan Bian 

[ Upstream commit 829383e183728dec7ed9150b949cd6de64127809 ]

memunmap() should be used to free the return of memremap(), not
iounmap().

Fixes: dfddb969edf0 ('iommu/vt-d: Switch from ioremap_cache to memremap')
Signed-off-by: Pan Bian 
Signed-off-by: Joerg Roedel 
Signed-off-by: Sasha Levin 
---
 drivers/iommu/intel-iommu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index aaf3fed97477..e86c1c8ec7f6 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -3086,7 +3086,7 @@ static int copy_context_table(struct intel_iommu *iommu,
}
 
if (old_ce)
-   iounmap(old_ce);
+   memunmap(old_ce);
 
ret = 0;
if (devfn < 0x80)
-- 
2.17.1

[PATCH AUTOSEL 4.19 58/68] flexfiles: use per-mirror specified stateid for IO

2018-11-28 Thread Sasha Levin

From: Tigran Mkrtchyan 

[ Upstream commit bb21ce0ad227b69ec0f83279297ee44232105d96 ]

rfc8435 says:

  For tight coupling, ffds_stateid provides the stateid to be used by
  the client to access the file.

However current implementation replaces per-mirror provided stateid with
by open or lock stateid.

Ensure that per-mirror stateid is used by ff_layout_write_prepare_v4 and
nfs4_ff_layout_prepare_ds.

Signed-off-by: Tigran Mkrtchyan 
Signed-off-by: Rick Macklem 
Signed-off-by: Trond Myklebust 
Signed-off-by: Sasha Levin 
---
 fs/nfs/flexfilelayout/flexfilelayout.c| 21 +
 fs/nfs/flexfilelayout/flexfilelayout.h|  4 
 fs/nfs/flexfilelayout/flexfilelayoutdev.c | 19 +++
 3 files changed, 32 insertions(+), 12 deletions(-)

diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c 
b/fs/nfs/flexfilelayout/flexfilelayout.c
index cae4ef16..86ac2c5b93fe 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -1361,12 +1361,7 @@ static void ff_layout_read_prepare_v4(struct rpc_task 
*task, void *data)
task))
return;
 
-   if (ff_layout_read_prepare_common(task, hdr))
-   return;
-
-   if (nfs4_set_rw_stateid(>args.stateid, hdr->args.context,
-   hdr->args.lock_context, FMODE_READ) == -EIO)
-   rpc_exit(task, -EIO); /* lost lock, terminate I/O */
+   ff_layout_read_prepare_common(task, hdr);
 }
 
 static void ff_layout_read_call_done(struct rpc_task *task, void *data)
@@ -1542,12 +1537,7 @@ static void ff_layout_write_prepare_v4(struct rpc_task 
*task, void *data)
task))
return;
 
-   if (ff_layout_write_prepare_common(task, hdr))
-   return;
-
-   if (nfs4_set_rw_stateid(>args.stateid, hdr->args.context,
-   hdr->args.lock_context, FMODE_WRITE) == -EIO)
-   rpc_exit(task, -EIO); /* lost lock, terminate I/O */
+   ff_layout_write_prepare_common(task, hdr);
 }
 
 static void ff_layout_write_call_done(struct rpc_task *task, void *data)
@@ -1742,6 +1732,10 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr)
fh = nfs4_ff_layout_select_ds_fh(lseg, idx);
if (fh)
hdr->args.fh = fh;
+
+   if (!nfs4_ff_layout_select_ds_stateid(lseg, idx, >args.stateid))
+   goto out_failed;
+
/*
 * Note that if we ever decide to split across DSes,
 * then we may need to handle dense-like offsets.
@@ -1804,6 +1798,9 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int 
sync)
if (fh)
hdr->args.fh = fh;
 
+   if (!nfs4_ff_layout_select_ds_stateid(lseg, idx, >args.stateid))
+   goto out_failed;
+
/*
 * Note that if we ever decide to split across DSes,
 * then we may need to handle dense-like offsets.
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h 
b/fs/nfs/flexfilelayout/flexfilelayout.h
index 411798346e48..de50a342d5a5 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.h
+++ b/fs/nfs/flexfilelayout/flexfilelayout.h
@@ -215,6 +215,10 @@ unsigned int ff_layout_fetch_ds_ioerr(struct 
pnfs_layout_hdr *lo,
unsigned int maxnum);
 struct nfs_fh *
 nfs4_ff_layout_select_ds_fh(struct pnfs_layout_segment *lseg, u32 mirror_idx);
+int
+nfs4_ff_layout_select_ds_stateid(struct pnfs_layout_segment *lseg,
+   u32 mirror_idx,
+   nfs4_stateid *stateid);
 
 struct nfs4_pnfs_ds *
 nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c 
b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
index 59aa04976331..a8df2f496898 100644
--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
@@ -370,6 +370,25 @@ nfs4_ff_layout_select_ds_fh(struct pnfs_layout_segment 
*lseg, u32 mirror_idx)
return fh;
 }
 
+int
+nfs4_ff_layout_select_ds_stateid(struct pnfs_layout_segment *lseg,
+   u32 mirror_idx,
+   nfs4_stateid *stateid)
+{
+   struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, mirror_idx);
+
+   if (!ff_layout_mirror_valid(lseg, mirror, false)) {
+   pr_err_ratelimited("NFS: %s: No data server for mirror offset 
index %d\n",
+   __func__, mirror_idx);
+   goto out;
+   }
+
+   nfs4_stateid_copy(stateid, >stateid);
+   return 1;
+out:
+   return 0;
+}
+
 /**
  * nfs4_ff_layout_prepare_ds - prepare a DS connection for an RPC call
  * @lseg: the layout segment we're operating on
-- 
2.17.1

[PATCH AUTOSEL 4.19 60/68] net: thunderx: set xdp_prog to NULL if bpf_prog_add fails

2018-11-28 Thread Sasha Levin

From: Lorenzo Bianconi 

[ Upstream commit 6d0f60b0f8588fd4380ea5df9601e12fddd55ce2 ]

Set xdp_prog pointer to NULL if bpf_prog_add fails since that routine
reports the error code instead of NULL in case of failure and xdp_prog
pointer value is used in the driver to verify if XDP is currently
enabled.
Moreover report the error code to userspace if nicvf_xdp_setup fails

Fixes: 05c773f52b96 ("net: thunderx: Add basic XDP support")
Signed-off-by: Lorenzo Bianconi 
Signed-off-by: David S. Miller 
Signed-off-by: Sasha Levin 
---
 drivers/net/ethernet/cavium/thunder/nicvf_main.c | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c 
b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index 768f584f8392..88f8a8fa93cd 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -1784,6 +1784,7 @@ static int nicvf_xdp_setup(struct nicvf *nic, struct 
bpf_prog *prog)
bool if_up = netif_running(nic->netdev);
struct bpf_prog *old_prog;
bool bpf_attached = false;
+   int ret = 0;
 
/* For now just support only the usual MTU sized frames */
if (prog && (dev->mtu > 1500)) {
@@ -1817,8 +1818,12 @@ static int nicvf_xdp_setup(struct nicvf *nic, struct 
bpf_prog *prog)
if (nic->xdp_prog) {
/* Attach BPF program */
nic->xdp_prog = bpf_prog_add(nic->xdp_prog, nic->rx_queues - 1);
-   if (!IS_ERR(nic->xdp_prog))
+   if (!IS_ERR(nic->xdp_prog)) {
bpf_attached = true;
+   } else {
+   ret = PTR_ERR(nic->xdp_prog);
+   nic->xdp_prog = NULL;
+   }
}
 
/* Calculate Tx queues needed for XDP and network stack */
@@ -1830,7 +1835,7 @@ static int nicvf_xdp_setup(struct nicvf *nic, struct 
bpf_prog *prog)
netif_trans_update(nic->netdev);
}
 
-   return 0;
+   return ret;
 }
 
 static int nicvf_xdp(struct net_device *netdev, struct netdev_bpf *xdp)
-- 
2.17.1

[PATCH AUTOSEL 4.19 59/68] net/dim: Update DIM start sample after each DIM iteration

2018-11-28 Thread Sasha Levin

From: Tal Gilboa 

[ Upstream commit 0211dda68a4f6531923a2f72d8e8959207f59fba ]

On every iteration of net_dim, the algorithm may choose to
check for the system state by comparing current data sample
with previous data sample. After each of these comparison,
regardless of the action taken, the sample used as baseline
is needed to be updated.

This patch fixes a bug that causes DIM to take wrong decisions,
due to never updating the baseline sample for comparison between
iterations. This way, DIM always compares current sample with
zeros.

Although this is a functional fix, it also improves and stabilizes
performance as the algorithm works properly now.

Performance:
Tested single UDP TX stream with pktgen:
samples/pktgen/pktgen_sample03_burst_single_flow.sh -i p4p2 -d 1.1.1.1
-m 24:8a:07:88:26:8b -f 3 -b 128

ConnectX-5 100GbE packet rate improved from 15-19Mpps to 19-20Mpps.
Also, toggling between profiles is less frequent with the fix.

Fixes: 8115b750dbcb ("net/dim: use struct net_dim_sample as arg to net_dim")
Signed-off-by: Tal Gilboa 
Reviewed-by: Tariq Toukan 
Signed-off-by: David S. Miller 
Signed-off-by: Sasha Levin 
---
 include/linux/net_dim.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/linux/net_dim.h b/include/linux/net_dim.h
index c79e859408e6..fd458389f7d1 100644
--- a/include/linux/net_dim.h
+++ b/include/linux/net_dim.h
@@ -406,6 +406,8 @@ static inline void net_dim(struct net_dim *dim,
}
/* fall through */
case NET_DIM_START_MEASURE:
+   net_dim_sample(end_sample.event_ctr, end_sample.pkt_ctr, 
end_sample.byte_ctr,
+  >start_sample);
dim->state = NET_DIM_MEASURE_IN_PROGRESS;
break;
case NET_DIM_APPLY_NEW_PROFILE:
-- 
2.17.1

[PATCH AUTOSEL 4.14 06/35] test_firmware: fix error return getting clobbered

2018-11-28 Thread Sasha Levin

From: Colin Ian King 

[ Upstream commit 8bb0a88600f0267cfcc245d34f8c4abe8c282713 ]

In the case where eq->fw->size > PAGE_SIZE the error return rc
is being set to EINVAL however this is being overwritten to
rc = req->fw->size because the error exit path via label 'out' is
not being taken.  Fix this by adding the jump to the error exit
path 'out'.

Detected by CoverityScan, CID#1453465 ("Unused value")

Fixes: c92316bf8e94 ("test_firmware: add batched firmware tests")
Signed-off-by: Colin Ian King 
Signed-off-by: Greg Kroah-Hartman 
Signed-off-by: Sasha Levin 
---
 lib/test_firmware.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lib/test_firmware.c b/lib/test_firmware.c
index e7008688769b..71d371f97138 100644
--- a/lib/test_firmware.c
+++ b/lib/test_firmware.c
@@ -838,6 +838,7 @@ static ssize_t read_firmware_show(struct device *dev,
if (req->fw->size > PAGE_SIZE) {
pr_err("Testing interface must use PAGE_SIZE firmware for 
now\n");
rc = -EINVAL;
+   goto out;
}
memcpy(buf, req->fw->data, req->fw->size);
 
-- 
2.17.1

[PATCH AUTOSEL 4.14 08/35] batman-adv: Use explicit tvlv padding for ELP packets

2018-11-28 Thread Sasha Levin

From: Sven Eckelmann 

[ Upstream commit f4156f9656feac21f4de712fac94fae964c5d402 ]

The announcement messages of batman-adv COMPAT_VERSION 15 have the
possibility to announce additional information via a dynamic TVLV part.
This part is optional for the ELP packets and currently not parsed by the
Linux implementation. Still out-of-tree versions are using it to transport
things like neighbor hashes to optimize the rebroadcast behavior.

Since the ELP broadcast packets are smaller than the minimal ethernet
packet, it often has to be padded. This is often done (as specified in
RFC894) with octets of zero and thus work perfectly fine with the TVLV
part (making it a zero length and thus empty). But not all ethernet
compatible hardware seems to follow this advice. To avoid ambiguous
situations when parsing the TVLV header, just force the 4 bytes (TVLV
length + padding) after the required ELP header to zero.

Fixes: d6f94d91f766 ("batman-adv: ELP - adding basic infrastructure")
Reported-by: Linus Lüssing 
Signed-off-by: Sven Eckelmann 
Signed-off-by: Simon Wunderlich 
Signed-off-by: Sasha Levin 
---
 net/batman-adv/bat_v_elp.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c
index e92dfedccc16..fbc132f4670e 100644
--- a/net/batman-adv/bat_v_elp.c
+++ b/net/batman-adv/bat_v_elp.c
@@ -338,19 +338,21 @@ static void batadv_v_elp_periodic_work(struct work_struct 
*work)
  */
 int batadv_v_elp_iface_enable(struct batadv_hard_iface *hard_iface)
 {
+   static const size_t tvlv_padding = sizeof(__be32);
struct batadv_elp_packet *elp_packet;
unsigned char *elp_buff;
u32 random_seqno;
size_t size;
int res = -ENOMEM;
 
-   size = ETH_HLEN + NET_IP_ALIGN + BATADV_ELP_HLEN;
+   size = ETH_HLEN + NET_IP_ALIGN + BATADV_ELP_HLEN + tvlv_padding;
hard_iface->bat_v.elp_skb = dev_alloc_skb(size);
if (!hard_iface->bat_v.elp_skb)
goto out;
 
skb_reserve(hard_iface->bat_v.elp_skb, ETH_HLEN + NET_IP_ALIGN);
-   elp_buff = skb_put_zero(hard_iface->bat_v.elp_skb, BATADV_ELP_HLEN);
+   elp_buff = skb_put_zero(hard_iface->bat_v.elp_skb,
+   BATADV_ELP_HLEN + tvlv_padding);
elp_packet = (struct batadv_elp_packet *)elp_buff;
 
elp_packet->packet_type = BATADV_ELP;
-- 
2.17.1

[PATCH v5 05/15] i2c: acpi: Return error pointers from i2c_acpi_new_device()

2018-11-28 Thread Andy Shevchenko

The caller would like to know the reason why the i2c_acpi_new_device() fails.
For example, if adapter is not available, it might be in the future and we
would like to re-probe the clients again. But at the same time we would like to
bail out if the error seems unrecoverable, such as invalid argument supplied.
To achieve this, return error pointer in some cases.

Signed-off-by: Andy Shevchenko 
---
 drivers/i2c/i2c-core-acpi.c | 21 +++--
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/drivers/i2c/i2c-core-acpi.c b/drivers/i2c/i2c-core-acpi.c
index 32affd3fa8bd..689c0c467e97 100644
--- a/drivers/i2c/i2c-core-acpi.c
+++ b/drivers/i2c/i2c-core-acpi.c
@@ -386,20 +386,22 @@ struct notifier_block i2c_acpi_notifier = {
  *
  * Also see i2c_new_device, which this function calls to create the i2c-client.
  *
- * Returns a pointer to the new i2c-client, or NULL if the adapter is not 
found.
+ * Returns a pointer to the new i2c-client, or error pointer in case of 
failure.
+ * Specifically, -EPROBE_DEFER is returned if the adapter is not found.
  */
 struct i2c_client *i2c_acpi_new_device(struct device *dev, int index,
   struct i2c_board_info *info)
 {
struct i2c_acpi_lookup lookup;
struct i2c_adapter *adapter;
+   struct i2c_client *client;
struct acpi_device *adev;
LIST_HEAD(resource_list);
int ret;
 
adev = ACPI_COMPANION(dev);
if (!adev)
-   return NULL;
+   return ERR_PTR(-EINVAL);
 
memset(, 0, sizeof(lookup));
lookup.info = info;
@@ -408,16 +410,23 @@ struct i2c_client *i2c_acpi_new_device(struct device 
*dev, int index,
 
ret = acpi_dev_get_resources(adev, _list,
 i2c_acpi_fill_info, );
+   if (ret < 0)
+   return ERR_PTR(ret);
+
acpi_dev_free_resource_list(_list);
 
-   if (ret < 0 || !info->addr)
-   return NULL;
+   if (!info->addr)
+   return ERR_PTR(-EADDRNOTAVAIL);
 
adapter = i2c_acpi_find_adapter_by_handle(lookup.adapter_handle);
if (!adapter)
-   return NULL;
+   return ERR_PTR(-EPROBE_DEFER);
+
+   client = i2c_new_device(adapter, info);
+   if (!client)
+   return ERR_PTR(-ENODEV);
 
-   return i2c_new_device(adapter, info);
+   return client;
 }
 EXPORT_SYMBOL_GPL(i2c_acpi_new_device);
 
-- 
2.19.2

[PATCH v5 08/15] i2c: acpi: Use ACPI_FAILURE instead of !ACPI_SUCCESS

2018-11-28 Thread Andy Shevchenko

Convert to use ACPI_FAILURE instead of !ACPI_SUCCESS.

Signed-off-by: Andy Shevchenko 
Reviewed-by: Hans de Goede 
Acked-by: Mika Westerberg 
---
 drivers/i2c/i2c-core-acpi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/i2c/i2c-core-acpi.c b/drivers/i2c/i2c-core-acpi.c
index 689c0c467e97..8a88586e0902 100644
--- a/drivers/i2c/i2c-core-acpi.c
+++ b/drivers/i2c/i2c-core-acpi.c
@@ -65,7 +65,7 @@ static int i2c_acpi_fill_info(struct acpi_resource *ares, 
void *data)
status = acpi_get_handle(lookup->device_handle,
 sb->resource_source.string_ptr,
 >adapter_handle);
-   if (!ACPI_SUCCESS(status))
+   if (ACPI_FAILURE(status))
return 1;
 
info->addr = sb->slave_address;
-- 
2.19.2

Re: [PATCH V2 2/4] thermal: imx_sc: add i.MX system controller thermal support

2018-11-28 Thread Lothar Waßmann

Hi,

On Wed, 28 Nov 2018 05:58:22 + Anson Huang wrote:
> i.MX8QXP is an ARMv8 SoC which has a Cortex-M4 system controller
> inside, the system controller is in charge of controlling power,
> clock and thermal sensors etc..
> 
> This patch adds i.MX system controller thermal driver support,
> Linux kernel has to communicate with system controller via MU
> (message unit) IPC to get each thermal sensor's temperature,
> it supports multiple sensors which are passed from device tree,
> please see the binding doc for details.
> 
> Signed-off-by: Anson Huang 
> ---
> changes since V1:
>   remove boiler plate license text;
>   remove unnecessary kfree during probe;
>   remove build warning of comparing unsigned int with < 0;
>  drivers/thermal/Kconfig  |  11 +++
>  drivers/thermal/Makefile |   1 +
>  drivers/thermal/imx_sc_thermal.c | 201 
> +++
>  3 files changed, 213 insertions(+)
>  create mode 100644 drivers/thermal/imx_sc_thermal.c
> 
> diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
> index 0e69edc..84e850c 100644
> --- a/drivers/thermal/Kconfig
> +++ b/drivers/thermal/Kconfig
> @@ -222,6 +222,17 @@ config IMX_THERMAL
> cpufreq is used as the cooling device to throttle CPUs when the
> passive trip is crossed.
>  
> +config IMX_SC_THERMAL
> + tristate "Temperature sensor driver for NXP i.MX SoCs with System 
> Controller"
> + depends on ARCH_MXC || COMPILE_TEST
> + depends on OF
> + help
> +   Support for Temperature Monitor (TEMPMON) found on NXP i.MX SoCs with
> +   system controller inside, Linux kernel has to communicate with system
> +   controller via MU (message unit) IPC to get temperature from thermal
> +   sensor. It supports one critical trip point and one
> +   passive trip point for each thermal sensor.
> +
>  config MAX77620_THERMAL
>   tristate "Temperature sensor driver for Maxim MAX77620 PMIC"
>   depends on MFD_MAX77620
> diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile
> index 610344e..1b13f6a 100644
> --- a/drivers/thermal/Makefile
> +++ b/drivers/thermal/Makefile
> @@ -41,6 +41,7 @@ obj-$(CONFIG_DB8500_THERMAL)+= db8500_thermal.o
>  obj-$(CONFIG_ARMADA_THERMAL) += armada_thermal.o
>  obj-$(CONFIG_TANGO_THERMAL)  += tango_thermal.o
>  obj-$(CONFIG_IMX_THERMAL)+= imx_thermal.o
> +obj-$(CONFIG_IMX_SC_THERMAL) += imx_sc_thermal.o
>  obj-$(CONFIG_MAX77620_THERMAL)   += max77620_thermal.o
>  obj-$(CONFIG_QORIQ_THERMAL)  += qoriq_thermal.o
>  obj-$(CONFIG_DA9062_THERMAL) += da9062-thermal.o
> diff --git a/drivers/thermal/imx_sc_thermal.c 
> b/drivers/thermal/imx_sc_thermal.c
> new file mode 100644
> index 000..890537f
> --- /dev/null
> +++ b/drivers/thermal/imx_sc_thermal.c
> @@ -0,0 +1,201 @@
> +// SPDX-License-Identifier: GPL-2.0+
> +/*
> + * Copyright 2018 NXP.
> + */
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +#include "thermal_core.h"
> +
> +#define IMX_SC_MISC_FUNC_GET_TEMP13
> +#define IMX_SC_C_TEMP0
> +
> +struct imx_sc_ipc *thermal_ipc_handle;
> +
static?

> +struct imx_sc_sensor {
> + struct thermal_zone_device *tzd;
> + u32 resource_id;
> +};
> +
> +struct imx_sc_thermal_data {
> + struct imx_sc_sensor *sensor;
> +};
> +
> +struct imx_sc_msg_req_misc_get_temp {
> + struct imx_sc_rpc_msg hdr;
> + u16 resource_id;
> + u8 type;
> +} __packed;
> +
> +struct imx_sc_msg_resp_misc_get_temp {
> + struct imx_sc_rpc_msg hdr;
> + u16 celsius;
> + u8 tenths;
> +} __packed;
> +
> +static int imx_sc_thermal_get_temp(void *data, int *temp)
> +{
> + struct imx_sc_msg_resp_misc_get_temp *resp;
> + struct imx_sc_msg_req_misc_get_temp msg;
> + struct imx_sc_rpc_msg *hdr = 
> + struct imx_sc_sensor *sensor = data;
> + int ret;
> +
> + msg.resource_id = sensor->resource_id;
> + msg.type = IMX_SC_C_TEMP;
> +
> + hdr->ver = IMX_SC_RPC_VERSION;
> + hdr->svc = IMX_SC_RPC_SVC_MISC;
> + hdr->func = IMX_SC_MISC_FUNC_GET_TEMP;
> + hdr->size = 2;
> +
> + ret = imx_scu_call_rpc(thermal_ipc_handle, , true);
> + if (ret) {
> + pr_err("read temp sensor %d failed, ret %d\n",
> + sensor->resource_id, ret);
> + return ret;
> + }
> +
> + resp = (struct imx_sc_msg_resp_misc_get_temp *)
> + *temp = resp->celsius * 1000 + resp->tenths * 100;
> +
> + return 0;
> +}
> +
> +static const struct thermal_zone_of_device_ops imx_sc_thermal_ops = {
> + .get_temp = imx_sc_thermal_get_temp,
> +};
> +
> +static int imx_sc_thermal_register_sensor(struct platform_device *pdev,
> +   struct imx_sc_sensor *sensor)
> +{
> + struct thermal_zone_device *tzd;
> +
> + tzd = devm_thermal_zone_of_sensor_register(>dev,
> +

[PATCH v5 07/15] platform/x86: i2c-multi-instantiate: Get rid of obsolete conditional

2018-11-28 Thread Andy Shevchenko

Now, when i2c_acpi_new_device() never returns NULL, there is no point to check
for it. Besides that, i2c_acpi_new_device() returns -EPROBE_DEFER directly and
caller doesn't need to guess is better.

Signed-off-by: Andy Shevchenko 
---
 drivers/platform/x86/i2c-multi-instantiate.c | 10 +++---
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/drivers/platform/x86/i2c-multi-instantiate.c 
b/drivers/platform/x86/i2c-multi-instantiate.c
index 16a0eabe1e31..724535673089 100644
--- a/drivers/platform/x86/i2c-multi-instantiate.c
+++ b/drivers/platform/x86/i2c-multi-instantiate.c
@@ -72,14 +72,10 @@ static int i2c_multi_inst_probe(struct platform_device 
*pdev)
board_info.irq = ret;
}
multi->clients[i] = i2c_acpi_new_device(dev, i, _info);
-   if (IS_ERR(multi->clients[i]))
+   if (IS_ERR(multi->clients[i])) {
ret = PTR_ERR(multi->clients[i]);
-   else if (!multi->clients[i])
-   ret = -EPROBE_DEFER; /* Wait for i2c-adapter to load */
-   else
-   ret = 0;
-   if (ret) {
-   dev_err(dev, "Error creating i2c-client, idx %d\n", i);
+   if (ret != -EPROBE_DEFER)
+   dev_err(dev, "Error creating i2c-client, idx 
%d\n", i);
goto error;
}
}
-- 
2.19.2

[PATCH v5 03/15] platform/x86: i2c-multi-instantiate: Accept errors of i2c_acpi_new_device()

2018-11-28 Thread Andy Shevchenko

In the future i2c_acpi_new_device() will return error pointer in some cases.
Prepare i2c-multi-instantiate driver to support that.

Signed-off-by: Andy Shevchenko 
Reviewed-by: Hans de Goede 
---
 drivers/platform/x86/i2c-multi-instantiate.c | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/platform/x86/i2c-multi-instantiate.c 
b/drivers/platform/x86/i2c-multi-instantiate.c
index 5456581b473c..e3345da82c84 100644
--- a/drivers/platform/x86/i2c-multi-instantiate.c
+++ b/drivers/platform/x86/i2c-multi-instantiate.c
@@ -72,9 +72,14 @@ static int i2c_multi_inst_probe(struct platform_device *pdev)
board_info.irq = ret;
}
multi->clients[i] = i2c_acpi_new_device(dev, i, _info);
-   if (!multi->clients[i]) {
-   dev_err(dev, "Error creating i2c-client, idx %d\n", i);
+   if (IS_ERR(multi->clients[i]))
+   ret = PTR_ERR(multi->clients[i]);
+   else if (!multi->clients[i])
ret = -ENODEV;
+   else
+   ret = 0;
+   if (ret) {
+   dev_err(dev, "Error creating i2c-client, idx %d\n", i);
goto error;
}
}
-- 
2.19.2

[PATCH v5 12/15] platform/x86: i2c-multi-instantiate: Introduce IOAPIC IRQ support

2018-11-28 Thread Andy Shevchenko

If ACPI table provides an Interrupt() resource we may consider to use it
instead of GpioInt() one.

Here we leave an error condition, when getting IRQ resource, to the driver
to decide how to proceed, because some drivers may consider IRQ resource
optional.

Signed-off-by: Andy Shevchenko 
Reviewed-by: Hans de Goede 
---
 drivers/platform/x86/i2c-multi-instantiate.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/drivers/platform/x86/i2c-multi-instantiate.c 
b/drivers/platform/x86/i2c-multi-instantiate.c
index 99db3e336718..365457f9c424 100644
--- a/drivers/platform/x86/i2c-multi-instantiate.c
+++ b/drivers/platform/x86/i2c-multi-instantiate.c
@@ -18,6 +18,7 @@
 #define IRQ_RESOURCE_TYPE  GENMASK(1, 0)
 #define IRQ_RESOURCE_NONE  0
 #define IRQ_RESOURCE_GPIO  1
+#define IRQ_RESOURCE_APIC  2
 
 struct i2c_inst_data {
const char *type;
@@ -104,6 +105,14 @@ static int i2c_multi_inst_probe(struct platform_device 
*pdev)
}
board_info.irq = ret;
break;
+   case IRQ_RESOURCE_APIC:
+   ret = platform_get_irq(pdev, inst_data[i].irq_idx);
+   if (ret < 0) {
+   dev_dbg(dev, "Error requesting irq at index %d: 
%d\n",
+   inst_data[i].irq_idx, ret);
+   }
+   board_info.irq = ret;
+   break;
default:
board_info.irq = 0;
break;
-- 
2.19.2

[PATCH v5 15/15] iio: inv_mpu6050: Use i2c_acpi_get_i2c_resource() helper

2018-11-28 Thread Andy Shevchenko

ACPI provides a generic helper to get I2C Serial Bus resources.
Use it instead of open coded variant.

Signed-off-by: Andy Shevchenko 
Reviewed-by: Hans de Goede 
---
 drivers/iio/imu/inv_mpu6050/inv_mpu_acpi.c | 16 ++--
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_acpi.c 
b/drivers/iio/imu/inv_mpu6050/inv_mpu_acpi.c
index d78a10403bac..a961b5a06fe6 100644
--- a/drivers/iio/imu/inv_mpu6050/inv_mpu_acpi.c
+++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_acpi.c
@@ -91,18 +91,14 @@ static int asus_acpi_get_sensor_info(struct acpi_device 
*adev,
 
 static int acpi_i2c_check_resource(struct acpi_resource *ares, void *data)
 {
+   struct acpi_resource_i2c_serialbus *sb;
u32 *addr = data;
 
-   if (ares->type == ACPI_RESOURCE_TYPE_SERIAL_BUS) {
-   struct acpi_resource_i2c_serialbus *sb;
-
-   sb = >data.i2c_serial_bus;
-   if (sb->type == ACPI_RESOURCE_SERIAL_TYPE_I2C) {
-   if (*addr)
-   *addr |= (sb->slave_address << 16);
-   else
-   *addr = sb->slave_address;
-   }
+   if (i2c_acpi_get_i2c_resource(ares, )) {
+   if (*addr)
+   *addr |= (sb->slave_address << 16);
+   else
+   *addr = sb->slave_address;
}
 
/* Tell the ACPI core that we already copied this address */
-- 
2.19.2

[PATCH v5 04/15] platform/x86: i2c-multi-instantiate: Defer probe when no adapter found

2018-11-28 Thread Andy Shevchenko

Likewise the rest of the i2c_acpi_new_device() users, defer the probe
of the i2c-multi-intantiate driver in case adapter is not yet found.

Signed-off-by: Andy Shevchenko 
Reviewed-by: Hans de Goede 
---
 drivers/platform/x86/i2c-multi-instantiate.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/platform/x86/i2c-multi-instantiate.c 
b/drivers/platform/x86/i2c-multi-instantiate.c
index e3345da82c84..16a0eabe1e31 100644
--- a/drivers/platform/x86/i2c-multi-instantiate.c
+++ b/drivers/platform/x86/i2c-multi-instantiate.c
@@ -75,7 +75,7 @@ static int i2c_multi_inst_probe(struct platform_device *pdev)
if (IS_ERR(multi->clients[i]))
ret = PTR_ERR(multi->clients[i]);
else if (!multi->clients[i])
-   ret = -ENODEV;
+   ret = -EPROBE_DEFER; /* Wait for i2c-adapter to load */
else
ret = 0;
if (ret) {
-- 
2.19.2

[PATCH v5 02/15] platform/x86: intel_cht_int33fe: Accept errors of i2c_acpi_new_device()

2018-11-28 Thread Andy Shevchenko

In the future i2c_acpi_new_device() will return error pointer in some cases.
Prepare intel_cht_int33fe driver to support that.

Signed-off-by: Andy Shevchenko 
Reviewed-by: Hans de Goede 
---
 drivers/platform/x86/intel_cht_int33fe.c | 28 +++-
 1 file changed, 23 insertions(+), 5 deletions(-)

diff --git a/drivers/platform/x86/intel_cht_int33fe.c 
b/drivers/platform/x86/intel_cht_int33fe.c
index 431151d4e611..367d6e304ade 100644
--- a/drivers/platform/x86/intel_cht_int33fe.c
+++ b/drivers/platform/x86/intel_cht_int33fe.c
@@ -168,8 +168,14 @@ static int cht_int33fe_probe(struct platform_device *pdev)
board_info.dev_name = "max17047";
board_info.properties = max17047_props;
data->max17047 = i2c_acpi_new_device(dev, 1, _info);
-   if (!data->max17047)
-   return -EPROBE_DEFER; /* Wait for i2c-adapter to load */
+   if (IS_ERR(data->max17047))
+   ret = PTR_ERR(data->max17047);
+   else if (!data->max17047)
+   ret = -EPROBE_DEFER; /* Wait for i2c-adapter to load */
+   else
+   ret = 0;
+   if (ret)
+   return ret;
}
 
data->connections[0].endpoint[0] = "port0";
@@ -194,7 +200,13 @@ static int cht_int33fe_probe(struct platform_device *pdev)
board_info.irq = fusb302_irq;
 
data->fusb302 = i2c_acpi_new_device(dev, 2, _info);
-   if (!data->fusb302)
+   if (IS_ERR(data->fusb302))
+   ret = PTR_ERR(data->fusb302);
+   else if (!data->fusb302)
+   ret = -EPROBE_DEFER; /* Wait for the i2c-adapter to load */
+   else
+   ret = 0;
+   if (ret)
goto out_unregister_max17047;
 
memset(_info, 0, sizeof(board_info));
@@ -202,7 +214,13 @@ static int cht_int33fe_probe(struct platform_device *pdev)
strlcpy(board_info.type, "pi3usb30532", I2C_NAME_SIZE);
 
data->pi3usb30532 = i2c_acpi_new_device(dev, 3, _info);
-   if (!data->pi3usb30532)
+   if (IS_ERR(data->pi3usb30532))
+   ret = PTR_ERR(data->pi3usb30532);
+   else if (!data->pi3usb30532)
+   ret = -EPROBE_DEFER; /* Wait for the i2c-adapter to load */
+   else
+   ret = 0;
+   if (ret)
goto out_unregister_fusb302;
 
platform_set_drvdata(pdev, data);
@@ -217,7 +235,7 @@ static int cht_int33fe_probe(struct platform_device *pdev)
 
device_connections_remove(data->connections);
 
-   return -EPROBE_DEFER; /* Wait for the i2c-adapter to load */
+   return ret;
 }
 
 static int cht_int33fe_remove(struct platform_device *pdev)
-- 
2.19.2

Re: [PATCH] mtd: nand: spi: Add initial support for Toshiba TC58CVG2S0H

2018-11-28 Thread Schrempf Frieder

Hi Miquèl,

On 18.11.18 21:47, Miquel Raynal wrote:
> Hi Schrempf,
> 
> Schrempf Frieder  wrote on Thu, 8 Nov 2018
> 08:32:11 +:
> 
>> Add minimal support for the Toshiba TC58CVG2S0H SPI NAND chip.
>>
>> Signed-off-by: Frieder Schrempf 
>> ---
> 
> With "mtd: spinand:" as prefix, applied to nand/next.

Clément suggested some fixes/improvements for this patch. You can find 
them in this patch: https://patchwork.ozlabs.org/patch/1004501/.

When these changes are approved, you can decide if you apply this on top 
of the initial patch or if you can/want to squash both patches.

Thanks,
Frieder

Re: [PATCH] Fix invalid use of sizeof in stm32_sai_add_mclk_provider()

2018-11-28 Thread Mark Brown

On Tue, Nov 27, 2018 at 08:35:20PM +0800, Wen Yang wrote:
> sizeof(mclk) is 4 or 8 as it is the size of a pointer,
> but we want to reserve space for the pointed data.
> This issue was detected by using the Coccinelle software.

Please use subject lines matching the style for the subsystem.  This
makes it easier for people to identify relevant patches.

signature.asc
Description: PGP signature

Re: [PATCH v3 0/3] perf report/annotate: Support average IPC and IPC coverage for function

2018-11-28 Thread Jin, Yao





On 11/28/2018 5:10 PM, Ingo Molnar wrote:


* Jin Yao  wrote:


Add supporting of displaying the average IPC and IPC coverage
percentage per function.

For example,

$ perf record -b ...
$ perf report -s symbol or
   perf report -s symbol --stdio

Overhead  Symbol   IPC   [IPC Coverage]
   39.60%  [.] __random 2.30  [ 54.8%]
   18.02%  [.] main 0.43  [ 54.3%]
   14.21%  [.] compute_flag 2.29  [100.0%]
   14.16%  [.] rand 0.36  [100.0%]
7.06%  [.] __random_r   2.57  [ 70.5%]
6.85%  [.] rand@plt 0.00  [  0.0%]
   ...

$ perf annotate --stdio2

Percent  IPC Cycle (Average IPC: 2.30, IPC Coverage: 54.8%)

 Disassembly of section .text:

 0003aac0 :
   8.32  3.28  sub$0x18,%rsp
 3.28  mov$0x1,%esi
 3.28  xor%eax,%eax
 3.28  cmpl   
$0x0,argp_program_version_hook@@GLIBC_2.2.5+0x1e0
  11.57  3.28 1  ↓ je 20
   lock   cmpxchg %esi,__abort_msg@@GLIBC_PRIVATE+0x8a0
 ↓ jne29
 ↓ jmp43
  11.57  1.1020:   cmpxchg %esi,__abort_msg@@GLIBC_PRIVATE+0x8a0


That's a nice feature: please add meaningful documentation, accessible
via the perf help system preferably, that outlines how the IPC metrics
should be interpreted and how they are useful when optimizing programs.

Thanks,

Ingo



Hi Ingo,

Thanks so much for your comments! I think I will add some explanations 
in perf/Documentation/perf-report.txt, maybe somewhere around the 
sort_key section (-s::).


Thanks
Jin Yao

Re: [PATCH v2 1/4] x86/hyper-v: move synic/stimer control structures definitions to hyperv-tlfs.h

2018-11-28 Thread Thomas Gleixner

On Wed, 28 Nov 2018, Vitaly Kuznetsov wrote:

> Nadav Amit  writes:
> 
> >
> > On a different note: how come all of the hyper-v structs are not marked
> > with the “packed" attribute?
> 
> "packed" should not be needed with proper padding; I vaguely remember
> someone (from x86@?) arguing _against_ "packed".

Packed needs to be used, when describing fixed format data structures in
hardware or other ABIs, so the compiler cannot put alignment holes into
them.

Using packed for generic data structures might result in suboptimal layouts
and prevents layout randomization.

Thanks,

tglx

Re: [PATCH v7 2/2] sched/fair: update scale invariance of PELT

2018-11-28 Thread Vincent Guittot

On Wed, 28 Nov 2018 at 14:33, Vincent Guittot
 wrote:
>
> On Wed, 28 Nov 2018 at 12:53, Patrick Bellasi  wrote:
> >
> > On 28-Nov 11:02, Peter Zijlstra wrote:
> > > On Wed, Nov 28, 2018 at 10:54:13AM +0100, Vincent Guittot wrote:
> > >
> > > > Is there anything else that I should do for these patches ?
> > >
> > > IIRC, Morten mention they break util_est; Patrick was going to explain.
> >
> > I guess the problem is that, once we cross the current capacity,
> > strictly speaking util_avg does not represent anymore a utilization.
> >
> > With the new signal this could happen and we end up storing estimated
> > utilization samples which will overestimate the task requirements.
> >
> > We will have a spike in estimated utilization at next wakeup, since we
> > use MAX(util_avg@dequeue_time, ewma). Potentially we also inflate the EWMA 
> > in
> > case we collect multiple samples above the current capacity.
>
> TBH I don't see how it's different from current implementation with a
> task that was scheduled on big core and now wakes up on little core.
> The util_est is overestimated as well.
>
> But I'm fine with adding your proposal on to on the patchset
s/on to on/on top of/

>
> >
> > So, a possible fix could be to avoid storing util_est samples if we
> > end up with a utilization above the current capacity.
> >
> > Something like:
> >
> > 8<---
> > diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> > index ac855b2f4774..93e0cf5d8a76 100644
> > --- a/kernel/sched/fair.c
> > +++ b/kernel/sched/fair.c
> > @@ -3661,6 +3661,10 @@ util_est_dequeue(struct cfs_rq *cfs_rq, struct 
> > task_struct *p, bool task_sleep)
> > if (!task_sleep)
> > return;
> >
> > +   /* Skip samples which do not represent an actual utilization */
> > +   if (unlikely(task_util(p) > capacity_of(task_cpu(p
> > +   return;
> > +
> > /*
> >  * If the PELT values haven't changed since enqueue time,
> >  * skip the util_est update.
> > ---8<---
> >
> > Could that work ?
> >
> > Maybe using a new utility function to wrap the new check.
> >
> > --
> > #include 
> >
> > Patrick Bellasi

Re: [PATCH v4 1/2] ptrace: save the type of syscall-stop in ptrace_message

2018-11-28 Thread Oleg Nesterov

On 11/28, Dmitry V. Levin wrote:
>
> +/*
> + * These values are stored in task->ptrace_message by 
> tracehook_report_syscall_*
> + * to describe current syscall-stop.
> + *
> + * Values for these constants are chosen so that they do not appear
> + * in task->ptrace_message by other means.
> + */
> +#define PTRACE_EVENTMSG_SYSCALL_ENTRY0x8000U
> +#define PTRACE_EVENTMSG_SYSCALL_EXIT 0x9000U

Again, I do not really understand the comment... Why should we care about
"do not appear in task->ptrace_message by other means" ?

2/2 should detect ptrace_report_syscall() case correctly, so we can use any
numbers, say, 1 and 2?

If debugger does PTRACE_GETEVENTMSG it should know how to interpet the value
anyway after wait(status).

Oleg.

[PATCH] Compiler Attributes: move kernel-only attributes into KERNEL

2018-11-28 Thread Xiaozhou Liu

Attributes such as `__gnu_inline' are meant to be used within the
kernel. When userspace somehow includes 
(eg. tools/bpf), compilation errors would be shown:

"error: unknown type name ‘__gnu_inline’"

So just move these things into __KERNEL__ and the behavior is kept
as before.

Fixes: a3f8a30f3f00 ("Compiler Attributes: use feature checks instead of 
version checks")
Signed-off-by: Xiaozhou Liu 
---
 include/linux/compiler_types.h | 4 
 1 file changed, 4 insertions(+)

diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index 4a3f9c09c92d..2fb2c311e5d6 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -161,6 +161,8 @@ struct ftrace_likely_data {
 #define __diag_error(compiler, version, option, comment) \
__diag_ ## compiler(version, error, option)
 
+#ifdef __KERNEL__
+
 #ifdef CONFIG_ENABLE_MUST_CHECK
 #define __must_check   __attribute__((__warn_unused_result__))
 #else
@@ -215,4 +217,6 @@ struct ftrace_likely_data {
  */
 #define noinline_for_stack noinline
 
+#endif /* __KERNEL__ */
+
 #endif /* __LINUX_COMPILER_TYPES_H */
-- 
2.11.0

Re: [PATCH 2/2] kernel/trace: fix watchdog soft lockup

2018-11-28 Thread Steven Rostedt

On Wed, 28 Nov 2018 09:13:34 +0100
Anders Roxell  wrote:

> When building a allmodconfig kernel for arm64 and boot that in qemu,
> CONFIG_FTRACE_STARTUP_TEST gets enabled and that takes time so the
> watchdog expires and prints out a message like this:
> 'watchdog: BUG: soft lockup - CPU#0 stuck for 22s! [swapper/0:1]'
> Each time the function ftrace_replace_code gets called it stays in that
> functions loop for 41424 times.
> Rework so that function cond_resched() gets called in the
> ftrace_replace_code loop.
> 
> Co-developed-by: Arnd Bergmann 
> Signed-off-by: Arnd Bergmann 
> Signed-off-by: Anders Roxell 
> ---
>  kernel/trace/ftrace.c | 4 
>  1 file changed, 4 insertions(+)
> 
> diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
> index 5b4f73e4fd56..3f456921dedf 100644
> --- a/kernel/trace/ftrace.c
> +++ b/kernel/trace/ftrace.c
> @@ -2426,6 +2426,10 @@ void __weak ftrace_replace_code(int enable)
>  
>   do_for_each_ftrace_rec(pg, rec) {
>  
> + /* This loop can take minutes when sanitizers are enabled, so
> +  * lets make sure we allow RCU processing.
> +  */
> + cond_resched();
>   if (rec->flags & FTRACE_FL_DISABLED)
>   continue;
>  

NACK.  On some architectures this code is run from stop machine. We
can't call cond_resched() because it may be called with interrupts
disabled.

This is a weak function. If arm64 has special needs, just copy it in
the arm64 code.

-- Steve

[tip:x86/pti] x86/speculation: Move STIPB/IBPB string conditionals out of cpu_show_common()

2018-11-28 Thread tip-bot for Tim Chen

Commit-ID:  a8f76ae41cd633ac00be1b3019b1eb4741be3828
Gitweb: https://git.kernel.org/tip/a8f76ae41cd633ac00be1b3019b1eb4741be3828
Author: Tim Chen 
AuthorDate: Sun, 25 Nov 2018 19:33:32 +0100
Committer:  Thomas Gleixner 
CommitDate: Wed, 28 Nov 2018 11:57:05 +0100

x86/speculation: Move STIPB/IBPB string conditionals out of cpu_show_common()

The Spectre V2 printout in cpu_show_common() handles conditionals for the
various mitigation methods directly in the sprintf() argument list. That's
hard to read and will become unreadable if more complex decisions need to
be made for a particular method.

Move the conditionals for STIBP and IBPB string selection into helper
functions, so they can be extended later on.

Signed-off-by: Tim Chen 
Signed-off-by: Thomas Gleixner 
Reviewed-by: Ingo Molnar 
Cc: Peter Zijlstra 
Cc: Andy Lutomirski 
Cc: Linus Torvalds 
Cc: Jiri Kosina 
Cc: Tom Lendacky 
Cc: Josh Poimboeuf 
Cc: Andrea Arcangeli 
Cc: David Woodhouse 
Cc: Andi Kleen 
Cc: Dave Hansen 
Cc: Casey Schaufler 
Cc: Asit Mallick 
Cc: Arjan van de Ven 
Cc: Jon Masters 
Cc: Waiman Long 
Cc: Greg KH 
Cc: Dave Stewart 
Cc: Kees Cook 
Cc: sta...@vger.kernel.org
Link: https://lkml.kernel.org/r/20181125185003.874479...@linutronix.de


---
 arch/x86/kernel/cpu/bugs.c | 20 ++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index b52a48966e01..a1502bce9eb8 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -844,6 +844,22 @@ static ssize_t l1tf_show_state(char *buf)
 }
 #endif
 
+static char *stibp_state(void)
+{
+   if (x86_spec_ctrl_base & SPEC_CTRL_STIBP)
+   return ", STIBP";
+   else
+   return "";
+}
+
+static char *ibpb_state(void)
+{
+   if (boot_cpu_has(X86_FEATURE_USE_IBPB))
+   return ", IBPB";
+   else
+   return "";
+}
+
 static ssize_t cpu_show_common(struct device *dev, struct device_attribute 
*attr,
   char *buf, unsigned int bug)
 {
@@ -865,9 +881,9 @@ static ssize_t cpu_show_common(struct device *dev, struct 
device_attribute *attr
 
case X86_BUG_SPECTRE_V2:
return sprintf(buf, "%s%s%s%s%s%s\n", 
spectre_v2_strings[spectre_v2_enabled],
-  boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : 
"",
+  ibpb_state(),
   boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", 
IBRS_FW" : "",
-  (x86_spec_ctrl_base & SPEC_CTRL_STIBP) ? ", 
STIBP" : "",
+  stibp_state(),
   boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB 
filling" : "",
   spectre_v2_module_string());

Re: [PATCH] mtd: spi-nor: parse SFDP 4-byte Address Instruction Table

2018-11-28 Thread Boris Brezillon

On Wed, 28 Nov 2018 14:17:12 +
 wrote:

> On 11/28/2018 09:57 AM, Boris Brezillon wrote:
> > On Tue, 20 Nov 2018 11:55:21 +
> >  wrote:
> >   
> >> +
> >> +  /*
> >> +   * We set nor->addr_width here to skip spi_nor_set_4byte_opcodes()
> >> +   * later because this latest function implements a legacy quirk for
> >> +   * the erase size of Spansion memory. However this quirk is no longer
> >> +   * needed with new SFDP compliant memories.
> >> +   */
> >> +  nor->addr_width = 4;
> >> +  nor->flags |= SPI_NOR_4B_OPCODES;  
> > 
> > You mean SNOR_F_4B_OPCODES (the one introduced here [1]), because
> > SPI_NOR_4B_OPCODES should only be used for flash_info->flags and might
> > soon conflict with another SNOR_F_ flag?
> >   
> 
> yes, you're right.
> 
> > [1]http://patchwork.ozlabs.org/patch/991476/
> >   
> 
> Can you apply your patch? Will submit a new version afterwards.

Actually, I realized setting SNOR_F_4B_OPCODES when the BFPT advertises
4_BYTES_ONLY is incorrect as 4bytes only can mean "use the 3B opcodes
but pass address on 4 bytes". Here is a new version of this patch [1].
Feel free to pick it up and send it along with your "SFDP 4-byte Address
Instruction Table" patch (I have not reason to send it alone since the
problem I was trying to solve is no longer fixed by [1]).

[1]https://github.com/bbrezillon/linux-0day/commit/a953b6b435ec67bca00df472db5f6dca4f63

[tip:x86/pti] x86/speculation: Clean up spectre_v2_parse_cmdline()

2018-11-28 Thread tip-bot for Tim Chen

Commit-ID:  24848509aa55eac39d524b587b051f4e86df3c12
Gitweb: https://git.kernel.org/tip/24848509aa55eac39d524b587b051f4e86df3c12
Author: Tim Chen 
AuthorDate: Sun, 25 Nov 2018 19:33:30 +0100
Committer:  Thomas Gleixner 
CommitDate: Wed, 28 Nov 2018 11:57:04 +0100

x86/speculation: Clean up spectre_v2_parse_cmdline()

Remove the unnecessary 'else' statement in spectre_v2_parse_cmdline()
to save an indentation level.

Signed-off-by: Tim Chen 
Signed-off-by: Thomas Gleixner 
Reviewed-by: Ingo Molnar 
Cc: Peter Zijlstra 
Cc: Andy Lutomirski 
Cc: Linus Torvalds 
Cc: Jiri Kosina 
Cc: Tom Lendacky 
Cc: Josh Poimboeuf 
Cc: Andrea Arcangeli 
Cc: David Woodhouse 
Cc: Andi Kleen 
Cc: Dave Hansen 
Cc: Casey Schaufler 
Cc: Asit Mallick 
Cc: Arjan van de Ven 
Cc: Jon Masters 
Cc: Waiman Long 
Cc: Greg KH 
Cc: Dave Stewart 
Cc: Kees Cook 
Cc: sta...@vger.kernel.org
Link: https://lkml.kernel.org/r/20181125185003.688010...@linutronix.de


---
 arch/x86/kernel/cpu/bugs.c | 27 +--
 1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 7f6d8159398e..839ab4103e89 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -276,22 +276,21 @@ static enum spectre_v2_mitigation_cmd __init 
spectre_v2_parse_cmdline(void)
 
if (cmdline_find_option_bool(boot_command_line, "nospectre_v2"))
return SPECTRE_V2_CMD_NONE;
-   else {
-   ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, 
sizeof(arg));
-   if (ret < 0)
-   return SPECTRE_V2_CMD_AUTO;
 
-   for (i = 0; i < ARRAY_SIZE(mitigation_options); i++) {
-   if (!match_option(arg, ret, 
mitigation_options[i].option))
-   continue;
-   cmd = mitigation_options[i].cmd;
-   break;
-   }
+   ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, 
sizeof(arg));
+   if (ret < 0)
+   return SPECTRE_V2_CMD_AUTO;
 
-   if (i >= ARRAY_SIZE(mitigation_options)) {
-   pr_err("unknown option (%s). Switching to AUTO 
select\n", arg);
-   return SPECTRE_V2_CMD_AUTO;
-   }
+   for (i = 0; i < ARRAY_SIZE(mitigation_options); i++) {
+   if (!match_option(arg, ret, mitigation_options[i].option))
+   continue;
+   cmd = mitigation_options[i].cmd;
+   break;
+   }
+
+   if (i >= ARRAY_SIZE(mitigation_options)) {
+   pr_err("unknown option (%s). Switching to AUTO select\n", arg);
+   return SPECTRE_V2_CMD_AUTO;
}
 
if ((cmd == SPECTRE_V2_CMD_RETPOLINE ||

[tip:x86/pti] x86/speculation: Remove unnecessary ret variable in cpu_show_common()

2018-11-28 Thread tip-bot for Tim Chen

Commit-ID:  b86bda0426853bfe8a3506c7d2a5b332760ae46b
Gitweb: https://git.kernel.org/tip/b86bda0426853bfe8a3506c7d2a5b332760ae46b
Author: Tim Chen 
AuthorDate: Sun, 25 Nov 2018 19:33:31 +0100
Committer:  Thomas Gleixner 
CommitDate: Wed, 28 Nov 2018 11:57:05 +0100

x86/speculation: Remove unnecessary ret variable in cpu_show_common()

Signed-off-by: Tim Chen 
Signed-off-by: Thomas Gleixner 
Reviewed-by: Ingo Molnar 
Cc: Peter Zijlstra 
Cc: Andy Lutomirski 
Cc: Linus Torvalds 
Cc: Jiri Kosina 
Cc: Tom Lendacky 
Cc: Josh Poimboeuf 
Cc: Andrea Arcangeli 
Cc: David Woodhouse 
Cc: Andi Kleen 
Cc: Dave Hansen 
Cc: Casey Schaufler 
Cc: Asit Mallick 
Cc: Arjan van de Ven 
Cc: Jon Masters 
Cc: Waiman Long 
Cc: Greg KH 
Cc: Dave Stewart 
Cc: Kees Cook 
Cc: sta...@vger.kernel.org
Link: https://lkml.kernel.org/r/20181125185003.783903...@linutronix.de


---
 arch/x86/kernel/cpu/bugs.c | 5 +
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 839ab4103e89..b52a48966e01 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -847,8 +847,6 @@ static ssize_t l1tf_show_state(char *buf)
 static ssize_t cpu_show_common(struct device *dev, struct device_attribute 
*attr,
   char *buf, unsigned int bug)
 {
-   int ret;
-
if (!boot_cpu_has_bug(bug))
return sprintf(buf, "Not affected\n");
 
@@ -866,13 +864,12 @@ static ssize_t cpu_show_common(struct device *dev, struct 
device_attribute *attr
return sprintf(buf, "Mitigation: __user pointer 
sanitization\n");
 
case X86_BUG_SPECTRE_V2:
-   ret = sprintf(buf, "%s%s%s%s%s%s\n", 
spectre_v2_strings[spectre_v2_enabled],
+   return sprintf(buf, "%s%s%s%s%s%s\n", 
spectre_v2_strings[spectre_v2_enabled],
   boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : 
"",
   boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", 
IBRS_FW" : "",
   (x86_spec_ctrl_base & SPEC_CTRL_STIBP) ? ", 
STIBP" : "",
   boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB 
filling" : "",
   spectre_v2_module_string());
-   return ret;
 
case X86_BUG_SPEC_STORE_BYPASS:
return sprintf(buf, "%s\n", ssb_strings[ssb_mode]);

Re: [PATCH] arm64: ftrace: Fix to enable syscall events on arm64

2018-11-28 Thread Steven Rostedt

On Wed, 28 Nov 2018 12:05:02 +
Will Deacon  wrote:

> Ok! Then please add a comment to arch_syscall_match_sym_name() along those
> lines, and you can add my ack:
> 
> Acked-by: Will Deacon 

Shouldn't this go through your tree?

-- Steve

[tip:x86/pti] x86/speculation: Prevent stale SPEC_CTRL msr content

2018-11-28 Thread tip-bot for Thomas Gleixner

Commit-ID:  6d991ba509ebcfcc908e009d1db51972a4f7a064
Gitweb: https://git.kernel.org/tip/6d991ba509ebcfcc908e009d1db51972a4f7a064
Author: Thomas Gleixner 
AuthorDate: Wed, 28 Nov 2018 10:56:57 +0100
Committer:  Thomas Gleixner 
CommitDate: Wed, 28 Nov 2018 11:57:12 +0100

x86/speculation: Prevent stale SPEC_CTRL msr content

The seccomp speculation control operates on all tasks of a process, but
only the current task of a process can update the MSR immediately. For the
other threads the update is deferred to the next context switch.

This creates the following situation with Process A and B:

Process A task 2 and Process B task 1 are pinned on CPU1. Process A task 2
does not have the speculation control TIF bit set. Process B task 1 has the
speculation control TIF bit set.

CPU0CPU1
MSR bit is set
ProcB.T1 schedules out
ProcA.T2 schedules in
MSR bit is cleared
ProcA.T1
  seccomp_update()
  set TIF bit on ProcA.T2
ProcB.T1 schedules in
MSR is not updated  <-- FAIL

This happens because the context switch code tries to avoid the MSR update
if the speculation control TIF bits of the incoming and the outgoing task
are the same. In the worst case ProcB.T1 and ProcA.T2 are the only tasks
scheduling back and forth on CPU1, which keeps the MSR stale forever.

In theory this could be remedied by IPIs, but chasing the remote task which
could be migrated is complex and full of races.

The straight forward solution is to avoid the asychronous update of the TIF
bit and defer it to the next context switch. The speculation control state
is stored in task_struct::atomic_flags by the prctl and seccomp updates
already.

Add a new TIF_SPEC_FORCE_UPDATE bit and set this after updating the
atomic_flags. Check the bit on context switch and force a synchronous
update of the speculation control if set. Use the same mechanism for
updating the current task.

Reported-by: Tim Chen 
Signed-off-by: Thomas Gleixner 
Cc: Peter Zijlstra 
Cc: Andy Lutomirski 
Cc: Linus Torvalds 
Cc: Jiri Kosina 
Cc: Tom Lendacky 
Cc: Josh Poimboeuf 
Cc: Andrea Arcangeli 
Cc: David Woodhouse 
Cc: Tim Chen 
Cc: Andi Kleen 
Cc: Dave Hansen 
Cc: Casey Schaufler 
Cc: Asit Mallick 
Cc: Arjan van de Ven 
Cc: Jon Masters 
Cc: Waiman Long 
Cc: Greg KH 
Cc: Dave Stewart 
Cc: Kees Cook 
Cc: sta...@vger.kernel.org
Link: 
https://lkml.kernel.org/r/alpine.deb.2.21.1811272247140.1...@nanos.tec.linutronix.de

---
 arch/x86/include/asm/spec-ctrl.h   |  6 +-
 arch/x86/include/asm/thread_info.h |  4 +++-
 arch/x86/kernel/cpu/bugs.c | 18 +++---
 arch/x86/kernel/process.c  | 30 +-
 4 files changed, 40 insertions(+), 18 deletions(-)

diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h
index 27b0bce3933b..5393babc0598 100644
--- a/arch/x86/include/asm/spec-ctrl.h
+++ b/arch/x86/include/asm/spec-ctrl.h
@@ -83,10 +83,6 @@ static inline void speculative_store_bypass_ht_init(void) { }
 #endif
 
 extern void speculation_ctrl_update(unsigned long tif);
-
-static inline void speculation_ctrl_update_current(void)
-{
-   speculation_ctrl_update(current_thread_info()->flags);
-}
+extern void speculation_ctrl_update_current(void);
 
 #endif
diff --git a/arch/x86/include/asm/thread_info.h 
b/arch/x86/include/asm/thread_info.h
index 6d201699c651..82b73b75d67c 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -84,6 +84,7 @@ struct thread_info {
 #define TIF_SYSCALL_AUDIT  7   /* syscall auditing active */
 #define TIF_SECCOMP8   /* secure computing */
 #define TIF_SPEC_IB9   /* Indirect branch speculation 
mitigation */
+#define TIF_SPEC_FORCE_UPDATE  10  /* Force speculation MSR update in 
context switch */
 #define TIF_USER_RETURN_NOTIFY 11  /* notify kernel of userspace return */
 #define TIF_UPROBE 12  /* breakpointed or singlestepping */
 #define TIF_PATCH_PENDING  13  /* pending live patching update */
@@ -112,6 +113,7 @@ struct thread_info {
 #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
 #define _TIF_SECCOMP   (1 << TIF_SECCOMP)
 #define _TIF_SPEC_IB   (1 << TIF_SPEC_IB)
+#define _TIF_SPEC_FORCE_UPDATE (1 << TIF_SPEC_FORCE_UPDATE)
 #define _TIF_USER_RETURN_NOTIFY(1 << TIF_USER_RETURN_NOTIFY)
 #define _TIF_UPROBE(1 << TIF_UPROBE)
 #define _TIF_PATCH_PENDING (1 << TIF_PATCH_PENDING)
@@ -149,7 +151,7 @@ struct thread_info {
 /* flags to check in __switch_to() */
 #define _TIF_WORK_CTXSW_BASE   \
(_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP| \
-_TIF_SSBD)
+_TIF_SSBD |

[tip:x86/pti] x86/speculation: Prepare arch_smt_update() for PRCTL mode

2018-11-28 Thread tip-bot for Thomas Gleixner

Commit-ID:  6893a959d7fdebbab5f5aa112c277d5a44435ba1
Gitweb: https://git.kernel.org/tip/6893a959d7fdebbab5f5aa112c277d5a44435ba1
Author: Thomas Gleixner 
AuthorDate: Sun, 25 Nov 2018 19:33:52 +0100
Committer:  Thomas Gleixner 
CommitDate: Wed, 28 Nov 2018 11:57:13 +0100

x86/speculation: Prepare arch_smt_update() for PRCTL mode

The upcoming fine grained per task STIBP control needs to be updated on CPU
hotplug as well.

Split out the code which controls the strict mode so the prctl control code
can be added later. Mark the SMP function call argument __unused while at it.

Signed-off-by: Thomas Gleixner 
Reviewed-by: Ingo Molnar 
Cc: Peter Zijlstra 
Cc: Andy Lutomirski 
Cc: Linus Torvalds 
Cc: Jiri Kosina 
Cc: Tom Lendacky 
Cc: Josh Poimboeuf 
Cc: Andrea Arcangeli 
Cc: David Woodhouse 
Cc: Tim Chen 
Cc: Andi Kleen 
Cc: Dave Hansen 
Cc: Casey Schaufler 
Cc: Asit Mallick 
Cc: Arjan van de Ven 
Cc: Jon Masters 
Cc: Waiman Long 
Cc: Greg KH 
Cc: Dave Stewart 
Cc: Kees Cook 
Cc: sta...@vger.kernel.org
Link: https://lkml.kernel.org/r/20181125185005.759457...@linutronix.de


---
 arch/x86/kernel/cpu/bugs.c | 46 +-
 1 file changed, 25 insertions(+), 21 deletions(-)

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 29f40a92f5a8..9cab538e10f1 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -530,40 +530,44 @@ specv2_set_mode:
arch_smt_update();
 }
 
-static bool stibp_needed(void)
+static void update_stibp_msr(void * __unused)
 {
-   /* Enhanced IBRS makes using STIBP unnecessary. */
-   if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
-   return false;
-
-   /* Check for strict user mitigation mode */
-   return spectre_v2_user == SPECTRE_V2_USER_STRICT;
+   wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
 }
 
-static void update_stibp_msr(void *info)
+/* Update x86_spec_ctrl_base in case SMT state changed. */
+static void update_stibp_strict(void)
 {
-   wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
+   u64 mask = x86_spec_ctrl_base & ~SPEC_CTRL_STIBP;
+
+   if (sched_smt_active())
+   mask |= SPEC_CTRL_STIBP;
+
+   if (mask == x86_spec_ctrl_base)
+   return;
+
+   pr_info("Update user space SMT mitigation: STIBP %s\n",
+   mask & SPEC_CTRL_STIBP ? "always-on" : "off");
+   x86_spec_ctrl_base = mask;
+   on_each_cpu(update_stibp_msr, NULL, 1);
 }
 
 void arch_smt_update(void)
 {
-   u64 mask;
-
-   if (!stibp_needed())
+   /* Enhanced IBRS implies STIBP. No update required. */
+   if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
return;
 
mutex_lock(_ctrl_mutex);
 
-   mask = x86_spec_ctrl_base & ~SPEC_CTRL_STIBP;
-   if (sched_smt_active())
-   mask |= SPEC_CTRL_STIBP;
-
-   if (mask != x86_spec_ctrl_base) {
-   pr_info("Spectre v2 cross-process SMT mitigation: %s STIBP\n",
-   mask & SPEC_CTRL_STIBP ? "Enabling" : "Disabling");
-   x86_spec_ctrl_base = mask;
-   on_each_cpu(update_stibp_msr, NULL, 1);
+   switch (spectre_v2_user) {
+   case SPECTRE_V2_USER_NONE:
+   break;
+   case SPECTRE_V2_USER_STRICT:
+   update_stibp_strict();
+   break;
}
+
mutex_unlock(_ctrl_mutex);
 }

[PATCH V2 2/5] mm: update ptep_modify_prot_commit to take old pte value as arg

2018-11-28 Thread Aneesh Kumar K.V

Architectures like ppc64 requires to do a conditional tlb flush based on the old
and new value of pte. Enable that by passing old pte value as the arg.

Signed-off-by: Aneesh Kumar K.V 
---
 arch/s390/include/asm/pgtable.h | 3 ++-
 arch/s390/mm/pgtable.c  | 2 +-
 arch/x86/include/asm/paravirt.h | 2 +-
 fs/proc/task_mmu.c  | 8 +---
 include/asm-generic/pgtable.h   | 2 +-
 mm/memory.c | 8 
 mm/mprotect.c   | 6 +++---
 7 files changed, 17 insertions(+), 14 deletions(-)

diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 5d730199e37b..76dc344edb8c 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -1070,7 +1070,8 @@ static inline pte_t ptep_get_and_clear(struct mm_struct 
*mm,
 
 #define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
 pte_t ptep_modify_prot_start(struct vm_area_struct *, unsigned long, pte_t *);
-void ptep_modify_prot_commit(struct vm_area_struct *, unsigned long, pte_t *, 
pte_t);
+void ptep_modify_prot_commit(struct vm_area_struct *, unsigned long,
+pte_t *, pte_t, pte_t);
 
 #define __HAVE_ARCH_PTEP_CLEAR_FLUSH
 static inline pte_t ptep_clear_flush(struct vm_area_struct *vma,
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 29c0a21cd34a..b283b92722cc 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -322,7 +322,7 @@ pte_t ptep_modify_prot_start(struct vm_area_struct *vma, 
unsigned long addr,
 EXPORT_SYMBOL(ptep_modify_prot_start);
 
 void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr,
-pte_t *ptep, pte_t pte)
+pte_t *ptep, pte_t old_pte, pte_t pte)
 {
pgste_t pgste;
struct mm_struct *mm = vma->vm_mm;
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 1154f154025d..0d75a4f60500 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -429,7 +429,7 @@ static inline pte_t ptep_modify_prot_start(struct 
vm_area_struct *vma, unsigned
 }
 
 static inline void ptep_modify_prot_commit(struct vm_area_struct *vma, 
unsigned long addr,
-  pte_t *ptep, pte_t pte)
+  pte_t *ptep, pte_t old_pte, pte_t 
pte)
 {
struct mm_struct *mm = vma->vm_mm;
 
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 9952d7185170..8d62891d38a8 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -940,10 +940,12 @@ static inline void clear_soft_dirty(struct vm_area_struct 
*vma,
pte_t ptent = *pte;
 
if (pte_present(ptent)) {
-   ptent = ptep_modify_prot_start(vma, addr, pte);
-   ptent = pte_wrprotect(ptent);
+   pte_t old_pte;
+
+   old_pte = ptep_modify_prot_start(vma, addr, pte);
+   ptent = pte_wrprotect(old_pte);
ptent = pte_clear_soft_dirty(ptent);
-   ptep_modify_prot_commit(vma, addr, pte, ptent);
+   ptep_modify_prot_commit(vma, addr, pte, old_pte, ptent);
} else if (is_swap_pte(ptent)) {
ptent = pte_swp_clear_soft_dirty(ptent);
set_pte_at(vma->vm_mm, addr, pte, ptent);
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index c9897dcc46c4..37039e918f17 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -619,7 +619,7 @@ static inline pte_t ptep_modify_prot_start(struct 
vm_area_struct *vma,
  */
 static inline void ptep_modify_prot_commit(struct vm_area_struct *vma,
   unsigned long addr,
-  pte_t *ptep, pte_t pte)
+  pte_t *ptep, pte_t old_pte, pte_t 
pte)
 {
__ptep_modify_prot_commit(vma->vm_mm, addr, ptep, pte);
 }
diff --git a/mm/memory.c b/mm/memory.c
index d36b0eaa7862..4f3ddaedc764 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3568,7 +3568,7 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
int last_cpupid;
int target_nid;
bool migrated = false;
-   pte_t pte;
+   pte_t pte, old_pte;
bool was_writable = pte_savedwrite(vmf->orig_pte);
int flags = 0;
 
@@ -3588,12 +3588,12 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
 * Make it present again, Depending on how arch implementes non
 * accessible ptes, some can allow access by kernel mode.
 */
-   pte = ptep_modify_prot_start(vma, vmf->address, vmf->pte);
-   pte = pte_modify(pte, vma->vm_page_prot);
+   old_pte = ptep_modify_prot_start(vma, vmf->address, vmf->pte);
+   pte = pte_modify(old_pte, vma->vm_page_prot);
pte = pte_mkyoung(pte);
if (was_writable)
pte = pte_mkwrite(pte);
-   ptep_modify_prot_commit(vma, vmf->address, vmf->pte, pte);

Re: [PATCH v2] mtd: change len type from signed to unsigned type

2018-11-28 Thread Huijin Park

Hi Miquèl,

On Thu, Nov 15, 2018 at 6:29 PM Miquel Raynal  wrote:
>
> Hi Huijin,
>
> Huijin Park  wrote on Thu, 15 Nov 2018
> 00:07:10 -0500:
>
> > From: "huijin.park" 
> >
> > This patch casts the "len" parameter to an unsigned int.
> > The callers of erase_write() pass the "len" parameter as unsigned int.
>
> Indeed. Perhaps it is worth backporting this patch to a stable releases?
>

It doesn't need backporting.
Because this patch is for enforcing code correctness.

> >
> > Signed-off-by: huijin.park 
> > ---
> >  drivers/mtd/mtdblock.c |2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
> >
> > diff --git a/drivers/mtd/mtdblock.c b/drivers/mtd/mtdblock.c
> > index a5b1933..b2d5ed1 100644
> > --- a/drivers/mtd/mtdblock.c
> > +++ b/drivers/mtd/mtdblock.c
> > @@ -56,7 +56,7 @@ struct mtdblk_dev {
> >   */
> >
> >  static int erase_write (struct mtd_info *mtd, unsigned long pos,
> > - int len, const char *buf)
> > + unsigned int len, const char *buf)
> >  {
> >   struct erase_info erase;
> >   size_t retlen;
>
> Reviewed-by: Miquel Raynal 
>
>
> Thanks,
> Miquèl

Thanks,
Huijin

Re: [PATCH v7 2/2] sched/fair: update scale invariance of PELT

2018-11-28 Thread Vincent Guittot

On Wed, 28 Nov 2018 at 15:40, Patrick Bellasi  wrote:
>
> On 28-Nov 14:33, Vincent Guittot wrote:
> > On Wed, 28 Nov 2018 at 12:53, Patrick Bellasi  
> > wrote:
> > >
> > > On 28-Nov 11:02, Peter Zijlstra wrote:
> > > > On Wed, Nov 28, 2018 at 10:54:13AM +0100, Vincent Guittot wrote:
> > > >
> > > > > Is there anything else that I should do for these patches ?
> > > >
> > > > IIRC, Morten mention they break util_est; Patrick was going to explain.
> > >
> > > I guess the problem is that, once we cross the current capacity,
> > > strictly speaking util_avg does not represent anymore a utilization.
> > >
> > > With the new signal this could happen and we end up storing estimated
> > > utilization samples which will overestimate the task requirements.
> > >
> > > We will have a spike in estimated utilization at next wakeup, since we
> > > use MAX(util_avg@dequeue_time, ewma). Potentially we also inflate the 
> > > EWMA in
> > > case we collect multiple samples above the current capacity.
> >
> > TBH I don't see how it's different from current implementation with a
> > task that was scheduled on big core and now wakes up on little core.
> > The util_est is overestimated as well.
>
> While running below the capacity of a CPU, either big or LITTLE, we
> can still measure the actual used bandwidth as long as we have idle
> time. If the task is then moved into a lower capacity core, I think
> it's still safe to assume that, likely, it would need more capacity.
>
> Why do you say it's the same ?

In the example of a task that runs 39ms in period of 80ms that we used
during previous version,
the utilization on the big core will reach 709 so will util_est too
When the task migrates on little core (512), util_est is higher than
current cpu capacity

>
> With your new signal instead, once we cross the current capacity,
> utilization is just not anymore utilization. Thus, IMHO it make sense
> avoid to accumulate a sample for what we call "estimated utilization".
>
> I would also say that, with the current implementation which caps
> utilization to the current capacity, we get better estimation in
> general. At least we can say with absolute precision:
>
>"the task needs _at least_ that amount of capacity".
>
> Potentially we can also flag the task as being under-provisioned, in
> case there was not idle time, and _let a policy_ decide what to do
> with it and the granted information we have.
>
> While, with your new signal, once we are over the current capacity,
> the "utilization" is just a sort of "random" number at best useful to
> drive some conclusions about how long the task has been delayed.
>
> IOW, I fear that we are embedding a policy within a signal which is
> currently representing something very well defined: how much cpu
> bandwidth a task used. While, latency/under-provisioning policies
> perhaps should be better placed somewhere else.
>
> Perhaps I've missed it in some of the previous discussions:
> have we have considered/discussed this signal-vs-policy aspect ?
>
> --
> #include 
>
> Patrick Bellasi

Re: [PATCH] mtd: nand: Fix memory allocation in nanddev_bbt_init()

2018-11-28 Thread Schrempf Frieder

Hi Boris,

On 28.11.18 15:41, Boris Brezillon wrote:
> On Tue, 27 Nov 2018 07:44:52 +
> Schrempf Frieder  wrote:
> 
>> Fix the size of the buffer allocated to store the in-memory BBT.
>> This bug was previously hidden by a different bug, that was fixed in
>> d098093ba06e.
>>
>> Fixes: 9c3736a3de21 ("mtd: nand: Add core infrastructure to deal with NAND 
>> devices")
>> Cc: 
>> Signed-off-by: Frieder Schrempf 
> 
> Looks like your From header does not match the SoB tag
> ('Frieder Schrempf' vs 'Schrempf Frieder') and checkpatch does not like
> that. I'll fix it when applying, but maybe you should fix
> your .gitconfig to make them match.

Actually the From header in my local patch is correct ( 
, Frieder Schrempf) as it comes from my git config. But since 
our company was renamed and our mail servers were transferred, our 
e-mails are sent with From= .

It seems like git send-email or patchwork or whatever uses the 
information from the e-mail header instead of what is in the patch.

I will try to raise this issue with our IT department as this would be 
best fixed on their side.

Thanks,
Frieder

> 
>> ---
>>   drivers/mtd/nand/bbt.c | 3 ++-
>>   1 file changed, 2 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/mtd/nand/bbt.c b/drivers/mtd/nand/bbt.c
>> index 56cde38..c12497f 100644
>> --- a/drivers/mtd/nand/bbt.c
>> +++ b/drivers/mtd/nand/bbt.c
>> @@ -27,7 +27,8 @@ int nanddev_bbt_init(struct nand_device *nand)
>>  unsigned int nwords = DIV_ROUND_UP(nblocks * bits_per_block,
>> BITS_PER_LONG);
>>   
>> -nand->bbt.cache = kzalloc(nwords, GFP_KERNEL);
>> +nand->bbt.cache = kzalloc(nwords * (BITS_PER_LONG / BITS_PER_BYTE),
>> +  GFP_KERNEL);
>>  if (!nand->bbt.cache)
>>  return -ENOMEM;
>>   
>

Re: [PATCH 07/10] regulator: da9211: Let core handle GPIO descriptors

2018-11-28 Thread Charles Keepax

On Wed, Nov 28, 2018 at 11:43:47AM +0100, Linus Walleij wrote:
> Use the gpiod_get_from_of_node() rather than the devm_*
> version so that the regulator core can handle the lifecycle
> of these descriptors.
> 
> This patch requires "gpio: Export gpiod_get_from_of_node()"
> to be applied first.
> 
> Fixes: 11da04af0d3b ("regulator: da9211: Pass descriptors instead of GPIO 
> numbers")
> Signed-off-by: Linus Walleij 
> ---
>  drivers/regulator/da9211-regulator.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/regulator/da9211-regulator.c 
> b/drivers/regulator/da9211-regulator.c
> index 8f68c7a05d27..bfdead356526 100644
> --- a/drivers/regulator/da9211-regulator.c
> +++ b/drivers/regulator/da9211-regulator.c
> @@ -293,8 +293,8 @@ static struct da9211_pdata *da9211_parse_regulators_dt(
>  
>   pdata->init_data[n] = da9211_matches[i].init_data;
>   pdata->reg_node[n] = da9211_matches[i].of_node;
> - pdata->gpiod_ren[n] = devm_gpiod_get_from_of_node(dev,
> -   da9211_matches[i].of_node,
> + pdata->gpiod_ren[n] =
> + gpiod_get_from_of_node(da9211_matches[i].of_node,

This driver has a lot of error paths that will leak the GPIO with
this change.

Thanks,
Charles

> "enable",
> 0,
> GPIOD_OUT_HIGH | GPIOD_FLAGS_BIT_NONEXCLUSIVE,
> -- 
> 2.19.1

[PATCH] RDMA/drivers: fix spelling mistake "initalize" -> "initialize"

2018-11-28 Thread Colin King

From: Colin Ian King 

Fix spelling mistake in usnic_err error message

Signed-off-by: Colin Ian King 
---
 drivers/infiniband/hw/usnic/usnic_ib_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c 
b/drivers/infiniband/hw/usnic/usnic_ib_main.c
index 73bd00f8d2c8..413fa5732e2b 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_main.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c
@@ -649,7 +649,7 @@ static int __init usnic_ib_init(void)
 
err = usnic_uiom_init(DRV_NAME);
if (err) {
-   usnic_err("Unable to initalize umem with err %d\n", err);
+   usnic_err("Unable to initialize umem with err %d\n", err);
return err;
}
 
-- 
2.19.1

Re: [PATCH 10/10] regulator: tps65090: Let core handle GPIO descriptors

2018-11-28 Thread Charles Keepax

On Wed, Nov 28, 2018 at 11:43:50AM +0100, Linus Walleij wrote:
> Use the gpiod_get_from_of_node() rather than the devm_*
> version so that the regulator core can handle the lifecycle
> of these descriptors.
> 
> This patch requires "gpio: Export gpiod_get_from_of_node()"
> to be applied first.
> 
> Fixes: 3012e81446d0 ("regulator: tps65090: Pass descriptor instead of GPIO 
> number")
> Signed-off-by: Linus Walleij 
> ---
>  drivers/regulator/tps65090-regulator.c | 10 +-
>  1 file changed, 5 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/regulator/tps65090-regulator.c 
> b/drivers/regulator/tps65090-regulator.c
> index db714d5edafc..223f6974a9f3 100644
> --- a/drivers/regulator/tps65090-regulator.c
> +++ b/drivers/regulator/tps65090-regulator.c
> @@ -376,11 +376,11 @@ static struct tps65090_platform_data 
> *tps65090_parse_dt_reg_data(
>   gflags = GPIOD_OUT_LOW;
>   gflags |= GPIOD_FLAGS_BIT_NONEXCLUSIVE;
>  
> - rpdata->gpiod = devm_gpiod_get_from_of_node(>dev,
> - 
> tps65090_matches[idx].of_node,
> - 
> "dcdc-ext-control-gpios", 0,
> - gflags,
> - "tps65090");
> + rpdata->gpiod = gpiod_get_from_of_node(
> + tps65090_matches[idx].of_node,
> + "dcdc-ext-control-gpios", 0,
> + gflags,
> + "tps65090");
>   if (IS_ERR(rpdata->gpiod))
>   return ERR_CAST(rpdata->gpiod);
>   if (!rpdata->gpiod)

This one needs some handling to avoid leaking the gpio too.

Thanks,
Charles

Re: [PATCH] mtd: nand: Fix memory allocation in nanddev_bbt_init()

2018-11-28 Thread Schrempf Frieder

On 28.11.18 16:02, Boris Brezillon wrote:
> On Wed, 28 Nov 2018 14:55:45 +
> Schrempf Frieder  wrote:
> 
>> Hi Boris,
>>
>> On 28.11.18 15:41, Boris Brezillon wrote:
>>> On Tue, 27 Nov 2018 07:44:52 +
>>> Schrempf Frieder  wrote:
>>>
 Fix the size of the buffer allocated to store the in-memory BBT.
 This bug was previously hidden by a different bug, that was fixed in
 d098093ba06e.

 Fixes: 9c3736a3de21 ("mtd: nand: Add core infrastructure to deal with NAND 
 devices")
 Cc: 
 Signed-off-by: Frieder Schrempf 
>>>
>>> Looks like your From header does not match the SoB tag
>>> ('Frieder Schrempf' vs 'Schrempf Frieder') and checkpatch does not like
>>> that. I'll fix it when applying, but maybe you should fix
>>> your .gitconfig to make them match.
>>
>> Actually the From header in my local patch is correct (
>> , Frieder Schrempf) as it comes from my git config. But since
>> our company was renamed and our mail servers were transferred, our
>> e-mails are sent with From= .
>>
>> It seems like git send-email or patchwork or whatever uses the
>> information from the e-mail header instead of what is in the patch.
>>
>> I will try to raise this issue with our IT department as this would be
>> best fixed on their side.
> 
> There's another solution: make git send-email add a From header in the
> message body.
> 
> git config --global sendemail.from "Schrempf Frieder 
> "

I don't get it. How would that change things? My From still wouldn't 
match my SoB tags.

Re: [PATCH 4/9] drm/rockchip/rockchip_drm_gem.c: Convert to use vm_insert_range

2018-11-28 Thread Heiko Stübner

Hi Souptick,

Am Montag, 26. November 2018, 06:36:42 CET schrieb Souptick Joarder:
> On Thu, Nov 15, 2018 at 9:14 PM Souptick Joarder  
wrote:
> > Convert to use vm_insert_range() to map range of kernel
> > memory to user vma.
> > 
> > Signed-off-by: Souptick Joarder 
> 
> Any feedback for this patch ?

sorry, took a bit longer to find time for a test-run.

Except the missing EXPORT_SYMBOL already pointed out in patch1,
my displays are still working on modern (with iommu) and the older
(without iommu) Rockchip socs, so

On rk3188, rk3288, rk3328 and rk3399
Tested-by: Heiko Stuebner 
and in general
Acked-by: Heiko Stuebner 


Heiko

Re: [PATCH v5 1/2] kernel/signal: Signal-based pre-coredump notification

2018-11-28 Thread Dave Martin

On Tue, Nov 27, 2018 at 10:54:41PM +, Enke Chen wrote:
> [Repost as a series, as suggested by Andrew Morton]
> 
> For simplicity and consistency, this patch provides an implementation
> for signal-based fault notification prior to the coredump of a child
> process. A new prctl command, PR_SET_PREDUMP_SIG, is defined that can
> be used by an application to express its interest and to specify the
> signal for such a notification.
> 
> Changes to prctl(2):
> 
>PR_SET_PREDUMP_SIG (since Linux 4.20.x)
>   Set the child pre-coredump signal of the calling process to
>   arg2 (either a signal value in the range 1..maxsig, or 0 to
>   clear). This is the signal that the calling process will get
>   prior to the coredump of a child process. This value is
>   cleared across execve(2), or for the child of a fork(2).
> 
>PR_GET_PREDUMP_SIG (since Linux 4.20.x)
>   Return the current value of the child pre-coredump signal,
>   in the location pointed to by (int *) arg2.
> 
> Background:
> 
> As the coredump of a process may take time, in certain time-sensitive
> applications it is necessary for a parent process (e.g., a process
> manager) to be notified of a child's imminent death before the coredump
> so that the parent process can act sooner, such as re-spawning an
> application process, or initiating a control-plane fail-over.
> 
> One application is BFD. The early fault notification is a critical
> component for maintaining BFD sessions (with a timeout value of
> 50 msec or 100 msec) across a control-plane failure.
> 
> Currently there are two ways for a parent process to be notified of a
> child process's state change. One is to use the POSIX signal, and
> another is to use the kernel connector module. The specific events and
> actions are summarized as follows:
> 
> Process EventPOSIX SignalConnector-based
> --
> ptrace_attach()  do_notify_parent_cldstop()  proc_ptrace_connector()
>  SIGCHLD / CLD_STOPPED
> 
> ptrace_detach()  do_notify_parent_cldstop()  proc_ptrace_connector()
>  SIGCHLD / CLD_CONTINUED
> 
> pre_coredump/N/A proc_coredump_connector()
> get_signal()
> 
> post_coredump/   do_notify_parent()  proc_exit_connector()
> do_exit()SIGCHLD / exit_signal
> --
> 
> As shown in the table, the signal-based pre-coredump notification is not
> currently available. In some cases using a connector-based notification
> can be quite complicated (e.g., when a process manager is written in shell
> scripts and thus is subject to certain inherent limitations), and a
> signal-based notification would be simpler and better suited.

Since this is a notification of a change of process status, would it be
more natural to send it through SIGCHLD?

As with other supplementary child status events, a flag could be added
for wait and sigaction.sa_flags to indicate whether the parent wants
this event to be reported or not.

Then a suitable CLD_XXX could be defined for this, and we could
piggyback on PR_{SET,GET}_PDEATHSIG rather than having to have something
new.

(I hadn't been watching this thread closely, so apologies if this has
been discussed already.)

> 
> Signed-off-by: Enke Chen 
> Reviewed-by: Oleg Nesterov 
> ---
> v4 -> v5:
> Addressed review comments from Oleg Nesterov:
> o use rcu_read_lock instead.
> o revert back to notify the real_parent.
> 
>  fs/coredump.c| 23 +++
>  fs/exec.c|  3 +++
>  include/linux/sched/signal.h |  3 +++
>  include/uapi/linux/prctl.h   |  4 
>  kernel/sys.c | 13 +
>  5 files changed, 46 insertions(+)
> 
> diff --git a/fs/coredump.c b/fs/coredump.c
> index e42e17e..740b1bb 100644
> --- a/fs/coredump.c
> +++ b/fs/coredump.c
> @@ -536,6 +536,24 @@ static int umh_pipe_setup(struct subprocess_info *info, 
> struct cred *new)
>   return err;
>  }
>  
> +/*
> + * While do_notify_parent() notifies the parent of a child's death post
> + * its coredump, this function lets the parent (if so desired) know about
> + * the imminent death of a child just prior to its coredump.
> + */
> +static void do_notify_parent_predump(void)
> +{
> + struct task_struct *parent;
> + int sig;
> +
> + rcu_read_lock();
> + parent = rcu_dereference(current->real_parent);
> + sig = parent->signal->predump_signal;
> + if (sig != 0)
> + do_send_sig_info(sig, SEND_SIG_NOINFO, parent, PIDTYPE_TGID);

Doesn't this send si_code == SI_USER.  That seems wrong: the receiving
process wouldn't not be able to distinguish a real pre-coredump
notification from a bogus one sent by kill(2) etc.

SEND_SIG_PRIV also looks wrong, because it assumes that the sender is
"the kernel" so there is no si_pid.

This may be another

[tip:x86/pti] sched/smt: Make sched_smt_present track topology

2018-11-28 Thread tip-bot for Peter Zijlstra (Intel)

Commit-ID:  c5511d03ec090980732e929c318a7a6374b5550e
Gitweb: https://git.kernel.org/tip/c5511d03ec090980732e929c318a7a6374b5550e
Author: Peter Zijlstra (Intel) 
AuthorDate: Sun, 25 Nov 2018 19:33:36 +0100
Committer:  Thomas Gleixner 
CommitDate: Wed, 28 Nov 2018 11:57:06 +0100

sched/smt: Make sched_smt_present track topology

Currently the 'sched_smt_present' static key is enabled when at CPU bringup
SMT topology is observed, but it is never disabled. However there is demand
to also disable the key when the topology changes such that there is no SMT
present anymore.

Implement this by making the key count the number of cores that have SMT
enabled.

In particular, the SMT topology bits are set before interrrupts are enabled
and similarly, are cleared after interrupts are disabled for the last time
and the CPU dies.

Signed-off-by: Peter Zijlstra (Intel) 
Signed-off-by: Thomas Gleixner 
Reviewed-by: Ingo Molnar 
Cc: Andy Lutomirski 
Cc: Linus Torvalds 
Cc: Jiri Kosina 
Cc: Tom Lendacky 
Cc: Josh Poimboeuf 
Cc: Andrea Arcangeli 
Cc: David Woodhouse 
Cc: Tim Chen 
Cc: Andi Kleen 
Cc: Dave Hansen 
Cc: Casey Schaufler 
Cc: Asit Mallick 
Cc: Arjan van de Ven 
Cc: Jon Masters 
Cc: Waiman Long 
Cc: Greg KH 
Cc: Dave Stewart 
Cc: Kees Cook 
Cc: sta...@vger.kernel.org
Link: https://lkml.kernel.org/r/20181125185004.246110...@linutronix.de


---
 kernel/sched/core.c | 19 +++
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 091e089063be..6fedf3a98581 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5738,15 +5738,10 @@ int sched_cpu_activate(unsigned int cpu)
 
 #ifdef CONFIG_SCHED_SMT
/*
-* The sched_smt_present static key needs to be evaluated on every
-* hotplug event because at boot time SMT might be disabled when
-* the number of booted CPUs is limited.
-*
-* If then later a sibling gets hotplugged, then the key would stay
-* off and SMT scheduling would never be functional.
+* When going up, increment the number of cores with SMT present.
 */
-   if (cpumask_weight(cpu_smt_mask(cpu)) > 1)
-   static_branch_enable_cpuslocked(_smt_present);
+   if (cpumask_weight(cpu_smt_mask(cpu)) == 2)
+   static_branch_inc_cpuslocked(_smt_present);
 #endif
set_cpu_active(cpu, true);
 
@@ -5790,6 +5785,14 @@ int sched_cpu_deactivate(unsigned int cpu)
 */
synchronize_rcu_mult(call_rcu, call_rcu_sched);
 
+#ifdef CONFIG_SCHED_SMT
+   /*
+* When going down, decrement the number of cores with SMT present.
+*/
+   if (cpumask_weight(cpu_smt_mask(cpu)) == 2)
+   static_branch_dec_cpuslocked(_smt_present);
+#endif
+
if (!sched_smp_initialized)
return 0;

Re: [PATCH 2/2] arm64: preempt: Provide our own implementation of asm/preempt.h

2018-11-28 Thread Ard Biesheuvel

On Tue, 27 Nov 2018 at 20:44, Will Deacon  wrote:
>
> The asm-generic/preempt.h implementation doesn't make use of the
> PREEMPT_NEED_RESCHED flag, since this can interact badly with load/store
> architectures which rely on the preempt_count word being unchanged across
> an interrupt.
>
> However, since we're a 64-bit architecture and the preempt count is
> only 32 bits wide, we can simply pack it next to the resched flag and
> load the whole thing in one go, so that a dec-and-test operation doesn't
> need to load twice.
>

Since the actual preempt count is a lot narrower than 32 bits, x86
just uses bit 31.

So what is the reason for using two different words?


> Signed-off-by: Will Deacon 
> ---
>  arch/arm64/include/asm/Kbuild|  1 -
>  arch/arm64/include/asm/preempt.h | 78 
> 
>  arch/arm64/include/asm/thread_info.h | 13 +-
>  3 files changed, 90 insertions(+), 2 deletions(-)
>  create mode 100644 arch/arm64/include/asm/preempt.h
>
> diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild
> index 6cd5d77b6b44..33498f900390 100644
> --- a/arch/arm64/include/asm/Kbuild
> +++ b/arch/arm64/include/asm/Kbuild
> @@ -14,7 +14,6 @@ generic-y += local64.h
>  generic-y += mcs_spinlock.h
>  generic-y += mm-arch-hooks.h
>  generic-y += msi.h
> -generic-y += preempt.h
>  generic-y += qrwlock.h
>  generic-y += qspinlock.h
>  generic-y += rwsem.h
> diff --git a/arch/arm64/include/asm/preempt.h 
> b/arch/arm64/include/asm/preempt.h
> new file mode 100644
> index ..832227d5ebc0
> --- /dev/null
> +++ b/arch/arm64/include/asm/preempt.h
> @@ -0,0 +1,78 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef __ASM_PREEMPT_H
> +#define __ASM_PREEMPT_H
> +
> +#include 
> +
> +#define PREEMPT_NEED_RESCHED   BIT(32)
> +#define PREEMPT_ENABLED(PREEMPT_NEED_RESCHED)
> +
> +static inline int preempt_count(void)
> +{
> +   return READ_ONCE(current_thread_info()->preempt.count);
> +}
> +
> +static inline void preempt_count_set(u64 pc)
> +{
> +   /* Preserve existing value of PREEMPT_NEED_RESCHED */
> +   WRITE_ONCE(current_thread_info()->preempt.count, pc);
> +}
> +
> +#define init_task_preempt_count(p) do { \
> +   task_thread_info(p)->preempt_count = FORK_PREEMPT_COUNT; \
> +} while (0)
> +
> +#define init_idle_preempt_count(p, cpu) do { \
> +   task_thread_info(p)->preempt_count = PREEMPT_ENABLED; \
> +} while (0)
> +
> +static inline void set_preempt_need_resched(void)
> +{
> +   current_thread_info()->preempt.need_resched = 0;
> +}
> +
> +static inline void clear_preempt_need_resched(void)
> +{
> +   current_thread_info()->preempt.need_resched = 1;
> +}
> +
> +static inline bool test_preempt_need_resched(void)
> +{
> +   return !current_thread_info()->preempt.need_resched;
> +}
> +
> +static inline void __preempt_count_add(int val)
> +{
> +   u32 pc = READ_ONCE(current_thread_info()->preempt.count);
> +   pc += val;
> +   WRITE_ONCE(current_thread_info()->preempt.count, pc);
> +}
> +
> +static inline void __preempt_count_sub(int val)
> +{
> +   u32 pc = READ_ONCE(current_thread_info()->preempt.count);
> +   pc -= val;
> +   WRITE_ONCE(current_thread_info()->preempt.count, pc);
> +}
> +
> +static inline bool __preempt_count_dec_and_test(void)
> +{
> +   u64 pc = READ_ONCE(current_thread_info()->preempt_count);
> +   WRITE_ONCE(current_thread_info()->preempt.count, --pc);
> +   return !pc;
> +}
> +
> +static inline bool should_resched(int preempt_offset)
> +{
> +   u64 pc = READ_ONCE(current_thread_info()->preempt_count);
> +   return pc == preempt_offset;
> +}
> +
> +#ifdef CONFIG_PREEMPT
> +void preempt_schedule(void);
> +#define __preempt_schedule() preempt_schedule()
> +void preempt_schedule_notrace(void);
> +#define __preempt_schedule_notrace() preempt_schedule_notrace()
> +#endif /* CONFIG_PREEMPT */
> +
> +#endif /* __ASM_PREEMPT_H */
> diff --git a/arch/arm64/include/asm/thread_info.h 
> b/arch/arm64/include/asm/thread_info.h
> index cb2c10a8f0a8..bbca68b54732 100644
> --- a/arch/arm64/include/asm/thread_info.h
> +++ b/arch/arm64/include/asm/thread_info.h
> @@ -42,7 +42,18 @@ struct thread_info {
>  #ifdef CONFIG_ARM64_SW_TTBR0_PAN
> u64 ttbr0;  /* saved TTBR0_EL1 */
>  #endif
> -   int preempt_count;  /* 0 => preemptable, <0 => 
> bug */
> +   union {
> +   u64 preempt_count;  /* 0 => preemptible, <0 => 
> bug */
> +   struct {
> +#ifdef CONFIG_CPU_BIG_ENDIAN
> +   u32 need_resched;
> +   u32 count;
> +#else
> +   u32 count;
> +   u32 need_resched;
> +#endif
> +   } preempt;
> +   };
>  };
>
>  #define thread_saved_pc(tsk)   \
> --
> 2.1.4
>

Re: [PATCH 08/17] soc: ti: pruss: Add a PRUSS irqchip driver for PRUSS interrupts

2018-11-28 Thread David Lechner


On 11/27/18 9:39 AM, Roger Quadros wrote:


On 26/11/18 23:17, David Lechner wrote:

On 11/22/18 5:39 AM, Roger Quadros wrote:

From: Suman Anna 

The Programmable Real-Time Unit Subsystem (PRUSS) contains an
interrupt controller (INTC) that can handle various system input
events and post interrupts back to the device-level initiators.
The INTC can support upto 64 input events with individual control
configuration and hardware prioritization. These events are mapped
onto 10 interrupt signals through two levels of many-to-one mapping
support. Different interrupt signals are routed to the individual
PRU cores or to the host CPU.

The PRUSS INTC platform driver manages this PRUSS interrupt
controller and implements an irqchip driver to provide a Linux
standard way for the PRU client users to enable/disable/ack/
re-trigger a PRUSS system event. The system events to interrupt
channels and host interrupts relies on the mapping configuration
provided through a firmware resource table for now. This will be
revisited and enhanced in the future for a better interface. The
mappings will currently be programmed during the boot/shutdown
of the PRU.


Does this mapping table take up space in the PRU IRAM or DRAM? If
so, that can be a problem on the AM18xx because it has such limited
resources - every byte counts.


Currently the entire resource table is being placed in DRAM.
But that is only because the current rpmsg vdev implementation depends on the
rpmsg channel information and vring buffers to be in DRAM.

I think the right way is to split up the 2 things.
i.e. separate out rpmgs channel DRAM allocation from resource table
and don't copy the resource table to DRAM.

This way if there is no rpmsg channel in the resource table we won't eat
any DRAM.

I'm not sure if there are any bottlenecks. I will only know when I work on it.


Sounds good to me.

RE: [PATCH V3 4/4] ARM64: dts: imx: add i.MX8QXP system controller RTC support

2018-11-28 Thread Aisheng DONG

[...]
> Subject: [PATCH V3 4/4] ARM64: dts: imx: add i.MX8QXP system controller RTC
> support
> 
> Add i.MX8QXP system controller RTC support.
> 
> Signed-off-by: Anson Huang 
> ---
>  arch/arm64/boot/dts/freescale/imx8qxp.dtsi | 4 
>  1 file changed, 4 insertions(+)
> 
> diff --git a/arch/arm64/boot/dts/freescale/imx8qxp.dtsi
> b/arch/arm64/boot/dts/freescale/imx8qxp.dtsi
> index da99b6f..5e8b554 100644
> --- a/arch/arm64/boot/dts/freescale/imx8qxp.dtsi
> +++ b/arch/arm64/boot/dts/freescale/imx8qxp.dtsi
> @@ -406,4 +406,8 @@
>   #size-cells = <1>;
>   ranges = <0x5f00 0x0 0x5f00 0x100>;
>   };
> +
> + rtc: rtc {
> + compatible = "nxp,imx8qxp-sc-rtc";

As I replied in patch 1, this belongs to scu node as well.

Regards
Dong Aisheng

> + };
>  };
> --
> 2.7.4

RE: [PATCH V3 2/4] rtc: add i.MX system controller RTC support

2018-11-28 Thread Aisheng DONG

> -Original Message-
> From: Anson Huang
> Sent: Wednesday, November 28, 2018 5:57 PM
[...]
> Subject: [PATCH V3 2/4] rtc: add i.MX system controller RTC support
> 
> i.MX8QXP is an ARMv8 SoC which has a Cortex-M4 system controller inside,
> the system controller is in charge of controlling power, clock and secure rtc
> etc..
> 
> This patch adds i.MX system controller RTC driver support, Linux kernel has to
> communicate with system controller via MU (message unit) IPC to set/get RTC
> time and other alarm functions, since the RTC set time needs to be done in
> secure EL3 mode (required by system controller firmware) and alarm functions
> needs to be done with general MU IRQ handle, these depend on other
> components which are NOT ready, so this patch ONLY enables the RTC time
> read.
> 
> Signed-off-by: Anson Huang 
> ---
> changes since V2:
>   - make rtc_ipc_handle/imx_sc_rtc static;
>   - remove comma in last entry of imx_sc_dt_ids;
>   - rename rtc_device.
>  drivers/rtc/Kconfig  |   6 +++
>  drivers/rtc/Makefile |   1 +
>  drivers/rtc/rtc-imx-sc.c | 107

This change log format seems strange.

> +++
>  3 files changed, 114 insertions(+)
>  create mode 100644 drivers/rtc/rtc-imx-sc.c
> 
> diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig index a819ef0..3b9642e
> 100644
> --- a/drivers/rtc/Kconfig
> +++ b/drivers/rtc/Kconfig
> @@ -1677,6 +1677,12 @@ config RTC_DRV_SNVS
>  This driver can also be built as a module, if so, the module
>  will be called "rtc-snvs".
> 
> +config RTC_DRV_IMX_SC
> + tristate "NXP i.MX System Controller RTC support"
> + help
> +If you say yes here you get support for the NXP i.MX System
> +Controller RTC module.
> +
>  config RTC_DRV_SIRFSOC
>   tristate "SiRFSOC RTC"
>   depends on ARCH_SIRF
> diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile index 
> 290c173..f97c05e
> 100644
> --- a/drivers/rtc/Makefile
> +++ b/drivers/rtc/Makefile
> @@ -75,6 +75,7 @@ obj-$(CONFIG_RTC_DRV_GOLDFISH)  += rtc-goldfish.o
>  obj-$(CONFIG_RTC_DRV_HID_SENSOR_TIME) += rtc-hid-sensor-time.o
>  obj-$(CONFIG_RTC_DRV_HYM8563)+= rtc-hym8563.o
>  obj-$(CONFIG_RTC_DRV_IMXDI)  += rtc-imxdi.o
> +obj-$(CONFIG_RTC_DRV_IMX_SC) += rtc-imx-sc.o
>  obj-$(CONFIG_RTC_DRV_ISL12022)   += rtc-isl12022.o
>  obj-$(CONFIG_RTC_DRV_ISL12026)   += rtc-isl12026.o
>  obj-$(CONFIG_RTC_DRV_ISL1208)+= rtc-isl1208.o
> diff --git a/drivers/rtc/rtc-imx-sc.c b/drivers/rtc/rtc-imx-sc.c new file mode
> 100644 index 000..b8e331e
> --- /dev/null
> +++ b/drivers/rtc/rtc-imx-sc.c
> @@ -0,0 +1,107 @@
> +// SPDX-License-Identifier: GPL-2.0+
> +/*
> + * Copyright 2018 NXP.
> + */
> +
> +#include 
> +#include 
> +#include 

Do we need them all?

> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +#define IMX_SC_TIMER_FUNC_GET_RTC_SEC19709
> +#define IMX_SC_TIMER_FUNC_SET_RTC_TIME   6
> +
> +static struct imx_sc_ipc *rtc_ipc_handle; static struct rtc_device
> +*imx_sc_rtc;
> +
> +struct imx_sc_msg_req_timer_get_rtc_time {
> + struct imx_sc_rpc_msg hdr;
> +} __packed;
> +
> +struct imx_sc_msg_resp_timer_get_rtc_time {
> + struct imx_sc_rpc_msg hdr;
> + u32 time;
> +} __packed;
> +
> +static int imx_sc_rtc_read_time(struct device *dev, struct rtc_time
> +*tm) {
> + struct imx_sc_msg_resp_timer_get_rtc_time *resp;
> + struct imx_sc_msg_req_timer_get_rtc_time msg;
> + struct imx_sc_rpc_msg *hdr = 
> + int ret;
> +
> + hdr->ver = IMX_SC_RPC_VERSION;
> + hdr->svc = IMX_SC_RPC_SVC_TIMER;
> + hdr->func = IMX_SC_TIMER_FUNC_GET_RTC_SEC1970;
> + hdr->size = 1;
> +
> + ret = imx_scu_call_rpc(rtc_ipc_handle, , true);
> + if (ret) {
> + pr_err("read rtc time failed, ret %d\n", ret);
> + return ret;
> + }
> +
> + resp = (struct imx_sc_msg_resp_timer_get_rtc_time *)

This is wrong and may result in memory corruption.
Please refer to how the scu-clk driver does.

Regards
Dong Aisheng

> + rtc_time_to_tm(resp->time, tm);
> +
> + return 0;
> +}
> +
> +static const struct rtc_class_ops imx_sc_rtc_ops = {
> + .read_time = imx_sc_rtc_read_time,
> +};
> +
> +static int imx_sc_rtc_probe(struct platform_device *pdev) {
> + int ret;
> +
> + ret = imx_scu_get_handle(_ipc_handle);
> + if (ret) {
> + if (ret == -EPROBE_DEFER)
> + return ret;
> +
> + dev_err(>dev, "failed to get ipc handle: %d!\n", ret);
> + return ret;
> + }
> +
> + imx_sc_rtc = devm_rtc_allocate_device(>dev);
> + if (IS_ERR(imx_sc_rtc)) {
> + ret = PTR_ERR(imx_sc_rtc);
> + return ret;
> + }
> +
> + imx_sc_rtc->ops = _sc_rtc_ops;
> + imx_sc_rtc->range_min = 0;
> + imx_sc_rtc->range_max = U32_MAX;
> +
> + ret = rtc_register_device(imx_sc_rtc);
> + if (ret) {
> + dev_err(>dev,

Re: ext4 file system corruption with v4.19.3 / v4.19.4

2018-11-28 Thread Theodore Y. Ts'o

On Wed, Nov 28, 2018 at 04:56:51PM +0100, Rainer Fiebig wrote:
> 
> If you still see the errors, at least the Ubuntu-kernel could be ruled out.

My impression is that some of the people reporting problems have been
using stock upstream kernels, so I wasn't really worried about the
Ubuntu kernel (although it could be something about the default
configs that Ubuntu sets up).  What I was more wondering was whether
there was something about userspace or default configs of Ubuntu.
This isn't necessarily a *problem* per se; for examople, not that long
ago some users were getting surprised when a problem showed up with an
older version of the LVM2 userspace with newer upstream kernels.
After a while, you learn to get super paranoid about making sure to
rule out all possibilities when trying to debug problems that are only
hitting a set of users.

- Ted

[PATCH v3] perf symbols: Cannot disassemble some routines when debuginfo present

2018-11-28 Thread Eric Saint-Etienne

When the kernel is compiled with -ffunction-sections and perf uses the
kernel debuginfo, perf fails the very first symbol lookup and ends up with
an hex offset inside [kernel.vmlinux]. It's due to how perf loads the maps.

Indeed only .text gets loaded by map_groups__find() into al->map.
Consequently al->map address range encompass the whole kernel image.
But then map__load() loads many function maps by splitting al->map,
which reduces al->map range drastically. Very likely the target address is
then in one of those newly created function maps, so we need to lookup the
map again to find that new map.

I'm not sure if this issue is only specific to the kernel but at least it
occurs withe the kernel dso, and when we're not using the kernel debuginfo,
perf will fallback to using kallsyms and then the first lookup will work.

The split of .text section happens in dso_process_kernel_symbol() where we
call map_groups__find_by_name() to find an existing map, but with
-ffunction-sections and a symbol belonging to a new (function) map, such
map doesn't exist yet so we end up creating one and adjusting existing maps
accordingly because adjust_kernel_syms is set there.

This patch makes sure that the event address we're looking-up is indeed
within the map we've found, otherwise we lookup another map again.
Only one extra lookup at most is required for the proper map to be found,
if it exists.

Signed-off-by: Eric Saint-Etienne 
Reviewed-by: Darren Kenny 
---
 tools/perf/util/event.c | 53 +++--
 1 file changed, 51 insertions(+), 2 deletions(-)

diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index e9c108a..f7cad1a 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -1569,9 +1569,58 @@ struct map *thread__find_map(struct thread *thread, u8 
cpumode, u64 addr,
 * Kernel maps might be changed when loading symbols so loading
 * must be done prior to using kernel maps.
 */
-   if (load_map)
+   if (load_map) {
+   /*
+* Note when using -ffunction-sections on the kernel:
+*
+* Only .text got loaded into al->map at this point.
+* Consequently al->map address range encompass the
+* whole image.
+*
+* map__load() will split this map into many function
+* maps by shrinking al->map accordingly.
+*
+* The split happens in dso_process_kernel_symbol()
+* where we call map_groups__find_by_name() to find an
+* existing map, but with -ffunction-sections and a
+* symbol belonging to a new (function) map, such map
+* doesn't exist yet so we end up creating one and
+* adjusting existing maps accordingly because
+* adjust_kernel_syms is set there.
+*/
+
map__load(al->map);
-   al->addr = al->map->map_ip(al->map, al->addr);
+
+   /*
+* Note when using -ffunction-sections on the kernel:
+*
+* Very likely the target address will now be in one of
+* the newly created function maps but al->map still
+* points to .text which has been drastically shrank by
+* the split done in map__load()
+*/
+   if (al->addr < al->map->start ||
+   al->addr >= al->map->end) {
+   al->map = map_groups__find(mg, al->addr);
+
+   /*
+* map_groups__find() should always find a map
+* because the target address was initially
+* found in .text which got split by map__load()
+* *WITHOUT INTRODUCING ANY GAP*
+*/
+   WARN_ONCE(al->map == NULL,
+ "map__load created unexpected gaps!");
+   }
+   }
+
+   /*
+* In case the later call to map_groups__find() didn't find a
+* suitable map (it should always, but better be safe) we make
+* sure that al->map is still valid before deferencing it.
+*/
+   if (al->map != NULL)
+   al->addr = al->map->map_ip(al->map, al->addr);
}
 
return al->map;
-- 
1.8.3.1

Re: ext4 file system corruption with v4.19.3 / v4.19.4

2018-11-28 Thread Marek Habersack

On 28/11/2018 17:10, Theodore Y. Ts'o wrote:
> On Wed, Nov 28, 2018 at 04:56:51PM +0100, Rainer Fiebig wrote:
>>
>> If you still see the errors, at least the Ubuntu-kernel could be ruled out.
> 
> My impression is that some of the people reporting problems have been
> using stock upstream kernels, so I wasn't really worried about the
Also, the Ubuntu mainline kernel doesn't patch the kernel code, it merely uses 
Ubuntu configs to build the stock kerenel
(you can find the patches in e.g. 
http://kernel.ubuntu.com/~kernel-ppa/mainline/v4.19.5/ at the top of the 
directory)

> Ubuntu kernel (although it could be something about the default
> configs that Ubuntu sets up).  What I was more wondering was whether
> there was something about userspace or default configs of Ubuntu.
> This isn't necessarily a *problem* per se; for examople, not that long
> ago some users were getting surprised when a problem showed up with an
> older version of the LVM2 userspace with newer upstream kernels.
> After a while, you learn to get super paranoid about making sure to
> rule out all possibilities when trying to debug problems that are only
> hitting a set of users.
> 
>   - Ted
> 

marek

Re: [PATCH] KVM: VMX: re-add ple_gap module parameter

2018-11-28 Thread Paolo Bonzini

On 28/11/18 17:11, Sasha Levin wrote:
> On Wed, Nov 28, 2018 at 03:57:53PM +, Moger, Babu wrote:
>> My bad.. Sorry about this. I think this should also go to
>> sta...@vger.kernel.org
> 
> Please actually add the stable tag in the commit message if you intend
> for this patch to go in stable.

No worries, the patch is already on its way to Linus with the stable tag.

Paolo

Re: [LKP] [mm] ac5b2c1891: vm-scalability.throughput -61.3% regression

2018-11-28 Thread Linus Torvalds

On Tue, Nov 27, 2018 at 7:20 PM Huang, Ying  wrote:
>
> From the above data, for the parent commit 3 processes exited within
> 14s, another 3 exited within 100s.  For this commit, the first process
> exited at 203s.  That is, this commit makes memory allocation more fair
> among processes, so that processes proceeded at more similar speed.  But
> this raises system memory footprint too, so triggered much more swap,
> thus lower benchmark score.
>
> In general, memory allocation fairness among processes should be a good
> thing.  So I think the report should have been a "performance
> improvement" instead of "performance regression".

Hey, when you put it that way...

Let's ignore this issue for now, and see if it shows up in some real
workload and people complain.

 Linus

Re: [PATCH] timers: Make the lower-level timer function first call than higher-level

2018-11-28 Thread Thomas Gleixner

Song,

On Wed, 21 Nov 2018, Thomas Gleixner wrote:
> On Tue, 20 Nov 2018, Muchun Song wrote:
> > Follow the current code logic, the timer0 function is called until the
> > function call of timer1-5 is completed. So the delay of timer0 is the time
> > spent by other timer function calls. If we can call the timer function in
> > the following order, this should be more friendly to lower-level timers.
> > 
> > timer0->timer1->->timer2->->timer3->->timer4->->timer5
> > 
> > Although not friendly to higher-level timers, higher-level has larger
> > granularity. Therefore the delay has less impact on higher-level.
> 
> Well yes, that's clear. But is it a problem in practice and if so, what is
> the measurable benefit.

Polite reminder. Can you please describe what the practical relevance is of
that and what real world problem you are solving? Ideally with numbers
backing it up.

Thanks,

tglx

Re: [PATCH] prctl: add PR_{GET,SET}_KILL_DESCENDANTS_ON_EXIT

2018-11-28 Thread Eric W. Biederman

Oleg Nesterov  writes:

> On 11/27, Jürg Billeter wrote:
>>
>> @@ -704,6 +713,9 @@ static void exit_notify(struct task_struct *tsk, int 
>> group_dead)
>>  struct task_struct *p, *n;
>>  LIST_HEAD(dead);
>>  
>> +if (group_dead && tsk->signal->kill_descendants_on_exit)
>> +walk_process_tree(tsk, kill_descendant_visitor, NULL);
>
> Well, this is not exactly right, at least this is suboptimal in that
> other sub-threads can too call walk_process_tree(kill_descendant_visitor)
> later for no reason.

Oleg I think I am missing something.

Reading kernel/exit.c I see "group_dead = 
atomic_dec_and_test(>signal->live)".
Which seems like enough to ensure exactly one task/thread calls 
walk_process_tree.

Can you explain what I am missing?

Eric

Re: [PATCH v4 1/2] ptrace: save the type of syscall-stop in ptrace_message

2018-11-28 Thread Dmitry V. Levin

On Wed, Nov 28, 2018 at 03:20:06PM +0100, Oleg Nesterov wrote:
> On 11/28, Dmitry V. Levin wrote:
> > On Wed, Nov 28, 2018 at 02:49:14PM +0100, Oleg Nesterov wrote:
> > > On 11/28, Dmitry V. Levin wrote:
> > > >
> > > > +/*
> > > > + * These values are stored in task->ptrace_message by 
> > > > tracehook_report_syscall_*
> > > > + * to describe current syscall-stop.
> > > > + *
> > > > + * Values for these constants are chosen so that they do not appear
> > > > + * in task->ptrace_message by other means.
> > > > + */
> > > > +#define PTRACE_EVENTMSG_SYSCALL_ENTRY  0x8000U
> > > > +#define PTRACE_EVENTMSG_SYSCALL_EXIT   0x9000U
> > > 
> > > Again, I do not really understand the comment... Why should we care about
> > > "do not appear in task->ptrace_message by other means" ?
> > > 
> > > 2/2 should detect ptrace_report_syscall() case correctly, so we can use 
> > > any
> > > numbers, say, 1 and 2?
> > > 
> > > If debugger does PTRACE_GETEVENTMSG it should know how to interpet the 
> > > value
> > > anyway after wait(status).
> > 
> > Given that without this patch the value returned by PTRACE_GETEVENTMSG
> > during syscall stop is undefined, we need two different ptrace_message
> > values that cannot be set by other ptrace events to enable reliable
> > identification of syscall-enter-stop and syscall-exit-stop in userspace:
> > if we make PTRACE_GETEVENTMSG return 0 or any other value routinely set by
> > other ptrace events, it would be hard for userspace to find out whether
> > the kernel implements new semantics or not.
> 
> Hmm, why? Debugger can just do ptrace(PTRACE_GET_SYSCALL_INFO, NULL), if it
> returns EIO then it is not implemented?

The debugger that uses PTRACE_GET_SYSCALL_INFO does not need to call
PTRACE_GETEVENTMSG for syscall stops.
My concern here is the PTRACE_GETEVENTMSG interface itself.  If we use
ptrace_message to implement PTRACE_GET_SYSCALL_INFO and expose
PTRACE_EVENTMSG_SYSCALL_{ENTRY,EXIT} for regular PTRACE_GETEVENTMSG users,
it should have clear semantics.


-- 
ldv


signature.asc
Description: PGP signature

Re: [PATCH 08/10] regulator: max77686: Let core handle GPIO descriptor

2018-11-28 Thread Charles Keepax

On Wed, Nov 28, 2018 at 11:43:48AM +0100, Linus Walleij wrote:
> Use the gpiod_get_from_of_node() rather than the devm_*
> version so that the regulator core can handle the lifecycle
> of these descriptors.
> 
> Fixes: 96392c3d8ca4 ("regulator: max77686: Pass descriptor instead of GPIO 
> number")
> Signed-off-by: Linus Walleij 
> ---
>  drivers/regulator/max77686-regulator.c | 3 +--
>  1 file changed, 1 insertion(+), 2 deletions(-)
> 
> diff --git a/drivers/regulator/max77686-regulator.c 
> b/drivers/regulator/max77686-regulator.c
> index f5cee1775905..236cd42002f0 100644
> --- a/drivers/regulator/max77686-regulator.c
> +++ b/drivers/regulator/max77686-regulator.c
> @@ -255,8 +255,7 @@ static int max77686_of_parse_cb(struct device_node *np,
>   case MAX77686_BUCK8:
>   case MAX77686_BUCK9:
>   case MAX77686_LDO20 ... MAX77686_LDO22:
> - config->ena_gpiod = devm_gpiod_get_from_of_node(max77686->dev,
> - np,
> + config->ena_gpiod = gpiod_get_from_of_node(np,

As this is inside the of_parse_cb, it probably needs some thought
on where the GPIO would need to be freed on which error paths, I
am not sure it is immediately obvious to me but I suspect it will
need to be freed in some cases.

Thanks,
Charles

[PATCH] sysctl: clean up nr_pdflush_threads leftover

2018-11-28 Thread Rafael Aquini

nr_pdflush_threads has been long deprecated and
removed, but a remnant of its glorious past is
still around in CTL_VM names enum. This patch
is a minor clean-up to that case.

Signed-off-by: Rafael Aquini 
---
 include/uapi/linux/sysctl.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/uapi/linux/sysctl.h b/include/uapi/linux/sysctl.h
index d71013fffaf6..dad5a8f93343 100644
--- a/include/uapi/linux/sysctl.h
+++ b/include/uapi/linux/sysctl.h
@@ -174,7 +174,7 @@ enum
VM_DIRTY_RATIO=12,  /* dirty_ratio */
VM_DIRTY_WB_CS=13,  /* dirty_writeback_centisecs */
VM_DIRTY_EXPIRE_CS=14,  /* dirty_expire_centisecs */
-   VM_NR_PDFLUSH_THREADS=15, /* nr_pdflush_threads */
+   VM_UNUSED15=15, /* was: int nr_pdflush_threads */
VM_OVERCOMMIT_RATIO=16, /* percent of RAM to allow overcommit in */
VM_PAGEBUF=17,  /* struct: Control pagebuf parameters */
VM_HUGETLB_PAGES=18,/* int: Number of available Huge Pages */
-- 
2.17.2

[PATCH] sched/debug: fix spelling mistake "logaritmic" -> "logarithmic"

2018-11-28 Thread Colin King

From: Colin Ian King 

There is a spelling mistake in one of the scaling names in array
sched_tunable_scaling_names, fix it.

Signed-off-by: Colin Ian King 
---
 kernel/sched/debug.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 02bd5f969b21..ac3b616d2a6f 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -698,7 +698,7 @@ do {
\
 
 static const char *sched_tunable_scaling_names[] = {
"none",
-   "logaritmic",
+   "logarithmic",
"linear"
 };
 
-- 
2.19.1

Re: [PATCH] x86/fpu: XRSTOR is expected to raise #GP

2018-11-28 Thread Sebastian Andrzej Siewior

On 2018-11-28 15:27:28 [+], David Laight wrote:
> Better still note it in the code.

I'm in favour of adding something to tools/testing/selftests/x86/.

>   David

Sebastian

Re: [PATCH 01/17] dt-bindings: remoteproc: Add TI PRUSS bindings

2018-11-28 Thread David Lechner


On 11/27/18 9:15 AM, Roger Quadros wrote:


On 26/11/18 23:14, David Lechner wrote:

On 11/22/18 5:38 AM, Roger Quadros wrote:

From: Suman Anna 

This patch adds the bindings for the Programmable Real-Time Unit
and Industrial Communication Subsystem (PRU-ICSS) present on various
TI SoCs. The IP is present on multiple TI SoC architecture families
including the OMAP architecture SoCs such as AM33xx, AM437x and
AM57xx; and on a Keystone 2 architecture based 66AK2G SoC. It is
also present on the Davinci based OMAPL138 SoCs and K3 architecture
based AM65x SoCs as well (not covered for now). Details have been
added to include bindings for various core sub-modules like the PRU
Cores, the PRUSS Interrupt Controller, and other sub-modules used
for Industrial Communication purposes, covering the MDIO, MII_RT
and the IEP sub-modules. The binding mostly uses standard DT
properties.

Signed-off-by: Suman Anna 
Signed-off-by: Roger Quadros 
---
   .../devicetree/bindings/soc/ti/ti,pruss.txt| 360 
+
   1 file changed, 360 insertions(+)
   create mode 100644 Documentation/devicetree/bindings/soc/ti/ti,pruss.txt

diff --git a/Documentation/devicetree/bindings/soc/ti/ti,pruss.txt 
b/Documentation/devicetree/bindings/soc/ti/ti,pruss.txt
new file mode 100644
index 000..24fedad
--- /dev/null
+++ b/Documentation/devicetree/bindings/soc/ti/ti,pruss.txt


...


+
+PRU-ICSS SoC Bus Parent Node
+=
+This node represents the integration of the PRU-ICSS IP into a SoC, and is
+required for all SoCs. The PRU-ICSS parent nodes need to be defined as child
+nodes of this node.
+
+Required Properties:
+
+- compatible : should be one of,
+   "ti,am3356-pruss-soc-bus" for AM335x family of SoCs
+   "ti,am4376-pruss-soc-bus" for AM437x family of SoCs
+   "ti,am5728-pruss-soc-bus" for AM57xx family of SoCs
+   "ti,k2g-pruss-soc-bus" for 66AK2G family of SoCs
+- reg: address and size of the PRUSS CFG sub-module registers
+   dictating the interconnect configuration


I haven't looked into Tony's suggestion of using ti-sysc yet, so this may be a
moot point, but how will this work with AM18xx that does not have a PRUSS CFG
register? It seems to me that reg here should be the address and size of the
entire PRUSS IP block and the CFG register should be a syscon node or something
like that.


The reg property description is incorrect in the patch. It should have been

reg : address of SYSCFG register.

The SYSCFG register is used to enable and reset the module.

But based on Tony's suggestion this wrapper driver will change to ti,sysc for
OMAP like SoCs.

For AM18xx it could be a simple wrapper driver that just populates the children?


I suppose that could work. I will look into it (perhaps after seeing what you
come up with in v2).






+- #address-cells : should be 1
+- #size-cells: should be 1
+- ranges : standard ranges definition
+


...


+
+PRUSS INTC Child Node
+==
+Each PRUSS has a single interrupt controller instance that is common to both
+the PRU cores. Each interrupt controller can detect 64 input events which are
+then mapped to 10 possible output interrupts through two levels of mapping. The
+input events can be triggered by either the PRUs and/or various other PRUSS
+internal and external peripherals. The first 2 output interrupts are fed
+exclusively to the internal PRU cores, with the remaining 8 connected to
+external interrupt controllers including the MPU.


FYI, on AM18xx, there is a PRUSSEVTSEL bit in CFGCHIP3[3] (already a syscon node
in the device tree) that allows selecting one of two groups of 32 input events
out of this group of 64. This is perhaps getting out of the scope of this patch
series, but I just want to make sure we end up with something that can be easily
extended for this case. For example, I was thinking that this binding could be
modified so that #interrupt-cells could be 1 or 2. If it is 2, then the first
cell specifies the PRUSSEVTSEL value and the second value is the event number.



this is da850.dtsi correct?


Yes.



As PRUSSEVTSEL is not SYSEVENT specific but applies to all the SYSEVENTs at a 
time.
I don't think interrupt-cells is the right place to specify this.

Can it be set in DT in the board file? But this can't change once booted so 
maybe restrictive.


I guess the way I see this is that it is like specifying the bank and index for
a GPIO. If you only specify the system event number, then it is not clear which
event you mean - it could be one of two events. You have to also specify the
PRUSSEVTSEL value (one could call this the bank or group, I suppose) to fully
describe the system event.



If runtime change is required it can only be done before a PRU boots.

How about providing this info in the resource table and/or application DT node?


This seems like

Re: [PATCH v7 2/2] sched/fair: update scale invariance of PELT

2018-11-28 Thread Vincent Guittot

On Wed, 28 Nov 2018 at 16:21, Patrick Bellasi  wrote:
>
> On 28-Nov 15:55, Vincent Guittot wrote:
> > On Wed, 28 Nov 2018 at 15:40, Patrick Bellasi  
> > wrote:
> > >
> > > On 28-Nov 14:33, Vincent Guittot wrote:
> > > > On Wed, 28 Nov 2018 at 12:53, Patrick Bellasi  
> > > > wrote:
> > > > >
> > > > > On 28-Nov 11:02, Peter Zijlstra wrote:
> > > > > > On Wed, Nov 28, 2018 at 10:54:13AM +0100, Vincent Guittot wrote:
> > > > > >
> > > > > > > Is there anything else that I should do for these patches ?
> > > > > >
> > > > > > IIRC, Morten mention they break util_est; Patrick was going to 
> > > > > > explain.
> > > > >
> > > > > I guess the problem is that, once we cross the current capacity,
> > > > > strictly speaking util_avg does not represent anymore a utilization.
> > > > >
> > > > > With the new signal this could happen and we end up storing estimated
> > > > > utilization samples which will overestimate the task requirements.
> > > > >
> > > > > We will have a spike in estimated utilization at next wakeup, since we
> > > > > use MAX(util_avg@dequeue_time, ewma). Potentially we also inflate the 
> > > > > EWMA in
> > > > > case we collect multiple samples above the current capacity.
> > > >
> > > > TBH I don't see how it's different from current implementation with a
> > > > task that was scheduled on big core and now wakes up on little core.
> > > > The util_est is overestimated as well.
> > >
> > > While running below the capacity of a CPU, either big or LITTLE, we
> > > can still measure the actual used bandwidth as long as we have idle
> > > time. If the task is then moved into a lower capacity core, I think
> > > it's still safe to assume that, likely, it would need more capacity.
> > >
> > > Why do you say it's the same ?
> >
> > In the example of a task that runs 39ms in period of 80ms that we used
> > during previous version,
> > the utilization on the big core will reach 709 so will util_est too
> > When the task migrates on little core (512), util_est is higher than
> > current cpu capacity
>
> Right, and what's the problem ?

you worry about an util_est being higher than capacity which is the case there

>
> 1) We know that PELT is calibrated to 32ms period task and in your
>example, since the runtime is higher then the half-life, it's
>correct to estimate a utilization higher then 50%.
>
>PELT utilization is defined _based on the half-life_: thus
>your task having a 50% duty cycle does not mean we are not correct
>if report a utilization != 50%.
>It would be as broken as reporting 10% utilization for a task
>running 100ms every 1s.
>
> 2) If it was a 70% task on a previous activation, once it's moved into
>a lower capacity CPU it's still correct to assume that it's likely
>going to require the same bandwidth and thus will be
>under-provisioned.
>
> I still don't see where we are wrong in this case :/
>
> To me it looks different then the problem I described.
>
> > > With your new signal instead, once we cross the current capacity,
> > > utilization is just not anymore utilization. Thus, IMHO it make sense
> > > avoid to accumulate a sample for what we call "estimated utilization".

This is not true. With the example above, the util_est will be exactly the same
 on big and little cores with the new signal

> > >
> > > I would also say that, with the current implementation which caps
> > > utilization to the current capacity, we get better estimation in
> > > general. At least we can say with absolute precision:
> > >
> > >"the task needs _at least_ that amount of capacity".
> > >
> > > Potentially we can also flag the task as being under-provisioned, in
> > > case there was not idle time, and _let a policy_ decide what to do
> > > with it and the granted information we have.
> > >
> > > While, with your new signal, once we are over the current capacity,
> > > the "utilization" is just a sort of "random" number at best useful to
> > > drive some conclusions about how long the task has been delayed.

see my comment above

> > >
> > > IOW, I fear that we are embedding a policy within a signal which is
> > > currently representing something very well defined: how much cpu
> > > bandwidth a task used. While, latency/under-provisioning policies
> > > perhaps should be better placed somewhere else.
> > >
> > > Perhaps I've missed it in some of the previous discussions:
> > > have we have considered/discussed this signal-vs-policy aspect ?
>
> What's your opinion on the above instead ?

It's not a policy but it gives better knowledge about the amount a work done
I have put below discussion on the  subject on previous version

> >
> > With contribution scaling the PELT utilization of a task is a _minimum_
> > utilization. Regardless of where the task is currently/was running (and
> > provided that it doesn't change behaviour) its PELT utilization will
> > approximate its _minimum_ utilization on an idle 1024 capacity CPU.
>
> The main drawback is that the

Re: oops when ext4 fs is full

2018-11-28 Thread Theodore Y. Ts'o

On Wed, Nov 28, 2018 at 08:50:39AM +, Willy Wolff wrote:
> I got a Oops when the hard drive was COMPLETELY full using a ext4 fs.
> After it, any command on the directory where the last write should have 
> occurred freezes, while any other directory behave just fine.

Was this true after you rebooted?

Can you get a log of running e2fsck -n on the file system?

   - Ted

Re: [RFC PATCH 0/5] x86: dynamic indirect call promotion

2018-11-28 Thread Josh Poimboeuf

On Wed, Oct 17, 2018 at 05:54:15PM -0700, Nadav Amit wrote:
> This RFC introduces indirect call promotion in runtime, which for the
> matter of simplification (and branding) will be called here "relpolines"
> (relative call + trampoline). Relpolines are mainly intended as a way
> of reducing retpoline overheads due to Spectre v2.
> 
> Unlike indirect call promotion through profile guided optimization, the
> proposed approach does not require a profiling stage, works well with
> modules whose address is unknown and can adapt to changing workloads.
> 
> The main idea is simple: for every indirect call, we inject a piece of
> code with fast- and slow-path calls. The fast path is used if the target
> matches the expected (hot) target. The slow-path uses a retpoline.
> During training, the slow-path is set to call a function that saves the
> call source and target in a hash-table and keep count for call
> frequency. The most common target is then patched into the hot path.
> 
> The patching is done on-the-fly by patching the conditional branch
> (opcode and offset) that is used to compare the target to the hot
> target. This allows to direct all cores to the fast-path, while patching
> the slow-path and vice-versa. Patching follows 2 more rules: (1) Only
> patch a single byte when the code might be executed by any core. (2)
> When patching more than one byte, ensure that all cores do not run the
> to-be-patched-code by preventing this code from being preempted, and
> using synchronize_sched() after patching the branch that jumps over this
> code.
> 
> Changing all the indirect calls to use relpolines is done using assembly
> macro magic. There are alternative solutions, but this one is
> relatively simple and transparent. There is also logic to retrain the
> software predictor, but the policy it uses may need to be refined.
> 
> Eventually the results are not bad (2 VCPU VM, throughput reported):
> 
>   baserelpoline
>   -
> nginx 22898   25178 (+10%)
> redis-ycsb24523   25486 (+4%)
> dbench21442103 (+2%)
> 
> When retpolines are disabled, and if retraining is off, performance
> benefits are up to 2% (nginx), but are much less impressive.

Hi Nadav,

Peter pointed me to these patches during a discussion about retpoline
profiling.  Personally, I think this is brilliant.  This could help
networking and filesystem intensive workloads a lot.

Some high-level comments:

- "Relpoline" looks confusingly a lot like "retpoline".  How about
  "optpoline"?  To avoid confusing myself I will hereafter refer to it
  as such :-)

- Instead of patching one byte at a time, is there a reason why
  text_poke_bp() can't be used?  That would greatly simplify the
  patching process, as everything could be patched in a single step.

- In many cases, a single direct call may not be sufficient, as there
  could be for example multiple tasks using different network protocols
  which need different callbacks for the same call site.

- I'm not sure about the periodic retraining logic, it seems a bit
  nondeterministic and bursty.
  
So I'd propose the following changes:

- In the optpoline, reserve space for multiple (5 or so) comparisons and
  direct calls.  Maybe the number of reserved cmp/jne/call slots can be
  tweaked by the caller somehow.  Or maybe it could grow as needed.
  Starting out, they would just be NOPs.

- Instead of the temporary learning mode, add permanent tracking to
  detect a direct call "miss" -- i.e., when none of the existing direct
  calls are applicable and the retpoline will be used.

- In the case of a miss (or N misses), it could trigger a direct call
  patching operation to be run later (workqueue or syscall exit).  If
  all the direct call slots are full, it could patch the least recently
  modified one.  If this causes thrashing (>x changes over y time), it
  could increase the number of direct call slots using a trampoline.
  Even if there were several slots, CPU branch prediction would
  presumably help make it much faster than a basic retpoline.

Thoughts?

-- 
Josh

Re: [PATCH] coresight: fix spelling mistake "deffered" -> "deferred"

2018-11-28 Thread Mathieu Poirier

On Tue, 27 Nov 2018 at 07:03, Colin King  wrote:
>
> From: Colin Ian King 
>
> There is a spelling mistake in the dev_info error message, fix it.
>
> Signed-off-by: Colin Ian King 
> ---
>  drivers/hwtracing/coresight/coresight-stm.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/hwtracing/coresight/coresight-stm.c 
> b/drivers/hwtracing/coresight/coresight-stm.c
> index 35d6f9709274..ef339ff22090 100644
> --- a/drivers/hwtracing/coresight/coresight-stm.c
> +++ b/drivers/hwtracing/coresight/coresight-stm.c
> @@ -856,7 +856,7 @@ static int stm_probe(struct amba_device *adev, const 
> struct amba_id *id)
>
> if (stm_register_device(dev, >stm, THIS_MODULE)) {
> dev_info(dev,
> -"stm_register_device failed, probing deffered\n");
> +"stm_register_device failed, probing deferred\n");
> return -EPROBE_DEFER;
> }
>

Applied - thanks,
Mathieu

> --
> 2.19.1
>

[PATCH] x86: boot: add missing va_end to die

2018-11-28 Thread Mattias Jacobsson

Each call to va_start must have a corresponding call to va_end before
the end of the function. Add the missing va_end.

Found with Coccinelle.

Signed-off-by: Mattias Jacobsson <2...@mok.nu>
---
 arch/x86/boot/tools/build.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c
index d4e6cd4577e5..07223fd42d01 100644
--- a/arch/x86/boot/tools/build.c
+++ b/arch/x86/boot/tools/build.c
@@ -132,6 +132,7 @@ static void die(const char * str, ...)
va_list args;
va_start(args, str);
vfprintf(stderr, str, args);
+   va_end(args);
fputc('\n', stderr);
exit(1);
 }
-- 
2.19.2

Re: ext4 file system corruption with v4.19.3 / v4.19.4

2018-11-28 Thread Rainer Fiebig

Am 28.11.18 um 17:10 schrieb Theodore Y. Ts'o:
> On Wed, Nov 28, 2018 at 04:56:51PM +0100, Rainer Fiebig wrote:
>>
>> If you still see the errors, at least the Ubuntu-kernel could be ruled out.
> 
> My impression is that some of the people reporting problems have been
> using stock upstream kernels, so I wasn't really worried about the
> Ubuntu kernel (although it could be something about the default
> configs that Ubuntu sets up).  What I was more wondering was whether
> there was something about userspace or default configs of Ubuntu.
> This isn't necessarily a *problem* per se; for examople, not that long
> ago some users were getting surprised when a problem showed up with an
> older version of the LVM2 userspace with newer upstream kernels.
> After a while, you learn to get super paranoid about making sure to
> rule out all possibilities when trying to debug problems that are only
> hitting a set of users.
> 
>   - Ted
> 

OK, thanks. Perhaps Andrey can tell us then what impact the default
.config had on the problem.

Rainer Fiebig



signature.asc
Description: OpenPGP digital signature

Re: [PATCH v2 0/3] Add support for using external dma in SDHCI

2018-11-28 Thread Rizvi, Mohammad Faiz Abbas


+ Mark Brown

Chunyan,

On 11/21/2018 5:17 PM, Faiz Abbas wrote:

Hi Chunyan,

On 12/11/18 12:56 PM, Chunyan Zhang wrote:

Currently the generic SDHCI code in the Linux kernel supports the SD
standard DMA integrated into the host controller but does not have any
support for external DMA controllers implemented using dmaengine meaning
that custom code is needed for any systems that use a generic DMA
controller with SDHCI which in practice means any SDHCI controller that
doesn't have an integrated DMA controller so we should have this as a
generic feature.

There are already a number of controller specific drivers that have dmaengine
code, and some could use sdhci.c actually, but needed to implement 
mmc_ops->request()
in their specific driver for sending command with external dma using dmaengine
framework, with this patchset, them will take advantage of the generic support.
TI's omap controller is the case as an example.

Any comments are very appreciated.



This is great. It helps us move am335x and am43xx platforms to
sdhci-omap. What platforms have you tested this on?



Gentle ping on this. I tried testing these with an am335x-evm board. In 
their current condition, the card fails to enumerate altogether. The 
changes suggested by Adrian should fix this. Let me know when you post a v3.


Thanks,
Faiz

Re: [PATCH 09/10] regulator: s5m8767: Let core handle GPIO descriptors

2018-11-28 Thread Charles Keepax

On Wed, Nov 28, 2018 at 11:43:49AM +0100, Linus Walleij wrote:
> Use the gpiod_get_from_of_node() rather than the devm_*
> version so that the regulator core can handle the lifecycle
> of these descriptors.
> 
> Introduce an errorpath so we free any retrieved descriptors
> properly.
> 
> This patch requires "gpio: Export gpiod_get_from_of_node()"
> to be applied first.
> 
> Fixes: 9ae5cc75ceaa ("regulator: s5m8767: Pass descriptor instead of GPIO 
> number")
> Signed-off-by: Linus Walleij 
> ---
>  drivers/regulator/s5m8767.c | 37 ++---
>  1 file changed, 26 insertions(+), 11 deletions(-)
> @@ -674,6 +681,14 @@ static int s5m8767_pmic_dt_parse_pdata(struct 
> platform_device *pdev,
>   }
>  
>   return 0;
> +
> +err_gpiod_put:
> + while (j) {
> + gpiod_put(rdata->ext_control_gpiod);
> + rdata--;
> + j--;
> + }
> + return ret;
>  }

These looks like it handles the error paths in
s5m8767_pmic_dt_parse_pdata, however there are still all the
error paths between the call to that function and the call to
regulator_register that need to be handled as well.

Thanks,
Charles

bnxt_en: NIC Link is Up, 100 Mbps full duplex - but no data

2018-11-28 Thread Ondrej Zary

Hello,
I have a new Dell R740 server with BCM57416:
Ethernet controller [0200]: Broadcom Limited BCM57416 NetXtreme-E 10GBase-T 
RDMA Ethernet Controller [14e4:16d8] (rev 01)
Subsystem: Broadcom Limited BCM57416 NetXtreme-E Dual-Media 10G RDMA Ethernet 
Controller [14e4:4160]

When I connect a cable from 100Mbps switch, everything looks good - the link
LED lights up orange, data LED flashes green as when data is sent/received,
this appears in log:
[ 3791.655357] bnxt_en :17:00.0 eno1np0: NIC Link is Up, 100 Mbps full 
duplex, Flow control: ON - receive & transmit
[ 3791.655361] bnxt_en :17:00.0 eno1np0: EEE is not active
[ 3791.655364] bnxt_en :17:00.0 eno1np0: FEC autoneg off encodings: None

But no data comes in or out. tcpdump shows only outgoing packets and they're
not transmitted in real (not seen by other machines).

It works fine after connecting through a gigabit switch.

Currently running 4.18.0-2-amd64 kernel (Debian testing).

Any ideas?

-- 
Ondrej Zary

RE: [PATCH] x86/fpu: XRSTOR is expected to raise #GP

2018-11-28 Thread David Laight

From: H. Peter Anvin
> Sent: 26 November 2018 19:50
> On 11/26/18 9:49 AM, Sebastian Andrzej Siewior wrote:
> > On 2018-11-26 18:27:06 [+0100], Jann Horn wrote:
> >> commit 75045f77f7a7 ("x86/extable: Introduce _ASM_EXTABLE_UA for uaccess
> >> fixups") incorrectly replaced the fixup entry for XSTATE_OP with a
> >> user-#PF-only fixup. However, XRSTOR can also raise #GP when the supplied
> >> address points to userspace memory. Change it back.
> >
> > The #GP is raised if the xstate content is invalid. But I guess the
> > details don't matter.
> >
> >> Reported-by: Sebastian Andrzej Siewior 
> >> Fixes: 75045f77f7a7 ("x86/extable: Introduce _ASM_EXTABLE_UA for uaccess 
> >> fixups")
> >> Signed-off-by: Jann Horn 
> > Acked-by: Sebastian Andrzej Siewior 
> >
> 
> It does matter -- please correct the patch description, or we might have some
> serious confusion at some arbitrary point in the future with the result that
> the bug gets re-introduced; it would not be the first time.

Better still note it in the code.

David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, 
UK
Registration No: 1397386 (Wales)

< 1 2 3 4 5 6 7 8 9 10 >

301 - 400 of 1756 matches

Mail list logo