[PATCH 1/3] retpolines: Only enable retpoline when compiler support it

2018-10-29 Thread Zhenzhong Duan
Since retpoline capable compilers are widely available, make
CONFIG_RETPOLINE hard depend on it.

Change KBUILD to use CONFIG_RETPOLINE_SUPPORT to avoid conflict with
CONFIG_RETPOLINE which is used by kernel.

With all that stuff, the check of RETPOLINE is changed to
CONFIG_RETPOLINE.

This change is based on suggestion in https://lkml.org/lkml/2018/9/18/1016

Signed-off-by: Zhenzhong Duan 
Cc: Thomas Gleixner 
Cc: Peter Zijlstra 
Cc: Borislav Petkov 
Cc: Daniel Borkmann 
Cc: David Woodhouse 
Cc: H. Peter Anvin 
Cc: Ingo Molnar 
Cc: Konrad Rzeszutek Wilk 
Cc: Andy Lutomirski 
Cc: Masahiro Yamada 
Cc: Michal Marek 
---
 Documentation/admin-guide/kernel-parameters.txt |  2 +-
 arch/x86/Kconfig|  8 
 arch/x86/Makefile   |  5 +++--
 arch/x86/entry/vdso/Makefile|  4 ++--
 arch/x86/include/asm/nospec-branch.h| 10 ++
 arch/x86/kernel/cpu/bugs.c  |  2 +-
 arch/x86/kernel/vmlinux.lds.S   |  2 +-
 arch/x86/lib/Makefile   |  2 +-
 arch/x86/lib/retpoline.S|  2 ++
 scripts/Makefile.build  |  2 +-
 10 files changed, 22 insertions(+), 17 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt 
b/Documentation/admin-guide/kernel-parameters.txt
index e129cd8..c26264e 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -4187,7 +4187,7 @@
Selecting 'on' will, and 'auto' may, choose a
mitigation method at run time according to the
CPU, the available microcode, the setting of the
-   CONFIG_RETPOLINE configuration option, and the
+   CONFIG_RETPOLINE_SUPPORT configuration option, and the
compiler with which the kernel was built.
 
Specific mitigations can also be selected manually:
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index cbd5f28..766563f 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -433,7 +433,7 @@ config GOLDFISH
def_bool y
depends on X86_GOLDFISH
 
-config RETPOLINE
+config RETPOLINE_SUPPORT
bool "Avoid speculative indirect branches in kernel"
default y
select STACK_VALIDATION if HAVE_STACK_VALIDATION
@@ -443,9 +443,9 @@ config RETPOLINE
  branches. Requires a compiler with -mindirect-branch=thunk-extern
  support for full protection. The kernel may run slower.
 
- Without compiler support, at least indirect branches in assembler
- code are eliminated. Since this includes the syscall entry path,
- it is not entirely pointless.
+ Since retpoline capable compilers are widely available, kernel doesn't
+ use CONFIG_RETPOLINE_SUPPORT directly but use CONFIG_RETPOLINE which
+ is enabled when compiler support retpoline.
 
 config INTEL_RDT
bool "Intel Resource Director Technology support"
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 5b562e4..7ed35b1 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -221,9 +221,10 @@ KBUILD_CFLAGS += -Wno-sign-compare
 KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
 
 # Avoid indirect branches in kernel to deal with Spectre
-ifdef CONFIG_RETPOLINE
+ifdef CONFIG_RETPOLINE_SUPPORT
 ifneq ($(RETPOLINE_CFLAGS),)
-  KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE
+  KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DCONFIG_RETPOLINE
+  KBUILD_AFLAGS += -DCONFIG_RETPOLINE
 endif
 endif
 
diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index 141d415..87acde1 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -70,7 +70,7 @@ CFL := $(PROFILING) -mcmodel=small -fPIC -O2 
-fasynchronous-unwind-tables -m64 \
-fno-omit-frame-pointer -foptimize-sibling-calls \
-DDISABLE_BRANCH_PROFILING -DBUILD_VDSO
 
-ifdef CONFIG_RETPOLINE
+ifdef CONFIG_RETPOLINE_SUPPORT
 ifneq ($(RETPOLINE_VDSO_CFLAGS),)
   CFL += $(RETPOLINE_VDSO_CFLAGS)
 endif
@@ -145,7 +145,7 @@ KBUILD_CFLAGS_32 += $(call cc-option, 
-foptimize-sibling-calls)
 KBUILD_CFLAGS_32 += -fno-omit-frame-pointer
 KBUILD_CFLAGS_32 += -DDISABLE_BRANCH_PROFILING
 
-ifdef CONFIG_RETPOLINE
+ifdef CONFIG_RETPOLINE_SUPPORT
 ifneq ($(RETPOLINE_VDSO_CFLAGS),)
   KBUILD_CFLAGS_32 += $(RETPOLINE_VDSO_CFLAGS)
 endif
diff --git a/arch/x86/include/asm/nospec-branch.h 
b/arch/x86/include/asm/nospec-branch.h
index 80dc144..8b09cbb 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -162,11 +162,12 @@
_ASM_PTR " 999b\n\t"\
".popsection\n\t"
 
-#if defined(CONFIG_X86_64) && defined(RETPOLINE)
+#ifdef CONFIG_RETPOLINE
+#ifdef CONFIG_X86_64
 
 /*
- * Since the inline asm uses the %V modifier which is only in newer GCC,
- * t

[PATCH 3/3] kprobes/x86: Simplify indirect-jump check in retpoline

2018-10-29 Thread Zhenzhong Duan
Since CONFIG_RETPOLINE hard depends on compiler support now, so
replacing indirect-jump check with the range check is safe in that case.

Signed-off-by: Zhenzhong Duan 
Cc: Thomas Gleixner 
Cc: Peter Zijlstra 
Cc: Borislav Petkov 
Cc: David Woodhouse 
Cc: H. Peter Anvin 
Cc: Ingo Molnar 
Cc: Konrad Rzeszutek Wilk 
---
 arch/x86/kernel/kprobes/opt.c | 14 ++
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index 40b16b2..1136b29 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -203,6 +203,7 @@ static int copy_optimized_instructions(u8 *dest, u8 *src, 
u8 *real)
return len;
 }
 
+#ifndef CONFIG_RETPOLINE
 /* Check whether insn is indirect jump */
 static int __insn_is_indirect_jump(struct insn *insn)
 {
@@ -210,6 +211,7 @@ static int __insn_is_indirect_jump(struct insn *insn)
(X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */
insn->opcode.bytes[0] == 0xea); /* Segment based jump */
 }
+#endif
 
 /* Check whether insn jumps into specified address range */
 static int insn_jump_into_range(struct insn *insn, unsigned long start, int 
len)
@@ -240,20 +242,16 @@ static int insn_jump_into_range(struct insn *insn, 
unsigned long start, int len)
 
 static int insn_is_indirect_jump(struct insn *insn)
 {
-   int ret = __insn_is_indirect_jump(insn);
+   int ret;
 
 #ifdef CONFIG_RETPOLINE
-   /*
-* Jump to x86_indirect_thunk_* is treated as an indirect jump.
-* Note that even with CONFIG_RETPOLINE=y, the kernel compiled with
-* older gcc may use indirect jump. So we add this check instead of
-* replace indirect-jump check.
-*/
-   if (!ret)
+   /* Jump to x86_indirect_thunk_* is treated as an indirect jump. */
ret = insn_jump_into_range(insn,
(unsigned long)__indirect_thunk_start,
(unsigned long)__indirect_thunk_end -
(unsigned long)__indirect_thunk_start);
+#else
+   ret = __insn_is_indirect_jump(insn);
 #endif
return ret;
 }
-- 
1.8.3.1


Re: [PATCH v2 0/2] mm: soft-offline: fix race against page allocation

2018-10-29 Thread Naoya Horiguchi
On Fri, Oct 26, 2018 at 10:46:36AM +0200, Michal Hocko wrote:
> On Wed 22-08-18 10:00:25, Michal Hocko wrote:
> > On Wed 22-08-18 01:37:48, Naoya Horiguchi wrote:
> > > On Wed, Aug 15, 2018 at 03:43:34PM -0700, Andrew Morton wrote:
> > > > On Tue, 17 Jul 2018 14:32:30 +0900 Naoya Horiguchi 
> > > >  wrote:
> > > > 
> > > > > I've updated the patchset based on feedbacks:
> > > > > 
> > > > > - updated comments (from Andrew),
> > > > > - moved calling set_hwpoison_free_buddy_page() from mm/migrate.c to 
> > > > > mm/memory-failure.c,
> > > > >   which is necessary to check the return code of 
> > > > > set_hwpoison_free_buddy_page(),
> > > > > - lkp bot reported a build error when only 1/2 is applied.
> > > > > 
> > > > >   >mm/memory-failure.c: In function 'soft_offline_huge_page':
> > > > >   > >> mm/memory-failure.c:1610:8: error: implicit declaration of 
> > > > > function
> > > > >   > 'set_hwpoison_free_buddy_page'; did you mean 'is_free_buddy_page'?
> > > > >   > [-Werror=implicit-function-declaration]
> > > > >   >if (set_hwpoison_free_buddy_page(page))
> > > > >   >^~~~
> > > > >   >is_free_buddy_page
> > > > >   >cc1: some warnings being treated as errors
> > > > > 
> > > > >   set_hwpoison_free_buddy_page() is defined in 2/2, so we can't use it
> > > > >   in 1/2. Simply doing 
> > > > > s/set_hwpoison_free_buddy_page/!TestSetPageHWPoison/
> > > > >   will fix this.
> > > > > 
> > > > > v1: https://lkml.org/lkml/2018/7/12/968
> > > > > 
> > > > 
> > > > Quite a bit of discussion on these two, but no actual acks or
> > > > review-by's?
> > > 
> > > Really sorry for late response.
> > > Xishi provided feedback on previous version, but no final ack/reviewed-by.
> > > This fix should work on the reported issue, but rewriting soft-offlining
> > > without PageHWPoison flag would be the better fix (no actual patch yet.)
> > 
> > If we can go with the later the I would obviously prefer that. I cannot
> > promise to work on the patch though. I can help with reviewing of
> > course.
> > 
> > If this is important enough that people are hitting the issue in normal
> > workloads then sure, let's go with the simple fix and continue on top of
> > that.
> 
> Naoya, did you have any chance to look at this or have any plans to look?
> I am willing to review and help with the overal design but I cannot
> really promise to work on the code.

I have a draft version of a patch to isolate a page in buddy-friendly manner
without PageHWPoison flag (that was written weeks ago, but I couldn't finish
because my other project interrupted me ...).
I'll post it after testing, especially confirming that hotplug code properly
reset the isolated page.

Thanks,
Naoya Horiguchi


Re: [PATCH v5 4/4] mm: Defer ZONE_DEVICE page initialization to the point where we init pgmap

2018-10-29 Thread Dan Williams
On Mon, Oct 29, 2018 at 11:29 PM Michal Hocko  wrote:
>
> On Mon 29-10-18 12:59:11, Alexander Duyck wrote:
> > On Mon, 2018-10-29 at 19:18 +0100, Michal Hocko wrote:
[..]
> > The patches Andrew pushed addressed the immediate issue so that now
> > systems with nvdimm/DAX memory can at least initialize quick enough
> > that systemd doesn't refuse to mount the root file system due to a
> > timeout.
>
> This is about the first time you actually mention that. I have re-read
> the cover letter and all changelogs of patches in this serious. Unless I
> have missed something there is nothing about real users hitting issues
> out there. nvdimm is still considered a toy because there is no real HW
> users can play with.

Yes, you have missed something, because that's incorrect. There's been
public articles about these parts sampling since May.


https://www.anandtech.com/show/12828/intel-launches-optane-dimms-up-to-512gb-apache-pass-is-here

That testing identified this initialization performance problem and
thankfully got it addressed in time for the current merge window.


[PATCH 2/3] retpolines: Remove the minimal stuff of retpoline support

2018-10-29 Thread Zhenzhong Duan
Now that CONFIG_RETPOLINE hard depends on compiler support, there is no
reason for minimal stuff to still exist.

This change is based on suggestion in https://lkml.org/lkml/2018/9/18/1016

Signed-off-by: Zhenzhong Duan 
Cc: Thomas Gleixner 
Cc: Peter Zijlstra 
Cc: Borislav Petkov 
Cc: David Woodhouse 
Cc: H. Peter Anvin 
Cc: Ingo Molnar 
Cc: Konrad Rzeszutek Wilk 
---
 arch/x86/include/asm/nospec-branch.h |  2 --
 arch/x86/kernel/cpu/bugs.c   | 13 ++---
 2 files changed, 2 insertions(+), 13 deletions(-)

diff --git a/arch/x86/include/asm/nospec-branch.h 
b/arch/x86/include/asm/nospec-branch.h
index 8b09cbb..c202a64 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -221,8 +221,6 @@
 /* The Spectre V2 mitigation variants */
 enum spectre_v2_mitigation {
SPECTRE_V2_NONE,
-   SPECTRE_V2_RETPOLINE_MINIMAL,
-   SPECTRE_V2_RETPOLINE_MINIMAL_AMD,
SPECTRE_V2_RETPOLINE_GENERIC,
SPECTRE_V2_RETPOLINE_AMD,
SPECTRE_V2_IBRS_ENHANCED,
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index d0108fb..7f6d815 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -135,8 +135,6 @@ enum spectre_v2_mitigation_cmd {
 
 static const char *spectre_v2_strings[] = {
[SPECTRE_V2_NONE]   = "Vulnerable",
-   [SPECTRE_V2_RETPOLINE_MINIMAL]  = "Vulnerable: Minimal generic 
ASM retpoline",
-   [SPECTRE_V2_RETPOLINE_MINIMAL_AMD]  = "Vulnerable: Minimal AMD ASM 
retpoline",
[SPECTRE_V2_RETPOLINE_GENERIC]  = "Mitigation: Full generic 
retpoline",
[SPECTRE_V2_RETPOLINE_AMD]  = "Mitigation: Full AMD 
retpoline",
[SPECTRE_V2_IBRS_ENHANCED]  = "Mitigation: Enhanced IBRS",
@@ -250,11 +248,6 @@ static void __init spec2_print_if_secure(const char 
*reason)
pr_info("%s selected on command line.\n", reason);
 }
 
-static inline bool retp_compiler(void)
-{
-   return __is_defined(CONFIG_RETPOLINE);
-}
-
 static inline bool match_option(const char *arg, int arglen, const char *opt)
 {
int len = strlen(opt);
@@ -417,14 +410,12 @@ static void __init spectre_v2_select_mitigation(void)
pr_err("Spectre mitigation: LFENCE not serializing, 
switching to generic retpoline\n");
goto retpoline_generic;
}
-   mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_AMD :
-SPECTRE_V2_RETPOLINE_MINIMAL_AMD;
+   mode = SPECTRE_V2_RETPOLINE_AMD;
setup_force_cpu_cap(X86_FEATURE_RETPOLINE_AMD);
setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
} else {
retpoline_generic:
-   mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_GENERIC :
-SPECTRE_V2_RETPOLINE_MINIMAL;
+   mode = SPECTRE_V2_RETPOLINE_GENERIC;
setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
}
 
-- 
1.8.3.1


Re: [PATCH v3] mm/page_owner: use kvmalloc instead of kmalloc

2018-10-29 Thread Miles Chen
On Tue, 2018-10-30 at 07:06 +0100, Michal Hocko wrote:
> On Tue 30-10-18 09:29:10, Miles Chen wrote:
> > On Mon, 2018-10-29 at 09:17 +0100, Michal Hocko wrote:
> > > On Mon 29-10-18 09:07:08, Michal Hocko wrote:
> > > [...]
> > > > Besides that, the following doesn't make much sense to me. It simply
> > > > makes no sense to use vmalloc for sub page allocation regardless of
> > > > HIGHMEM.
> > > 
> > > OK, it is still early morning here. Now I get the point of the patch.
> > > You just want to (ab)use highmeme for smaller requests. I do not like
> > > this, to be honest. It causes an internal fragmentation and more
> > > importantly the VMALLOC space on 32b where HIGHMEM is enabled (do we
> > > have any 64b with HIGHMEM btw?) is quite small to be wasted like that.
> > > 
> > thanks for your comment. It looks like that using vmalloc fallback for
> > sub page allocation is not good here.
> > 
> > Your comment gave another idea:
> > 
> > 1. force kbuf to PAGE_SIZE
> > 2. allocate a page by alloc_page(GFP_KERNEL | __GFP_HIGHMEM); so we can
> > get a highmem page if possible
> > 3. use kmap/kunmap pair to create mapping for this page. No vmalloc
> > space is used.
> > 4. do not change kvmalloc logic.
> 
> If you mean for this particular situation then is this really worth
> it? I mean this is a short term allocation for root only so you do not
> have to worry about low mem depletion.

The 1...3 are applied to print_page_owner(), not in kmalloc() or
kvmalloc() logic. 


It's a real problem when using page_owner.
I found this issue recently: I'm not able to read page_owner information
during a overnight test. (error: read failed: Out of memory). I replace
kmalloc() with vmalloc() and it worked well.

> 
> If you are thiking in more generic terms to allow kmalloc to use highmem
> then I am not really sure this will work out.

I'm thinking about modify print_page_owner().




[PATCH 0/3] Enable retpoline only when compiler support

2018-10-29 Thread Zhenzhong Duan
According to Peter Zijlstra's suggestion in 
https://lkml.org/lkml/2018/9/18/1016,
hard bind retpoline with compiler support and remove minimal stuff.

Tested with both CONFIG_RETPOLIN_SUPPORT enabled and disabled.

Zhenzhong Duan (3):
  retpolines: Only enable retpoline when compiler support it
  retpolines: Remove the minimal stuff of retpoline support
  kprobes/x86: Simplify indirect-jump check in retpoline case

 Documentation/admin-guide/kernel-parameters.txt |  2 +-
 arch/x86/Kconfig|  8 
 arch/x86/Makefile   |  5 +++--
 arch/x86/entry/vdso/Makefile|  4 ++--
 arch/x86/include/asm/nospec-branch.h| 12 ++--
 arch/x86/kernel/cpu/bugs.c  | 13 ++---
 arch/x86/kernel/kprobes/opt.c   | 14 ++
 arch/x86/kernel/vmlinux.lds.S   |  2 +-
 arch/x86/lib/Makefile   |  2 +-
 arch/x86/lib/retpoline.S|  2 ++
 scripts/Makefile.build  |  2 +-
 11 files changed, 29 insertions(+), 37 deletions(-)

-- 
1.8.3.1


[PATCH] sched, trace: Fix prev_state output in sched_switch tracepoint

2018-10-29 Thread Pavankumar Kondeti
commit 3f5fe9fef5b2 ("sched/debug: Fix task state recording/printout")
tried to fix the problem introduced by a previous commit efb40f588b43
("sched/tracing: Fix trace_sched_switch task-state printing"). However
the prev_state output in sched_switch is still broken.

task_state_index() uses fls() which considers the LSB as 1. Left
shifting 1 by this value gives an incorrect mapping to the task state.
Fix this by decrementing the value returned by __get_task_state()
before shifting.

Fixes: 3f5fe9fef5b2 ("sched/debug: Fix task state recording/printout")
Signed-off-by: Pavankumar Kondeti 
---
 include/trace/events/sched.h | 12 +++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index f07b270..9a4bdfa 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -107,6 +107,8 @@
 #ifdef CREATE_TRACE_POINTS
 static inline long __trace_sched_switch_state(bool preempt, struct task_struct 
*p)
 {
+   unsigned int state;
+
 #ifdef CONFIG_SCHED_DEBUG
BUG_ON(p != current);
 #endif /* CONFIG_SCHED_DEBUG */
@@ -118,7 +120,15 @@ static inline long __trace_sched_switch_state(bool 
preempt, struct task_struct *
if (preempt)
return TASK_REPORT_MAX;
 
-   return 1 << task_state_index(p);
+   /*
+* task_state_index() uses fls() and returns a value from 0-8 range.
+* Decrement it by 1 (except TASK_RUNNING state i.e 0) before using
+* it for left shift operation to get the correct task->state
+* mapping.
+*/
+   state = task_state_index(p);
+
+   return state ? (1 << (state - 1)) : state;
 }
 #endif /* CREATE_TRACE_POINTS */
 
-- 
Qualcomm India Private Limited, on behalf of Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux 
Foundation Collaborative Project.



Re: [PATCH 3/3] drm/mediatek: Use drm_gem_cma_object instead of mtk_drm_gem_obj

2018-10-29 Thread CK Hu
Hi, Daniel:

On Mon, 2018-10-29 at 10:16 +0100, Daniel Vetter wrote:
> On Mon, Oct 29, 2018 at 11:11:16AM +0800, CK Hu wrote:
> > Hi,Daniel:
> > 
> > On Fri, 2018-10-26 at 12:21 +0200, Daniel Vetter wrote:
> > > On Fri, Oct 26, 2018 at 03:22:03PM +0800, CK Hu wrote:
> > > > After adding dma_dev in struct drm_device and
> > > > drm_gem_cma_dumb_create_no_kmap(), drm_gem_cma_object could replace
> > > > mtk_drm_gem_obj, so use drm_gem_cma_object instead of mtk_drm_gem_obj to
> > > > reduce redundant code.
> > > > 
> > > > Signed-off-by: CK Hu 
> > > 
> > > A few questions/thoughts:
> > > 
> > > - Why do you need both drm_device->dev and drm_device->dma_dev? Can't you
> > >   just register the drm_device with the right struct device?
> > > 
> > 
> > In [1], mmsys is the drm driver and ovl0 and ovl1 is the sub device
> > which has dma function.
> > In this drm, there are two crtc and each one is comprised of many
> > component.
> > This is an example of mt8173:
> > 
> > crtc0: ovl0, color0, aal, od, rdma0, ufoe, dsi0
> > crtc1: ovl1, color1, gamma, rdma1, dpi0
> > 
> > In the device node of ovl0 and ovl1, there is a 'iommus' parameter in
> > it, so use dma_alloc_xxx() and dma_map_xxx() with that device would get
> > iova rather than pa. I don't think it's a good idea to register ovl0 or
> > ovl1 as drm device because each one is just a component in a pipeline.
> > mmsys controls the clock and routing of multi-media system which include
> > this drm system, so it's better to register mmsys as drm device. Maybe
> > we could move 'iommus' parameter from ovl device to mmsys device, so the
> > dma device changes from ovl device to mmsys device. I'm not sure this
> > would be a good choice, how do you think?
> > 
> > [1]
> > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/arm64/boot/dts/mediatek/mt8173.dtsi?h=v4.19
> 
> Ah ok. But if you have 2 blocks that make up the overall drm device, why
> don't you need to switch at runtime between them? I.e. buffer allocated
> for crtc0 needs to be dma-mapped to crtc0, buffer allocated to crtc1 needs
> to be dma-mapped on crtc1?
> 
> And if they're both the exact same iommu, then imo it would make indeed
> sense to move the iommu attribute up. Since your current code cant'
> actually handle truly separate dma-mappings. And neither can your patch
> series here handled separate iommu for crtc0 and crtc1.

Yes, they're the exact same iommu. So I would move iommu attribute up.

> 
> > > - You don't use drm_gem_prime_import_dev, so prime import isn't using the
> > >   right device either.
> > 
> > Yes, you are right. I'm not familiar with whore drm core, so I start to
> > modify what Mediatek drm use. But this function still works for the drm
> > device that itself is dma device. If one day there is a drm device which
> > itself is not a dma device and need this function, send a patch to
> > modify this function and test it with that drm device. If you want me to
> > modify all in advance, I'm ok but need others to test it because
> > Mediatek drm driver does not use them.
> 
> I meant to say that mediatek should use drm_gem_prime_import_dev, but
> currently isn't using that. And your patch series here doesn't fix that
> either. So there's more bugs left in this area.

Great, you find a bug. My test only include export but not import. This
would take time to generate import-test environment.

> 
> > > - exynos seems to have the same or at least similar issue, stronger case
> > >   for your patches if you can solve both.
> > 
> > I'm still Mediatek's employee. If I modify other company's driver and it
> > is not a MUST-BE for Mediatek, Mediatek may think I give contribution to
> > other company. So I've better not to modify exynos driver.
> 
> This isn't how upstream works :-)

OK, because now I would not modify drm core, I would focus on Mediatek
drm driver first. If the modification of exynos driver is easy, I could
try. But if the modification of exynos is huge, I suggest that someone
who is familiar with exynos driver and have exynos platform to do it.

Regards,
CK
> 
> > > - I'd start out with using struct drm_gem_cma_object in mtk (similar to
> > >   what vc4 does), and then reusing as much as possible of the existing
> > >   helpers. And then looking later on what's still left (like the support
> > >   for leaving out the virtual mapping).
> > 
> > I'm not clear what vc4 does. It looks like that you want me to redefine
> > mtk_drm_gem_obj based on drm_gem_cma_object. So it would be like
> > 
> > struct mtk_drm_gem_obj {
> > struct drm_gem_cma_object base;
> > void *cookie;
> > unsigned long dma_attrs;
> > };
> > 
> > I could try to modify as this and see what have left.
> 
> Yup, that's my suggestion. Then we can look at what mtk can use unchanged
> from the core helpers. And what would need to change and so better
> evaluate whether it makes sense to do that.
> 
> I still think just moving the iommu is probably best.
> -Daniel




Re: Business Proposal

2018-10-29 Thread Edward Yuan


Dear Friend, 

  My name is Mr. Edward Yuan, a consultant/broker. I know you might be a bit 
apprehensive because you do not know me. Nevertheless, I have a proposal on 
behalf of a client, a lucrative business that might be of mutual benefit to you.

If interested in this proposition please kindly and urgently contact me for 
more details. 

Best Regards.
Mr. Edward Yuan.

---
This email has been checked for viruses by AVG.
https://www.avg.com



Re: [PATCH] V6 init/main.c Enable watchdog_thresh control from kernel line To: lober...@redhat.com

2018-10-29 Thread Thomas Gleixner
Laurence,

On Wed, 24 Oct 2018, Laurence Oberman wrote:

your subject line reads a bit strange:

Subject: [PATCH] V6 init/main.c Enable watchdog_thresh control from kernel line 
To: lober...@redhat.com

Aside of that extra 'To:...', please move the V6 inside the square brackets
together with PATCH so tools can strip off the whole thing.

Also please refrain from using file path as a prefix. It's sufficient to
use 'init' and please add a colon after the prefix to separate it from the
short log string. 'kernel line' reads strange, that should be 'kernel
command line'. 

> Both graphics and serial consoles are exposed to hard lockups
> when handling a large amount of messaging. The kernel watchdog_thresh
> parameter up to now has not been available to be set on the kernel line for
> early boot.
> This patch allows the setting of watchdog_thresh to be increased
> when needed to avoid the hard lockups in the console code.

git grep 'This patch' Documentation/process/

> Signed-off-by: Laurence Oberman 
> Acked-by: Randy Dunlap 
> ---
>  Documentation/admin-guide/kernel-parameters.txt |  7 +++
>  init/main.c | 11 +++
>  2 files changed, 18 insertions(+)
> 
> diff --git a/Documentation/admin-guide/kernel-parameters.txt 
> b/Documentation/admin-guide/kernel-parameters.txt
> index 4cdcd1a..102382f 100644
> --- a/Documentation/admin-guide/kernel-parameters.txt
> +++ b/Documentation/admin-guide/kernel-parameters.txt
> @@ -4932,6 +4932,13 @@
>   or other driver-specific files in the
>   Documentation/watchdog/ directory.
>  
> + watchdog_thresh=
> + This parameter allows early boot to change the
> + value of the watchdog timeout threshold from the default
> + of 10 seconds to avoid hard lockups.  Example:
> + watchdog_thresh=30
> + Default: 10

  Describing a parameter with 'This parameter' is pointless. The 'early
  boot' extra is not really helpful either as the kernel command line
  parameters are evaluated during early boot.

  Changing this parameter does not avoid hard lockups, really. It changes
  the time which has to elapse for a lockup to be detected.

  Aside of that it does not only affect the hard lockup detector it also
  affects the soft lockup detector.

  You also fail to mention that setting this to 0 disables both lockup
  detectors completely. Something like this perhaps:

watchdog_thresh=
[KNL]
Set the hard lockup detector stall duration
threshold in seconds. The soft lockup detector
threshold is set to twice the value. A value of 0
disables both lockup detectors. Default is 10
seconds.

> --- a/init/main.c
> +++ b/init/main.c
> @@ -1038,6 +1038,17 @@ static int __init set_debug_rodata(char *str)
>  __setup("rodata=", set_debug_rodata);
>  #endif
>  
> +#ifdef CONFIG_LOCKUP_DETECTOR
> +extern int watchdog_thresh;
> +
> +static int __init watchdog_thresh_setup(char *str)
> +{
> + get_option(&str, &watchdog_thresh);
> + return 1;
> +}
> +__setup("watchdog_thresh=", watchdog_thresh_setup);
> +#endif

Why are you adding this to init/main.c?

This really belongs into kernel/watchdog.c which also avoids the ifdeffery
and the ugly extern. Then the subject line becomes something like this:

 [PATCH V$N] watchdog/core: Add watchdog_thresh command line parameter

Thanks,

tglx


Re: [REGRESSION 4.19-rc2] sometimes hangs with black screen when resuming from suspend or hibernation (was: Re: Linux 4.19-rc2)

2018-10-29 Thread Rafael J. Wysocki
On Fri, Oct 26, 2018 at 5:49 PM Martin Steigerwald  wrote:
>
> This regression is gone with 4.19-rc8.

Thanks for the update!


> Martin Steigerwald - 11.09.18, 09:53:
> […]
> > Linus Torvalds - 02.09.18, 23:45:
> > > As usual, the rc2 release is pretty small. People are taking a
> >
> > With 4.19-rc2 this ThinkPad T520 with i5 Sandybrdige sometimes hangs
> > with black screen when resuming from suspend or hibernation.  With
> > 4.18.1 it did not. Of course there have been userspace related updates
> > that could be related.
> >
> > I currently have no time to dig into this and on this production
> > laptop I generally do not do bisects between major kernel releases.
> > So currently I only answer questions that do not require much time to
> > answer.
> >
> > For now I switched back to 4.18. If that is stable – and thus likely
> > no userspace component is related –, I go with 4.19-rc3 or whatever
> > is most recent version to see if the issue has been fixed already.
> >
> > % inxi -z -b -G
> > System:Host: […] Kernel: 4.18.1-tp520-btrfstrim x86_64 bits: 64
> > Desktop: KDE Plasma 5.13.5
> >Distro: Debian GNU/Linux buster/sid
> > Machine:   Type: Laptop System: LENOVO product: 42433WG v: ThinkPad
> > T520 serial: 
> >Mobo: LENOVO model: 42433WG serial:  UEFI [Legacy]:
> > LENOVO v: 8AET69WW (1.49 )
> >date: 06/14/2018
> > […]
> > CPU:   Dual Core: Intel Core i5-2520M type: MT MCP speed: 2990 MHz
> > min/max: 800/3200 MHz
> > Graphics:  Device-1: Intel 2nd Generation Core Processor Family
> > Integrated Graphics driver: i915 v: kernel
> >Display: x11 server: X.Org 1.20.1 driver: modesetting
> > resolution: 1920x1080~60Hz
> >OpenGL: renderer: Mesa DRI Intel Sandybridge Mobile v: 3.3
> > Mesa 18.1.7
> > […]
> > Info:  Processes: 322 Uptime: 16m Memory: 15.45 GiB used: 3.12 GiB
> > (20.2%) Shell: zsh inxi: 3.0.22
> >
> > Thanks,
> > Martin
> >
> > > breather after the merge window, and it takes a bit of time for bug
> > > reports to start coming in and get identified.  Plus people were
> > > probably still on vacation (particularly Europe), and some people
> > > were at Open Source Summit NA last week too. Having a calm week was
> > > good.
> > >
> > > Regardless of the reason, it's pretty quiet/ The bulk of it is
> > > drivers (network and gpu stand out), with the rest being a random
> > > collection all over (arch/x86 and generic networking stands out,
> > > but there's misc stuff all over).
> > >
> > > Go out and test.
> > >
> > >  Linus
> > >
> > > ---
> […]
> --
> Martin
>
>


Re: [v6 0/4] mpt3sas: Hot-Plug Surprise removal support on IOC.

2018-10-29 Thread Suganath Prabu Subramani
Any update on this ?
In V6, I have posted has only defect fixes (Other than HBA Hot-Plug
Surprise remove support).

We are reworking and incorporating the suggestions from Bjorn.
And after covering tests, we ll post Hot-Plug Surprise removal patches.

Thanks,
Suganath Prabu
On Tue, Oct 23, 2018 at 3:51 PM Suganath Prabu
 wrote:
>
> v6 Change set:
> Incorporated changes as suggested by Andy.
> In Patch 1 converted while loop to do while in
> function mpt3sas_wait_for_ioc_to_operational().
> And in patch 3 removed parentheses.
>
> V5 Change set:
> V5 post has only defect fixes.
> We are reworking and incorporating the suggestions from Bjorn.
> And after covering tests, we ll be post Hot-Plug Surprise
>  removal patches.
>
> V4 Change set:
> Reframe split strings in print statement, to avoid
>
> V3 Change Set:
> Simplified function "mpt3sas_base_pci_device_is_available" and
> made inline
>
> V2 changes:
> Replaced mpt3sas_base_pci_device_is_unplugged with
> pci_device_is_present.
>
> V1 changes:
> In Patch 0001 - unlock mutex, if active reset is in progress.
> Suganath Prabu (4):
>   mpt3sas: Separate out mpt3sas_wait_for_ioc_to_operational
>   mpt3sas: Fix Sync cache command failure during driver unload
>   mpt3sas:Fix driver modifying persistent data.
>   mpt3sas: Bump driver version to 27.100.00.00.
>
>  drivers/scsi/mpt3sas/mpt3sas_base.c  | 75 
> ++--
>  drivers/scsi/mpt3sas/mpt3sas_base.h  |  8 +++-
>  drivers/scsi/mpt3sas/mpt3sas_config.c| 28 +++-
>  drivers/scsi/mpt3sas/mpt3sas_ctl.c   | 21 ++---
>  drivers/scsi/mpt3sas/mpt3sas_scsih.c | 38 +++-
>  drivers/scsi/mpt3sas/mpt3sas_transport.c | 70 ++---
>  6 files changed, 106 insertions(+), 134 deletions(-)
>
> --
> 1.8.3.1
>


Re: [PATCH i2c-next v8 5/5] i2c: aspeed: Add bus idle waiting logic for multi-master use cases

2018-10-29 Thread kbuild test robot
Hi Jae,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on wsa/i2c/for-next]
[also build test ERROR on v4.19 next-20181029]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]

url:
https://github.com/0day-ci/linux/commits/Jae-Hyun-Yoo/i2c-aspeed-Add-bus-idle-waiting-logic-for-multi-master-use-cases/20181030-051719
base:   https://git.kernel.org/pub/scm/linux/kernel/git/wsa/linux.git 
i2c/for-next
config: arm-multi_v5_defconfig (attached as .config)
compiler: arm-linux-gnueabi-gcc (Debian 7.2.0-11) 7.2.0
reproduce:
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
GCC_VERSION=7.2.0 make.cross ARCH=arm 

All errors (new ones prefixed by >>):

   drivers/i2c//busses/i2c-aspeed.c: In function 'aspeed_i2c_check_bus_busy':
>> drivers/i2c//busses/i2c-aspeed.c:617:12: error: 'struct aspeed_i2c_bus' has 
>> no member named 'slave_state'; did you mean 'master_state'?
  bus->slave_state == ASPEED_I2C_SLAVE_STOP)
   ^~~
   master_state

vim +617 drivers/i2c//busses/i2c-aspeed.c

   604  
   605  static int aspeed_i2c_check_bus_busy(struct aspeed_i2c_bus *bus)
   606  {
   607  unsigned long check_started;
   608  
   609  if (bus->multi_master) {
   610  might_sleep();
   611  check_started = jiffies;
   612  }
   613  
   614  for (;;) {
   615  if (!(readl(bus->base + ASPEED_I2C_CMD_REG) &
   616ASPEED_I2CD_BUS_BUSY_STS) &&
 > 617  bus->slave_state == ASPEED_I2C_SLAVE_STOP)
   618  return 0;
   619  if (!bus->multi_master)
   620  break;
   621  if (time_after(jiffies, check_started + 
bus->adap.timeout))
   622  break;
   623  usleep_range((ASPEED_I2C_BUS_BUSY_CHECK_INTERVAL_US >> 
2) + 1,
   624   ASPEED_I2C_BUS_BUSY_CHECK_INTERVAL_US);
   625  }
   626  
   627  return aspeed_i2c_recover_bus(bus);
   628  }
   629  

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: application/gzip


Re: [PATCH v2] kernel/SRCU: Fix ctags

2018-10-29 Thread Greg KH
On Mon, Oct 29, 2018 at 10:11:36PM +0200, Sam Protsenko wrote:
> Hi Greg,
> 
> On Mon, Oct 29, 2018 at 10:09 PM, Sam Protsenko
>  wrote:
> > ctags indexing ("make tags" command) throws this warning:
> >
> > ctags: Warning: include/linux/notifier.h:125:
> > null expansion of name pattern "\1"
> >
> > This is the result of DEFINE_PER_CPU() macro exapansion. Fix that by
> > getting rid of line break.
> >
> > Similar fix was already done in commit 25528213fe9f ("tags: Fix
> > DEFINE_PER_CPU expansions"), but this one probably wasn't noticed.
> >
> > Signed-off-by: Sam Protsenko 
> > ---
> 
> This one was sent some time ago, but there wasn't any response. Can
> you please review it and merge if applicable?

As before, why ask me?

thanks,

greg k-h


Re: [RFC PATCH v2 3/3] mm, oom: hand over MMF_OOM_SKIP to exit path if it is guranteed to finish

2018-10-29 Thread Michal Hocko
On Tue 30-10-18 13:45:22, Tetsuo Handa wrote:
> Michal Hocko wrote:
> > @@ -3156,6 +3166,13 @@ void exit_mmap(struct mm_struct *mm)
> > vma = remove_vma(vma);
> > }
> > vm_unacct_memory(nr_accounted);
> > +
> > +   /*
> > +* Now that the full address space is torn down, make sure the
> > +* OOM killer skips over this task
> > +*/
> > +   if (oom)
> > +   set_bit(MMF_OOM_SKIP, &mm->flags);
> >  }
> > 
> >  /* Insert vm structure into process list sorted by address
> 
> I don't like setting MMF_OOF_SKIP after remove_vma() loop. 50 users might
> call vma->vm_ops->close() from remove_vma(). Some of them are doing fs
> writeback, some of them might be doing GFP_KERNEL allocation from
> vma->vm_ops->open() with a lock also held by vma->vm_ops->close().
> 
> I don't think that waiting for completion of remove_vma() loop is safe.

What do you mean by 'safe' here?
-- 
Michal Hocko
SUSE Labs


Re: Logitech high-resolution scrolling..

2018-10-29 Thread Peter Hutterer
On Mon, Oct 29, 2018 at 04:03:54PM -0700, Harry Cutts wrote:
> On Mon, 29 Oct 2018 at 15:01, Linus Torvalds
>  wrote:
> > That would work, yes.
> 
> OK, I'll write a patch for this. (It may be next week, though, as I
> have a deadline on a separate project this week.)
> 
> > Except I think you *do* want the "reset on direction change" logic,
> > because otherwise we still end up having the:
> >
> > > - we update remainder to -1
> >
> > where it now gets easier to next time go the wrong way, for no good
> > reason.  So now you only need another 6/8ths the other way to get to
> > within 7/8ths of -8 and scroll back.
> >
> > In other words, the whole "round partial scrolling" also causes that
> > whole "now the other direction is closer" issue.
> >
> > At 7/8's it is less obviously a problem than it was at 1/2, but I
> > still think it's a sign of an unstable algorithm, where changes get
> > triggered too easily in the non-highres world.
> >
> > Also, honestly, I'm not sure I see the point. *IF* you actually scroll
> > more in one direction, it doesn't matter one whit whether you pick
> > 1/2, 7/8, or whole multipliers: the *next* step is still always going
> > to be one whole multiplier away.
> >
> > So I think the whole rounding is actually misguided. I think it may
> > come from the very fact that you did *not* reset the remainder on
> > direction changes, so you could scroll in one direction to -3, and
> > then you change direction and go a "whole" tick the other way, but now
> > it's just at +5, so you think you need to round up.
> >
> > With the whole "reset when changing direction", I don't think the
> > rounding is necessary, and I don't think it makes sense.
> 
> Resetting on direction change would certainly make complete sense in
> smooth mode. The reason that I'm reluctant to do it is for clicky
> mode, where we think it's important that the low-res event happen at a
> consistent point in the movement between notches (the resting
> positions of the wheel). For example, imagine the following scenario
> with a wheel multiplier of 8 and the threshold initially at 7/8ths of
> a notch:
> 
> - I scroll one notch down. The low-res event occurs just before the
> wheel settles in to its notch, leaving a -1/8th remainder, and then
> (on most wheels) the ratchet mechanism settles the wheel 1/8th further
> into its resting position, eliminating the remainder.
> - I move the wheel 3/8ths further down, then change my mind and start
> scrolling upwards.
> 
> If we reset on direction change at this point, then the "zero point"
> will have moved, so that we trigger the low-res movement at -4/8ths
> (at the peak of resistance between the two notches) instead of at
> 7/8ths. If we don't reset but allow the 3/8ths remainder to be
> cleared, the trigger point stays at 7/8ths. It's a minor thing, to be
> sure, but we think that keeping the on-screen response consistent with
> the tactile feel of the wheel is important for the user experience.

IMO this is a lost battle because you cannot know when the ratchet is
enabled or not (at least not on all mice). Users switch between ratchet and
freewheeling time and once you're out of one mode, you have no reference 
to the other mode's reset point anymore.

you could guess it with heuristics. if you get multiple scroll sequences
with $multiplier events, then you're probably back in ratchet mode. Of
course, it's just guesswork...

fwiw, here's a writeup of the issues that I found in the current code,
before Linus' patch. This is as much my note-taking as it is an email.

Let's assume free-wheeling for now, and I'm using high-res values of
2 to reduce typing. multiplier is 8 like the default in the code.

- the first event comes earlier than the second on a consistent scroll
  motion, you get one event after a half movement, the second, third, ...
  events after n + half. Not a huge issue since it only ever happens once
  after plug.  And this is by design as you said, so let's live with
  that :)
- The scroll wheel emulation is unpredictable across scroll events. Let's
  assume multiple sequences of events, with a pause long enough to make the
  user think they are independent scroll motions:
[2, 2, 2, 2] [2, 2, 2, 2] ← input events
xx
[2, 2] [2, 2, 2, 2, 2, 2] ← input events
xx
[2, 2, 2, 2, 2] [2, 2, 2] ← input events
xx
  x marks the spot where the low-res event is sent.
  in the first case, everything is fine, second case has the first sequence
  react quickly, the second one slower. third case is the opposite. The only
  reason this isn't very obvious is because the scroll distance is very
  small either way. we'd need a timeout to avoid this issue, a basic "reset
  remainder after N ms".
- the directional change is what Linus triggered
  [2, 2, -2, 2, -2 ...] ← input events
  remainders:  0  4  r - 8
 -4  -6  r + 8
  2  4   r - 8
 

Re: [PATCH] mm: handle no memcg case in memcg_kmem_charge() properly

2018-10-29 Thread Michal Hocko
On Mon 29-10-18 21:51:55, Roman Gushchin wrote:
> Mike Galbraith reported a regression caused by the commit 9b6f7e163cd0
> ("mm: rework memcg kernel stack accounting") on a system with
> "cgroup_disable=memory" boot option: the system panics with the
> following stack trace:
> 
>   [0.928542] BUG: unable to handle kernel NULL pointer dereference at 
> 00f8
>   [0.929317] PGD 0 P4D 0
>   [0.929573] Oops: 0002 [#1] PREEMPT SMP PTI
>   [0.929984] CPU: 0 PID: 1 Comm: systemd Not tainted 4.19.0-preempt+ #410
>   [0.930637] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 
> ?-20180531_142017-buildhw-08.phx2.fed4
>   [0.931862] RIP: 0010:page_counter_try_charge+0x22/0xc0
>   [0.932376] Code: 41 5d c3 c3 0f 1f 40 00 0f 1f 44 00 00 48 85 ff 0f 84 a7 
> 00 00 00 41 56 48 89 f8 49 89 fe 49
>   [0.934283] RSP: 0018:acf68031fcb8 EFLAGS: 00010202
>   [0.934826] RAX: 00f8 RBX:  RCX: 
>   [0.935558] RDX: acf68031fd08 RSI: 0020 RDI: 00f8
>   [0.936288] RBP: 0001 R08: 8063 R09: 99ff7cd37a40
>   [0.937021] R10: acf68031fed0 R11: 0020 R12: 0020
>   [0.937749] R13: acf68031fd08 R14: 00f8 R15: 99ff7da1ec60
>   [0.938486] FS:  7fc2140bb280() GS:99ff7da0() 
> knlGS:
>   [0.939311] CS:  0010 DS:  ES:  CR0: 80050033
>   [0.939905] CR2: 00f8 CR3: 12dc8002 CR4: 00760ef0
>   [0.940638] DR0:  DR1:  DR2: 
>   [0.941366] DR3:  DR6: fffe0ff0 DR7: 0400
>   [0.942110] PKRU: 5554
>   [0.942412] Call Trace:
>   [0.942673]  try_charge+0xcb/0x780
>   [0.943031]  memcg_kmem_charge_memcg+0x28/0x80
>   [0.943486]  ? __vmalloc_node_range+0x1e4/0x280
>   [0.943971]  memcg_kmem_charge+0x8b/0x1d0
>   [0.944396]  copy_process.part.41+0x1ca/0x2070
>   [0.944853]  ? get_acl+0x1a/0x120
>   [0.945200]  ? shmem_tmpfile+0x90/0x90
>   [0.945596]  _do_fork+0xd7/0x3d0
>   [0.945934]  ? trace_hardirqs_off_thunk+0x1a/0x1c
>   [0.946421]  do_syscall_64+0x5a/0x180
>   [0.946798]  entry_SYSCALL_64_after_hwframe+0x49/0xbe
> 
> The problem occurs because get_mem_cgroup_from_current() returns
> the NULL pointer if memory controller is disabled. Let's check
> if this is a case at the beginning of memcg_kmem_charge() and
> just return 0 if mem_cgroup_disabled() returns true. This is how
> we handle this case in many other places in the memory controller
> code.
> 
> Fixes: 9b6f7e163cd0 ("mm: rework memcg kernel stack accounting")
> Reported-by: Mike Galbraith 
> Signed-off-by: Roman Gushchin 
> Cc: Michal Hocko 
> Cc: Johannes Weiner 
> Cc: Vladimir Davydov 
> Cc: Andrew Morton 

I tend to agree with Shakeel that consistency with the other caller
would be less confusing. I would split the function to __memcg_kmem_charge
without any checks and call it from __alloc_pages_nodemask and add the
check to memcg_kmem_charge. This would be less confusing I guess.

Something for a follow up clean up though.

Acked-by: Michal Hocko 

> ---
>  mm/memcontrol.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 54920cbc46bf..6e1469b80cb7 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -2593,7 +2593,7 @@ int memcg_kmem_charge(struct page *page, gfp_t gfp, int 
> order)
>   struct mem_cgroup *memcg;
>   int ret = 0;
>  
> - if (memcg_kmem_bypass())
> + if (mem_cgroup_disabled() || memcg_kmem_bypass())
>   return 0;
>  
>   memcg = get_mem_cgroup_from_current();
> -- 
> 2.17.2

-- 
Michal Hocko
SUSE Labs


Re: [PATCH 3/4] base/drivers/topology: Move instructions in the error path

2018-10-29 Thread Viresh Kumar
On Mon, Oct 29, 2018 at 9:56 PM Daniel Lezcano
 wrote:
>
> When the function topology_parse_cpu_capacity() fails, we set the boolean
> cap_parsing_failed to true and we free the raw_capacity. This is correct as
> the function begins with a check against cap_parsing_failed thus protecting
> the function to be re-entered.
>
> However, even it is impossible that can happen with the current code, let's

Why impossible ?

> move in the instructions:
>
>  - cap_parsing_failed = true;
>  - free_raw_capacity();
>
>  ... in the 'else' block when the error is detected, that is more semantically
>  correct.
>
> Signed-off-by: Daniel Lezcano 
> ---
>  drivers/base/arch_topology.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c
> index b19d6d4..7311641 100644
> --- a/drivers/base/arch_topology.c
> +++ b/drivers/base/arch_topology.c
> @@ -155,9 +155,9 @@ bool __init topology_parse_cpu_capacity(struct 
> device_node *cpu_node, int cpu)
> pr_err("cpu_capacity: missing %pOF raw capacity\n",
> cpu_node);
> pr_err("cpu_capacity: partial information: fallback 
> to 1024 for all CPUs\n");
> +   cap_parsing_failed = true;
> +   free_raw_capacity();
> }
> -   cap_parsing_failed = true;
> -   free_raw_capacity();

While it is fine to move free_raw_capacity(), it is not to move the
other line. With your
patch what will happen if the first CPU in DT doesn't have the
"capacity-dmips-mhz"
property set ? We will never set cap_parsing_failed and keep on
re-entering this routine
which wasn't required.

Note that the current implementation isn't written to always print an
error where this
property is only partially filled and the same wouldn't happen with
your patch as well.

--
viresh


Re: [PATCH v3] mm/page_owner: use kvmalloc instead of kmalloc

2018-10-29 Thread Michal Hocko
On Tue 30-10-18 09:29:10, Miles Chen wrote:
> On Mon, 2018-10-29 at 09:17 +0100, Michal Hocko wrote:
> > On Mon 29-10-18 09:07:08, Michal Hocko wrote:
> > [...]
> > > Besides that, the following doesn't make much sense to me. It simply
> > > makes no sense to use vmalloc for sub page allocation regardless of
> > > HIGHMEM.
> > 
> > OK, it is still early morning here. Now I get the point of the patch.
> > You just want to (ab)use highmeme for smaller requests. I do not like
> > this, to be honest. It causes an internal fragmentation and more
> > importantly the VMALLOC space on 32b where HIGHMEM is enabled (do we
> > have any 64b with HIGHMEM btw?) is quite small to be wasted like that.
> > 
> thanks for your comment. It looks like that using vmalloc fallback for
> sub page allocation is not good here.
> 
> Your comment gave another idea:
> 
> 1. force kbuf to PAGE_SIZE
> 2. allocate a page by alloc_page(GFP_KERNEL | __GFP_HIGHMEM); so we can
> get a highmem page if possible
> 3. use kmap/kunmap pair to create mapping for this page. No vmalloc
> space is used.
> 4. do not change kvmalloc logic.

If you mean for this particular situation then is this really worth
it? I mean this is a short term allocation for root only so you do not
have to worry about low mem depletion.

If you are thiking in more generic terms to allow kmalloc to use highmem
then I am not really sure this will work out.
-- 
Michal Hocko
SUSE Labs


Re: [PATCH v1 2/2] clk: qcom : dispcc: Add support for display port clocks

2018-10-29 Thread Taniya Das




On 10/30/2018 12:13 AM, Stephen Boyd wrote:

Quoting Taniya Das (2018-10-28 03:34:55)

Hello Stephen,

On 2018-10-19 16:04, Taniya Das wrote:

Hello Stephen,

On 10/10/2018 2:04 AM, Stephen Boyd wrote:

Quoting Taniya Das (2018-10-09 06:57:47)

diff --git a/drivers/clk/qcom/dispcc-sdm845.c
b/drivers/clk/qcom/dispcc-sdm845.c
index 0cc4909..6d3136a 100644
--- a/drivers/clk/qcom/dispcc-sdm845.c
+++ b/drivers/clk/qcom/dispcc-sdm845.c
@@ -128,6 +144,100 @@ enum {
  },
   };

+static const struct freq_tbl ftbl_disp_cc_mdss_dp_aux_clk_src[] = {
+   F(1920, P_BI_TCXO, 1, 0, 0),
+   { }
+};
+
+static struct clk_rcg2 disp_cc_mdss_dp_aux_clk_src = {
+   .cmd_rcgr = 0x219c,
+   .mnd_width = 0,
+   .hid_width = 5,
+   .parent_map = disp_cc_parent_map_2,
+   .freq_tbl = ftbl_disp_cc_mdss_dp_aux_clk_src,
+   .clkr.hw.init = &(struct clk_init_data){
+   .name = "disp_cc_mdss_dp_aux_clk_src",
+   .parent_names = disp_cc_parent_names_2,
+   .num_parents = 2,
+   .flags = CLK_SET_RATE_PARENT,
+   .ops = &clk_rcg2_ops,
+   },
+};
+
+static const struct freq_tbl ftbl_disp_cc_mdss_dp_crypto_clk_src[] =
{
+   F(108000, P_DP_PHY_PLL_LINK_CLK,   3,   0,   0),
+   F(18, P_DP_PHY_PLL_LINK_CLK,   3,   0,   0),
+   F(36, P_DP_PHY_PLL_LINK_CLK,   3,   0,   0),
+   F(54, P_DP_PHY_PLL_LINK_CLK,   3,   0,   0),
+   { }
+};
+
+static struct clk_rcg2 disp_cc_mdss_dp_crypto_clk_src = {
+   .cmd_rcgr = 0x2154,
+   .mnd_width = 0,
+   .hid_width = 5,
+   .parent_map = disp_cc_parent_map_1,
+   .freq_tbl = ftbl_disp_cc_mdss_dp_crypto_clk_src,
+   .clkr.hw.init = &(struct clk_init_data){
+   .name = "disp_cc_mdss_dp_crypto_clk_src",
+   .parent_names = disp_cc_parent_names_1,
+   .num_parents = 4,
+   .flags = CLK_GET_RATE_NOCACHE,


Why?


+   .ops = &clk_rcg2_ops,
+   },
+};
+
+static const struct freq_tbl ftbl_disp_cc_mdss_dp_link_clk_src[] = {
+   F(162000, P_DP_PHY_PLL_LINK_CLK,   1,   0,   0),
+   F(27, P_DP_PHY_PLL_LINK_CLK,   1,   0,   0),
+   F(54, P_DP_PHY_PLL_LINK_CLK,   1,   0,   0),
+   F(81, P_DP_PHY_PLL_LINK_CLK,   1,   0,   0),


Are these in kHz? They really look like it and that's bad. Why do we
need them at all? Just to make sure the display driver picks these
exact
frequencies? It seems like we could just pass whatever number comes in
up to the parent and see what it can do.



Let me check back the reason we had to make this change.


We will need this flag since we reset/power-down the PLL every time we
disconnect/connect the DP cable or during suspend/resume. Only with this
flag, the calls to the PLL driver are properly called.


What does this mean? I wanted to know about the weird frequencies listed
above, and why it can't be done without a frequency table and direct
rates passed up to the parent.



OOps, my bad :(.

We added these changes to handle higher clock rates. These rates when 
greater than 4.3Ghz cannot be represented in 32bit variables. For DP, we 
already have 5.4G and 8.1GHz freq for VCO clock. We will need these Khz 
freq list in clock driver.
 Let me check if they can do something like the byte/pixel clocks of 
display.





+   { }
+};
+
+static struct clk_rcg2 disp_cc_mdss_dp_link_clk_src = {
+   .cmd_rcgr = 0x2138,
+   .mnd_width = 0,
+   .hid_width = 5,
+   .parent_map = disp_cc_parent_map_1,
+   .freq_tbl = ftbl_disp_cc_mdss_dp_link_clk_src,
+   .clkr.hw.init = &(struct clk_init_data){
+   .name = "disp_cc_mdss_dp_link_clk_src",
+   .parent_names = disp_cc_parent_names_1,
+   .num_parents = 4,
+   .flags = CLK_SET_RATE_PARENT,
+   .ops = &clk_rcg2_ops,
+   },
+};
+
+static struct clk_rcg2 disp_cc_mdss_dp_pixel1_clk_src = {
+   .cmd_rcgr = 0x2184,
+   .mnd_width = 16,
+   .hid_width = 5,
+   .parent_map = disp_cc_parent_map_1,
+   .clkr.hw.init = &(struct clk_init_data){
+   .name = "disp_cc_mdss_dp_pixel1_clk_src",
+   .parent_names = disp_cc_parent_names_1,
+   .num_parents = 4,
+   .flags = CLK_SET_RATE_PARENT,
+   .ops = &clk_dp_ops,
+   },
+};
+
+static struct clk_rcg2 disp_cc_mdss_dp_pixel_clk_src = {
+   .cmd_rcgr = 0x216c,
+   .mnd_width = 16,
+   .hid_width = 5,
+   .parent_map = disp_cc_parent_map_1,
+   .clkr.hw.init = &(struct clk_init_data){
+   .name = "disp_cc_mdss_dp_pixel_clk_src",
+   .parent_names = disp_cc_parent_names_1,
+   .num_parents = 4,
+   .flags = CLK_SET_RATE_PARENT,
+   .ops = &clk_dp_ops,
+   },
+};
+
   static const struct freq_tbl ftbl_disp_cc_mdss_esc0_clk_src[] = {
  F(1920, P_BI_TCXO, 1, 0, 0),
  { }
@@ -391,6 +501,115 @@ enum {
  

Re: [PATCH 2/4] base/drivers/arch_topology: Replace mutex with READ_ONCE / WRITE_ONCE

2018-10-29 Thread Viresh Kumar
On Mon, Oct 29, 2018 at 9:54 PM Daniel Lezcano
 wrote:
>
> The mutex protects a per_cpu variable access. The potential race can
> happen only when the cpufreq governor module is loaded and at the same
> time the cpu capacity is changed in the sysfs.
>
> There is no real interest of using a mutex to protect a variable
> assignation when there is no situation where a task can take the lock
> and block.
>
> Replace the mutex by READ_ONCE / WRITE_ONCE.
>
> Signed-off-by: Daniel Lezcano 
> ---
>  drivers/base/arch_topology.c  | 7 +--
>  include/linux/arch_topology.h | 2 +-
>  2 files changed, 2 insertions(+), 7 deletions(-)

Reviewed-by: Viresh Kumar 


Re: [PATCH v2] bit_spinlock: introduce smp_cond_load_relaxed

2018-10-29 Thread Gao Xiang
Hi,

On 2018/10/30 14:04, Gao Xiang wrote:
> It is better to use wrapped smp_cond_load_relaxed
> instead of open-coded busy waiting for bit_spinlock.
> 
> Signed-off-by: Gao Xiang 
> ---
> 
> change log v2:
>  - fix the incorrect expression !(VAL >> (bitnum & (BITS_PER_LONG-1)))
>  - the test result is described in the following reply.
> 
> Thanks,
> Gao Xiang


Simple test script:
#include 
#include 
#include 

unsigned long global_lock;

int test_thread(void *data)
{
unsigned long thread_id = (unsigned long)data;
int i;
u64 start = ktime_get_ns();

for (i = 0; i < 50; ++i) {
bit_spin_lock(0, &global_lock);
__asm__("yield");
bit_spin_unlock(0, &global_lock);
}
pr_err("Thread id: %lu time: %llu\n", thread_id, ktime_get_ns() - 
start);

do_exit(0);
}


static int __init bitspinlock_test_module_init(void)
{
int i;

for (i = 0; i < 8; ++i) {
if (IS_ERR(kthread_run(test_thread, (void *)(unsigned long)i, 
"thread-%d", i)))
pr_err("fail to create thread %d\n", i);
}

return 0;
}

static void __exit bitspinlock_test_module_exit(void)
{
}

module_init(bitspinlock_test_module_init);
module_exit(bitspinlock_test_module_exit);
MODULE_LICENSE("GPL");


...and tested in the following ARM server environment:

Processor: HI1616 (https://en.wikichip.org/wiki/hisilicon/hi16xx/hi1616)
Board: HiSilicon D05 Development Board (http://open-estuary.org/d05/)
Memory: 512GB
Host OS: Ubuntu 18.04.1 LTS (Ubuntu 4.15.0-29.31-generic 4.15.18)
QEMU KVM OS: Linux 4.19 + buildroot
QEMU KVM cmdline: qemu-system-aarch64 -enable-kvm -cpu host -smp 4 -m 256M 
-kernel Image -M virt,kernel_irqchip=on -nographic -hda rootfs.ext2 -append 
'root=/dev/vda console=ttyAMA0 earlycon=pl011,0x900' -serial mon:stdio -net 
none

Without this patch:
  Thread 0   Thread 1   Thread 2   Thread 3   Thread 4   Thread 5   Thread 6   
Thread 7
1 1283709480 1271869280  454742480 1173673820 1145643640 1118846920  774616920 
1144146140
2  643580180  625143860  576841700  322982340  649987880  585749000  529178880  
373374780
3  672307220  847315000  880801860 1039502040  667086380 1033939940 1035381120 
1046898300
4  568635580  440547020  737000380  910040880  804543740  712314280  868896880  
867049000
5  749107320  726397720  776134480  611970100  756721040  753449440  711691300  
609343300

With this patch:
  Thread 0   Thread 1   Thread 2   Thread 3   Thread 4   Thread 5   Thread 6   
Thread 7
1  170327620  196322160  169434180   74723860  178145600  178873460  143843260  
 70998780
2  166415220  129649200  166161240  175241520  155474460  112811860  157003140  
150087420
3  511420780  117655640  598641860  596213720  462888760  430838600  554346300  
428035120 
4  174520240  156311800  120274280   87465380  172781400  136118620  163728340  
 63026360
5  153677940  202786860  183626500  140721300  150311360  161266840  168154340  
107247460

Thanks,
Gao Xiang


[PATCH v2] bit_spinlock: introduce smp_cond_load_relaxed

2018-10-29 Thread Gao Xiang
It is better to use wrapped smp_cond_load_relaxed
instead of open-coded busy waiting for bit_spinlock.

Signed-off-by: Gao Xiang 
---

change log v2:
 - fix the incorrect expression !(VAL >> (bitnum & (BITS_PER_LONG-1)))
 - the test result is described in the following reply.

Thanks,
Gao Xiang

 include/linux/bit_spinlock.h | 23 ++-
 1 file changed, 10 insertions(+), 13 deletions(-)

diff --git a/include/linux/bit_spinlock.h b/include/linux/bit_spinlock.h
index bbc4730a6505..d5f922b5ffd9 100644
--- a/include/linux/bit_spinlock.h
+++ b/include/linux/bit_spinlock.h
@@ -15,22 +15,19 @@
  */
 static inline void bit_spin_lock(int bitnum, unsigned long *addr)
 {
-   /*
-* Assuming the lock is uncontended, this never enters
-* the body of the outer loop. If it is contended, then
-* within the inner loop a non-atomic test is used to
-* busywait with less bus contention for a good time to
-* attempt to acquire the lock bit.
-*/
-   preempt_disable();
 #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
-   while (unlikely(test_and_set_bit_lock(bitnum, addr))) {
-   preempt_enable();
-   do {
-   cpu_relax();
-   } while (test_bit(bitnum, addr));
+   const unsigned int bitshift = bitnum & (BITS_PER_LONG - 1);
+
+   while (1) {
+   smp_cond_load_relaxed(&addr[BIT_WORD(bitnum)],
+ !((VAL >> bitshift) & 1));
preempt_disable();
+   if (!test_and_set_bit_lock(bitnum, addr))
+   break;
+   preempt_enable();
}
+#else
+   preempt_disable();
 #endif
__acquire(bitlock);
 }
-- 
2.17.1



Re: [PATCH 1/4] base/drivers/arch_topology: Remove useless check

2018-10-29 Thread Viresh Kumar
On Mon, Oct 29, 2018 at 9:56 PM Daniel Lezcano
 wrote:

Would have been better if I was cc'd on all the patches since I was
looking at this
stuff actively this week :)

> The function 'register_cpufreq_notifier' registers the
> init_cpu_capacity_notifier() only if raw_capacity is not NULL.
>
> Hence init_cpu_capacity_notifier() can not be called with raw_capacity
> set to NULL, it is pointless to check it.

It isn't entirely pointless though.

It is possible for init_cpu_capacity_notifier() to get called after
free_raw_capacity()
is called from it as the notifier unregistration happens from a workqueue.


[PATCH] arm64: dts: qcom: msm8998: Reserve gpio ranges on MTP

2018-10-29 Thread Bjorn Andersson
GPIOs 0 through 3 and 81 through 84 are configured to not be accessible
from the application CPUs. Mark them as reserved to allow the MSM8998
MTP to boot after the introduction of 3edfb7bd76bd ("gpiolib: Show
correct direction from the beginning").

Signed-off-by: Bjorn Andersson 
---
 arch/arm64/boot/dts/qcom/msm8998-mtp.dtsi | 4 
 1 file changed, 4 insertions(+)

diff --git a/arch/arm64/boot/dts/qcom/msm8998-mtp.dtsi 
b/arch/arm64/boot/dts/qcom/msm8998-mtp.dtsi
index b4276da1fb0d..11fd1fe8bdb5 100644
--- a/arch/arm64/boot/dts/qcom/msm8998-mtp.dtsi
+++ b/arch/arm64/boot/dts/qcom/msm8998-mtp.dtsi
@@ -241,3 +241,7 @@
};
};
 };
+
+&tlmm {
+   gpio-reserved-ranges = <0 4>, <81 4>;
+};
-- 
2.18.0



[PATCH] doc: correct parameter in stallwarn

2018-10-29 Thread Joel Fernandes (Google)
The stallwarn document incorrectly mentions 'fps=' instead of 'fqs='.
Correct that.

Signed-off-by: Joel Fernandes (Google) 
---
 Documentation/RCU/stallwarn.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/RCU/stallwarn.txt b/Documentation/RCU/stallwarn.txt
index b01bcafc64aa..073dbc12d1ea 100644
--- a/Documentation/RCU/stallwarn.txt
+++ b/Documentation/RCU/stallwarn.txt
@@ -205,7 +205,7 @@ handlers are no longer able to execute on this CPU.  This 
can happen if
 the stalled CPU is spinning with interrupts are disabled, or, in -rt
 kernels, if a high-priority process is starving RCU's softirq handler.
 
-The "fps=" shows the number of force-quiescent-state idle/offline
+The "fqs=" shows the number of force-quiescent-state idle/offline
 detection passes that the grace-period kthread has made across this
 CPU since the last time that this CPU noted the beginning of a grace
 period.
-- 
2.19.1.568.g152ad8e336-goog



linux-next: Tree for Oct 30

2018-10-29 Thread Stephen Rothwell
Hi all,

Please do not add any v4.21/v5.1 code to your linux-next included trees
until after the merge window closes.

Changes since 20181029:

My fixes tree contains this:

  "drivers: net: include linux/ip.h for iphdr"

The compiler-attributes tree gained a conflict against the kbuild tree.

The vfs tree gained a conflict against Linus' tree.

Non-merge commits (relative to Linus' tree): 1133
 1678 files changed, 65393 insertions(+), 20326 deletions(-)



I have created today's linux-next tree at
git://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
(patches at http://www.kernel.org/pub/linux/kernel/next/ ).  If you
are tracking the linux-next tree using git, you should not use "git pull"
to do so as that will try to merge the new linux-next release with the
old one.  You should use "git fetch" and checkout or reset to the new
master.

You can see which trees have been included by looking in the Next/Trees
file in the source.  There are also quilt-import.log and merge.log
files in the Next directory.  Between each merge, the tree was built
with a ppc64_defconfig for powerpc, an allmodconfig for x86_64, a
multi_v7_defconfig for arm and a native build of tools/perf. After
the final fixups (if any), I do an x86_64 modules_install followed by
builds for x86_64 allnoconfig, powerpc allnoconfig (32 and 64 bit),
ppc44x_defconfig, allyesconfig and pseries_le_defconfig and i386, sparc
and sparc64 defconfig. And finally, a simple boot test of the powerpc
pseries_le_defconfig kernel in qemu (with and without kvm enabled).

Below is a summary of the state of the merge.

I am currently merging 291 trees (counting Linus' and 66 trees of bug
fix patches pending for the current merge release).

Stats about the size of the tree over time can be seen at
http://neuling.org/linux-next-size.html .

Status of my local build tests will be at
http://kisskb.ellerman.id.au/linux-next .  If maintainers want to give
advice about cross compilers/configs that work, we are always open to add
more builds.

Thanks to Randy Dunlap for doing many randconfig builds.  And to Paul
Gortmaker for triage and bug fixes.

-- 
Cheers,
Stephen Rothwell

$ git checkout master
$ git reset --hard stable
Merging origin/master (4b42745211af Merge tag 'armsoc-soc' of 
git://git.kernel.org/pub/scm/linux/kernel/git/arm/arm-soc)
Merging fixes/master (2941927a2da1 drivers: net: include linux/ip.h for iphdr)
Merging kbuild-current/fixes (9f51ae62c84a Merge 
git://git.kernel.org/pub/scm/linux/kernel/git/davem/net)
Merging arc-current/for-curr (56d02dd9e794 ARC: IOC: panic if kernel was 
started with previously enabled IOC)
Merging arm-current/fixes (3a58ac65e2d7 ARM: 8799/1: mm: fix pci_ioremap_io() 
offset check)
Merging arm64-fixes/for-next/fixes (ca2b497253ad arm64: perf: Reject 
stand-alone CHAIN events for PMUv3)
Merging m68k-current/for-linus (58c116fb7dc6 m68k/sun3: Remove is_medusa and 
m68k_pgtable_cachemode)
Merging powerpc-fixes/fixes (ac1788cc7da4 powerpc/numa: Skip onlining a offline 
node in kdump path)
Merging sparc/master (345671ea0f92 Merge branch 'akpm' (patches from Andrew))
Merging fscrypt-current/for-stable (ae64f9bd1d36 Linux 4.15-rc2)
Merging net/master (9f51ae62c84a Merge 
git://git.kernel.org/pub/scm/linux/kernel/git/davem/net)
Merging bpf/master (d8fd9e106fbc bpf: fix wrong helper enablement in cgroup 
local storage)
Merging ipsec/master (6788fac82001 Merge 
git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf)
Merging netfilter/master (c1cf13068b26 Merge branch 'master' of 
git://blackhole.kfki.hu/nf)
Merging ipvs/master (feb9f55c33e5 netfilter: nft_dynset: allow dynamic updates 
of non-anonymous set)
Merging wireless-drivers/master (3baafeffa48a iwlwifi: 1000: set the TFD queue 
size)
Merging mac80211/master (8d0be26c781a mac80211_hwsim: fix module init error 
paths for netlink)
Merging rdma-fixes/for-rc (a3671a4f973e RDMA/ucma: Fix Spectre v1 vulnerability)
Merging sound-current/for-linus (aedef16a63d5 ALSA: dice: fix to wait for 
releases of all ALSA character devices)
Merging sound-asoc-fixes/for-linus (eafb621d62c7 Merge branch 'asoc-4.19' into 
asoc-linus)
Merging regmap-fixes/for-linus (35a7f35ad1b1 Linux 4.19-rc8)
Merging regulator-fixes/for-linus (84df9525b0c2 Linux 4.19)
Merging spi-fixes/for-linus (599eb81f4118 Merge branch 'spi-4.19' into 
spi-linus)
Merging pci-current/for-linus (2edab4df98d9 PCI: Expand the "PF" acronym in 
Kconfig help text)
Merging driver-core.current/driver-core-linus (9f51ae62c84a Merge 
git://git.kernel.org/pub/scm/linux/kernel/git/davem/net)
Merging tty.current/tty-linus (202dc3cc10b4 serial: sh-sci: Fix receive on 
SCIFA/SCIFB variants with DMA)
Merging usb.current/usb-linus (69d5b97c5973 HID: we do not randomly make new 
drivers 'default y')
Merging usb-gadget-fixes/fixes (d9707490077b 

Re: [RFC PATCH] Implement /proc/pid/kill

2018-10-29 Thread Aleksa Sarai
On 2018-10-29, Daniel Colascione  wrote:
> Add a simple proc-based kill interface. To use /proc/pid/kill, just
> write the signal number in base-10 ASCII to the kill file of the
> process to be killed: for example, 'echo 9 > /proc/$$/kill'.
> 
> Semantically, /proc/pid/kill works like kill(2), except that the
> process ID comes from the proc filesystem context instead of from an
> explicit system call parameter. This way, it's possible to avoid races
> between inspecting some aspect of a process and that process's PID
> being reused for some other process.

(Aside from any UX concerns other folks might have.)

I think it would be a good idea to (at least temporarily) restrict this
so that only processes that are in the same PID namespace as the /proc
being resolved through may use this interface. Otherwise you might have
cases where partial container breakouts can start sending signals to
PIDs they wouldn't normally be able to address.

> With /proc/pid/kill, it's possible to write a proper race-free and
> safe pkill(1). An approximation follows. A real program might use
> openat(2), having opened a process's /proc/pid directory explicitly,
> with the directory file descriptor serving as a sort of "process
> handle".

I do like the idea of holding a dirfd to /proc/$pid to address
processes, and it something I considered doing in runc. (Unfortunately
there are lots of things that make it a bit difficult to use /proc/$pid
exclusively for introspection of a process -- especially in the context
of containers.)

-- 
Aleksa Sarai
Senior Software Engineer (Containers)
SUSE Linux GmbH



signature.asc
Description: PGP signature


Re: [RFC PATCH v2 3/3] mm, oom: hand over MMF_OOM_SKIP to exit path if it is guranteed to finish

2018-10-29 Thread Tetsuo Handa
Michal Hocko wrote:
> @@ -3156,6 +3166,13 @@ void exit_mmap(struct mm_struct *mm)
> vma = remove_vma(vma);
> }
> vm_unacct_memory(nr_accounted);
> +
> +   /*
> +* Now that the full address space is torn down, make sure the
> +* OOM killer skips over this task
> +*/
> +   if (oom)
> +   set_bit(MMF_OOM_SKIP, &mm->flags);
>  }
> 
>  /* Insert vm structure into process list sorted by address

I don't like setting MMF_OOF_SKIP after remove_vma() loop. 50 users might
call vma->vm_ops->close() from remove_vma(). Some of them are doing fs
writeback, some of them might be doing GFP_KERNEL allocation from
vma->vm_ops->open() with a lock also held by vma->vm_ops->close().

I don't think that waiting for completion of remove_vma() loop is safe.
And my patch is safe.

 drivers/android/binder.c  |2 +-
 drivers/gpu/drm/drm_gem_cma_helper.c  |2 +-
 drivers/gpu/drm/drm_vm.c  |8 
 drivers/gpu/drm/gma500/framebuffer.c  |2 +-
 drivers/gpu/drm/gma500/psb_drv.c  |2 +-
 drivers/gpu/drm/i915/i915_drv.c   |2 +-
 drivers/gpu/drm/ttm/ttm_bo_vm.c   |2 +-
 drivers/gpu/drm/udl/udl_drv.c |2 +-
 drivers/gpu/drm/v3d/v3d_drv.c |2 +-
 drivers/gpu/drm/vc4/vc4_drv.c |2 +-
 drivers/gpu/drm/vgem/vgem_drv.c   |2 +-
 drivers/gpu/drm/vkms/vkms_drv.c   |2 +-
 drivers/gpu/drm/xen/xen_drm_front.c   |2 +-
 drivers/hwtracing/intel_th/msu.c  |2 +-
 drivers/hwtracing/stm/core.c  |2 +-
 drivers/infiniband/core/uverbs_main.c |2 +-
 drivers/infiniband/sw/rdmavt/mmap.c   |2 +-
 drivers/infiniband/sw/rxe/rxe_mmap.c  |2 +-
 drivers/media/common/videobuf2/videobuf2-memops.c |2 +-
 drivers/media/pci/meye/meye.c |2 +-
 drivers/media/platform/omap/omap_vout.c   |2 +-
 drivers/media/usb/stkwebcam/stk-webcam.c  |2 +-
 drivers/media/v4l2-core/videobuf-dma-contig.c |2 +-
 drivers/media/v4l2-core/videobuf-dma-sg.c |2 +-
 drivers/media/v4l2-core/videobuf-vmalloc.c|2 +-
 drivers/misc/genwqe/card_dev.c|2 +-
 drivers/misc/mic/scif/scif_mmap.c |2 +-
 drivers/misc/sgi-gru/grufile.c|2 +-
 drivers/rapidio/devices/rio_mport_cdev.c  |2 +-
 drivers/staging/comedi/comedi_fops.c  |2 +-
 drivers/staging/media/zoran/zoran_driver.c|2 +-
 drivers/staging/vme/devices/vme_user.c|2 +-
 drivers/usb/core/devio.c  |2 +-
 drivers/usb/mon/mon_bin.c |2 +-
 drivers/video/fbdev/omap2/omapfb/omapfb-main.c|2 +-
 drivers/xen/gntalloc.c|2 +-
 drivers/xen/gntdev.c  |2 +-
 drivers/xen/privcmd-buf.c |2 +-
 drivers/xen/privcmd.c |2 +-
 fs/9p/vfs_file.c  |2 +-
 fs/fuse/file.c|2 +-
 fs/kernfs/file.c  |2 +-
 include/linux/mm.h|2 +-
 ipc/shm.c |2 +-
 kernel/events/core.c  |2 +-
 kernel/relay.c|2 +-
 mm/hugetlb.c  |2 +-
 mm/mmap.c |   14 +++---
 net/packet/af_packet.c|2 +-
 sound/core/pcm_native.c   |4 ++--
 sound/usb/usx2y/us122l.c  |2 +-
 sound/usb/usx2y/usx2yhwdeppcm.c   |2 +-
 52 files changed, 62 insertions(+), 62 deletions(-)


[PATCH] kbuild: consolidate single targets

2018-10-29 Thread Masahiro Yamada
Instead of specifying target/source pairs, let's list patterns that we
want to handle as single targets. This slightly changes the behavior;
the top Makefile previously checked the presence of a source file,
now Kbuild will descend into a subdirectory anyway to find out what to
do there.

Signed-off-by: Masahiro Yamada 
---

 Makefile | 16 +---
 1 file changed, 1 insertion(+), 15 deletions(-)

diff --git a/Makefile b/Makefile
index be76e6e..7d13add 100644
--- a/Makefile
+++ b/Makefile
@@ -1713,21 +1713,7 @@ else
 target-dir = $(if $(KBUILD_EXTMOD),$(dir $<),$(dir $@))
 endif
 
-%.s: %.c prepare scripts FORCE
-   $(Q)$(MAKE) $(build)=$(build-dir) $(target-dir)$(notdir $@)
-%.i: %.c prepare scripts FORCE
-   $(Q)$(MAKE) $(build)=$(build-dir) $(target-dir)$(notdir $@)
-%.o: %.c prepare scripts FORCE
-   $(Q)$(MAKE) $(build)=$(build-dir) $(target-dir)$(notdir $@)
-%.lst: %.c prepare scripts FORCE
-   $(Q)$(MAKE) $(build)=$(build-dir) $(target-dir)$(notdir $@)
-%.s: %.S prepare scripts FORCE
-   $(Q)$(MAKE) $(build)=$(build-dir) $(target-dir)$(notdir $@)
-%.o: %.S prepare scripts FORCE
-   $(Q)$(MAKE) $(build)=$(build-dir) $(target-dir)$(notdir $@)
-%.symtypes: %.c prepare scripts FORCE
-   $(Q)$(MAKE) $(build)=$(build-dir) $(target-dir)$(notdir $@)
-%.ll: %.c prepare scripts FORCE
+%.i %.ll %.lst %.o %.s %.symtypes: prepare scripts FORCE
$(Q)$(MAKE) $(build)=$(build-dir) $(target-dir)$(notdir $@)
 
 # Modules
-- 
2.7.4



[PATCH 2/2] kbuild: remove cc-name variable

2018-10-29 Thread Masahiro Yamada
There is one more user of $(cc-name) in the top Makefile. It is supposed
to detect Clang before invoking Kconfig, so it should still be there
in the $(shell ...) form. All the other users of $(cc-name) have been
replaced with $(CONFIG_CC_IS_CLANG). Hence, scripts/Kbuild.include does
not need to define cc-name any more.

Signed-off-by: Masahiro Yamada 
---

 Makefile   | 2 +-
 scripts/Kbuild.include | 4 
 2 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/Makefile b/Makefile
index bd93bc3..430f7de 100644
--- a/Makefile
+++ b/Makefile
@@ -485,7 +485,7 @@ ifneq ($(KBUILD_SRC),)
$(Q)$(CONFIG_SHELL) $(srctree)/scripts/mkmakefile $(srctree)
 endif
 
-ifeq ($(cc-name),clang)
+ifneq ($(shell $(CC) --version 2>&1 | head -n 1 | grep clang),)
 ifneq ($(CROSS_COMPILE),)
 CLANG_TARGET   := --target=$(notdir $(CROSS_COMPILE:%-=%))
 GCC_TOOLCHAIN_DIR := $(dir $(shell which $(LD)))
diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include
index ca21a35..51703ae 100644
--- a/scripts/Kbuild.include
+++ b/scripts/Kbuild.include
@@ -140,10 +140,6 @@ cc-option-yn = $(call try-run,\
 cc-disable-warning = $(call try-run,\
$(CC) -Werror $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) -W$(strip $(1)) -c 
-x c /dev/null -o "$$TMP",-Wno-$(strip $(1)))
 
-# cc-name
-# Expands to either gcc or clang
-cc-name = $(shell $(CC) -v 2>&1 | grep -q "clang version" && echo clang || 
echo gcc)
-
 # cc-version
 cc-version = $(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-version.sh $(CC))
 
-- 
2.7.4



Re: [PATCH v2] pstore: Avoid duplicate call of persistent_ram_zap()

2018-10-29 Thread Joel Fernandes
On Mon, Oct 29, 2018 at 06:37:53AM +, Peng15 Wang 王鹏 wrote:
> 
> 
> >From: Kees Cook 
> >Sent: Monday, October 29, 2018 0:03
> >To: Peng15 Wang 王鹏
> >Cc: an...@enomsg.org; ccr...@android.com; tony.l...@intel.com; 
> >linux-kernel@vger.kernel.org; Joel Fernandes
> >Subject: Re: [PATCH v2] pstore: Avoid duplicate call of persistent_ram_zap()
> >
> >On Sat, Oct 27, 2018 at 2:08 PM, Peng15 Wang 王鹏  
> >wrote:
> >> When initialing prz with invalid data in buffer(no PERSISTENT_RAM_SIG),
> >> function call path is like this:
> >>
> >> ramoops_init_prz ->
> >> |
> >> |-> persistent_ram_new -> persistent_ram_post_init -> persistent_ram_zap
> >> |
> >> |-> persistent_ram_zap
> >>
> >> As we can see, persistent_ram_zap() is called twice.
> >> We can avoid this by adding an option to persistent_ram_new(), and
> >> only call persistent_ram_zap() when it is needed.
> >>
> >> Signed-off-by: Peng Wang 
> >> ---
> >>  fs/pstore/ram.c|  5 +++--
> >>  fs/pstore/ram_core.c   | 11 +++
> >>  include/linux/pstore_ram.h |  3 ++-
> >>  3 files changed, 12 insertions(+), 7 deletions(-)
> >>
> >> diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
> >> index ffcff6516e89..3044274de2f0 100644
> >> --- a/fs/pstore/ram.c
> >> +++ b/fs/pstore/ram.c
> >> @@ -596,7 +596,8 @@ static int ramoops_init_przs(const char *name,
> >>   name, i, *cnt - 1);
> >> prz_ar[i] = persistent_ram_new(*paddr, zone_sz, sig,
> >>&cxt->ecc_info,
> >> -  cxt->memtype, flags, label);
> >> +  cxt->memtype, flags,
> >> +  label, true);
> >> if (IS_ERR(prz_ar[i])) {
> >> err = PTR_ERR(prz_ar[i]);
> >> dev_err(dev, "failed to request %s mem region 
> >> (0x%zx@0x%llx): %d\n",
> >> @@ -640,7 +641,7 @@ static int ramoops_init_prz(const char *name,
> >>
> >> label = kasprintf(GFP_KERNEL, "ramoops:%s", name);
> >> *prz = persistent_ram_new(*paddr, sz, sig, &cxt->ecc_info,
> >> - cxt->memtype, 0, label);
> >> + cxt->memtype, 0, label, false);
> >> if (IS_ERR(*prz)) {
> >> int err = PTR_ERR(*prz);
> >>
> >> diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c
> >> index 12e21f789194..d8a520c8741c 100644
> >> --- a/fs/pstore/ram_core.c
> >> +++ b/fs/pstore/ram_core.c
> >> @@ -486,7 +486,8 @@ static int persistent_ram_buffer_map(phys_addr_t 
> >> start, phys_addr_t size,
> >>  }
> >>
> >>  static int persistent_ram_post_init(struct persistent_ram_zone *prz, u32 
> >> sig,
> >> -   struct persistent_ram_ecc_info 
> >> *ecc_info)
> >> +   struct persistent_ram_ecc_info 
> >> *ecc_info,
> >> +   bool zap_option)
> >>  {
> >> int ret;
> >>
> >> @@ -514,7 +515,8 @@ static int persistent_ram_post_init(struct 
> >> persistent_ram_zone *prz, u32 >sig,
> >>
> >> /* Rewind missing or invalid memory area. */
> >> prz->buffer->sig = sig;
> >> -   persistent_ram_zap(prz);
> >> +   if (zap_option)
> >> +   persistent_ram_zap(prz);
> >
> >This part of persistent_ram_post_init() handles the "invalid buffer"
> >case, which should always zap. The question is whether or not to zap
> >in the case of a valid buffer (the "return 0" earlier in the
> >function). I think you v2 patch needs similar changes found in your
> >v1: the v2 patch also needs to remove the "return 0" and replace it
> >with "zap_option = true;" and to remove the zap call from
> >ramoops_init_prz(). Then I think all the paths will be consolidated.
> 
> Thank you so much for the tips!
> 
> Furthermore,  we can make "zap_option" stand for whether its caller want to 
> zap in case of
> a valid buffer. So ramoops_init_przs() would say "false", and 
> ramoops_init_prz() would 
> say "true".
> 
> In persistent_ram_post_init(), if zap_option says "false", we return 
> immediately after 
> persistent_ram_save_old(), otherwise persistent_ram_zap would be called at 
> the end.

Can you not just add it to the flags, something like PRZ_ZAP_NEW, and set
that flag before calling ramoops_init_prz*, then check the flag in
persistent_ram_new? We are already passing flags to persistent_ram_new.

That way no new function arguments are needed and its simple.

 - Joel



Re: [PATCH tip/core/rcu 02/19] rcu: Defer reporting RCU-preempt quiescent states when disabled

2018-10-29 Thread Joel Fernandes
On Mon, Oct 29, 2018 at 07:27:35AM -0700, Paul E. McKenney wrote:
> On Mon, Oct 29, 2018 at 11:24:42AM +, Ran Rozenstein wrote:
> > Hi Paul and all,
> > 
> > > -Original Message-
> > > From: linux-kernel-ow...@vger.kernel.org [mailto:linux-kernel-
> > > ow...@vger.kernel.org] On Behalf Of Paul E. McKenney
> > > Sent: Thursday, August 30, 2018 01:21
> > > To: linux-kernel@vger.kernel.org
> > > Cc: mi...@kernel.org; jiangshan...@gmail.com; dipan...@in.ibm.com;
> > > a...@linux-foundation.org; mathieu.desnoy...@efficios.com;
> > > j...@joshtriplett.org; t...@linutronix.de; pet...@infradead.org;
> > > rost...@goodmis.org; dhowe...@redhat.com; eduma...@google.com;
> > > fweis...@gmail.com; o...@redhat.com; j...@joelfernandes.org; Paul E.
> > > McKenney 
> > > Subject: [PATCH tip/core/rcu 02/19] rcu: Defer reporting RCU-preempt
> > > quiescent states when disabled
> > > 
> > > This commit defers reporting of RCU-preempt quiescent states at
> > > rcu_read_unlock_special() time when any of interrupts, softirq, or
> > > preemption are disabled.  These deferred quiescent states are reported at 
> > > a
> > > later RCU_SOFTIRQ, context switch, idle entry, or CPU-hotplug offline
> > > operation.  Of course, if another RCU read-side critical section has 
> > > started in
> > > the meantime, the reporting of the quiescent state will be further 
> > > deferred.
> > > 
> > > This also means that disabling preemption, interrupts, and/or softirqs 
> > > will act
> > > as an RCU-preempt read-side critical section.
> > > This is enforced by checking preempt_count() as needed.
> > > 
> > > Some special cases must be handled on an ad-hoc basis, for example,
> > > context switch is a quiescent state even though both the scheduler and
> > > do_exit() disable preemption.  In these cases, additional calls to
> > > rcu_preempt_deferred_qs() override the preemption disabling.  Similar 
> > > logic
> > > overrides disabled interrupts in rcu_preempt_check_callbacks() because in
> > > this case the quiescent state happened just before the corresponding
> > > scheduling-clock interrupt.
> > > 
> > > In theory, this change lifts a long-standing restriction that required 
> > > that if
> > > interrupts were disabled across a call to rcu_read_unlock() that the 
> > > matching
> > > rcu_read_lock() also be contained within that interrupts-disabled region 
> > > of
> > > code.  Because the reporting of the corresponding RCU-preempt quiescent
> > > state is now deferred until after interrupts have been enabled, it is no 
> > > longer
> > > possible for this situation to result in deadlocks involving the 
> > > scheduler's
> > > runqueue and priority-inheritance locks.  This may allow some code
> > > simplification that might reduce interrupt latency a bit.  Unfortunately, 
> > > in
> > > practice this would also defer deboosting a low-priority task that had 
> > > been
> > > subjected to RCU priority boosting, so real-time-response considerations
> > > might well force this restriction to remain in place.
> > > 
> > > Because RCU-preempt grace periods are now blocked not only by RCU read-
> > > side critical sections, but also by disabling of interrupts, preemption, 
> > > and
> > > softirqs, it will be possible to eliminate RCU-bh and RCU-sched in favor 
> > > of
> > > RCU-preempt in CONFIG_PREEMPT=y kernels.  This may require some
> > > additional plumbing to provide the network denial-of-service guarantees
> > > that have been traditionally provided by RCU-bh.  Once these are in place,
> > > CONFIG_PREEMPT=n kernels will be able to fold RCU-bh into RCU-sched.
> > > This would mean that all kernels would have but one flavor of RCU, which
> > > would open the door to significant code cleanup.
> > > 
> > > Moving to a single flavor of RCU would also have the beneficial effect of
> > > reducing the NOCB kthreads by at least a factor of two.
> > > 
> > > Signed-off-by: Paul E. McKenney  [ paulmck:
> > > Apply rcu_read_unlock_special() preempt_count() feedback
> > >   from Joel Fernandes. ]
> > > [ paulmck: Adjust rcu_eqs_enter() call to rcu_preempt_deferred_qs() in
> > >   response to bug reports from kbuild test robot. ] [ paulmck: Fix bug 
> > > located
> > > by kbuild test robot involving recursion
> > >   via rcu_preempt_deferred_qs(). ]
> > > ---
> > >  .../RCU/Design/Requirements/Requirements.html |  50 +++---
> > >  include/linux/rcutiny.h   |   5 +
> > >  kernel/rcu/tree.c |   9 ++
> > >  kernel/rcu/tree.h |   3 +
> > >  kernel/rcu/tree_exp.h |  71 +++--
> > >  kernel/rcu/tree_plugin.h  | 144 +-
> > >  6 files changed, 205 insertions(+), 77 deletions(-)
> > > 
> > 
> > We started seeing the trace below in our regression system, after I 
> > bisected I found this is the offending commit.
> > This appears immediately on boot. 
> > Please let me know if you need any additional details.
> 
> 

Re: [RFC PATCH] Implement /proc/pid/kill

2018-10-29 Thread Joel Fernandes
On Mon, Oct 29, 2018 at 3:11 PM Daniel Colascione  wrote:
>
> Add a simple proc-based kill interface. To use /proc/pid/kill, just
> write the signal number in base-10 ASCII to the kill file of the
> process to be killed: for example, 'echo 9 > /proc/$$/kill'.
>
> Semantically, /proc/pid/kill works like kill(2), except that the
> process ID comes from the proc filesystem context instead of from an
> explicit system call parameter. This way, it's possible to avoid races
> between inspecting some aspect of a process and that process's PID
> being reused for some other process.
>
> With /proc/pid/kill, it's possible to write a proper race-free and
> safe pkill(1). An approximation follows. A real program might use
> openat(2), having opened a process's /proc/pid directory explicitly,
> with the directory file descriptor serving as a sort of "process
> handle".

How long does the 'inspection' procedure take? If its a short
duration, then is PID reuse really an issue, I mean the PIDs are not
reused until wrap around and the only reason this can be a problem is
if you have the wrap around while the 'inspecting some aspect'
procedure takes really long.

Also the proc fs is typically not the right place for this. Some
entries in proc are writeable, but those are for changing values of
kernel data structures. The title of man proc(5) is "proc - process
information pseudo-filesystem". So its "information" right?

IMO without a really good reason for this, it could really be a hard
sell but the RFC was worth it anyway to discuss it ;-)

thanks,

- Joel


Re: [PATCH] kretprobe: produce sane stack traces

2018-10-29 Thread Aleksa Sarai
On 2018-10-30, Masami Hiramatsu  wrote:
> > Historically, kretprobe has always produced unusable stack traces
> > (kretprobe_trampoline is the only entry in most cases, because of the
> > funky stack pointer overwriting). This has caused quite a few annoyances
> > when using tracing to debug problems[1] -- since return values are only
> > available with kretprobes but stack traces were only usable for kprobes,
> > users had to probe both and then manually associate them.
> 
> Yes, this unfortunately still happens. I once tried to fix it by
> replacing current "kretprobe instance" with graph-tracer's per-thread
> return stack. (https://lkml.org/lkml/2017/8/21/553)

I played with graph-tracer a while ago and it didn't appear to have
associated return values? Is this hidden somewhere or did I just miss
it?

> I still believe that direction is the best solution to solve this kind
> of issues, otherwise, we have to have 2 different stack fixups for
> kretprobe and ftrace graph tracer. (I will have a talk with Steve at
> plumbers next month)

I'm definitely :+1: on removing the duplication of the stack fixups, my
first instinct was to try to refactor all of the stack_trace code so
that we didn't have multiple arch-specific "get the stack trace" paths
(and so we could generically add current_kretprobe_instance() to one
codepath). But after looking into it, I was convinced this would be more
than a little ugly to do.

> > With the advent of bpf_trace, users would have been able to do this
> > association in bpf, but this was less than ideal (because
> > bpf_get_stackid would still produce rubbish and programs that didn't
> > know better would get silly results). The main usecase for stack traces
> > (at least with bpf_trace) is for DTrace-style aggregation on stack
> > traces (both entry and exit). Therefore we cannot simply correct the
> > stack trace on exit -- we must stash away the stack trace and return the
> > entry stack trace when it is requested.
> > 
> > In theory, patches like commit 76094a2cf46e ("ftrace: distinguish
> > kretprobe'd functions in trace logs") are no longer necessary *for
> > tracing* because now all kretprobe traces should produce sane stack
> > traces. However it's not clear whether removing them completely is
> > reasonable.
> 
> Then, let's try to revert it :)

Sure. :P

> BTW, could you also add a test case for ftrace too?
> also, I have some comments below.

Yup, will do.

> > +#define KRETPROBE_TRACE_SIZE 1024
> > +struct kretprobe_trace {
> > +   int nr_entries;
> > +   unsigned long entries[KRETPROBE_TRACE_SIZE];
> > +};
> 
> Hmm, do we really need all entries? It takes 8KB for each instances.
> Note that the number of instances can be big if the system core number
> is larger.

Yeah, you're right this is too large for a default.

But the problem is that we need it to be large enough for any of the
tracers to be happy -- otherwise we'd have to dynamically allocate it
and I had a feeling this would be seen as a Bad Idea™ in the kprobe
paths.

  * ftrace uses PAGE_SIZE/sizeof(u64) == 512 (on x86_64).
  * perf_events (and thus BPF) uses 127 as the default but can be
configured via sysctl -- and thus can be unbounded.
  * show_stack(...) doesn't appear to have a limit, but I might just be
misreading the x86-specific code.

As mentioned above, the lack of consensus on a single structure for
storing stack traces also means that there is a lack of consensus on
what the largest reasonable stack is.

But maybe just doing 127 would be "reasonable"?

(Athough, dynamically allocating would allow us to just use 'struct
stack_trace' directly without needing to embed a different structure.)

> > +   hlist_for_each_entry_safe(iter, next, head, hlist) {
> 
> Why would you use "_safe" variant here? if you don't modify the hlist,
> you don't need to use it.

Yup, my mistake.

> > +void kretprobe_save_stack_trace(struct kretprobe_instance *ri,
> > +   struct stack_trace *trace)
> > +{
> > +   int i;
> > +   struct kretprobe_trace *krt = &ri->entry;
> > +
> > +   for (i = trace->skip; i < krt->nr_entries; i++) {
> > +   if (trace->nr_entries >= trace->max_entries)
> > +   break;
> > +   trace->entries[trace->nr_entries++] = krt->entries[i];
> > +   }
> > +}
> > +EXPORT_SYMBOL_GPL(kretprobe_save_stack_trace);
> > +
> > +void kretprobe_perf_callchain_kernel(struct kretprobe_instance *ri,
> > +struct perf_callchain_entry_ctx *ctx)
> > +{
> > +   int i;
> > +   struct kretprobe_trace *krt = &ri->entry;
> > +
> > +   for (i = 0; i < krt->nr_entries; i++) {
> > +   if (krt->entries[i] == ULONG_MAX)
> > +   break;
> > +   perf_callchain_store(ctx, (u64) krt->entries[i]);
> > +   }
> > +}
> > +EXPORT_SYMBOL_GPL(kretprobe_perf_callchain_kernel);
> 
> 
> Why do we need to export these functions?

That's a good question -- I must've just banged out the EXPORT
statements without thinki

Re: [PATCH 2/2] gsmi: Log event for critical thermal thresholds

2018-10-29 Thread kbuild test robot
Hi Duncan,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on soc-thermal/next]
[also build test ERROR on v4.19]
[cannot apply to next-20181029]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]

url:
https://github.com/0day-ci/linux/commits/Ross-Zwisler/thermal-Add-notifier-call-chain-for-hot-critical-events/20181023-043806
base:   
https://git.kernel.org/pub/scm/linux/kernel/git/evalenti/linux-soc-thermal.git 
next
config: i386-randconfig-k0-10291547 (attached as .config)
compiler: gcc-7 (Debian 7.3.0-1) 7.3.0
reproduce:
# save the attached .config to linux build tree
make ARCH=i386 

All errors (new ones prefixed by >>):

   drivers/firmware/google/gsmi.o: In function `gsmi_exit':
>> drivers/firmware/google/gsmi.c:936: undefined reference to 
>> `unregister_thermal_notifier'
   drivers/firmware/google/gsmi.o: In function `gsmi_init':
>> drivers/firmware/google/gsmi.c:909: undefined reference to 
>> `register_thermal_notifier'

vim +936 drivers/firmware/google/gsmi.c

   787  
   788  static __init int gsmi_init(void)
   789  {
   790  unsigned long flags;
   791  int ret;
   792  
   793  ret = gsmi_system_valid();
   794  if (ret)
   795  return ret;
   796  
   797  gsmi_dev.smi_cmd = acpi_gbl_FADT.smi_command;
   798  
   799  /* register device */
   800  gsmi_dev.pdev = platform_device_register_full(&gsmi_dev_info);
   801  if (IS_ERR(gsmi_dev.pdev)) {
   802  printk(KERN_ERR "gsmi: unable to register platform 
device\n");
   803  return PTR_ERR(gsmi_dev.pdev);
   804  }
   805  
   806  /* SMI access needs to be serialized */
   807  spin_lock_init(&gsmi_dev.lock);
   808  
   809  ret = -ENOMEM;
   810  gsmi_dev.dma_pool = dma_pool_create("gsmi", &gsmi_dev.pdev->dev,
   811   GSMI_BUF_SIZE, 
GSMI_BUF_ALIGN, 0);
   812  if (!gsmi_dev.dma_pool)
   813  goto out_err;
   814  
   815  /*
   816   * pre-allocate buffers because sometimes we are called when
   817   * this is not feasible: oops, panic, die, mce, etc
   818   */
   819  gsmi_dev.name_buf = gsmi_buf_alloc();
   820  if (!gsmi_dev.name_buf) {
   821  printk(KERN_ERR "gsmi: failed to allocate name 
buffer\n");
   822  goto out_err;
   823  }
   824  
   825  gsmi_dev.data_buf = gsmi_buf_alloc();
   826  if (!gsmi_dev.data_buf) {
   827  printk(KERN_ERR "gsmi: failed to allocate data 
buffer\n");
   828  goto out_err;
   829  }
   830  
   831  gsmi_dev.param_buf = gsmi_buf_alloc();
   832  if (!gsmi_dev.param_buf) {
   833  printk(KERN_ERR "gsmi: failed to allocate param 
buffer\n");
   834  goto out_err;
   835  }
   836  
   837  /*
   838   * Determine type of handshake used to serialize the SMI
   839   * entry. See also gsmi_exec().
   840   *
   841   * There's a "behavior" present on some chipsets where writing 
the
   842   * SMI trigger register in the southbridge doesn't result in an
   843   * immediate SMI. Rather, the processor can execute "a few" more
   844   * instructions before the SMI takes effect. To ensure 
synchronous
   845   * behavior, implement a handshake between the kernel driver 
and the
   846   * firmware handler to spin until released. This ioctl 
determines
   847   * the type of handshake.
   848   *
   849   * NONE: The firmware handler does not implement any
   850   * handshake. Either it doesn't need to, or it's legacy firmware
   851   * that doesn't know it needs to and never will.
   852   *
   853   * CF: The firmware handler will clear the CF in the saved
   854   * state before returning. The driver may set the CF and test 
for
   855   * it to clear before proceeding.
   856   *
   857   * SPIN: The firmware handler does not implement any handshake
   858   * but the driver should spin for a hundred or so microseconds
   859   * to ensure the SMI has triggered.
   860   *
   861   * Finally, the handler will return -ENOSYS if
   862   * GSMI_CMD_HANDSHAKE_TYPE is unimplemented, which implies
   863   * HANDSHAKE_NONE.
   864   */
   865  spin_lock_irqsave(&gsmi_dev.lock, flags);
   866  gsmi_dev.handshake_type = GSMI_HANDSHAKE_SPIN;
   867  gsmi_dev.handshake_type =
   868

Re: memcg oops: memcg_kmem_charge_memcg()->try_charge()->page_counter_try_charge()->BOOM

2018-10-29 Thread Mike Galbraith
On Mon, 2018-10-29 at 21:49 +, Roman Gushchin wrote:
> On Mon, Oct 29, 2018 at 09:46:54PM +0100, Mike Galbraith wrote:
> 
> > Ah, I have cgroup_disable=memory on the command line, which turns out
> > to be why your box doesn't explode, while mine does.
> 
> Yeah, here it is. I'll send the fix in few minutes. Please,
> test it on your setup. Your tested-by will be appreciated.

Yup, all-better-by:/me


Re: [RFC PATCH] Minimal non-child process exit notification support

2018-10-29 Thread Joel Fernandes
On Mon, Oct 29, 2018 at 1:01 PM Daniel Colascione  wrote:
>
> Thanks for taking a look.
>
> On Mon, Oct 29, 2018 at 7:45 PM, Joel Fernandes  wrote:
> >
> > On Mon, Oct 29, 2018 at 10:53 AM Daniel Colascione  
> > wrote:
> > >
> > > This patch adds a new file under /proc/pid, /proc/pid/exithand.
> > > Attempting to read from an exithand file will block until the
> > > corresponding process exits, at which point the read will successfully
> > > complete with EOF.  The file descriptor supports both blocking
> > > operations and poll(2). It's intended to be a minimal interface for
> > > allowing a program to wait for the exit of a process that is not one
> > > of its children.
> > >
> > > Why might we want this interface? Android's lmkd kills processes in
> > > order to free memory in response to various memory pressure
> > > signals. It's desirable to wait until a killed process actually exits
> > > before moving on (if needed) to killing the next process. Since the
> > > processes that lmkd kills are not lmkd's children, lmkd currently
> > > lacks a way to wait for a proces to actually die after being sent
> > > SIGKILL; today, lmkd resorts to polling the proc filesystem pid
> >
> > Any idea why it needs to wait and then send SIGKILL? Why not do
> > SIGKILL and look for errno == ESRCH in a loop with a delay.
>
> I want to get polling loops out of the system. Polling loops are bad
> for wakeup attribution, bad for power, bad for priority inheritance,
> and bad for latency. There's no right answer to the question "How long
> should I wait before checking $CONDITION again?". If we can have an
> explicit waitqueue interface to something, we should. Besides, PID
> polling is vulnerable to PID reuse, whereas this mechanism (just like
> anything based on struct pid) is immune to it.

The argument sounds Ok to me. I would also more details in the commit
message about the alternate methods to do this (such as kill polling
or ptrace) and why they don't work well etc so no one asks any
questions. Like maybe under a "other ways to do this" section. A bit
of googling also showed a netlink way of doing it without polling
(though I don't look into that much and wouldn't be surprised if its
more complicated)

Also I guess when you send a patch, it'd be good to pass
"--cc-cmd='./scripts/get_maintainer.pl" to git-send-email so it
automatically CCs the maintainers who maintain this.

thanks,

- Joel


Re: [PATCH] arm64/numa: Add more vetting in numa_set_distance()

2018-10-29 Thread Anshuman Khandual



On 10/29/2018 08:18 PM, Will Deacon wrote:
> On Mon, Oct 29, 2018 at 06:15:42PM +0530, Anshuman Khandual wrote:
>> On 10/29/2018 06:02 PM, John Garry wrote:
>>> On 29/10/2018 12:16, Will Deacon wrote:
 On Mon, Oct 29, 2018 at 12:14:09PM +, John Garry wrote:
> On 29/10/2018 11:25, Will Deacon wrote:
>> On Fri, Oct 26, 2018 at 09:57:47PM +0800, John Garry wrote:
>>> Currently it is acceptable to set the distance between 2 separate nodes 
>>> to
>>> LOCAL_DISTANCE.
>>>
>>> Reject this as it is invalid.
>>>
>>> This change avoids a crash reported in [1].
>>>
>>> [1] https://www.spinics.net/lists/arm-kernel/msg683304.html
>>>
>>> Signed-off-by: John Garry 
>>>
>>> diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c
>>> index 146c04c..6092e3d 100644
>>> --- a/arch/arm64/mm/numa.c
>>> +++ b/arch/arm64/mm/numa.c
>>> @@ -335,7 +335,8 @@ void __init numa_set_distance(int from, int to, int 
>>> distance)
>>> }
>>>
>>> if ((u8)distance != distance ||
>>> -    (from == to && distance != LOCAL_DISTANCE)) {
>>> +    (from == to && distance != LOCAL_DISTANCE) ||
>>> +    (from != to && distance == LOCAL_DISTANCE)) {
>>
>> The current code here is more-or-less lifted from the x86 implementation
>> of numa_set_distance().
>
> Right, I did notice this. I didn't think that x86 folks would be so
> concerned since they generally only use ACPI, and the ACPI code already
> validates these distances in drivers/acpi/numa.c: slit_valid() [unlike OF
> code].
>
>  I think we should either factor out the sanity check
>> into a core helper or make the core code robust to these funny 
>> configurations.
>
> OK, so to me it would make sense to factor out a sanity check into a core
> helper.

 That, or have the OF code perform the same validation that slit_valid() is
 doing for ACPI. I'm just trying to avoid other architectures running into
 this problem down the line.

>>>
>>> Right, OF code should do this validation job if ACPI is doing it
>>> (especially since the DT bindings actually specify the distance rules),
>>> and not rely on the arch NUMA code to accept/reject numa_set_distance()
>>> combinations.
>>
>> I would say this particular condition checking still falls under arch NUMA 
>> init
>> code sanity check like other basic tests what numa_set_distance() currently 
>> does
>> already but it should not be a necessity for the OF driver to check these. 
>> It can
>> choose to check but arch NUMA should check basic things like two different 
>> NUMA
>> nodes should not have LOCAL_DISTANCE as distance like in this case.
>>
>>  (from == to && distance != LOCAL_DISTANCE) ||
>>  (from != to && distance == LOCAL_DISTANCE))
>>
>>
>>>
>>> And, in addition to this, I'd say OF should disable NUMA if given an
>>> invalid table (like ACPI does).
>>
>> Taking a decision to disable NUMA should be with kernel (arch NUMA) once 
>> kernel
>> starts booting. Platform should have sent right values, OF driver trying to
>> adjust stuff what platform has sent with FDT once the kernel starts booting 
>> is
>> not right. For example "Kernel NUMA wont like the distance factors lets clean
>> then up before passing on to MM". Disabling NUMA is one such major decision 
>> which
>> should be with arch NUMA code not with OF driver.
> 
> I don't fully understand what you're getting at here, but why would the
> check posted by John be arch-specific? It's already done in the core code
> for ACPI, so there's a discrepancy between ACPI and FDT that should be
> resolved. I'd also argue that the subtleties of this check are actually
> based on what the core code is willing to accept in terms of the NUMA
> description, so it's also the best place to enforce it.

Agreed. I had overlooked the existing semantics with respect to ACPI parsing.
Yes, there is a discrepancy with respect to FDT which should be fixed. But
IMHO its also worth to enhance numa_set_distance() checks with this proposed
new check as well.


Re: Can VFIO pin only a specific region of guest mem when use pass through devices?

2018-10-29 Thread Peter Xu
On Mon, Oct 29, 2018 at 12:29:22PM -0600, Alex Williamson wrote:
> On Mon, 29 Oct 2018 17:14:46 +0800
> Jason Wang  wrote:
> 
> > On 2018/10/29 上午10:42, Simon Guo wrote:
> > > Hi,
> > >
> > > I am using network device pass through mode with qemu x86(-device 
> > > vfio-pci,host=:xx:yy.z)
> > > and “intel_iommu=on” in host kernel command line, and it shows the whole 
> > > guest memory
> > > were pinned(vfio_pin_pages()), viewed by the “top” RES memory output. I 
> > > understand it is due
> > > to device can DMA to any guest memory address and it cannot be swapped.
> > >
> > > However can we just pin a rang of address space allowed by iommu group of 
> > > that device,
> > > instead of pin whole address space? I do notice some code like 
> > > vtd_host_dma_iommu().
> > > Maybe there is already some way to enable that?
> > >
> > > Sorry if I missed some basics. I googled some but no luck to find the 
> > > answer yet. Please
> > > let me know if any discussion already raised on that.
> > >
> > > Any other suggestion will also be appreciated. For example, can we modify 
> > > the guest network
> > > card driver to allocate only from a specific memory region(zone), and 
> > > qemu advises guest
> > > kernel to only pin that memory region(zone) accordingly?
> > >
> > > Thanks,
> > > - Simon  
> > 
> > 
> > One possible method is to enable IOMMU of VM.
> 
> Right, making use of a virtual IOMMU in the VM is really the only way
> to bound the DMA to some subset of guest memory, but vIOMMU usage by
> the guest is optional on x86 and even if the guest does use it, it might
> enable passthrough mode, which puts you back at the problem that all
> guest memory is pinned with the additional problem that it might also
> be accounted for once per assigned device and may hit locked memory
> limits.  Also, the DMA mapping and unmapping path with a vIOMMU is very
> slow, so performance of the device in the guest will be abysmal unless
> the use case is limited to very static mappings, such as userspace use
> within the guest for nested assignment or perhaps DPDK use cases.
> 
> Modifying the guest to only use a portion of memory for DMA sounds like
> a quite intrusive option.  There are certainly IOMMU models where the
> IOMMU provides a fixed IOVA range, but creating dynamic mappings within
> that range doesn't really solve anything given that it simply returns
> us to a vIOMMU with slow mapping.  A window with a fixed identity
> mapping used as a DMA zone seems plausible, but again, also pretty
> intrusive to the guest, possibly also to the drivers.  Host IOMMU page
> faulting can also help the pinned memory footprint, but of course
> requires hardware support and lots of new code paths, many of which are
> already being discussed for things like Scalable IOV and SVA.  Thanks,

Agree with Jason's and Alex's comments.  One trivial additional: the
whole guest RAM will possibly still be pinned for a very short period
during guest system boot (e.g., when running guest BIOS) and before
the guest kernel enables the vIOMMU for the assigned device since the
bootup code like BIOS would still need to be able to access the whole
guest memory.

Thanks,

-- 
Peter Xu


How to implement "#interrupt-cells = <2>" for a gpiochip?

2018-10-29 Thread Daniel Santos
Hello,

I'm trying to use a GPIO as an interrupt on an mt7620 (using OpenWRT
drivers) and I can't seem to figure out how to glue my two-celled
interrupt description (including the trigger) to the device tree code. 
This is the gpio driver I'm using: 
https://github.com/openwrt/openwrt/blob/master/target/linux/ramips/patches-4.14/0027-GPIO-MIPS-ralink-add-gpio-driver-for-ralink-SoC.patch

And this is the gpio chip in the device tree:

gpio0: gpio@600 {
compatible = "ralink,mt7620a-gpio", 
"ralink,rt2880-gpio";
reg = <0x600 0x34>;

resets = <&rstctrl 13>;
reset-names = "pio";

interrupt-parent = <&intc>;
interrupts = <6>;

interrupt-controller;
#interrupt-cells = <2>;

gpio-controller;
#gpio-cells = <2>;

ralink,gpio-base = <0>;
ralink,num-gpios = <24>;
ralink,register-map = [ 00 04 08 0c
20 24 28 2c
30 34 ];
};


I've added the "interrupt-controller;" and "#interrupt-cells" myself. 
This is my i2c device:

&i2c {
status = "okay";

imu: lsm6ds3@6b {
compatible = "st,lsm6ds3";
reg = <0x6b>;
interrupt-parent = <&gpio0>;
interrupts = <14 IRQ_TYPE_EDGE_FALLING>;
};
};


The problem is that when the driver probes and asks what the trigger for
the irq is, it returns zero instead of 2 (IRQ_TYPE_EDGE_FALLING).  I
presume this is because the two-celled interrupts aren't implemented by
the gpio driver? 
Documentation/devicetree/bindings/interrupt-controller/interrupts.txt says:

A device is marked as an interrupt controller with the
"interrupt-controller"
property. This is a empty, boolean property. An additional
"#interrupt-cells"
property defines the number of cells needed to specify a single interrupt.

It is the responsibility of the interrupt controller's binding to define the
length and format of the interrupt specifier. The following two variants are
commonly used:
...

However, I'm having great trouble finding documentation on how to write
these bindings. Can anybody give me a pointer please?

Thanks,
Daniel


Re: [PATCH] arm64/numa: Add more vetting in numa_set_distance()

2018-10-29 Thread Anshuman Khandual



On 10/29/2018 08:14 PM, John Garry wrote:
>
>  I think we should either factor out the sanity check
>> into a core helper or make the core code robust to these funny 
>> configurations.
>
> OK, so to me it would make sense to factor out a sanity check into a core
> helper.

 That, or have the OF code perform the same validation that slit_valid() is
 doing for ACPI. I'm just trying to avoid other architectures running into
 this problem down the line.

>>>
>>> Right, OF code should do this validation job if ACPI is doing it 
>>> (especially since the DT bindings actually specify the distance rules), and 
>>> not rely on the arch NUMA code to accept/reject numa_set_distance() 
>>> combinations.
>>
>> I would say this particular condition checking still falls under arch NUMA 
>> init
>> code sanity check like other basic tests what numa_set_distance() currently 
>> does
>> already but it should not be a necessity for the OF driver to check these.
> 
> The checks in the arch NUMA code mean that invalid inter-node distance 
> combinations are ignored.

Right and should not this new test (from != to && distance == LOCAL_DISTANCE) be
one of them as well ? numa_set_distance() updates the table or just throws some
warnings while skipping entries it deems invalid. It would be okay to have this
new check there in addition to others like this patch suggests.

> 
> However, if any entries in the table are invalid, then the whole table can be 
> discarded as none of it can be believed, i.e. it's better to validate the 
> table.
>

Agreed. slit_valid() on the ACPI parsing is currently enforcing that before
acpi_numa_slit_init() which would call into numa_set_distance(). Hence arch
NUMA code numa_set_distance() never had the opportunity to do the sanity
checks as ACPI slit_valid() has completely invalidated the table.

Unlike ACPI path, of_numa_parse_distance_map_v1() does not do any sanity
checks on the distance values parse from the "distance-matrix" property
and all the checks directly falls on numa_set_distance(). This needs to
be fixed in line with ACPI

* If (to == from) ---> distance = LOCAL_DISTANCE
* If (to != from) ---> distance > LOCAL_DISTANCE

At the same time its okay to just enhance numa_set_distance() test coverage
to include this new test. If we would have trusted firmware parsing all the
way, existing basic checks about node range, distance stuff should not have
been there in numa_set_distance(). Hence IMHO even if we fix the OF driver
part, we should include this new check there as well.

> It can
>> choose to check but arch NUMA should check basic things like two different 
>> NUMA
>> nodes should not have LOCAL_DISTANCE as distance like in this case.
>>
>> (from == to && distance != LOCAL_DISTANCE) ||
>>     (from != to && distance == LOCAL_DISTANCE))
>>
>>
>>>
>>> And, in addition to this, I'd say OF should disable NUMA if given an 
>>> invalid table (like ACPI does).
>>
>> Taking a decision to disable NUMA should be with kernel (arch NUMA) once 
>> kernel
>> starts booting. Platform should have sent right values, OF driver trying to
>> adjust stuff what platform has sent with FDT once the kernel starts booting 
>> is
>> not right. For example "Kernel NUMA wont like the distance factors lets clean
>> then up before passing on to MM".
> 
> Sorry, but I don't know who was advocating this.

I was just giving an example. Invalidating NUMA distance table during firmware
table (ACPI or FDT) parsing forces arm64_numa_init() to fall back on dummy NUMA
node which is like disabling NUMA. But that is the current semantics with ACPI
parsing which I overlooked. Fixing OF driver to do the same wont extend this
any further, hence my previous concern does not stand valid.

> 
> Disabling NUMA is one such major decision which
>> should be with arch NUMA code not with OF driver.
> 
> I meant parsing the table would fail, so arch NUMA would fall back on dummy 
> NUMA.

Right and ACPI parsing does that and can force a fallback on a dummy NUMA node.


RE: [PATCH] binder: ipc namespace support for android binder

2018-10-29 Thread 周威
> > > It's not obvious from this patch where this dependency comes 
> > > from...why is SYSVIPC required? I'd like to not have to require 
> > > IPC_NS either for devices.
> >
> > Yes, the patch is not highly dependent on SYSVIPC, but it will be 
> > convenient if require it. I will update it to drop dependency of it in 
> > V2 patch. This patch doesn't need IPC_NS set at present.
> 
> Actually it is dependent on IPC_NS since it makes changes to ipc/namespace.c 
> which is compiled only if CONFIG_IPC_NS.
> 

Actually it does not require IPC_NS, the code in ipc/namespace.c are namespace 
specific, 
and is *not needed* if ipc namespace is not supported.  <-- fixed here

> There are a couple more implementations similar to this one.
> https://lwn.net/Articles/577957/ and some submissions to AOSP derived from 
> that one that introduce a generic registration function for namespace support 
> [1], and changes to binder to implement namespaces [2].
> 
> If this is really needed, then we should have a solution that works for 
> devices without requiring IPC_NS or SYSVIPC. Also, we should not add 
> binder-specific code to ipc/namespace.c or include/linux/ipc_namespace.h.
> 
> -Todd
> 
> [1] https://android-review.googlesource.com/c/kernel/common/+/471961
> [2] https://android-review.googlesource.com/c/kernel/common/+/471825
>

If the binder will be isolated by namespace, it must put binder proc and binder 
context in ipc_namespace (or with something like void* as [1] did)
I have sent the V2 patch, that patch does not require SYSVIPC or IPC_NS. If 
IPC_NS is not set, binder_init will put proc and context into init_ipc_ns.
If SYSVIPC and CONFIG_POSIX_MQUEUE are both unset, I will make a fake 
init_ipc_ns to put them. it is marked as no static intentionally to let compile 
generate an error if it has defined somewhere alse. The code in ipc/namespace.c 
is just to notify binder to do some installationwhere namespace are
creating, If no IPC_NS set, the initialization in binder_init will be enough.
So please review and test the V2 patch.


Re: [PATCH] mm: handle no memcg case in memcg_kmem_charge() properly

2018-10-29 Thread Shakeel Butt
On Mon, Oct 29, 2018 at 6:01 PM Rik van Riel  wrote:
>
> On Mon, 2018-10-29 at 17:50 -0700, Shakeel Butt wrote:
> > On Mon, Oct 29, 2018 at 2:52 PM Roman Gushchin  wrote:
> > >
> > > Mike Galbraith reported a regression caused by the commit
> > > 9b6f7e163cd0
> > > ("mm: rework memcg kernel stack accounting") on a system with
> > > "cgroup_disable=memory" boot option: the system panics with the
> > > following stack trace:
> > >
> > >   [0.928542] BUG: unable to handle kernel NULL pointer dereference
> > > at 00f8
> > >   [0.929317] PGD 0 P4D 0
> > >   [0.929573] Oops: 0002 [#1] PREEMPT SMP PTI
> > >   [0.929984] CPU: 0 PID: 1 Comm: systemd Not tainted 4.19.0-
> > > preempt+ #410
> > >   [0.930637] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996),
> > > BIOS ?-20180531_142017-buildhw-08.phx2.fed4
> > >   [0.931862] RIP: 0010:page_counter_try_charge+0x22/0xc0
> > >   [0.932376] Code: 41 5d c3 c3 0f 1f 40 00 0f 1f 44 00 00 48 85 ff
> > > 0f 84 a7 00 00 00 41 56 48 89 f8 49 89 fe 49
> > >   [0.934283] RSP: 0018:acf68031fcb8 EFLAGS: 00010202
> > >   [0.934826] RAX: 00f8 RBX:  RCX:
> > > 
> > >   [0.935558] RDX: acf68031fd08 RSI: 0020 RDI:
> > > 00f8
> > >   [0.936288] RBP: 0001 R08: 8063 R09:
> > > 99ff7cd37a40
> > >   [0.937021] R10: acf68031fed0 R11: 0020 R12:
> > > 0020
> > >   [0.937749] R13: acf68031fd08 R14: 00f8 R15:
> > > 99ff7da1ec60
> > >   [0.938486] FS:  7fc2140bb280() GS:99ff7da0()
> > > knlGS:
> > >   [0.939311] CS:  0010 DS:  ES:  CR0: 80050033
> > >   [0.939905] CR2: 00f8 CR3: 12dc8002 CR4:
> > > 00760ef0
> > >   [0.940638] DR0:  DR1:  DR2:
> > > 
> > >   [0.941366] DR3:  DR6: fffe0ff0 DR7:
> > > 0400
> > >   [0.942110] PKRU: 5554
> > >   [0.942412] Call Trace:
> > >   [0.942673]  try_charge+0xcb/0x780
> > >   [0.943031]  memcg_kmem_charge_memcg+0x28/0x80
> > >   [0.943486]  ? __vmalloc_node_range+0x1e4/0x280
> > >   [0.943971]  memcg_kmem_charge+0x8b/0x1d0
> > >   [0.944396]  copy_process.part.41+0x1ca/0x2070
> > >   [0.944853]  ? get_acl+0x1a/0x120
> > >   [0.945200]  ? shmem_tmpfile+0x90/0x90
> > >   [0.945596]  _do_fork+0xd7/0x3d0
> > >   [0.945934]  ? trace_hardirqs_off_thunk+0x1a/0x1c
> > >   [0.946421]  do_syscall_64+0x5a/0x180
> > >   [0.946798]  entry_SYSCALL_64_after_hwframe+0x49/0xbe
> > >
> > > The problem occurs because get_mem_cgroup_from_current() returns
> > > the NULL pointer if memory controller is disabled. Let's check
> > > if this is a case at the beginning of memcg_kmem_charge() and
> > > just return 0 if mem_cgroup_disabled() returns true. This is how
> > > we handle this case in many other places in the memory controller
> > > code.
> > >
> > > Fixes: 9b6f7e163cd0 ("mm: rework memcg kernel stack accounting")
> > > Reported-by: Mike Galbraith 
> > > Signed-off-by: Roman Gushchin 
> > > Cc: Michal Hocko 
> > > Cc: Johannes Weiner 
> > > Cc: Vladimir Davydov 
> > > Cc: Andrew Morton 
> > > ---
> > >  mm/memcontrol.c | 2 +-
> > >  1 file changed, 1 insertion(+), 1 deletion(-)
> > >
> > > diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> > > index 54920cbc46bf..6e1469b80cb7 100644
> > > --- a/mm/memcontrol.c
> > > +++ b/mm/memcontrol.c
> > > @@ -2593,7 +2593,7 @@ int memcg_kmem_charge(struct page *page,
> > > gfp_t gfp, int order)
> > > struct mem_cgroup *memcg;
> > > int ret = 0;
> > >
> > > -   if (memcg_kmem_bypass())
> > > +   if (mem_cgroup_disabled() || memcg_kmem_bypass())
> > > return 0;
> > >
> >
> > Why not check memcg_kmem_enabled() before calling memcg_kmem_charge()
> > in memcg_charge_kernel_stack()?
>
> Check Roman's backtrace again. The function
> memcg_charge_kernel_stack() is not in it.
>

It got inlined.

> That is why it is generally better to check
> in the called function, rather than add a
> check to every call site (and maybe miss one
> or two).
>

I think the reason the check was at the call site was not to introduce
jmp/call in the allocation hot path for processes in the root memcg. I
don't have any strong preference but we should be persistent i.e.
checks at call site for all or check in the called function for all.

Shakeel


RE: [PATCH] binder: ipc namespace support for android binder

2018-10-29 Thread 周威
> > > It's not obvious from this patch where this dependency comes 
> > > from...why is SYSVIPC required? I'd like to not have to require 
> > > IPC_NS either for devices.
> >
> > Yes, the patch is not highly dependent on SYSVIPC, but it will be 
> > convenient if require it. I will update it to drop dependency of it in 
> > V2 patch. This patch doesn't need IPC_NS set at present.
> 
> Actually it is dependent on IPC_NS since it makes changes to ipc/namespace.c 
> which is compiled only if CONFIG_IPC_NS.
> 

Actually it does not require IPC_NS, the code in ipc/namespace.c are namespace 
specific, and is *not needed* if ipc namespace is supported.

> There are a couple more implementations similar to this one.
> https://lwn.net/Articles/577957/ and some submissions to AOSP derived from 
> that one that introduce a generic registration function for namespace support 
> [1], and changes to binder to implement namespaces [2].
> 
> If this is really needed, then we should have a solution that works for 
> devices without requiring IPC_NS or SYSVIPC. Also, we should not add 
> binder-specific code to ipc/namespace.c or include/linux/ipc_namespace.h.
> 
> -Todd
> 
> [1] https://android-review.googlesource.com/c/kernel/common/+/471961
> [2] https://android-review.googlesource.com/c/kernel/common/+/471825
>

If the binder will be isolated by namespace, it must put binder proc and binder 
context in ipc_namespace (or with something like void* as [1] did)
I have sent the V2 patch, that patch does not require SYSVIPC or IPC_NS. If 
IPC_NS is not set, binder_init will put proc and context into init_ipc_ns.
If SYSVIPC and CONFIG_POSIX_MQUEUE are both unset, I will make a fake 
init_ipc_ns to put them. it is marked as no static intentionally to let compile 
generate an error if it has defined somewhere alse. The code in ipc/namespace.c 
is just to notify binder to do some installationwhere namespace are
creating, If no IPC_NS set, the initialization in binder_init will be enough.
So please review and test the V2 patch.


[PATCH -next] nds32: Remove duplicated include from pm.c

2018-10-29 Thread YueHaibing
Remove duplicated include.

Signed-off-by: YueHaibing 
---
 arch/nds32/kernel/pm.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/nds32/kernel/pm.c b/arch/nds32/kernel/pm.c
index 6989560..ffa8040 100644
--- a/arch/nds32/kernel/pm.c
+++ b/arch/nds32/kernel/pm.c
@@ -5,7 +5,6 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
 







Re: [PATCH v8 2/2] samples: add an example of seccomp user trap

2018-10-29 Thread Tycho Andersen
On Mon, Oct 29, 2018 at 11:31:00PM +, Serge E. Hallyn wrote:
> On Mon, Oct 29, 2018 at 04:40:31PM -0600, Tycho Andersen wrote:
> > +   if (req->data.nr != __NR_mount) {
> > +   fprintf(stderr, "huh? trapped something besides mknod? %d\n", 
> > req->data.nr);
> 
> 'besides mount' ?

Yes, thanks :)

Tycho


Re: [PATCH] V4 init/main.c Enable watchdog_thresh control from kernel line

2018-10-29 Thread kbuild test robot
Hi Laurence,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on linux-sof-driver/master]
[also build test ERROR on v4.19 next-20181029]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]

url:
https://github.com/0day-ci/linux/commits/Laurence-Oberman/V4-init-main-c-Enable-watchdog_thresh-control-from-kernel-line/20181025-040136
base:   https://github.com/thesofproject/linux master
config: i386-randconfig-k3-10291547 (attached as .config)
compiler: gcc-7 (Debian 7.3.0-1) 7.3.0
reproduce:
# save the attached .config to linux build tree
make ARCH=i386 

All errors (new ones prefixed by >>):

   init/main.c: In function 'is_watchdog_thresh_setup':
>> init/main.c:1036:20: error: 'watchdog_thresh' undeclared (first use in this 
>> function); did you mean 'proc_watchdog_thresh'?
 get_option(&str, &watchdog_thresh);
   ^~~
   proc_watchdog_thresh
   init/main.c:1036:20: note: each undeclared identifier is reported only once 
for each function it appears in

vim +1036 init/main.c

  1033  
  1034  static int __init is_watchdog_thresh_setup(char *str)
  1035  {
> 1036  get_option(&str, &watchdog_thresh);
  1037  return 1;
  1038  }
  1039  __setup("watchdog_thresh=", is_watchdog_thresh_setup);
  1040  
  1041  

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: application/gzip


Re: [PATCHES/RFC] Re: A concern about overflow ring buffer mode

2018-10-29 Thread Liang, Kan




On 10/29/2018 6:42 PM, David Miller wrote:

From: "Liang, Kan" 
Date: Mon, 29 Oct 2018 18:32:40 -0400


- struct annotation_options *annotation_options 
__maybe_unused)
+ struct annotation_options *annotation_options __maybe_unused,
+ atomic_t *nr_rb_read __maybe_unused)
  {


What is going on with the indentations of this patch?



Sorry, my editor auto wraps the line.
The patch has been sent in a separate email.

Thanks,
Kan


[RFC PATCH] perf top: Move the timeout warning from event processing thread to display thread

2018-10-29 Thread kan . liang
From: Kan Liang 

The main event processing thread may hang if the ring buffer event
processing timeouts.

Analysis from David Miller:
"It hangs the event thread, because the ui call waits for a keypress
but the display thread will eat them up and the event thread thus
hangs in select()."

The timeout warning is moved to display thread.

The nr_rb_read is introduced to track the times of
perf_top__mmap_read(), which is the main function of event processing.
If the nr_rb_read doesn't increase during the refresh time, the display
thread may output stale data. The timeout warning will be triggered.

The timeout warning can only be triggered one time to avoid the annoying
and duplicated warning message.

The first perf_top__mmap_read() is moved to after display thread create.
Because the perf_top__mmap_read() could cost long time. For example, the
function may cost tens of minutes on Knights Landing platform with
parallel kernel build. There will be nothing displayed on the screen.
The display thread has to be created before perf_top__mmap_read(). But
at that time, the data is not ready. Display thread has to sleep
refresh time.

Fix: 8cc42de736b6 ("perf top: Check the latency of
perf_top__mmap_read()")
Reported-by: David Miller 
Signed-off-by: Kan Liang 
---
 tools/perf/builtin-c2c.c   |  4 +--
 tools/perf/builtin-report.c|  3 ++-
 tools/perf/builtin-top.c   | 39 +++-
 tools/perf/ui/browsers/hists.c | 58 ++
 tools/perf/ui/browsers/hists.h |  2 +-
 tools/perf/util/hist.h |  6 +++--
 tools/perf/util/top.h  |  1 +
 7 files changed, 85 insertions(+), 28 deletions(-)

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index f3aa9d0..1e77515 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -2371,7 +2371,7 @@ static int perf_c2c__browse_cacheline(struct hist_entry 
*he)
c2c_browser__update_nr_entries(browser);
 
while (1) {
-   key = hist_browser__run(browser, "? - help", true);
+   key = hist_browser__run(browser, "? - help", true, NULL);
 
switch (key) {
case 's':
@@ -2440,7 +2440,7 @@ static int perf_c2c__hists_browse(struct hists *hists)
c2c_browser__update_nr_entries(browser);
 
while (1) {
-   key = hist_browser__run(browser, "? - help", true);
+   key = hist_browser__run(browser, "? - help", true, NULL);
 
switch (key) {
case 'q':
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 257c9c1..2fc1273 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -561,7 +561,8 @@ static int report__browse_hists(struct report *rep)
ret = perf_evlist__tui_browse_hists(evlist, help, NULL,
rep->min_percent,
&session->header.env,
-   true, 
&rep->annotation_opts);
+   true, &rep->annotation_opts,
+   NULL);
/*
 * Usually "ret" is the last pressed key, and we only
 * care if the key notifies us to switch data file.
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index d21d875..95409de 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -584,6 +584,8 @@ static void *display_thread_tui(void *arg)
.refresh= top->delay_secs,
};
 
+   sleep(top->delay_secs);
+
/* In order to read symbols from other namespaces perf to  needs to call
 * setns(2).  This isn't permitted if the struct_fs has multiple users.
 * unshare(2) the fs so that we may continue to setns into namespaces
@@ -607,7 +609,8 @@ static void *display_thread_tui(void *arg)
  top->min_percent,
  &top->session->header.env,
  !top->record_opts.overwrite,
- &top->annotation_opts);
+ &top->annotation_opts,
+ &top->nr_rb_read);
 
done = 1;
return NULL;
@@ -633,6 +636,11 @@ static void *display_thread(void *arg)
struct termios save;
struct perf_top *top = arg;
int delay_msecs, c;
+   bool rb_read_timeout_warned = false;
+   bool rb_read_timeout = false;
+   int last_nr_rb_read = 0;
+
+   sleep(top->delay_secs);
 
/* In order to read symbols from other namespaces perf to  needs to call
 * setns(2).  This isn't permitted if the struct_fs has multiple users.
@@ -651,12 +659,26 @@ static void *display_thread(void *arg)
 
while (!done) {
perf_top__print_sym_tab

arch/x86/include/asm/rmwcc.h:23:17: error: jump into statement expression

2018-10-29 Thread kbuild test robot
Hi Peter,

FYI, the error/warning still remains.

tree:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 
master
head:   4b42745211af552f170f38a1b97f4a112b5da6b2
commit: 7aa54be2976550f17c11a1c3e3630002dea39303 locking/qspinlock, x86: 
Provide liveness guarantee
date:   13 days ago
config: x86_64-randconfig-j0-10290909 (attached as .config)
compiler: gcc-4.9 (Debian 4.9.4-2) 4.9.4
reproduce:
git checkout 7aa54be2976550f17c11a1c3e3630002dea39303
# save the attached .config to linux build tree
make ARCH=x86_64 

All errors (new ones prefixed by >>):

   In file included from arch/x86/include/asm/atomic.h:5:0,
from include/linux/atomic.h:7,
from include/linux/crypto.h:20,
from arch/x86/kernel/asm-offsets.c:9:
   arch/x86/include/asm/qspinlock.h: In function 
'queued_fetch_set_pending_acquire':
>> arch/x86/include/asm/rmwcc.h:23:17: error: jump into statement expression
   : clobbers : cc_label);\
^
   include/linux/compiler.h:58:42: note: in definition of macro '__trace_if'
 if (__builtin_constant_p(!!(cond)) ? !!(cond) :   \
 ^
   arch/x86/include/asm/qspinlock.h:18:2: note: in expansion of macro 'if'
 if (GEN_BINARY_RMWcc(LOCK_PREFIX "btsl", lock->val.counter, c,
 ^
   arch/x86/include/asm/rmwcc.h:21:2: note: in expansion of macro 
'asm_volatile_goto'
 asm_volatile_goto (fullop "; j" #cc " %l[cc_label]"  \
 ^
   arch/x86/include/asm/rmwcc.h:54:2: note: in expansion of macro '__GEN_RMWcc'
 __GEN_RMWcc(op " %[val], " arg0, var, cc,   \
 ^
   arch/x86/include/asm/rmwcc.h:58:2: note: in expansion of macro 
'GEN_BINARY_RMWcc_6'
 GEN_BINARY_RMWcc_6(op, var, cc, vcon, val, "%[var]")
 ^
   arch/x86/include/asm/rmwcc.h:9:30: note: in expansion of macro 
'GEN_BINARY_RMWcc_5'
#define __RMWcc_CONCAT(a, b) a ## b
 ^
   arch/x86/include/asm/rmwcc.h:10:28: note: in expansion of macro 
'__RMWcc_CONCAT'
#define RMWcc_CONCAT(a, b) __RMWcc_CONCAT(a, b)
   ^
   arch/x86/include/asm/rmwcc.h:60:32: note: in expansion of macro 
'RMWcc_CONCAT'
#define GEN_BINARY_RMWcc(X...) RMWcc_CONCAT(GEN_BINARY_RMWcc_, 
RMWcc_ARGS(X))(X)
   ^
   arch/x86/include/asm/qspinlock.h:18:6: note: in expansion of macro 
'GEN_BINARY_RMWcc'
 if (GEN_BINARY_RMWcc(LOCK_PREFIX "btsl", lock->val.counter, c,
 ^
   arch/x86/include/asm/rmwcc.h:25:1: note: label 'cc_label' defined here
cc_label: c = true;  \
^
   include/linux/compiler.h:58:30: note: in definition of macro '__trace_if'
 if (__builtin_constant_p(!!(cond)) ? !!(cond) :   \
 ^
   arch/x86/include/asm/qspinlock.h:18:2: note: in expansion of macro 'if'
 if (GEN_BINARY_RMWcc(LOCK_PREFIX "btsl", lock->val.counter, c,
 ^
   arch/x86/include/asm/rmwcc.h:54:2: note: in expansion of macro '__GEN_RMWcc'
 __GEN_RMWcc(op " %[val], " arg0, var, cc,   \
 ^
   arch/x86/include/asm/rmwcc.h:58:2: note: in expansion of macro 
'GEN_BINARY_RMWcc_6'
 GEN_BINARY_RMWcc_6(op, var, cc, vcon, val, "%[var]")
 ^
   arch/x86/include/asm/rmwcc.h:9:30: note: in expansion of macro 
'GEN_BINARY_RMWcc_5'
#define __RMWcc_CONCAT(a, b) a ## b
 ^
   arch/x86/include/asm/rmwcc.h:10:28: note: in expansion of macro 
'__RMWcc_CONCAT'
#define RMWcc_CONCAT(a, b) __RMWcc_CONCAT(a, b)
   ^
   arch/x86/include/asm/rmwcc.h:60:32: note: in expansion of macro 
'RMWcc_CONCAT'
#define GEN_BINARY_RMWcc(X...) RMWcc_CONCAT(GEN_BINARY_RMWcc_, 
RMWcc_ARGS(X))(X)
   ^
   arch/x86/include/asm/qspinlock.h:18:6: note: in expansion of macro 
'GEN_BINARY_RMWcc'
 if (GEN_BINARY_RMWcc(LOCK_PREFIX "btsl", lock->val.counter, c,
 ^
>> arch/x86/include/asm/rmwcc.h:25:1: error: duplicate label 'cc_label'
cc_label: c = true;  \
^
   include/linux/compiler.h:58:42: note: in definition of macro '__trace_if'
 if (__builtin_constant_p(!!(cond)) ? !!(cond) :   \
 ^
   arch/x86/include/asm/qspinlock.h:18:2: note: in expansion of macro 'if'
 if (GEN_BINARY_RMWcc(LOCK_PREFIX "btsl", lock->val.counter, c,
 ^
   arch/x86/include/asm/rmwcc.h:54:2: note: in expansion of macro '__GEN_RMWcc'
 __GEN_RMWcc(op " %[val], " arg0, var, cc,   \
 ^
   arch/x86/include/asm/rmwcc.h:58:2: note: in expansion of macro 
'GEN_BINARY_RMWcc_6'
 GEN_BINARY_RMWcc_6(op, var, cc, vcon, val, "%[var]")
 ^
   arch/x86/include/asm/rmwcc.h:9:30: note: in expansion of macro 
'GEN_BINARY_RMWcc_5'
#define __RMWcc_CONCAT(a, b) a ## b
 ^
   arch/x86/include/asm/rmwcc.h:10:28: note: in expansion of macro 
'__RMWcc_CONCAT'
#define RMWcc_CONCAT(a, b) __RMWcc_CONCAT(a, b)
  

Re: [PATCH v2 1/1] iommu/arm-smmu-v3: eliminate a potential memory corruption on Hi16xx soc

2018-10-29 Thread Leizhen (ThunderTown)



On 2018/10/30 1:59, Will Deacon wrote:
> On Sat, Oct 20, 2018 at 03:36:54PM +0800, Zhen Lei wrote:
>> The standard GITS_TRANSLATER register in ITS is only 4 bytes, but
>> Hisilicon expands the next 4 bytes to carry some IMPDEF information. That
>> means, total 8 bytes data will be written to MSIAddress each time.
>>
>> MSIAddr: |4bytes|4bytes|
>>   |MSIData   |IMPDEF|
>>
>> There is no problem for ITS, because the next 4 bytes space is reserved
>> in ITS. But it will overwrite the 4 bytes memory following "sync_count".
>> It's very fortunately that the previous and the next neighbour of the
>> "sync_count" are both aligned by 8 bytes, so no problem is met now.
>>
>> It's good to explicitly add a workaround:
>> 1. Add gcc __attribute__((aligned(8))) to make sure that "sync_count" is
>>always aligned by 8 bytes.
>> 2. Add a "int" struct member to make sure the 4 bytes padding is always
>>exist.
>>
>> There is no functional change.
>>
>> Signed-off-by: Zhen Lei 
>> ---
>>  drivers/iommu/arm-smmu-v3.c | 15 ++-
>>  1 file changed, 14 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
>> index 5059d09..624fdd0 100644
>> --- a/drivers/iommu/arm-smmu-v3.c
>> +++ b/drivers/iommu/arm-smmu-v3.c
>> @@ -586,7 +586,20 @@ struct arm_smmu_device {
>>  
>>  struct arm_smmu_strtab_cfg  strtab_cfg;
>>  
>> -u32 sync_count;
>> +/*
>> + * The alignment and padding is required by Hi16xx of Hisilicon.
>> + * Because the ITS hardware on Hi16xx will truncate the MSIAddress(Here
>> + * it's the address of "sync_count") to 8 bytes boundary first, then
>> + * write 32 bits MSIdata at offset 0, and 32 bits IMPDEF data at offset
>> + * 4. Without this workaround, the adjacent member maybe overwritten.
>> + *
>> + *|---4bytes---|---4bytes---|
>> + * MSIAddress & (~0x7):   MSIdata  | IMPDEF data|
>> + */
>> +struct {
>> +u32 sync_count;
>> +int padding;
>> +} __attribute__((aligned(8)));
> 
> I thought the conclusion after reviewing your original patch was to maintain
> the union and drop the alignment directive? e.g.
> 
>   union {
>   u32 sync_count;
>   u64 padding; /* Hi16xx writes an extra 32 bits of goodness 
> */
>   };
OK, I will sent v3.

> 
> Will
> 
> .
> 

-- 
Thanks!
BestRegards



Re: [PATCH] ASoC: AMD: Fix race condition between register access

2018-10-29 Thread Daniel Kurtz
Hi Akshu,


On Mon, Oct 29, 2018 at 1:39 AM Agrawal, Akshu  wrote:
>
> During simultaneous running of playback and capture, we
> got hit by incorrect value write on common register. This was due
> to race condition between 2 streams.
> Fixing this by locking the common register access.

Nice catch!  It looks looks like one of the operations you are trying
to make atomic is that two step Addr + Data register update.
If so, then I recommend refactoring a bit, and just doing that locked
2-step-access in its own helper function, like this:

  static void acp_reg_write_srbm_targ(void __iomem *acp_mmio, u32
addr, u32 data)
  {
unsigned long flags;

spin_lock_irqsave(&lock, flags);
acp_reg_write(addr, acp_mmio, mmACP_SRBM_Targ_Idx_Addr);
acp_reg_write(data, acp_mmio, mmACP_SRBM_Targ_Idx_Data);
spin_unlock_irqrestore(&lock, flags);
  }

And similarly, you can add 2 more locking helpers, one for modifying
the imr/ch and another for mmACP_I2S_16BIT_RESOLUTION_EN.

>
> Signed-off-by: Akshu Agrawal 
> ---
>  sound/soc/amd/acp-pcm-dma.c | 29 +
>  1 file changed, 29 insertions(+)
>
> diff --git a/sound/soc/amd/acp-pcm-dma.c b/sound/soc/amd/acp-pcm-dma.c
> index 0ac4b5b..993a7db 100644
> --- a/sound/soc/amd/acp-pcm-dma.c
> +++ b/sound/soc/amd/acp-pcm-dma.c
> @@ -121,6 +121,9 @@
> .periods_max = CAPTURE_MAX_NUM_PERIODS,
>  };
>
> +/* Lock to protect access to registers */
> +static DEFINE_SPINLOCK(lock);
> +
>  static u32 acp_reg_read(void __iomem *acp_mmio, u32 reg)
>  {
> return readl(acp_mmio + (reg * 4));
> @@ -168,9 +171,12 @@ static void config_dma_descriptor_in_sram(void __iomem 
> *acp_mmio,
>   acp_dma_dscr_transfer_t *descr_info)
>  {
> u32 sram_offset;
> +   unsigned long flags;
>
> sram_offset = (descr_idx * sizeof(acp_dma_dscr_transfer_t));
>
> +   spin_lock_irqsave(&lock, flags);
> +
> /* program the source base address. */
> acp_reg_write(sram_offset, acp_mmio, mmACP_SRBM_Targ_Idx_Addr);
> acp_reg_write(descr_info->src,  acp_mmio, mmACP_SRBM_Targ_Idx_Data);
> @@ -181,6 +187,8 @@ static void config_dma_descriptor_in_sram(void __iomem 
> *acp_mmio,
> /* program the number of bytes to be transferred for this descriptor. 
> */
> acp_reg_write(sram_offset + 8,  acp_mmio, mmACP_SRBM_Targ_Idx_Addr);
> acp_reg_write(descr_info->xfer_val, acp_mmio, 
> mmACP_SRBM_Targ_Idx_Data);
> +
> +   spin_unlock_irqrestore(&lock, flags);
>  }
>
>  static void pre_config_reset(void __iomem *acp_mmio, u16 ch_num)
> @@ -309,8 +317,12 @@ static void acp_pte_config(void __iomem *acp_mmio, 
> struct page *pg,
> u32 low;
> u32 high;
> u32 offset;
> +   unsigned long flags;
>
> offset  = ACP_DAGB_GRP_SRBM_SRAM_BASE_OFFSET + (pte_offset * 8);
> +
> +   spin_lock_irqsave(&lock, flags);
> +
> for (page_idx = 0; page_idx < (num_of_pages); page_idx++) {
> /* Load the low address of page int ACP SRAM through SRBM */
> acp_reg_write((offset + (page_idx * 8)),
> @@ -333,6 +345,8 @@ static void acp_pte_config(void __iomem *acp_mmio, struct 
> page *pg,
> /* Move to next physically contiguos page */
> pg++;
> }
> +
> +   spin_unlock_irqrestore(&lock, flags);
>  }
>
>  static void config_acp_dma(void __iomem *acp_mmio,
> @@ -367,6 +381,7 @@ static void acp_dma_cap_channel_enable(void __iomem 
> *acp_mmio,
>u16 cap_channel)
>  {
> u32 val, ch_reg, imr_reg, res_reg;
> +   unsigned long flags;
>
> switch (cap_channel) {
> case CAP_CHANNEL1:
> @@ -381,6 +396,8 @@ static void acp_dma_cap_channel_enable(void __iomem 
> *acp_mmio,
> imr_reg = mmACP_I2SMICSP_IMR0;
> break;
> }
> +   spin_lock_irqsave(&lock, flags);
> +
> val = acp_reg_read(acp_mmio,
>mmACP_I2S_16BIT_RESOLUTION_EN);
> if (val & ACP_I2S_MIC_16BIT_RESOLUTION_EN) {
> @@ -393,12 +410,15 @@ static void acp_dma_cap_channel_enable(void __iomem 
> *acp_mmio,
> val &= ~ACP_I2SMICSP_IMR1__I2SMICSP_RXFOM_MASK;
> acp_reg_write(val, acp_mmio, imr_reg);
> acp_reg_write(0x1, acp_mmio, ch_reg);
> +
> +   spin_unlock_irqrestore(&lock, flags);
>  }
>
>  static void acp_dma_cap_channel_disable(void __iomem *acp_mmio,
> u16 cap_channel)
>  {
> u32 val, ch_reg, imr_reg;
> +   unsigned long flags;
>
> switch (cap_channel) {
> case CAP_CHANNEL1:
> @@ -411,11 +431,15 @@ static void acp_dma_cap_channel_disable(void __iomem 
> *acp_mmio,
> ch_reg = mmACP_I2SMICSP_RER0;
> break;
> }
> +   spin_lock_irqsave(&lock, flags);
> +
> val = acp_reg_read(acp_mmio, imr_reg);
> val |= ACP_I2SMICSP_IMR1__I2SMICSP_RXDAM_MASK

Re: [PATCH v3] mm/page_owner: use kvmalloc instead of kmalloc

2018-10-29 Thread Miles Chen
On Mon, 2018-10-29 at 09:17 +0100, Michal Hocko wrote:
> On Mon 29-10-18 09:07:08, Michal Hocko wrote:
> [...]
> > Besides that, the following doesn't make much sense to me. It simply
> > makes no sense to use vmalloc for sub page allocation regardless of
> > HIGHMEM.
> 
> OK, it is still early morning here. Now I get the point of the patch.
> You just want to (ab)use highmeme for smaller requests. I do not like
> this, to be honest. It causes an internal fragmentation and more
> importantly the VMALLOC space on 32b where HIGHMEM is enabled (do we
> have any 64b with HIGHMEM btw?) is quite small to be wasted like that.
> 
thanks for your comment. It looks like that using vmalloc fallback for
sub page allocation is not good here.

Your comment gave another idea:

1. force kbuf to PAGE_SIZE
2. allocate a page by alloc_page(GFP_KERNEL | __GFP_HIGHMEM); so we can
get a highmem page if possible
3. use kmap/kunmap pair to create mapping for this page. No vmalloc
space is used.
4. do not change kvmalloc logic.


> In any case such a changes should come with some numbers and as a
> separate patch for sure.
> 
> > > diff --git a/mm/util.c b/mm/util.c
> > > index 8bf08b5b5760..7b1c59b9bfbf 100644
> > > --- a/mm/util.c
> > > +++ b/mm/util.c
> > > @@ -416,10 +416,10 @@ void *kvmalloc_node(size_t size, gfp_t flags, int 
> > > node)
> > >   ret = kmalloc_node(size, kmalloc_flags, node);
> > >  
> > >   /*
> > > -  * It doesn't really make sense to fallback to vmalloc for sub page
> > > -  * requests
> > > +  * It only makes sense to fallback to vmalloc for sub page
> > > +  * requests if we might be able to allocate highmem pages.
> > >*/
> > > - if (ret || size <= PAGE_SIZE)
> > > + if (ret || (!IS_ENABLED(CONFIG_HIGHMEM) && size <= PAGE_SIZE))
> > >   return ret;
> > >  
> > >   return __vmalloc_node_flags_caller(size, node, flags,
> > > -- 
> > > 2.18.0
> > > 
> > 
> > -- 
> > Michal Hocko
> > SUSE Labs
> 




linux-next: manual merge of the vfs tree with Linus' tree

2018-10-29 Thread Stephen Rothwell
Hi Al,

Today's linux-next merge of the vfs tree got a conflict in:

  fs/compat_ioctl.c

between commit:

  77654350306a ("take compat TIOC[SG]SERIAL treatment into tty_compat_ioctl()")

from Linus' tree and commit:

  69374d063be0 ("compat_ioctl: remove pointless HCI... ioctls")

from the vfs tree.

I fixed it up (see below) and can carry the fix as necessary. This
is now fixed as far as linux-next is concerned, but any non trivial
conflicts should be mentioned to your upstream maintainer when your tree
is submitted for merging.  You may also want to consider cooperating
with the maintainer of the conflicting tree to minimise any particularly
complex conflicts.

-- 
Cheers,
Stephen Rothwell

diff --cc fs/compat_ioctl.c
index 6e30949d9f77,326ceab5246a..
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@@ -429,13 -499,68 +429,6 @@@ static int mt_ioctl_trans(struct file *
  
  #endif /* CONFIG_BLOCK */
  
- /* Bluetooth ioctls */
- #define HCIUARTSETPROTO   _IOW('U', 200, int)
- #define HCIUARTGETPROTO   _IOR('U', 201, int)
- #define HCIUARTGETDEVICE  _IOR('U', 202, int)
- #define HCIUARTSETFLAGS   _IOW('U', 203, int)
- #define HCIUARTGETFLAGS   _IOR('U', 204, int)
 -struct serial_struct32 {
 -compat_int_ttype;
 -compat_int_tline;
 -compat_uint_t   port;
 -compat_int_tirq;
 -compat_int_tflags;
 -compat_int_txmit_fifo_size;
 -compat_int_tcustom_divisor;
 -compat_int_tbaud_base;
 -unsigned short  close_delay;
 -chario_type;
 -charreserved_char[1];
 -compat_int_thub6;
 -unsigned short  closing_wait; /* time to wait before closing */
 -unsigned short  closing_wait2; /* no longer used... */
 -compat_uint_t   iomem_base;
 -unsigned short  iomem_reg_shift;
 -unsigned intport_high;
 - /* compat_ulong_t  iomap_base FIXME */
 -compat_int_treserved[1];
 -};
 -
 -static int serial_struct_ioctl(struct file *file,
 -  unsigned cmd, struct serial_struct32 __user *ss32)
 -{
 -typedef struct serial_struct32 SS32;
 -int err;
 -  struct serial_struct __user *ss = compat_alloc_user_space(sizeof(*ss));
 -__u32 udata;
 -  unsigned int base;
 -  unsigned char *iomem_base;
 -
 -  if (ss == NULL)
 -  return -EFAULT;
 -if (cmd == TIOCSSERIAL) {
 -  if (copy_in_user(ss, ss32, offsetof(SS32, iomem_base)) ||
 -  get_user(udata, &ss32->iomem_base))
 -  return -EFAULT;
 -  iomem_base = compat_ptr(udata);
 -  if (put_user(iomem_base, &ss->iomem_base) ||
 -  convert_in_user(&ss32->iomem_reg_shift,
 -&ss->iomem_reg_shift) ||
 -  convert_in_user(&ss32->port_high, &ss->port_high) ||
 -  put_user(0UL, &ss->iomap_base))
 -  return -EFAULT;
 -}
 -  err = do_ioctl(file, cmd, (unsigned long)ss);
 -if (cmd == TIOCGSERIAL && err >= 0) {
 -  if (copy_in_user(ss32, ss, offsetof(SS32, iomem_base)) ||
 -  get_user(iomem_base, &ss->iomem_base))
 -  return -EFAULT;
 -  base = (unsigned long)iomem_base  >> 32 ?
 -  0x : (unsigned)(unsigned long)iomem_base;
 -  if (put_user(base, &ss32->iomem_base) ||
 -  convert_in_user(&ss->iomem_reg_shift,
 -&ss32->iomem_reg_shift) ||
 -  convert_in_user(&ss->port_high, &ss32->port_high))
 -  return -EFAULT;
 -}
 -return err;
 -}
--
  #define RTC_IRQP_READ32   _IOR('p', 0x0b, compat_ulong_t)
  #define RTC_IRQP_SET32_IOW('p', 0x0c, compat_ulong_t)
  #define RTC_EPOCH_READ32  _IOR('p', 0x0d, compat_ulong_t)


pgpemguebeHG1.pgp
Description: OpenPGP digital signature


make[2]: *** No rule to make target 'arch/xtensa/boot/dts/csp.dtb', needed by '__build'.

2018-10-29 Thread kbuild test robot
Hi Rob,

FYI, the error/warning still remains.

tree:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 
master
head:   4b42745211af552f170f38a1b97f4a112b5da6b2
commit: 37c8a5fafa3bb7dcdd51774be353be6cb2912b86 kbuild: consolidate Devicetree 
dtb build rules
date:   4 weeks ago
config: xtensa-common_defconfig (attached as .config)
compiler: xtensa-linux-gcc (GCC) 8.1.0
reproduce:
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
git checkout 37c8a5fafa3bb7dcdd51774be353be6cb2912b86
# save the attached .config to linux build tree
GCC_VERSION=8.1.0 make.cross ARCH=xtensa 

All errors (new ones prefixed by >>):

>> make[2]: *** No rule to make target 'arch/xtensa/boot/dts/csp.dtb', needed 
>> by '__build'.
>> make[2]: *** No rule to make target 'arch/xtensa/boot/dts/lx200mx.dtb', 
>> needed by '__build'.
>> make[2]: *** No rule to make target 'arch/xtensa/boot/dts/ml605.dtb', needed 
>> by '__build'.
>> make[2]: *** No rule to make target 'arch/xtensa/boot/dts/kc705_nommu.dtb', 
>> needed by '__build'.
>> make[2]: *** No rule to make target 'arch/xtensa/boot/dts/kc705.dtb', needed 
>> by '__build'.
>> make[2]: *** No rule to make target 'arch/xtensa/boot/dts/lx60.dtb', needed 
>> by '__build'.
   make[2]: Target '__build' not remade because of errors.

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: application/gzip


RTL8812au driver source code A little help please?

2018-10-29 Thread Nathaniel Russell
make[1]: Entering directory '/usr/src/linux-4.19'
  CC [M]  /tmp/rtl8812AU_8821AU_linux/os_dep/linux/os_intfs.o
/tmp/rtl8812AU_8821AU_linux/os_dep/linux/os_intfs.c:816:22: error:
initialization of ‘u16 (*)(struct net_device *, struct sk_buff *,
struct net_device *, u16 (*)(struct net_device *, struct sk_buff *,
struct net_device *))’ {aka ‘short unsigned int (*)(struct net_device
*, struct sk_buff *, struct net_device *, short unsigned int
(*)(struct net_device *, struct sk_buff *, struct net_device *))’}
from incompatible pointer type ‘u16 (*)(struct net_device *, struct
sk_buff *, void *, u16 (*)(struct net_device *, struct sk_buff *,
struct net_device *))’ {aka ‘short unsigned int (*)(struct net_device
*, struct sk_buff *, void *, short unsigned int (*)(struct net_device
*, struct sk_buff *, struct net_device *))’}
[-Werror=incompatible-pointer-types]
  .ndo_select_queue = rtw_select_queue,
  ^~~~
/tmp/rtl8812AU_8821AU_linux/os_dep/linux/os_intfs.c:816:22: note:
(near initialization for ‘rtw_netdev_ops.ndo_select_queue’)
cc1: some warnings being treated as errors
make[2]: *** [scripts/Makefile.build:306:
/tmp/rtl8812AU_8821AU_linux/os_dep/linux/os_intfs.o] Error 1
make[1]: *** [Makefile:1517: _module_/tmp/rtl8812AU_8821AU_linux] Error 2
make[1]: Leaving directory '/usr/src/linux-4.19'
make: *** [Makefile:1584: modules] Error 2

If anyone could help me with this issue it would be greatly appreciated.


Re: [PATCH] kretprobe: produce sane stack traces

2018-10-29 Thread Masami Hiramatsu
Hi Aleksa,

On Sat, 27 Oct 2018 00:22:10 +1100
Aleksa Sarai  wrote:

> Historically, kretprobe has always produced unusable stack traces
> (kretprobe_trampoline is the only entry in most cases, because of the
> funky stack pointer overwriting). This has caused quite a few annoyances
> when using tracing to debug problems[1] -- since return values are only
> available with kretprobes but stack traces were only usable for kprobes,
> users had to probe both and then manually associate them.

Yes, this unfortunately still happens. I once tried to fix it by
replacing current "kretprobe instance" with graph-tracer's per-thread
return stack. (https://lkml.org/lkml/2017/8/21/553)

I still believe that direction is the best solution to solve this kind
of issues, otherwise, we have to have 2 different stack fixups for
kretprobe and ftrace graph tracer. (I will have a talk with Steve at
plumbers next month)

Anyway, until that merge happens, this patch looks good to avoid
this issue for generic solution (e.g. for the arch which doesn't
supports retstack).


> 
> With the advent of bpf_trace, users would have been able to do this
> association in bpf, but this was less than ideal (because
> bpf_get_stackid would still produce rubbish and programs that didn't
> know better would get silly results). The main usecase for stack traces
> (at least with bpf_trace) is for DTrace-style aggregation on stack
> traces (both entry and exit). Therefore we cannot simply correct the
> stack trace on exit -- we must stash away the stack trace and return the
> entry stack trace when it is requested.
> 
> In theory, patches like commit 76094a2cf46e ("ftrace: distinguish
> kretprobe'd functions in trace logs") are no longer necessary *for
> tracing* because now all kretprobe traces should produce sane stack
> traces. However it's not clear whether removing them completely is
> reasonable.

Then, let's try to revert it :)

BTW, could you also add a test case for ftrace too?
also, I have some comments below.

> 
> [1]: https://github.com/iovisor/bpftrace/issues/101
> 
> Cc: Brendan Gregg 
> Cc: Christian Brauner 
> Signed-off-by: Aleksa Sarai 
> ---
>  include/linux/kprobes.h   |  15 ++
>  kernel/events/callchain.c |   8 ++-
>  kernel/kprobes.c  | 108 +-
>  kernel/trace/trace.c  |  11 +++-
>  4 files changed, 138 insertions(+), 4 deletions(-)
> 
> diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
> index e909413e4e38..8a4f78a0c990 100644
> --- a/include/linux/kprobes.h
> +++ b/include/linux/kprobes.h
> @@ -40,6 +40,8 @@
>  #include 
>  #include 
>  #include 
> +#include 
> +#include 
>  #include 
>  
>  #ifdef CONFIG_KPROBES
> @@ -168,11 +170,18 @@ struct kretprobe {
>   raw_spinlock_t lock;
>  };
>  
> +#define KRETPROBE_TRACE_SIZE 1024
> +struct kretprobe_trace {
> + int nr_entries;
> + unsigned long entries[KRETPROBE_TRACE_SIZE];
> +};

Hmm, do we really need all entries? It takes 8KB for each instances.
Note that the number of instances can be big if the system core number
is larger.

> +
>  struct kretprobe_instance {
>   struct hlist_node hlist;
>   struct kretprobe *rp;
>   kprobe_opcode_t *ret_addr;
>   struct task_struct *task;
> + struct kretprobe_trace entry;
>   char data[0];
>  };
>  
> @@ -371,6 +380,12 @@ void unregister_kretprobe(struct kretprobe *rp);
>  int register_kretprobes(struct kretprobe **rps, int num);
>  void unregister_kretprobes(struct kretprobe **rps, int num);
>  
> +struct kretprobe_instance *current_kretprobe_instance(void);
> +void kretprobe_save_stack_trace(struct kretprobe_instance *ri,
> + struct stack_trace *trace);
> +void kretprobe_perf_callchain_kernel(struct kretprobe_instance *ri,
> +  struct perf_callchain_entry_ctx *ctx);
> +
>  void kprobe_flush_task(struct task_struct *tk);
>  void recycle_rp_inst(struct kretprobe_instance *ri, struct hlist_head *head);
>  
> diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c
> index 24a77c34e9ad..98edcd8a6987 100644
> --- a/kernel/events/callchain.c
> +++ b/kernel/events/callchain.c
> @@ -12,6 +12,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #include "internal.h"
>  
> @@ -197,9 +198,14 @@ get_perf_callchain(struct pt_regs *regs, u32 init_nr, 
> bool kernel, bool user,
>   ctx.contexts_maxed = false;
>  
>   if (kernel && !user_mode(regs)) {
> + struct kretprobe_instance *ri = current_kretprobe_instance();
> +
>   if (add_mark)
>   perf_callchain_store_context(&ctx, PERF_CONTEXT_KERNEL);
> - perf_callchain_kernel(&ctx, regs);
> + if (ri)
> + kretprobe_perf_callchain_kernel(ri, &ctx);
> + else
> + perf_callchain_kernel(&ctx, regs);
>   }
>  
>   if (user) {
> diff --git a/kernel/kprobes.c b/kernel/kprobes.c
> index 90e98e233647..a077

[PATCH v3] thermal: qoriq: add multiple sensors support

2018-10-29 Thread andy . tang
From: Yuantian Tang 

The QorIQ Layerscape SoC has several thermal sensors but the current
driver only supports one.

Massage the code to be sensor oriented and allow the support for
multiple sensors.

Signed-off-by: Yuantian Tang 
Reviewed-by: Daniel Lezcano 
---
v3:
  - add Reviewed-by
v2:
  - update the commit message
  - refine the qoriq_tmu_register_tmu_zone()

 drivers/thermal/qoriq_thermal.c |  100 ++-
 1 files changed, 46 insertions(+), 54 deletions(-)

diff --git a/drivers/thermal/qoriq_thermal.c b/drivers/thermal/qoriq_thermal.c
index 450ed66..8beb344 100644
--- a/drivers/thermal/qoriq_thermal.c
+++ b/drivers/thermal/qoriq_thermal.c
@@ -59,14 +59,21 @@ struct qoriq_tmu_regs {
u32 ttr3cr; /* Temperature Range 3 Control Register */
 };
 
+struct qoriq_tmu_data;
+
 /*
  * Thermal zone data
  */
+struct qoriq_sensor {
+   struct thermal_zone_device  *tzd;
+   struct qoriq_tmu_data   *qdata;
+   int id;
+};
+
 struct qoriq_tmu_data {
-   struct thermal_zone_device *tz;
struct qoriq_tmu_regs __iomem *regs;
-   int sensor_id;
bool little_endian;
+   struct qoriq_sensor *sensor[SITES_MAX];
 };
 
 static void tmu_write(struct qoriq_tmu_data *p, u32 val, void __iomem *addr)
@@ -87,48 +94,51 @@ static u32 tmu_read(struct qoriq_tmu_data *p, void __iomem 
*addr)
 
 static int tmu_get_temp(void *p, int *temp)
 {
+   struct qoriq_sensor *qsensor = p;
+   struct qoriq_tmu_data *qdata = qsensor->qdata;
u32 val;
-   struct qoriq_tmu_data *data = p;
 
-   val = tmu_read(data, &data->regs->site[data->sensor_id].tritsr);
+   val = tmu_read(qdata, &qdata->regs->site[qsensor->id].tritsr);
*temp = (val & 0xff) * 1000;
 
return 0;
 }
 
-static int qoriq_tmu_get_sensor_id(void)
+static const struct thermal_zone_of_device_ops tmu_tz_ops = {
+   .get_temp = tmu_get_temp,
+};
+
+static int qoriq_tmu_register_tmu_zone(struct platform_device *pdev)
 {
-   int ret, id;
-   struct of_phandle_args sensor_specs;
-   struct device_node *np, *sensor_np;
+   struct qoriq_tmu_data *qdata = platform_get_drvdata(pdev);
+   int id, sites = 0;
 
-   np = of_find_node_by_name(NULL, "thermal-zones");
-   if (!np)
-   return -ENODEV;
+   for (id = 0; id < SITES_MAX; id++) {
+   qdata->sensor[id] = devm_kzalloc(&pdev->dev,
+   sizeof(struct qoriq_sensor), GFP_KERNEL);
+   if (!qdata->sensor[id])
+   return -ENOMEM;
 
-   sensor_np = of_get_next_child(np, NULL);
-   ret = of_parse_phandle_with_args(sensor_np, "thermal-sensors",
-   "#thermal-sensor-cells",
-   0, &sensor_specs);
-   if (ret) {
-   of_node_put(np);
-   of_node_put(sensor_np);
-   return ret;
-   }
+   qdata->sensor[id]->id = id;
+   qdata->sensor[id]->qdata = qdata;
 
-   if (sensor_specs.args_count >= 1) {
-   id = sensor_specs.args[0];
-   WARN(sensor_specs.args_count > 1,
-   "%s: too many cells in sensor specifier %d\n",
-   sensor_specs.np->name, sensor_specs.args_count);
-   } else {
-   id = 0;
-   }
+   qdata->sensor[id]->tzd = devm_thermal_zone_of_sensor_register(
+   &pdev->dev, id, qdata->sensor[id], &tmu_tz_ops);
+   if (IS_ERR(qdata->sensor[id]->tzd)) {
+   if (PTR_ERR(qdata->sensor[id]->tzd) == -ENODEV)
+   continue;
+   else
+   return PTR_ERR(qdata->sensor[id]->tzd);
 
-   of_node_put(np);
-   of_node_put(sensor_np);
+   }
+
+   sites |= 0x1 << (15 - id);
+   }
+   /* Enable monitoring */
+   if (sites != 0)
+   tmu_write(qdata, sites | TMR_ME | TMR_ALPF, &qdata->regs->tmr);
 
-   return id;
+   return 0;
 }
 
 static int qoriq_tmu_calibration(struct platform_device *pdev)
@@ -178,16 +188,11 @@ static void qoriq_tmu_init_device(struct qoriq_tmu_data 
*data)
tmu_write(data, TMR_DISABLE, &data->regs->tmr);
 }
 
-static const struct thermal_zone_of_device_ops tmu_tz_ops = {
-   .get_temp = tmu_get_temp,
-};
-
 static int qoriq_tmu_probe(struct platform_device *pdev)
 {
int ret;
struct qoriq_tmu_data *data;
struct device_node *np = pdev->dev.of_node;
-   u32 site;
 
if (!np) {
dev_err(&pdev->dev, "Device OF-Node is NULL");
@@ -203,13 +208,6 @@ static int qoriq_tmu_probe(struct platform_device *pdev)
 
data->little_endian = of_property_read_bool(np, "little-endian");
 
-   data->sensor_id = qoriq_tmu_get_sensor_id();
-   if (data->sensor_id < 0) {
-   dev

Re: [PATCH] mm: handle no memcg case in memcg_kmem_charge() properly

2018-10-29 Thread Rik van Riel
On Mon, 2018-10-29 at 17:50 -0700, Shakeel Butt wrote:
> On Mon, Oct 29, 2018 at 2:52 PM Roman Gushchin  wrote:
> > 
> > Mike Galbraith reported a regression caused by the commit
> > 9b6f7e163cd0
> > ("mm: rework memcg kernel stack accounting") on a system with
> > "cgroup_disable=memory" boot option: the system panics with the
> > following stack trace:
> > 
> >   [0.928542] BUG: unable to handle kernel NULL pointer dereference
> > at 00f8
> >   [0.929317] PGD 0 P4D 0
> >   [0.929573] Oops: 0002 [#1] PREEMPT SMP PTI
> >   [0.929984] CPU: 0 PID: 1 Comm: systemd Not tainted 4.19.0-
> > preempt+ #410
> >   [0.930637] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996),
> > BIOS ?-20180531_142017-buildhw-08.phx2.fed4
> >   [0.931862] RIP: 0010:page_counter_try_charge+0x22/0xc0
> >   [0.932376] Code: 41 5d c3 c3 0f 1f 40 00 0f 1f 44 00 00 48 85 ff
> > 0f 84 a7 00 00 00 41 56 48 89 f8 49 89 fe 49
> >   [0.934283] RSP: 0018:acf68031fcb8 EFLAGS: 00010202
> >   [0.934826] RAX: 00f8 RBX:  RCX:
> > 
> >   [0.935558] RDX: acf68031fd08 RSI: 0020 RDI:
> > 00f8
> >   [0.936288] RBP: 0001 R08: 8063 R09:
> > 99ff7cd37a40
> >   [0.937021] R10: acf68031fed0 R11: 0020 R12:
> > 0020
> >   [0.937749] R13: acf68031fd08 R14: 00f8 R15:
> > 99ff7da1ec60
> >   [0.938486] FS:  7fc2140bb280() GS:99ff7da0()
> > knlGS:
> >   [0.939311] CS:  0010 DS:  ES:  CR0: 80050033
> >   [0.939905] CR2: 00f8 CR3: 12dc8002 CR4:
> > 00760ef0
> >   [0.940638] DR0:  DR1:  DR2:
> > 
> >   [0.941366] DR3:  DR6: fffe0ff0 DR7:
> > 0400
> >   [0.942110] PKRU: 5554
> >   [0.942412] Call Trace:
> >   [0.942673]  try_charge+0xcb/0x780
> >   [0.943031]  memcg_kmem_charge_memcg+0x28/0x80
> >   [0.943486]  ? __vmalloc_node_range+0x1e4/0x280
> >   [0.943971]  memcg_kmem_charge+0x8b/0x1d0
> >   [0.944396]  copy_process.part.41+0x1ca/0x2070
> >   [0.944853]  ? get_acl+0x1a/0x120
> >   [0.945200]  ? shmem_tmpfile+0x90/0x90
> >   [0.945596]  _do_fork+0xd7/0x3d0
> >   [0.945934]  ? trace_hardirqs_off_thunk+0x1a/0x1c
> >   [0.946421]  do_syscall_64+0x5a/0x180
> >   [0.946798]  entry_SYSCALL_64_after_hwframe+0x49/0xbe
> > 
> > The problem occurs because get_mem_cgroup_from_current() returns
> > the NULL pointer if memory controller is disabled. Let's check
> > if this is a case at the beginning of memcg_kmem_charge() and
> > just return 0 if mem_cgroup_disabled() returns true. This is how
> > we handle this case in many other places in the memory controller
> > code.
> > 
> > Fixes: 9b6f7e163cd0 ("mm: rework memcg kernel stack accounting")
> > Reported-by: Mike Galbraith 
> > Signed-off-by: Roman Gushchin 
> > Cc: Michal Hocko 
> > Cc: Johannes Weiner 
> > Cc: Vladimir Davydov 
> > Cc: Andrew Morton 
> > ---
> >  mm/memcontrol.c | 2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
> > 
> > diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> > index 54920cbc46bf..6e1469b80cb7 100644
> > --- a/mm/memcontrol.c
> > +++ b/mm/memcontrol.c
> > @@ -2593,7 +2593,7 @@ int memcg_kmem_charge(struct page *page,
> > gfp_t gfp, int order)
> > struct mem_cgroup *memcg;
> > int ret = 0;
> > 
> > -   if (memcg_kmem_bypass())
> > +   if (mem_cgroup_disabled() || memcg_kmem_bypass())
> > return 0;
> > 
> 
> Why not check memcg_kmem_enabled() before calling memcg_kmem_charge()
> in memcg_charge_kernel_stack()?

Check Roman's backtrace again. The function
memcg_charge_kernel_stack() is not in it.

That is why it is generally better to check
in the called function, rather than add a
check to every call site (and maybe miss one
or two).

Acked-by: Rik van Riel 
-- 
All Rights Reversed.


signature.asc
Description: This is a digitally signed message part


Re: [PATCH] fs/proc: introduce /proc/stat2 file

2018-10-29 Thread Vito Caputo
On Mon, Oct 29, 2018 at 11:04:45PM +, Daniel Colascione wrote:
> On Mon, Oct 29, 2018 at 7:25 PM, Davidlohr Bueso  wrote:
> > This patch introduces a new /proc/stat2 file that is identical to the
> > regular 'stat' except that it zeroes all hard irq statistics. The new
> > file is a drop in replacement to stat for users that need performance.
> 
> For a while now, I've been thinking over ways to improve the
> performance of collecting various bits of kernel information. I don't
> think that a proliferation of special-purpose named bag-of-fields file
> variants is the right answer, because even if you add a few info-file
> variants, you're still left with a situation where a given file
> provides a particular caller with too little or too much information.
> I'd much rather move to a model in which userspace *explicitly* tells
> the kernel which fields it wants, with the kernel replying with just
> those particular fields, maybe in their raw binary representations.
> The ASCII-text bag-of-everything files would remain available for
> ad-hoc and non-performance critical use, but programs that cared about
> performance would have an efficient bypass. One concrete approach is
> to let users open up today's proc files and, instead of read(2)ing a
> text blob, use an ioctl to retrieve specified and targeted information
> of the sort that would normally be encoded in the text blob. Because
> callers would open the same file when using either the text or binary
> interfaces, little would have to change, and it'd be easy to implement
> fallbacks when a particular system doesn't support a particular
> fast-path ioctl.


We have two extremes of granularity in the /proc and /sys virtual
filesystems today:

On procfs there's these legacy files which aggregate loosely-related
system information, and in cases where you actually want most of what's
provided, it's a nice optimization because you can sample it all in a
single pread() call.

On sysfs the granularity is much finer with it being fairly common to
find a file-per-datum.  This has other advantages, like not needing to
parse snowflake formats which sometimes varied across kernel versions
like in procfs, or needing to burden the kernel to produce more
information than necessary.

But anyone who has written tools trying to sample large subsets of the
granular information in sysfs at a high rate will know how quickly it
becomes rather costly in terms of system calls.

The last time I went down this path, I wished there were a system call
like readv() which accepted a vector a new iovec type specifying an fd.

Then the sysfs model could be made a more efficient by coalescing all
the required read syscalls into a single megaread bundling all the
relevant fds that are simply kept open and reused.

If we had such a readv() variant, the sysfs granular model could be used
to granularly expose all the information we currently expose in /proc,
while still being relatively efficient in terms of system calls per
sample.  Sure you still have to lookup and open all the files of
interest, but that only needs to occur once at initialization.

Regards,
Vito Caputo


Re: [PATCH] mm: handle no memcg case in memcg_kmem_charge() properly

2018-10-29 Thread Shakeel Butt
On Mon, Oct 29, 2018 at 2:52 PM Roman Gushchin  wrote:
>
> Mike Galbraith reported a regression caused by the commit 9b6f7e163cd0
> ("mm: rework memcg kernel stack accounting") on a system with
> "cgroup_disable=memory" boot option: the system panics with the
> following stack trace:
>
>   [0.928542] BUG: unable to handle kernel NULL pointer dereference at 
> 00f8
>   [0.929317] PGD 0 P4D 0
>   [0.929573] Oops: 0002 [#1] PREEMPT SMP PTI
>   [0.929984] CPU: 0 PID: 1 Comm: systemd Not tainted 4.19.0-preempt+ #410
>   [0.930637] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 
> ?-20180531_142017-buildhw-08.phx2.fed4
>   [0.931862] RIP: 0010:page_counter_try_charge+0x22/0xc0
>   [0.932376] Code: 41 5d c3 c3 0f 1f 40 00 0f 1f 44 00 00 48 85 ff 0f 84 a7 
> 00 00 00 41 56 48 89 f8 49 89 fe 49
>   [0.934283] RSP: 0018:acf68031fcb8 EFLAGS: 00010202
>   [0.934826] RAX: 00f8 RBX:  RCX: 
>   [0.935558] RDX: acf68031fd08 RSI: 0020 RDI: 00f8
>   [0.936288] RBP: 0001 R08: 8063 R09: 99ff7cd37a40
>   [0.937021] R10: acf68031fed0 R11: 0020 R12: 0020
>   [0.937749] R13: acf68031fd08 R14: 00f8 R15: 99ff7da1ec60
>   [0.938486] FS:  7fc2140bb280() GS:99ff7da0() 
> knlGS:
>   [0.939311] CS:  0010 DS:  ES:  CR0: 80050033
>   [0.939905] CR2: 00f8 CR3: 12dc8002 CR4: 00760ef0
>   [0.940638] DR0:  DR1:  DR2: 
>   [0.941366] DR3:  DR6: fffe0ff0 DR7: 0400
>   [0.942110] PKRU: 5554
>   [0.942412] Call Trace:
>   [0.942673]  try_charge+0xcb/0x780
>   [0.943031]  memcg_kmem_charge_memcg+0x28/0x80
>   [0.943486]  ? __vmalloc_node_range+0x1e4/0x280
>   [0.943971]  memcg_kmem_charge+0x8b/0x1d0
>   [0.944396]  copy_process.part.41+0x1ca/0x2070
>   [0.944853]  ? get_acl+0x1a/0x120
>   [0.945200]  ? shmem_tmpfile+0x90/0x90
>   [0.945596]  _do_fork+0xd7/0x3d0
>   [0.945934]  ? trace_hardirqs_off_thunk+0x1a/0x1c
>   [0.946421]  do_syscall_64+0x5a/0x180
>   [0.946798]  entry_SYSCALL_64_after_hwframe+0x49/0xbe
>
> The problem occurs because get_mem_cgroup_from_current() returns
> the NULL pointer if memory controller is disabled. Let's check
> if this is a case at the beginning of memcg_kmem_charge() and
> just return 0 if mem_cgroup_disabled() returns true. This is how
> we handle this case in many other places in the memory controller
> code.
>
> Fixes: 9b6f7e163cd0 ("mm: rework memcg kernel stack accounting")
> Reported-by: Mike Galbraith 
> Signed-off-by: Roman Gushchin 
> Cc: Michal Hocko 
> Cc: Johannes Weiner 
> Cc: Vladimir Davydov 
> Cc: Andrew Morton 
> ---
>  mm/memcontrol.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 54920cbc46bf..6e1469b80cb7 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -2593,7 +2593,7 @@ int memcg_kmem_charge(struct page *page, gfp_t gfp, int 
> order)
> struct mem_cgroup *memcg;
> int ret = 0;
>
> -   if (memcg_kmem_bypass())
> +   if (mem_cgroup_disabled() || memcg_kmem_bypass())
> return 0;
>

Why not check memcg_kmem_enabled() before calling memcg_kmem_charge()
in memcg_charge_kernel_stack()?

> memcg = get_mem_cgroup_from_current();
> --
> 2.17.2
>


net/sunrpc/auth_gss/gss_krb5_seal.c:144:14: error: implicit declaration of function 'cmpxchg64'; did you mean 'cmpxchg'?

2018-10-29 Thread kbuild test robot
Hi Arnd,

FYI, the error/warning still remains.

tree:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 
master
head:   4b42745211af552f170f38a1b97f4a112b5da6b2
commit: 21924765862a0871908a35cb0e53e2e1c169b888 SUNRPC: use cmpxchg64() in 
gss_seq_send64_fetch_and_inc()
date:   3 weeks ago
config: sh-allmodconfig (attached as .config)
compiler: sh4-linux-gnu-gcc (Debian 7.2.0-11) 7.2.0
reproduce:
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
git checkout 21924765862a0871908a35cb0e53e2e1c169b888
# save the attached .config to linux build tree
GCC_VERSION=7.2.0 make.cross ARCH=sh 

All errors (new ones prefixed by >>):

   net/sunrpc/auth_gss/gss_krb5_seal.c: In function 
'gss_seq_send64_fetch_and_inc':
>> net/sunrpc/auth_gss/gss_krb5_seal.c:144:14: error: implicit declaration of 
>> function 'cmpxchg64'; did you mean 'cmpxchg'? 
>> [-Werror=implicit-function-declaration]
  seq_send = cmpxchg64(&ctx->seq_send64, old, old + 1);
 ^
 cmpxchg
   cc1: some warnings being treated as errors

vim +144 net/sunrpc/auth_gss/gss_krb5_seal.c

   136  
   137  u64
   138  gss_seq_send64_fetch_and_inc(struct krb5_ctx *ctx)
   139  {
   140  u64 old, seq_send = READ_ONCE(ctx->seq_send);
   141  
   142  do {
   143  old = seq_send;
 > 144  seq_send = cmpxchg64(&ctx->seq_send64, old, old + 1);
   145  } while (old != seq_send);
   146  return seq_send;
   147  }
   148  

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: application/gzip


sound/pci/hda/patch_ca0132.c:7650:20: error: implicit declaration of function 'pci_iomap'; did you mean 'pcim_iomap'?

2018-10-29 Thread kbuild test robot
Hi Rakesh,

FYI, the error/warning still remains.

tree:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 
master
head:   4b42745211af552f170f38a1b97f4a112b5da6b2
commit: 6bae5ea9498926440ffc883f3dbceb0adc65e492 ASoC: hdac_hda: add asoc 
extension for legacy HDA codec drivers
date:   9 weeks ago
config: sh-allyesconfig (attached as .config)
compiler: sh4-linux-gnu-gcc (Debian 7.2.0-11) 7.2.0
reproduce:
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
git checkout 6bae5ea9498926440ffc883f3dbceb0adc65e492
# save the attached .config to linux build tree
GCC_VERSION=7.2.0 make.cross ARCH=sh 

All errors (new ones prefixed by >>):

   sound/pci/hda/patch_ca0132.c: In function 'patch_ca0132':
>> sound/pci/hda/patch_ca0132.c:7650:20: error: implicit declaration of 
>> function 'pci_iomap'; did you mean 'pcim_iomap'? 
>> [-Werror=implicit-function-declaration]
  spec->mem_base = pci_iomap(codec->bus->pci, 2, 0xC20);
   ^
   pcim_iomap
   sound/pci/hda/patch_ca0132.c:7650:18: warning: assignment makes pointer from 
integer without a cast [-Wint-conversion]
  spec->mem_base = pci_iomap(codec->bus->pci, 2, 0xC20);
 ^
   cc1: some warnings being treated as errors

vim +7650 sound/pci/hda/patch_ca0132.c

d5c016b56 Gabriele Martino 2015-05-18  7581  
95c6e9cb7 Ian Minett   2011-06-15  7582  static int patch_ca0132(struct 
hda_codec *codec)
95c6e9cb7 Ian Minett   2011-06-15  7583  {
95c6e9cb7 Ian Minett   2011-06-15  7584 struct ca0132_spec *spec;
a73d511c4 Ian Minett   2012-12-20  7585 int err;
d5c016b56 Gabriele Martino 2015-05-18  7586 const struct snd_pci_quirk 
*quirk;
95c6e9cb7 Ian Minett   2011-06-15  7587  
4e76a8833 Takashi Iwai 2014-02-25  7588 codec_dbg(codec, 
"patch_ca0132\n");
95c6e9cb7 Ian Minett   2011-06-15  7589  
95c6e9cb7 Ian Minett   2011-06-15  7590 spec = kzalloc(sizeof(*spec), 
GFP_KERNEL);
95c6e9cb7 Ian Minett   2011-06-15  7591 if (!spec)
95c6e9cb7 Ian Minett   2011-06-15  7592 return -ENOMEM;
95c6e9cb7 Ian Minett   2011-06-15  7593 codec->spec = spec;
993884f6a Chih-Chung Chang 2013-03-25  7594 spec->codec = codec;
95c6e9cb7 Ian Minett   2011-06-15  7595  
225068ab2 Takashi Iwai 2015-05-29  7596 codec->patch_ops = 
ca0132_patch_ops;
225068ab2 Takashi Iwai 2015-05-29  7597 codec->pcm_format_first = 1;
225068ab2 Takashi Iwai 2015-05-29  7598 codec->no_sticky_stream = 1;
225068ab2 Takashi Iwai 2015-05-29  7599  
d5c016b56 Gabriele Martino 2015-05-18  7600 /* Detect codec quirk */
d5c016b56 Gabriele Martino 2015-05-18  7601 quirk = 
snd_pci_quirk_lookup(codec->bus->pci, ca0132_quirks);
d5c016b56 Gabriele Martino 2015-05-18  7602 if (quirk)
d5c016b56 Gabriele Martino 2015-05-18  7603 spec->quirk = 
quirk->value;
d5c016b56 Gabriele Martino 2015-05-18  7604 else
d5c016b56 Gabriele Martino 2015-05-18  7605 spec->quirk = 
QUIRK_NONE;
d5c016b56 Gabriele Martino 2015-05-18  7606  
e24aa0a4c Takashi Iwai 2014-08-10  7607 spec->dsp_state = 
DSP_DOWNLOAD_INIT;
a7e76271b Ian Minett   2012-12-20  7608 spec->num_mixers = 1;
017310fbe Connor McAdams   2018-05-08  7609  
017310fbe Connor McAdams   2018-05-08  7610 /* Set which mixers each quirk 
uses. */
017310fbe Connor McAdams   2018-05-08  7611 switch (spec->quirk) {
017310fbe Connor McAdams   2018-05-08  7612 case QUIRK_SBZ:
e25e34450 Connor McAdams   2018-08-08  7613 spec->mixers[0] = 
desktop_mixer;
017310fbe Connor McAdams   2018-05-08  7614 
snd_hda_codec_set_name(codec, "Sound Blaster Z");
017310fbe Connor McAdams   2018-05-08  7615 break;
e25e34450 Connor McAdams   2018-08-08  7616 case QUIRK_R3D:
e25e34450 Connor McAdams   2018-08-08  7617 spec->mixers[0] = 
desktop_mixer;
e25e34450 Connor McAdams   2018-08-08  7618 
snd_hda_codec_set_name(codec, "Recon3D");
e25e34450 Connor McAdams   2018-08-08  7619 break;
017310fbe Connor McAdams   2018-05-08  7620 case QUIRK_R3DI:
017310fbe Connor McAdams   2018-05-08  7621 spec->mixers[0] = 
r3di_mixer;
017310fbe Connor McAdams   2018-05-08  7622 
snd_hda_codec_set_name(codec, "Recon3Di");
017310fbe Connor McAdams   2018-05-08  7623 break;
017310fbe Connor McAdams   2018-05-08  7624 default:
a7e76271b Ian Minett   2012-12-20  7625 spec->mixers[0] = 
ca0132_mixer;
017310fbe Connor McAdams   2018-05-08  7626 break;
017310fbe Connor McAdams   2018-05-08  7627 }
a7e76271b Ian Minett   2012-12-20  7628  
08eca6b1f Connor McAdams   2018-08-08  7629 /* Setup whether or not to use 
alt functions/controls/pci_mmio */
009b8f979 Connor McAdams   2018-05-08  7630 switch (

Re: [PATCH v2 3/5] Creates macro to avoid variable shadowing

2018-10-29 Thread Leonardo Bras
Thank you!
On Sun, Oct 28, 2018 at 1:38 PM Masahiro Yamada
 wrote:
>
> On Tue, Oct 23, 2018 at 10:11 AM Leonardo Brás  wrote:
> >
> > Creates DEF_FIELD_ADDR_VAR as a more generic version of the DEF_FIELD_ADD
> > macro, allowing usage of a variable name other than the struct element name.
> > Also, sets DEF_FIELD_ADDR as a specific usage of DEF_FILD_ADDR_VAR in which
> > the var name is the same as the struct element name.
> >
> > Signed-off-by: Leonardo Brás 
> > ---
>
>
> Applied to linux-kbuild.
>
>
>
> >  scripts/mod/file2alias.c | 24 +---
> >  1 file changed, 17 insertions(+), 7 deletions(-)
> >
> > diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c
> > index 7be43697ff84..3015c0bdecb2 100644
> > --- a/scripts/mod/file2alias.c
> > +++ b/scripts/mod/file2alias.c
> > @@ -95,12 +95,20 @@ extern struct devtable *__start___devtable[], 
> > *__stop___devtable[];
> >   */
> >  #define DEF_FIELD(m, devid, f) \
> > typeof(((struct devid *)0)->f) f = TO_NATIVE(*(typeof(f) *)((m) + 
> > OFF_##devid##_##f))
> > +
> > +/* Define a variable v that holds the address of field f of struct devid
> > + * based at address m.  Due to the way typeof works, for a field of type
> > + * T[N] the variable has type T(*)[N], _not_ T*.
> > + */
> > +#define DEF_FIELD_ADDR_VAR(m, devid, f, v) \
> > +   typeof(((struct devid *)0)->f) *v = ((m) + OFF_##devid##_##f)
> > +
> >  /* Define a variable f that holds the address of field f of struct devid
> >   * based at address m.  Due to the way typeof works, for a field of type
> >   * T[N] the variable has type T(*)[N], _not_ T*.
> >   */
> >  #define DEF_FIELD_ADDR(m, devid, f) \
> > -   typeof(((struct devid *)0)->f) *f = ((m) + OFF_##devid##_##f)
> > +   DEF_FIELD_ADDR_VAR(m, devid, f, f)
> >
> >  /* Add a table entry.  We test function type matches while we're here. */
> >  #define ADD_TO_DEVTABLE(device_id, type, function) \
> > @@ -641,25 +649,27 @@ static void do_pnp_card_entries(void *symval, 
> > unsigned long size,
> > unsigned int i;
> >
> > device_id_check(mod->name, "pnp", size, id_size, symval);
> > +   DEF_FIELD_ADDR(symval, pnp_card_device_id, devs);
> > +   typeof(devs) devs_last;
> >
> > for (i = 0; i < count; i++) {
> > unsigned int j;
> > -   DEF_FIELD_ADDR(symval + i*id_size, pnp_card_device_id, 
> > devs);
> > +   devs_last = devs + i * id_size;
> >
> > for (j = 0; j < PNP_MAX_DEVICES; j++) {
> > -   const char *id = (char *)(*devs)[j].id;
> > -   int i2, j2;
> > +   const char *id = (char *)(*devs_last)[j].id;
> > +   int j2;
> > int dup = 0;
> >
> > if (!id[0])
> > break;
> >
> > /* find duplicate, already added value */
> > -   for (i2 = 0; i2 < i && !dup; i2++) {
> > -   DEF_FIELD_ADDR(symval + i2*id_size, 
> > pnp_card_device_id, devs);
> > +   while ((devs_last -= id_size) >= devs && !dup) {
> >
> > for (j2 = 0; j2 < PNP_MAX_DEVICES; j2++) {
> > -   const char *id2 = (char 
> > *)(*devs)[j2].id;
> > +   const char *id2 =
> > +   (char *)(*devs_last)[j2].id;
> >
> > if (!id2[0])
> > break;
> > --
> > 2.19.1
> >
>
>
> --
> Best Regards
> Masahiro Yamada


Re: [PATCH v3 2/5] kbuild: Removes unnecessary shadowed local variable.

2018-10-29 Thread Leonardo Bras
Sorry, I will take care next time.

Thank you,

Leonardo Bras

On Sun, Oct 28, 2018 at 1:37 PM Masahiro Yamada
 wrote:
>
> On Wed, Oct 24, 2018 at 1:04 PM Leonardo Bras  wrote:
> >
> > Removes an unnecessary shadowed local variable (start).
> > It was used only once, with the same value it was started before
> > the if block.
> >
> > Signed-off-by: Leonardo Bras 
>
>
>
> Applied to linux-kbuild
> with some fixups in the subject.
>
> Please do not add a period to the end of the subject.
>
>
>
>
>
>
> > ---
> >  scripts/asn1_compiler.c | 2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
> >
> > diff --git a/scripts/asn1_compiler.c b/scripts/asn1_compiler.c
> > index c146020fc783..1b28787028d3 100644
> > --- a/scripts/asn1_compiler.c
> > +++ b/scripts/asn1_compiler.c
> > @@ -413,7 +413,7 @@ static void tokenise(char *buffer, char *end)
> >
> > /* Handle string tokens */
> > if (isalpha(*p)) {
> > -   const char **dir, *start = p;
> > +   const char **dir;
> >
> > /* Can be a directive, type name or element
> >  * name.  Find the end of the name.
> > --
> > 2.19.1
> >
>
>
> --
> Best Regards
> Masahiro Yamada


Re: [PATCH] binder: ipc namespace support for android binder

2018-10-29 Thread Todd Kjos
+christ...@brauner.io

On Sun, Oct 28, 2018 at 7:29 PM chouryzhou(周威)  wrote:
...
>
> > It's not obvious from this patch where this dependency comes
> > from...why is SYSVIPC required? I'd like to not have to require IPC_NS
> > either for devices.
>
> Yes, the patch is not highly dependent on SYSVIPC, but it will be convenient
> if require it. I will update it to drop dependency of it in V2 patch. This 
> patch
> doesn't need IPC_NS set at present.

Actually it is dependent on IPC_NS since it makes changes to
ipc/namespace.c which is
compiled only if CONFIG_IPC_NS.

There are a couple more implementations similar to this one.
https://lwn.net/Articles/577957/ and some submissions to AOSP derived
from that one
that introduce a generic registration function for namespace support [1], and
changes to binder to implement namespaces [2].

If this is really needed, then we should have a solution that works
for devices without
requiring IPC_NS or SYSVIPC. Also, we should not add binder-specific code to
ipc/namespace.c or include/linux/ipc_namespace.h.

-Todd

[1] https://android-review.googlesource.com/c/kernel/common/+/471961
[2] https://android-review.googlesource.com/c/kernel/common/+/471825


[ANNOUNCE] v4.19-rt1

2018-10-29 Thread Sebastian Andrzej Siewior
Dear RT folks!

I'm pleased to announce the v4.19-rt1 patch set. 

Changes since v4.18.16-rt9:

  - rebase to v4.19

Known issues
 - A warning triggered in "rcu_note_context_switch" originated from
   SyS_timer_gettime(). The issue was always there, it is now
   visible. Reported by Grygorii Strashko and Daniel Wagner.

You can get this release via the git tree at:

git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-rt-devel.git 
v4.19-rt1

The RT patch against v4.19 can be found here:


https://cdn.kernel.org/pub/linux/kernel/projects/rt/4.19/older/patch-4.19-rt1.patch.xz

The split quilt queue is available at:


https://cdn.kernel.org/pub/linux/kernel/projects/rt/4.19/older/patches-4.19-rt1.tar.xz

Sebastian


Re: [GIT PULL] rpmsg updates for v4.20

2018-10-29 Thread Linus Torvalds
On Mon, Oct 29, 2018 at 4:30 PM Bjorn Andersson
 wrote:
>
> rpmsg updates for v4.20

Pulled (along with the remoteproc branch),

Linus


Re: Re: [PATCH] fs/proc: introduce /proc/stat2 file

2018-10-29 Thread Alexey Dobriyan
On Mon, Oct 29, 2018 at 11:40:47PM +, Daniel Colascione wrote:
> On Mon, Oct 29, 2018 at 11:34 PM, Alexey Dobriyan  wrote:
> >> I'd much rather move to a model in which userspace *explicitly* tells
> >> the kernel which fields it wants, with the kernel replying with just
> >> those particular fields, maybe in their raw binary representations.
> >> The ASCII-text bag-of-everything files would remain available for
> >> ad-hoc and non-performance critical use, but programs that cared about
> >> performance would have an efficient bypass. One concrete approach is
> >> to let users open up today's proc files and, instead of read(2)ing a
> >> text blob, use an ioctl to retrieve specified and targeted information
> >> of the sort that would normally be encoded in the text blob. Because
> >> callers would open the same file when using either the text or binary
> >> interfaces, little would have to change, and it'd be easy to implement
> >> fallbacks when a particular system doesn't support a particular
> >> fast-path ioctl.
> >
> > You've just reinvented systems calls.
> 
> I don't know why you say so. There are important benefits that come
> from using an ioctl on a proc file FD instead of a plain system call.
> Procfs files have file permissions,auditing, SCM_RIGHTS-ability, PID
> race immunity, and other things that you wouldn't get from a plain
> "get this information about this PID" system call.

This whole thread started because /proc/stat is slow and every number in
/proc/stat is system global.

If you continue adding stuff to /proc, one day someone will notice that
core VFS adds considerable overhead, at this point there is nothing
anyone could do.

I'd strongly advise to look at what this DB actually needs and deliver
just that.

Very little of other things apply to /proc/stat:
* system call auditing exists,
* /proc/stat is world readable and continues to be so,
* thus passing descriptor around is pretty useless,
* $PID race doesn't apply.

Additionally passing descriptors feels like party trick.
I suspect that's not how people use statistics in /proc: they run
processes and one priviledged enough monitoring daemon collects data,
otherwise userspace needs to cooperate with monitoring userspace
which of course doesn't happen.

PID race is solved by giving out descriptors which pin "struct pid".
Which is how the race is solved currently: dentry pins inode, inode
pins "struct pid".


[PATCH 3/6] arch: Define ARCH_HAS_PHYS_INITRD for ARM and Unicore32

2018-10-29 Thread Florian Fainelli
Make ARM and Unicore32 select ARCH_HAS_PHYS_INITRD meaning that they do
define phys_initrd_start/phys_initrd_size and make use of it.

Signed-off-by: Florian Fainelli 
---
 arch/Kconfig   | 7 +++
 arch/arm/Kconfig   | 1 +
 arch/unicore32/Kconfig | 1 +
 3 files changed, 9 insertions(+)

diff --git a/arch/Kconfig b/arch/Kconfig
index 9d329608913e..0926f8291782 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -865,6 +865,13 @@ config HAVE_ARCH_PREL32_RELOCATIONS
  architectures, and don't require runtime relocation on relocatable
  kernels.
 
+config ARCH_HAS_PHYS_INITRD
+   bool
+   help
+ An architecture selects this when it needs to act on the physical
+ address of the initial ramdisk and allow generic code such as
+ FDT to populate that address.
+
 source "kernel/gcov/Kconfig"
 
 source "scripts/gcc-plugins/Kconfig"
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index e8cd55a5b04c..b87c40701b0e 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -11,6 +11,7 @@ config ARM
select ARCH_HAS_KCOV
select ARCH_HAS_MEMBARRIER_SYNC_CORE
select ARCH_HAS_PTE_SPECIAL if ARM_LPAE
+   select ARCH_HAS_PHYS_INITRD
select ARCH_HAS_PHYS_TO_DMA
select ARCH_HAS_SET_MEMORY
select ARCH_HAS_STRICT_KERNEL_RWX if MMU && !XIP_KERNEL
diff --git a/arch/unicore32/Kconfig b/arch/unicore32/Kconfig
index 0c5111b206bd..28a66ae61dcf 100644
--- a/arch/unicore32/Kconfig
+++ b/arch/unicore32/Kconfig
@@ -2,6 +2,7 @@
 config UNICORE32
def_bool y
select ARCH_HAS_DEVMEM_IS_ALLOWED
+   select ARCH_HAS_PHYS_INITRD
select ARCH_MIGHT_HAVE_PC_PARPORT
select ARCH_MIGHT_HAVE_PC_SERIO
select DMA_DIRECT_OPS
-- 
2.17.1



[PATCH 5/6] arm64: Utilize ARCH_HAS_PHYS_INITRD

2018-10-29 Thread Florian Fainelli
ARM64 is the only architecture that re-defines
__early_init_dt_declare_initrd() in order for that function to populate
initrd_start/initrd_end with physical addresses instead of virtual
addresses. Instead of having an override, just get rid of that
implementation and select ARCH_HAS_PHYS_INITRD which would do that for
us.

Signed-off-by: Florian Fainelli 
---
 arch/arm64/Kconfig  |  1 +
 arch/arm64/include/asm/memory.h |  8 
 arch/arm64/mm/init.c| 23 +++
 3 files changed, 12 insertions(+), 20 deletions(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 964f682a2b7b..302fb721d412 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -21,6 +21,7 @@ config ARM64
select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA
select ARCH_HAS_KCOV
select ARCH_HAS_MEMBARRIER_SYNC_CORE
+   select ARCH_HAS_PHYS_INITRD
select ARCH_HAS_PTE_SPECIAL
select ARCH_HAS_SET_MEMORY
select ARCH_HAS_SG_CHAIN
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index b96442960aea..dc3ca21ba240 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -168,14 +168,6 @@
 #define IOREMAP_MAX_ORDER  (PMD_SHIFT)
 #endif
 
-#ifdef CONFIG_BLK_DEV_INITRD
-#define __early_init_dt_declare_initrd(__start, __end) \
-   do {\
-   initrd_start = (__start);   \
-   initrd_end = (__end);   \
-   } while (0)
-#endif
-
 #ifndef __ASSEMBLY__
 
 #include 
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 3cf87341859f..fef9eb7fdb50 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -61,6 +61,8 @@
  */
 s64 memstart_addr __ro_after_init = -1;
 phys_addr_t arm64_dma_phys_limit __ro_after_init;
+phys_addr_t phys_initrd_start __initdata;
+unsigned long phys_initrd_size __initdata;
 
 #ifdef CONFIG_BLK_DEV_INITRD
 static int __init early_initrd(char *p)
@@ -72,8 +74,8 @@ static int __init early_initrd(char *p)
if (*endp == ',') {
size = memparse(endp + 1, NULL);
 
-   initrd_start = start;
-   initrd_end = start + size;
+   phys_initrd_start = start;
+   phys_initrd_size = size;
}
return 0;
 }
@@ -408,14 +410,14 @@ void __init arm64_memblock_init(void)
memblock_add(__pa_symbol(_text), (u64)(_end - _text));
}
 
-   if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && initrd_start) {
+   if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && phys_initrd_size) {
/*
 * Add back the memory we just removed if it results in the
 * initrd to become inaccessible via the linear mapping.
 * Otherwise, this is a no-op
 */
-   u64 base = initrd_start & PAGE_MASK;
-   u64 size = PAGE_ALIGN(initrd_end) - base;
+   u64 base = phys_initrd_start & PAGE_MASK;
+   u64 size = PAGE_ALIGN(phys_initrd_size);
 
/*
 * We can only add back the initrd memory if we don't end up
@@ -460,13 +462,10 @@ void __init arm64_memblock_init(void)
 */
memblock_reserve(__pa_symbol(_text), _end - _text);
 #ifdef CONFIG_BLK_DEV_INITRD
-   if (initrd_start) {
-   memblock_reserve(initrd_start, initrd_end - initrd_start);
-
-   /* the generic initrd code expects virtual addresses */
-   initrd_start = __phys_to_virt(initrd_start);
-   initrd_end = __phys_to_virt(initrd_end);
-   }
+   /* the generic initrd code expects virtual addresses */
+   initrd_start = __phys_to_virt(phys_initrd_start);
+   initrd_end = initrd_start + phys_initrd_size;
+   initrd_below_start_ok = 0;
 #endif
 
early_init_fdt_scan_reserved_mem();
-- 
2.17.1



[PATCH 2/6] arch: Make phys_initrd_start and phys_initrd_size global variables

2018-10-29 Thread Florian Fainelli
Make phys_initrd_start and phys_initrd_size global variables that will
later be referenced by generic code under drivers/of/fdt.c.

Signed-off-by: Florian Fainelli 
---
 arch/arm/mm/init.c   | 4 ++--
 arch/unicore32/mm/init.c | 4 ++--
 include/linux/initrd.h   | 3 +++
 3 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 0cc8e04295a4..8f364aa24172 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -51,8 +51,8 @@ unsigned long __init __clear_cr(unsigned long mask)
 }
 #endif
 
-static phys_addr_t phys_initrd_start __initdata = 0;
-static unsigned long phys_initrd_size __initdata = 0;
+phys_addr_t phys_initrd_start __initdata = 0;
+unsigned long phys_initrd_size __initdata = 0;
 
 static int __init early_initrd(char *p)
 {
diff --git a/arch/unicore32/mm/init.c b/arch/unicore32/mm/init.c
index 8f8699e62bd5..4dd26d6f02e5 100644
--- a/arch/unicore32/mm/init.c
+++ b/arch/unicore32/mm/init.c
@@ -31,8 +31,8 @@
 
 #include "mm.h"
 
-static unsigned long phys_initrd_start __initdata = 0x0100;
-static unsigned long phys_initrd_size __initdata = SZ_8M;
+phys_addr_t phys_initrd_start __initdata = 0x0100;
+unsigned long phys_initrd_size __initdata = SZ_8M;
 
 static int __init early_initrd(char *p)
 {
diff --git a/include/linux/initrd.h b/include/linux/initrd.h
index 84b423044088..14beaff9b445 100644
--- a/include/linux/initrd.h
+++ b/include/linux/initrd.h
@@ -21,4 +21,7 @@ extern int initrd_below_start_ok;
 extern unsigned long initrd_start, initrd_end;
 extern void free_initrd_mem(unsigned long, unsigned long);
 
+extern phys_addr_t phys_initrd_start;
+extern unsigned long phys_initrd_size;
+
 extern unsigned int real_root_dev;
-- 
2.17.1



[PATCH 6/6] of/fdt: Remove definition check for __early_init_dt_declare_initrd()

2018-10-29 Thread Florian Fainelli
With the one and only architecture (ARM64) no longer defining a custom
__early_init_dt_declare_initrd() function, just get rid of the check for
that function being already defined.

Signed-off-by: Florian Fainelli 
---
 drivers/of/fdt.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index 313cd4f24258..3d84fe79eeb4 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -892,7 +892,6 @@ const void * __init of_flat_dt_match_machine(const void 
*default_match,
 }
 
 #ifdef CONFIG_BLK_DEV_INITRD
-#ifndef __early_init_dt_declare_initrd
 static void __early_init_dt_declare_initrd(unsigned long start,
   unsigned long end)
 {
@@ -904,7 +903,6 @@ static void __early_init_dt_declare_initrd(unsigned long 
start,
phys_initrd_size = end - start;
 #endif
 }
-#endif
 
 /**
  * early_init_dt_check_for_initrd - Decode initrd location from flat tree
-- 
2.17.1



[PATCH 4/6] of/fdt: Populate phys_initrd_start/phys_initrd_size from FDT

2018-10-29 Thread Florian Fainelli
If the architecture implements ARCH_HAS_PHYS_INITRD, make the FDT
scanning code populate the physical address of the start of the FDT and
its size.

Signed-off-by: Florian Fainelli 
---
 arch/arm/mm/init.c | 2 +-
 drivers/of/fdt.c   | 4 
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 8f364aa24172..517e95cfb5d2 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -237,7 +237,7 @@ static void __init arm_initrd_init(void)
phys_addr_t start;
unsigned long size;
 
-   /* FDT scan will populate initrd_start */
+   /* FDT scan will populate initrd_start and phys_initrd_start */
if (initrd_start && !phys_initrd_size) {
phys_initrd_start = __virt_to_phys(initrd_start);
phys_initrd_size = initrd_end - initrd_start;
diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index 76c83c1ffeda..313cd4f24258 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -899,6 +899,10 @@ static void __early_init_dt_declare_initrd(unsigned long 
start,
initrd_start = (unsigned long)__va(start);
initrd_end = (unsigned long)__va(end);
initrd_below_start_ok = 1;
+#ifdef CONFIG_ARCH_HAS_PHYS_INITRD
+   phys_initrd_start = start;
+   phys_initrd_size = end - start;
+#endif
 }
 #endif
 
-- 
2.17.1



[PATCH 1/6] nds32: Remove phys_initrd_start and phys_initrd_size

2018-10-29 Thread Florian Fainelli
This will conflict with a subsequent change making phys_initrd_start and
phys_initrd_size global variables. nds32 does not make use of those nor
provides a suitable declarations so just get rid of them.

Signed-off-by: Florian Fainelli 
---
 arch/nds32/mm/init.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/nds32/mm/init.c b/arch/nds32/mm/init.c
index c713d2ad55dc..32f55a24ccbb 100644
--- a/arch/nds32/mm/init.c
+++ b/arch/nds32/mm/init.c
@@ -22,8 +22,6 @@
 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
 DEFINE_SPINLOCK(anon_alias_lock);
 extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
-extern unsigned long phys_initrd_start;
-extern unsigned long phys_initrd_size;
 
 /*
  * empty_zero_page is a special page that is used for
-- 
2.17.1



Re:

2018-10-29 Thread Ahmed Ad
-- 
I solicit your cooperation to transfer $7 million to your bank account
and you will be entitled to 40% of this fund, respond urgent if you
are interested for more details.


[PATCH 0/6] arm64: Get rid of __early_init_dt_declare_initrd()

2018-10-29 Thread Florian Fainelli
Hi all,

The numbers no longer make any sense since I either did not correctly
understand the feedback being given, or dramatically changed the
approach.

This version introduces an architecture symbol: ARCH_HAS_PHYS_INITRD
which indicates whether the architecture cares/supports parsing the
physical address of the initrd. Currently ARM (32-bit), Unicore32 and
now ARM64 support that.

When that symbol is defined, we also have the generic FDT code populate
the initrd physical address and size, and we can later make use of that
within architecture specific code to populate the memblock regions and
do the righ physical to virtual address conversion.

Rob, hopefully this is what you had in mind.

Previous discussions/submissions list here:

v3:
https://www.spinics.net/lists/arm-kernel/msg683566.html
v2:
https://lkml.org/lkml/2018/10/25/4


Florian Fainelli (6):
  nds32: Remove phys_initrd_start and phys_initrd_size
  arch: Make phys_initrd_start and phys_initrd_size global variables
  arch: Define ARCH_HAS_PHYS_INITRD for ARM and Unicore32
  of/fdt: Populate phys_initrd_start/phys_initrd_size from FDT
  arm64: Utilize ARCH_HAS_PHYS_INITRD
  of/fdt: Remove definition check for __early_init_dt_declare_initrd()

 arch/Kconfig|  7 +++
 arch/arm/Kconfig|  1 +
 arch/arm/mm/init.c  |  6 +++---
 arch/arm64/Kconfig  |  1 +
 arch/arm64/include/asm/memory.h |  8 
 arch/arm64/mm/init.c| 23 +++
 arch/nds32/mm/init.c|  2 --
 arch/unicore32/Kconfig  |  1 +
 arch/unicore32/mm/init.c|  4 ++--
 drivers/of/fdt.c|  6 --
 include/linux/initrd.h  |  3 +++
 11 files changed, 33 insertions(+), 29 deletions(-)

-- 
2.17.1



Re: linux-next: manual merge of the compiler-attributes tree with the kbuild tree

2018-10-29 Thread Stephen Rothwell
Hi all,

On Tue, 30 Oct 2018 10:46:37 +1100 Stephen Rothwell  
wrote:
>
> Today's linux-next merge of the compiler-attributes tree got a conflict
> in:
> 
>   include/linux/compiler-gcc.h
> 
> between commit:
> 
>   94c7dfd01652 ("kernel hacking: support building kernel with -Og 
> optimization level")
> 
> from the kbuild tree and commits:
> 
>   5c67a52f3da0 ("Compiler Attributes: always use the extra-underscores 
> syntax")
>   989bd5000f36 ("Compiler Attributes: remove unneeded sparse (__CHECKER__) 
> tests")
> 
> from the compiler-attributes tree.
> 
> I fixed it up (the latter just removed the __CHECKER__ check, so I did
> that) and can carry the fix as necessary. This is now fixed as far as

On reflection, that may not have been the correct resolution ...

-- 
Cheers,
Stephen Rothwell


pgpgBBdcLtMNJ.pgp
Description: OpenPGP digital signature


linux-next: manual merge of the compiler-attributes tree with the kbuild tree

2018-10-29 Thread Stephen Rothwell
Hi Miguel,

Today's linux-next merge of the compiler-attributes tree got a conflict
in:

  include/linux/compiler-gcc.h

between commit:

  94c7dfd01652 ("kernel hacking: support building kernel with -Og optimization 
level")

from the kbuild tree and commits:

  5c67a52f3da0 ("Compiler Attributes: always use the extra-underscores syntax")
  989bd5000f36 ("Compiler Attributes: remove unneeded sparse (__CHECKER__) 
tests")

from the compiler-attributes tree.

I fixed it up (the latter just removed the __CHECKER__ check, so I did
that) and can carry the fix as necessary. This is now fixed as far as
linux-next is concerned, but any non trivial conflicts should be mentioned
to your upstream maintainer when your tree is submitted for merging.
You may also want to consider cooperating with the maintainer of the
conflicting tree to minimise any particularly complex conflicts.

-- 
Cheers,
Stephen Rothwell


pgprlBtdAFSN8.pgp
Description: OpenPGP digital signature


Re: Re: [PATCH] fs/proc: introduce /proc/stat2 file

2018-10-29 Thread Daniel Colascione
On Mon, Oct 29, 2018 at 11:34 PM, Alexey Dobriyan  wrote:
>> I'd much rather move to a model in which userspace *explicitly* tells
>> the kernel which fields it wants, with the kernel replying with just
>> those particular fields, maybe in their raw binary representations.
>> The ASCII-text bag-of-everything files would remain available for
>> ad-hoc and non-performance critical use, but programs that cared about
>> performance would have an efficient bypass. One concrete approach is
>> to let users open up today's proc files and, instead of read(2)ing a
>> text blob, use an ioctl to retrieve specified and targeted information
>> of the sort that would normally be encoded in the text blob. Because
>> callers would open the same file when using either the text or binary
>> interfaces, little would have to change, and it'd be easy to implement
>> fallbacks when a particular system doesn't support a particular
>> fast-path ioctl.
>
> You've just reinvented systems calls.

I don't know why you say so. There are important benefits that come
from using an ioctl on a proc file FD instead of a plain system call.
Procfs files have file permissions, auditing, SCM_RIGHTS-ability, PID
race immunity, and other things that you wouldn't get from a plain
"get this information about this PID" system call.


Re: Re: [PATCH] fs/proc: introduce /proc/stat2 file

2018-10-29 Thread Alexey Dobriyan
> I'd much rather move to a model in which userspace *explicitly* tells
> the kernel which fields it wants, with the kernel replying with just
> those particular fields, maybe in their raw binary representations.
> The ASCII-text bag-of-everything files would remain available for
> ad-hoc and non-performance critical use, but programs that cared about
> performance would have an efficient bypass. One concrete approach is
> to let users open up today's proc files and, instead of read(2)ing a
> text blob, use an ioctl to retrieve specified and targeted information
> of the sort that would normally be encoded in the text blob. Because
> callers would open the same file when using either the text or binary
> interfaces, little would have to change, and it'd be easy to implement
> fallbacks when a particular system doesn't support a particular
> fast-path ioctl.

You've just reinvented systems calls.

I suspect the DB in question cares about CPU related numbers and nothing
else which can be nicely split from the rest of /proc/stat.


Re: [PATCH][next] ubifs: authentication: fix memory leak on error exit path

2018-10-29 Thread Richard Weinberger
Am Dienstag, 30. Oktober 2018, 00:21:46 CET schrieb Colin King:
> From: Colin Ian King 
> 
> Currently a failure when calling ubifs_read_nnode results in a leak
> of desc and buf because of a direct return. Fix this by exiting via
> label 'out' that performs the necessary free'ing of the resources.
> 
> Fixes: a1dc58140f7e ("ubifs: authentication: Authenticate LPT")
> 
> Signed-off-by: Colin Ian King 
> ---
>  fs/ubifs/lpt.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c
> index d1d5e96350dd..c162459a1e02 100644
> --- a/fs/ubifs/lpt.c
> +++ b/fs/ubifs/lpt.c
> @@ -1688,7 +1688,7 @@ int ubifs_lpt_calc_hash(struct ubifs_info *c, u8 *hash)
>   if (!c->nroot) {
>   err = ubifs_read_nnode(c, NULL, 0);
>   if (err)
> - return err;
> + goto out;

IMHO a better fix would be reading the root node before allocating these 
buffers.

Thanks,
//richard




[PATCH] perf/core: clean up inconsisent indentation

2018-10-29 Thread Colin King
From: Colin Ian King 

Replace a bunch of spaces with tab, cleans up indentation

Signed-off-by: Colin Ian King 
---
 kernel/events/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 8c490130c4fb..84530ab358c3 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -750,7 +750,7 @@ static inline void update_cgrp_time_from_event(struct 
perf_event *event)
/*
 * Do not update time when cgroup is not active
 */
-   if (cgroup_is_descendant(cgrp->css.cgroup, event->cgrp->css.cgroup))
+   if (cgroup_is_descendant(cgrp->css.cgroup, event->cgrp->css.cgroup))
__update_cgrp_time(event->cgrp);
 }
 
-- 
2.19.1



Re: [PATCH v8 2/2] samples: add an example of seccomp user trap

2018-10-29 Thread Serge E. Hallyn
On Mon, Oct 29, 2018 at 04:40:31PM -0600, Tycho Andersen wrote:
> The idea here is just to give a demonstration of how one could safely use
> the SECCOMP_RET_USER_NOTIF feature to do mount policies. This particular
> policy is (as noted in the comment) not very interesting, but it serves to
> illustrate how one might apply a policy dodging the various TOCTOU issues.
> 
> Signed-off-by: Tycho Andersen 
> CC: Kees Cook 
> CC: Andy Lutomirski 
> CC: Oleg Nesterov 
> CC: Eric W. Biederman 
> CC: "Serge E. Hallyn" 
> CC: Christian Brauner 
> CC: Tyler Hicks 
> CC: Akihiro Suda 
> ---
> v5: new in v5
> v7: updates for v7 API changes
> v8: * add some more comments about what's happening in main() (Kees)
> * move from ptrace API to SECCOMP_FILTER_FLAG_NEW_LISTENER
> ---
>  samples/seccomp/.gitignore  |   1 +
>  samples/seccomp/Makefile|   7 +-
>  samples/seccomp/user-trap.c | 345 
>  3 files changed, 352 insertions(+), 1 deletion(-)
> 
> diff --git a/samples/seccomp/.gitignore b/samples/seccomp/.gitignore
> index 78fb78184291..d1e2e817d556 100644
> --- a/samples/seccomp/.gitignore
> +++ b/samples/seccomp/.gitignore
> @@ -1,3 +1,4 @@
>  bpf-direct
>  bpf-fancy
>  dropper
> +user-trap
> diff --git a/samples/seccomp/Makefile b/samples/seccomp/Makefile
> index cf34ff6b4065..4920903c8009 100644
> --- a/samples/seccomp/Makefile
> +++ b/samples/seccomp/Makefile
> @@ -1,6 +1,6 @@
>  # SPDX-License-Identifier: GPL-2.0
>  ifndef CROSS_COMPILE
> -hostprogs-$(CONFIG_SAMPLE_SECCOMP) := bpf-fancy dropper bpf-direct
> +hostprogs-$(CONFIG_SAMPLE_SECCOMP) := bpf-fancy dropper bpf-direct user-trap
>  
>  HOSTCFLAGS_bpf-fancy.o += -I$(objtree)/usr/include
>  HOSTCFLAGS_bpf-fancy.o += -idirafter $(objtree)/include
> @@ -16,6 +16,10 @@ HOSTCFLAGS_bpf-direct.o += -I$(objtree)/usr/include
>  HOSTCFLAGS_bpf-direct.o += -idirafter $(objtree)/include
>  bpf-direct-objs := bpf-direct.o
>  
> +HOSTCFLAGS_user-trap.o += -I$(objtree)/usr/include
> +HOSTCFLAGS_user-trap.o += -idirafter $(objtree)/include
> +user-trap-objs := user-trap.o
> +
>  # Try to match the kernel target.
>  ifndef CONFIG_64BIT
>  
> @@ -33,6 +37,7 @@ HOSTCFLAGS_bpf-fancy.o += $(MFLAG)
>  HOSTLDLIBS_bpf-direct += $(MFLAG)
>  HOSTLDLIBS_bpf-fancy += $(MFLAG)
>  HOSTLDLIBS_dropper += $(MFLAG)
> +HOSTLDLIBS_user-trap += $(MFLAG)
>  endif
>  always := $(hostprogs-m)
>  endif
> diff --git a/samples/seccomp/user-trap.c b/samples/seccomp/user-trap.c
> new file mode 100644
> index ..bba7ac803c6c
> --- /dev/null
> +++ b/samples/seccomp/user-trap.c
> @@ -0,0 +1,345 @@
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
> +
> +static int seccomp(unsigned int op, unsigned int flags, void *args)
> +{
> + errno = 0;
> + return syscall(__NR_seccomp, op, flags, args);
> +}
> +
> +static int send_fd(int sock, int fd)
> +{
> + struct msghdr msg = {};
> + struct cmsghdr *cmsg;
> + char buf[CMSG_SPACE(sizeof(int))] = {0}, c = 'c';
> + struct iovec io = {
> + .iov_base = &c,
> + .iov_len = 1,
> + };
> +
> + msg.msg_iov = &io;
> + msg.msg_iovlen = 1;
> + msg.msg_control = buf;
> + msg.msg_controllen = sizeof(buf);
> + cmsg = CMSG_FIRSTHDR(&msg);
> + cmsg->cmsg_level = SOL_SOCKET;
> + cmsg->cmsg_type = SCM_RIGHTS;
> + cmsg->cmsg_len = CMSG_LEN(sizeof(int));
> + *((int *)CMSG_DATA(cmsg)) = fd;
> + msg.msg_controllen = cmsg->cmsg_len;
> +
> + if (sendmsg(sock, &msg, 0) < 0) {
> + perror("sendmsg");
> + return -1;
> + }
> +
> + return 0;
> +}
> +
> +static int recv_fd(int sock)
> +{
> + struct msghdr msg = {};
> + struct cmsghdr *cmsg;
> + char buf[CMSG_SPACE(sizeof(int))] = {0}, c = 'c';
> + struct iovec io = {
> + .iov_base = &c,
> + .iov_len = 1,
> + };
> +
> + msg.msg_iov = &io;
> + msg.msg_iovlen = 1;
> + msg.msg_control = buf;
> + msg.msg_controllen = sizeof(buf);
> +
> + if (recvmsg(sock, &msg, 0) < 0) {
> + perror("recvmsg");
> + return -1;
> + }
> +
> + cmsg = CMSG_FIRSTHDR(&msg);
> +
> + return *((int *)CMSG_DATA(cmsg));
> +}
> +
> +static int user_trap_syscall(int nr, unsigned int flags)
> +{
> + struct sock_filter filter[] = {
> + BPF_STMT(BPF_LD+BPF_W+BPF_ABS,
> + offsetof(struct seccomp_data, nr)),
> + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, nr, 0, 1),
> + BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_USER_NOTIF),
> + BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW),
> + };
> +
> + struct sock_fprog prog = {
> + .len = (unsigned short)ARRAY_SIZE(filter),

[GIT PULL] rpmsg updates for v4.20

2018-10-29 Thread Bjorn Andersson
The following changes since commit 5b394b2ddf0347bef56e50c69a58773c94343ff3:

  Linux 4.19-rc1 (2018-08-26 14:11:59 -0700)

are available in the Git repository at:

  git://github.com/andersson/remoteproc tags/rpmsg-v4.20

for you to fetch changes up to 928002a5e9dab2ddc1a0fe3e00739e89be30dc6b:

  rpmsg: glink: smem: Support rx peak for size less than 4 bytes (2018-10-03 
17:04:32 -0700)


rpmsg updates for v4.20

This migrates rpmsg_char to use read/write_iter to allow being operated
using aio, removes the message size alignment requirements from glink,
closes a potential memory leak in SMD and switches to %pOFn for printing
device_node names.


Arun Kumar Neelakantam (1):
  rpmsg: glink: smem: Support rx peak for size less than 4 bytes

Bjorn Andersson (1):
  rpmsg: char: Migrate to iter versions of read and write

Chris Lew (1):
  rpmsg: glink: Remove chunk size word align warning

Colin Ian King (1):
  rpmsg: smd: fix memory leak on channel create

Rob Herring (1):
  rpmsg: Convert to using %pOFn instead of device_node.name

 drivers/rpmsg/qcom_glink_native.c |  3 ---
 drivers/rpmsg/qcom_glink_smem.c   | 14 +-
 drivers/rpmsg/qcom_smd.c  |  9 ++---
 drivers/rpmsg/rpmsg_char.c| 27 ---
 4 files changed, 27 insertions(+), 26 deletions(-)


[GIT PULL] remoteproc updates for v4.20

2018-10-29 Thread Bjorn Andersson
The following changes since commit 5b394b2ddf0347bef56e50c69a58773c94343ff3:

  Linux 4.19-rc1 (2018-08-26 14:11:59 -0700)

are available in the Git repository at:

  git://github.com/andersson/remoteproc tags/rproc-v4.20

for you to fetch changes up to f18b7e914fd2ed5e8b5733644cefcf62f7582679:

  remoteproc: qcom: q6v5-mss: Register segments/dumpfn for coredump (2018-10-19 
12:54:03 -0700)


remoteproc updates for v4.20

This contains a series of patches that reworks the memory carveout
handling in remoteproc, in order to allow this to be reused for
statically allocated memory regions to be used for e.g. firmware.

It adds support for audio DSP (both TZ-assisted and non-TZ assisted) and
compute DSP on Qualcomm SDM845, TZ-assisted audio DSP, compute DSP and
WiFi processor on Qualcomm QCS404 and through some renaming of the
drivers cleans up the naming situation.

Finally support for custom coreudmp segment handlers is added and
is used in the Qualcomm modem remoteproc driver to gather memory dumps
of the firmware.


Bjorn Andersson (6):
  remoteproc/davinci: Use %zx for formating size_t
  remoteproc: qcom: adsp: Add SDM845 ADSP and CDSP support
  remoteproc: qcom: q6v5: Propagate EPROBE_DEFER
  remoteproc: qcom: Rename Hexagon v5 PAS driver
  remoteproc: qcom: Rename Hexagon v5 modem driver
  remoteproc: qcom: pas: Add QCS404 remoteprocs

Brian Norris (2):
  remoteproc: qcom: q6v5-mss: add SCM probe dependency
  remoteproc: qcom: q6v5: shore up resource probe handling

Loic Pallardy (12):
  remoteproc: configure IOMMU only if device address requested
  remoteproc: add rproc_va_to_pa function
  remoteproc: add release ops in rproc_mem_entry struct
  remoteproc: add name in rproc_mem_entry struct
  remoteproc: add helper function to allocate and init rproc_mem_entry 
struct
  remoteproc: introduce rproc_add_carveout function
  remoteproc: introduce rproc_find_carveout_by_name function
  remoteproc: add alloc ops in rproc_mem_entry struct
  remoteproc: add helper function to allocate rproc_mem_entry from reserved 
memory
  remoteproc: add helper function to check carveout device address
  remoteproc: modify rproc_handle_carveout to support pre-registered region
  remoteproc: modify vring allocation to rely on centralized carveout 
allocator

Rohit kumar (2):
  dt-binding: remoteproc: Add QTI ADSP PIL bindings
  remoteproc: qcom: Introduce Non-PAS ADSP PIL driver

Sibi Sankar (11):
  dt-bindings: remoteproc: qcom: Remove additional definition tag
  dt-bindings: remoteproc: Add PDC reset binding for Q6V5 PIL
  remoteproc: qcom: q6v5-pil: Explicitly get mss_restart line
  remoteproc: qcom: q6v5-pil: Add PDC reset for modem on SDM845 SoCs
  remoteproc: qcom: q6v5: Fix a race condition on fatal crash
  remoteproc: qcom: q6v5-pil: Assign the relocated address
  remoteproc: Introduce custom dump function for each remoteproc segment
  remoteproc: Add mechanism for custom dump function assignment
  remoteproc: qcom: q6v5-mss: Refactor mba load/unload sequence
  remoteproc: qcom: q6v5-mss: Add custom dump function for modem
  remoteproc: qcom: q6v5-mss: Register segments/dumpfn for coredump

Suman Anna (2):
  remoteproc: Check for NULL firmwares in sysfs interface
  remoteproc: Add missing kernel-doc comment for auto-boot

Wei Yongjun (1):
  remoteproc: qcom: qcom_q6v5_adsp: Fix some return value check

 .../bindings/remoteproc/qcom,adsp-pil.txt  | 126 +
 .../devicetree/bindings/remoteproc/qcom,adsp.txt   |   5 +
 .../devicetree/bindings/remoteproc/qcom,q6v5.txt   |   8 +-
 drivers/remoteproc/Kconfig |  46 +-
 drivers/remoteproc/Makefile|   5 +-
 drivers/remoteproc/da8xx_remoteproc.c  |   2 +-
 drivers/remoteproc/qcom_q6v5.c |  43 +-
 drivers/remoteproc/qcom_q6v5_adsp.c| 497 +
 .../{qcom_q6v5_pil.c => qcom_q6v5_mss.c}   | 420 ++-
 .../{qcom_adsp_pil.c => qcom_q6v5_pas.c}   |  28 +-
 drivers/remoteproc/remoteproc_core.c   | 595 +
 drivers/remoteproc/remoteproc_debugfs.c|   1 +
 drivers/remoteproc/remoteproc_internal.h   |   2 +
 drivers/remoteproc/remoteproc_sysfs.c  |   5 +
 drivers/remoteproc/remoteproc_virtio.c |  14 +-
 include/linux/remoteproc.h |  47 +-
 16 files changed, 1557 insertions(+), 287 deletions(-)
 create mode 100644 
Documentation/devicetree/bindings/remoteproc/qcom,adsp-pil.txt
 create mode 100644 drivers/remoteproc/qcom_q6v5_adsp.c
 rename drivers/remoteproc/{qcom_q6v5_pil.c => qcom_q6v5_mss.c} (90%)
 rename drivers/remoteproc/{qcom_adsp_pil.c => qcom_q6v5_pas.c} (90%)


Re: [PATCH 1/2 v5] arm64: Get rid of __early_init_dt_declare_initrd()

2018-10-29 Thread Rob Herring
On Mon, Oct 29, 2018 at 4:58 PM Ard Biesheuvel
 wrote:
>
> On 29 October 2018 at 16:59, Rob Herring  wrote:
> > +Ard who last touched this.
> >
> > On Mon, Oct 29, 2018 at 2:23 PM Florian Fainelli  
> > wrote:
> >>
> >> ARM64 is the only architecture that re-defines
> >> __early_init_dt_declare_initrd() in order for that function to populate
> >> initrd_start/initrd_end with physical addresses instead of virtual
> >> addresses. Instead of having an override, just get rid of that
> >> implementation and perform the virtual to physical conversion of these
> >> addresses in arm64_memblock_init() where relevant.
> >>
> >> Signed-off-by: Florian Fainelli 
> >> Signed-off-by: Mike Rapoport 
> >> ---
> >>  arch/arm64/include/asm/memory.h |  8 ---
> >>  arch/arm64/mm/init.c| 42 +
> >>  2 files changed, 27 insertions(+), 23 deletions(-)
> >>
> >> diff --git a/arch/arm64/include/asm/memory.h 
> >> b/arch/arm64/include/asm/memory.h
> >> index b96442960aea..dc3ca21ba240 100644
> >> --- a/arch/arm64/include/asm/memory.h
> >> +++ b/arch/arm64/include/asm/memory.h
> >> @@ -168,14 +168,6 @@
> >>  #define IOREMAP_MAX_ORDER  (PMD_SHIFT)
> >>  #endif
> >>
> >> -#ifdef CONFIG_BLK_DEV_INITRD
> >> -#define __early_init_dt_declare_initrd(__start, __end) \
> >> -   do {\
> >> -   initrd_start = (__start);   \
> >> -   initrd_end = (__end);   \
> >> -   } while (0)
> >> -#endif
> >> -
> >>  #ifndef __ASSEMBLY__
> >>
> >>  #include 
> >> diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
> >> index 3cf87341859f..292570b08f85 100644
> >> --- a/arch/arm64/mm/init.c
> >> +++ b/arch/arm64/mm/init.c
> >> @@ -62,6 +62,8 @@
> >>  s64 memstart_addr __ro_after_init = -1;
> >>  phys_addr_t arm64_dma_phys_limit __ro_after_init;
> >>
> >> +static phys_addr_t phys_initrd_start, phys_initrd_end;
> >> +
> >>  #ifdef CONFIG_BLK_DEV_INITRD
> >>  static int __init early_initrd(char *p)
> >>  {
> >> @@ -72,8 +74,8 @@ static int __init early_initrd(char *p)
> >> if (*endp == ',') {
> >> size = memparse(endp + 1, NULL);
> >>
> >> -   initrd_start = start;
> >> -   initrd_end = start + size;
> >> +   phys_initrd_start = start;
> >> +   phys_initrd_end = start + size;
> >> }
> >> return 0;
> >>  }
> >> @@ -364,6 +366,7 @@ static void __init fdt_enforce_memory_region(void)
> >>  void __init arm64_memblock_init(void)
> >>  {
> >> const s64 linear_region_size = -(s64)PAGE_OFFSET;
> >> +   u64 __maybe_unused base, size;
> >>
> >> /* Handle linux,usable-memory-range property */
> >> fdt_enforce_memory_region();
> >> @@ -408,14 +411,25 @@ void __init arm64_memblock_init(void)
> >> memblock_add(__pa_symbol(_text), (u64)(_end - _text));
> >> }
> >>
> >> -   if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && initrd_start) {
> >> +   if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) &&
> >> +   (initrd_start || phys_initrd_start)) {
> >
> > I've tried to explain already that this is broken. The problem is
> > __early_init_dt_declare_initrd using __va() which happens before this
> > function is called. __va() uses PHYS_OFFSET which in turn is defined
> > as memstart_addr. However, memstart_addr may be changed just above
> > this hunk, so the earlier conversion to a VA may not be valid at this
> > point. This is explained if you read Ard's commit that added all this
> > mess.
> >
> > You could fix this by converting back to a PA before adjusting
> > memstart_addr, but that's 2 wrongs making a right and fragile. The
> > better solution is the other proposal making the DT code set
> > phys_initrd_* (whatever the ARM code calls them).
> >
>
> On arm64, we have
>
> #define PHYS_OFFSET \
>   ({ VM_BUG_ON(memstart_addr & 1); memstart_addr; })
>
> and
>
> s64 memstart_addr __ro_after_init = -1;
>
> IOW, any attempt to perform PA to VA translations before memstart_addr
> is assigned will BUG() if CONFIG_DEBUG_VM=y, so please enable that
> when playing with this code.

Which will result in a crashed kernel with no console output unless
you have earlycon enabled (or maybe EFI console will be up?). A WARN
would be better.

> The reason we have this code is because the start of the linear region
> might not coincide with memblock_start_of_DRAM(), which could happen,
> e.g., when running a 39-bit VA kernel on a system with a very sparse
> memory map (which is unfortunately what some silicon vendors think is
> what ARM recommends) and the kernel loaded near the top of that
> memory. The ability to load the kernel anywhere in physical memory was
> introduced to accommodate physical KASLR.
>
> Ideally, we'd fix this by only recording physical addresses for the
> initrd in generic code, and deferring the translation until the poi

[PATCH][next] ubifs: authentication: fix memory leak on error exit path

2018-10-29 Thread Colin King
From: Colin Ian King 

Currently a failure when calling ubifs_read_nnode results in a leak
of desc and buf because of a direct return. Fix this by exiting via
label 'out' that performs the necessary free'ing of the resources.

Fixes: a1dc58140f7e ("ubifs: authentication: Authenticate LPT")

Signed-off-by: Colin Ian King 
---
 fs/ubifs/lpt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c
index d1d5e96350dd..c162459a1e02 100644
--- a/fs/ubifs/lpt.c
+++ b/fs/ubifs/lpt.c
@@ -1688,7 +1688,7 @@ int ubifs_lpt_calc_hash(struct ubifs_info *c, u8 *hash)
if (!c->nroot) {
err = ubifs_read_nnode(c, NULL, 0);
if (err)
-   return err;
+   goto out;
}
 
cnode = (struct ubifs_cnode *)c->nroot;
-- 
2.19.1



[PATCH net-next 2/2] net: nixge: Update device-tree bindings with v3.00

2018-10-29 Thread alex . williams
From: Alex Williams 

Now the DMA engine is free to float elsewhere in the system map.

Signed-off-by: Alex Williams 
---
 Documentation/devicetree/bindings/net/nixge.txt | 14 +++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/Documentation/devicetree/bindings/net/nixge.txt 
b/Documentation/devicetree/bindings/net/nixge.txt
index e55af7f0881a..d0f9fb520578 100644
--- a/Documentation/devicetree/bindings/net/nixge.txt
+++ b/Documentation/devicetree/bindings/net/nixge.txt
@@ -1,8 +1,14 @@
 * NI XGE Ethernet controller
 
 Required properties:
-- compatible: Should be "ni,xge-enet-2.00"
-- reg: Address and length of the register set for the device
+- compatible: Should be "ni,xge-enet-3.00", but can be "ni,xge-enet-2.00" for
+  older device trees with DMA engines co-located in the address 
map,
+  with the one reg entry to describe the whole device.
+- reg: Address and length of the register set for the device. It contains the
+   information of registers in the same order as described by reg-names.
+- reg-names: Should contain the reg names
+   "dma":  DMA engine control and status region
+"ctrl": MDIO and PHY control and status region
 - interrupts: Should contain tx and rx interrupt
 - interrupt-names: Should be "rx" and "tx"
 - phy-mode: See ethernet.txt file in the same directory.
@@ -13,7 +19,9 @@ Required properties:
 Examples (10G generic PHY):
nixge0: ethernet@4000 {
compatible = "ni,xge-enet-2.00";
-   reg = <0x4000 0x6000>;
+   reg = <0x4000 0x4000
+  0x41002000 0x2000>;
+   reg-names = "dma", "ctrl";
 
nvmem-cells = <ð1_addr>;
nvmem-cell-names = "address";
-- 
2.14.5



Re: [PATCH v2] mtd: spi-nor: Add support for SPI boot flash access for AMD Family 16h

2018-10-29 Thread Grandbois, Brett

On 28/10/18 1:39 am, Boris Brezillon wrote:
> Hi Brett,
>
> On Tue, 16 Oct 2018 00:57:41 +
> "Grandbois, Brett"  wrote:
>
>> Add support to expose the SPI boot flash on AMD Family 16h CPUs as a
>> standard mtd device to give userspace BIOS updaters greater feature
>> support.  The BIOS and Kernel Developer's Guide refers to this as the
>> 'SPI ROM' controller and so the driver follows that naming convention
>> for consistency.
>>
> We're currently trying to convert spi-nor controller drivers to the
> spi-mem interface [1]. Can you look at this new interface and tell me if
> you'd be able to implement it? If that's not possible, then I'd prefer
> to have this driver implement the mtd_info interface directly.

So from going over the spi-mem interface it looks like the intent is for these 
sorts of devices to be a standard spi_controller with only mem_ops defined and 
the transfer/_one/_one_message left as NULL?  Is that correct?  That's a bit of 
a pivot from how it's currently done (it's conceptually similar to the 
intel-spi-pci driver so I was following that) but I should be able to rework it 
to the new interface.  This then lives under drivers/spi and thus should be 
submitted to linux-spi?
   

> Thanks,
>
> Boris
>
> [1]https://na01.safelinks.protection.outlook.com/?url=https%3A%2F%2Felixir.bootlin.com%2Flinux%2Flatest%2Fsource%2Finclude%2Flinux%2Fspi%2Fspi-mem.h%23L185&data=02%7C01%7Cbrett.grandbois%40opengear.com%7Cf5244d9287534ea84b1208d63c226e8f%7Ca6251c26d21f4164a2251f4eaebf5f9a%7C0%7C0%7C636762515915097405&sdata=9brGIXcEcLQu5g6WxaXT661%2Bjf8ULMQQpVdXzI36NTU%3D&reserved=0


Re: [PATCH RT 1/2] x86/kconfig: Fall back to ticket spinlocks

2018-10-29 Thread Sebastian Andrzej Siewior
On 2018-10-29 21:16:16 [+0100], Daniel Wagner wrote:
> From: Daniel Wagner 
> 
> v4.4.162-rt176-rc1 stable review patch.
> If anyone has any objections, please let me know.

I though that we are going to route this via Greg/stable for v4.4?

> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -42,7 +42,6 @@ config X86
>   select ARCH_USE_BUILTIN_BSWAP
>   select ARCH_USE_CMPXCHG_LOCKREF if X86_64
>   select ARCH_USE_QUEUED_RWLOCKS
> - select ARCH_USE_QUEUED_SPINLOCKS
>   select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
>   select ARCH_WANTS_DYNAMIC_TASK_STRUCT
>   select ARCH_WANT_FRAME_POINTERS
> -- 
> 2.14.4


Re: [PATCH] fs/proc: introduce /proc/stat2 file

2018-10-29 Thread Daniel Colascione
On Mon, Oct 29, 2018 at 7:25 PM, Davidlohr Bueso  wrote:
> This patch introduces a new /proc/stat2 file that is identical to the
> regular 'stat' except that it zeroes all hard irq statistics. The new
> file is a drop in replacement to stat for users that need performance.

For a while now, I've been thinking over ways to improve the
performance of collecting various bits of kernel information. I don't
think that a proliferation of special-purpose named bag-of-fields file
variants is the right answer, because even if you add a few info-file
variants, you're still left with a situation where a given file
provides a particular caller with too little or too much information.
I'd much rather move to a model in which userspace *explicitly* tells
the kernel which fields it wants, with the kernel replying with just
those particular fields, maybe in their raw binary representations.
The ASCII-text bag-of-everything files would remain available for
ad-hoc and non-performance critical use, but programs that cared about
performance would have an efficient bypass. One concrete approach is
to let users open up today's proc files and, instead of read(2)ing a
text blob, use an ioctl to retrieve specified and targeted information
of the sort that would normally be encoded in the text blob. Because
callers would open the same file when using either the text or binary
interfaces, little would have to change, and it'd be easy to implement
fallbacks when a particular system doesn't support a particular
fast-path ioctl.


Re: Logitech high-resolution scrolling..

2018-10-29 Thread Harry Cutts
On Mon, 29 Oct 2018 at 15:01, Linus Torvalds
 wrote:
> That would work, yes.

OK, I'll write a patch for this. (It may be next week, though, as I
have a deadline on a separate project this week.)

> Except I think you *do* want the "reset on direction change" logic,
> because otherwise we still end up having the:
>
> > - we update remainder to -1
>
> where it now gets easier to next time go the wrong way, for no good
> reason.  So now you only need another 6/8ths the other way to get to
> within 7/8ths of -8 and scroll back.
>
> In other words, the whole "round partial scrolling" also causes that
> whole "now the other direction is closer" issue.
>
> At 7/8's it is less obviously a problem than it was at 1/2, but I
> still think it's a sign of an unstable algorithm, where changes get
> triggered too easily in the non-highres world.
>
> Also, honestly, I'm not sure I see the point. *IF* you actually scroll
> more in one direction, it doesn't matter one whit whether you pick
> 1/2, 7/8, or whole multipliers: the *next* step is still always going
> to be one whole multiplier away.
>
> So I think the whole rounding is actually misguided. I think it may
> come from the very fact that you did *not* reset the remainder on
> direction changes, so you could scroll in one direction to -3, and
> then you change direction and go a "whole" tick the other way, but now
> it's just at +5, so you think you need to round up.
>
> With the whole "reset when changing direction", I don't think the
> rounding is necessary, and I don't think it makes sense.

Resetting on direction change would certainly make complete sense in
smooth mode. The reason that I'm reluctant to do it is for clicky
mode, where we think it's important that the low-res event happen at a
consistent point in the movement between notches (the resting
positions of the wheel). For example, imagine the following scenario
with a wheel multiplier of 8 and the threshold initially at 7/8ths of
a notch:

- I scroll one notch down. The low-res event occurs just before the
wheel settles in to its notch, leaving a -1/8th remainder, and then
(on most wheels) the ratchet mechanism settles the wheel 1/8th further
into its resting position, eliminating the remainder.
- I move the wheel 3/8ths further down, then change my mind and start
scrolling upwards.

If we reset on direction change at this point, then the "zero point"
will have moved, so that we trigger the low-res movement at -4/8ths
(at the peak of resistance between the two notches) instead of at
7/8ths. If we don't reset but allow the 3/8ths remainder to be
cleared, the trigger point stays at 7/8ths. It's a minor thing, to be
sure, but we think that keeping the on-screen response consistent with
the tactile feel of the wheel is important for the user experience.

Harry Cutts
Chrome OS Touch/Input team


RE: [PATCH v2 0/8] selftests/resctrl: Add resctrl selftest

2018-10-29 Thread Moger, Babu
Hi Fenghua, 

> -Original Message-
> From: linux-kernel-ow...@vger.kernel.org  ow...@vger.kernel.org> On Behalf Of Fenghua Yu
> Sent: Thursday, October 25, 2018 6:07 PM
> To: Thomas Gleixner ; Ingo Molnar
> ; H Peter Anvin ; Tony Luck
> ; Peter Zijlstra ; Reinette
> Chatre ; Moger, Babu
> ; James Morse ; Ravi V
> Shankar ; Sai Praneeth Prakhya
> ; Arshiya Hayatkhan Pathan
> 
> Cc: linux-kernel ; Fenghua Yu
> 
> Subject: [PATCH v2 0/8] selftests/resctrl: Add resctrl selftest
> 
> With more and more resctrl features are being added by Intel, AMD
> and ARM, a test tool is becoming more and more useful to validate
> that both hardware and software functionalities work as expected.
> 
> We introduce resctrl selftest to cover resctrl features on both
> X86 and ARM architectures. It first implements MBM (Memory Bandwidth
> Monitoring) and MBA (Memory Bandwidth Allocation) tests. We can
> enhance
> the selftest tool to include more functionality tests in future.
> 
> There is an existing resctrl test suit 'intel_cmt_cat'. But the major
> purpose of the tool is to test Intel(R) RDT hardware via writing and
> reading MSR registers. It does access resctrl file system; but the
> functionalities are very limited. And it doesn't support automatic test
> and a lot of manual verifications are involved.
> 
> So the selftest tool we are introducing here provides a convenient
> tool which does automatic resctrl testing, is easily available in kernel
> tree, and will be extended to AMD QoS and ARM MPAM.
> 
> The selftest tool is in tools/testing/selftests/resctrl in order to have
> generic test code for all architectures.
> 
> Changelog:
> v2:
> - Change code based on comments from Babu Moger
> - Clean up other places.
> 
> Arshiya Hayatkhan Pathan (2):
>   selftests/resctrl: Add mbm test
>   selftests/resctrl: Add mba test

I suggest to use MBM and MBA(all caps) while talking about these features.  
Same applies in each individual patches.

> 
> Fenghua Yu (2):
>   selftests/resctrl: Add README for resctrl tests
>   selftests/resctrl: Add the test in MAINTAINERS
> 
> Sai Praneeth Prakhya (4):
>   selftests/resctrl: Add basic resctrl file system operations and data
>   selftests/resctrl: Read memory bandwidth from perf IMC counter and
> from resctrl file system
>   selftests/resctrl: Add callback to start a benchmark
>   selftests/resctrl: Add built in benchmark
> 
>  MAINTAINERS |   1 +
>  tools/testing/selftests/resctrl/Makefile|  16 +
>  tools/testing/selftests/resctrl/README  |  53 ++
>  tools/testing/selftests/resctrl/fill_buf.c  | 175 ++
>  tools/testing/selftests/resctrl/mba_test.c  | 175 ++
>  tools/testing/selftests/resctrl/mbm_test.c  | 143 +
>  tools/testing/selftests/resctrl/membw.c | 678
> 
>  tools/testing/selftests/resctrl/resctrl.h   |  88 +++
>  tools/testing/selftests/resctrl/resctrl_tests.c | 138 +
>  tools/testing/selftests/resctrl/resctrlfs.c | 465 
>  10 files changed, 1932 insertions(+)
>  create mode 100644 tools/testing/selftests/resctrl/Makefile
>  create mode 100644 tools/testing/selftests/resctrl/README
>  create mode 100644 tools/testing/selftests/resctrl/fill_buf.c
>  create mode 100644 tools/testing/selftests/resctrl/mba_test.c
>  create mode 100644 tools/testing/selftests/resctrl/mbm_test.c
>  create mode 100644 tools/testing/selftests/resctrl/membw.c
>  create mode 100644 tools/testing/selftests/resctrl/resctrl.h
>  create mode 100644 tools/testing/selftests/resctrl/resctrl_tests.c
>  create mode 100644 tools/testing/selftests/resctrl/resctrlfs.c
> 
> --
> 2.5.0



Re: [RFC 00/60] Coscheduling for Linux

2018-10-29 Thread Subhra Mazumdar



On 10/26/18 4:44 PM, Jan H. Schönherr wrote:

On 19/10/2018 02.26, Subhra Mazumdar wrote:

Hi Jan,

Hi. Sorry for the delay.


On 9/7/18 2:39 PM, Jan H. Schönherr wrote:

The collective context switch from one coscheduled set of tasks to another
-- while fast -- is not atomic. If a use-case needs the absolute guarantee
that all tasks of the previous set have stopped executing before any task
of the next set starts executing, an additional hand-shake/barrier needs to
be added.


Do you know how much is the delay? i.e what is overlap time when a thread
of new group starts executing on one HT while there is still thread of
another group running on the other HT?

The delay is roughly equivalent to the IPI latency, if we're just talking
about coscheduling at SMT level: one sibling decides to schedule another
group, sends an IPI to the other sibling(s), and may already start
executing a task of that other group, before the IPI is received on the
other end.

Can you point to where the leader is sending the IPI to other siblings?

I did some experiment and delay seems to be sub microsec. I ran 2 threads
that are just looping in one cosched group and affinitized to the 2 HTs of
a core. And another thread in a different cosched group starts running
affinitized to the first HT of the same core. I time stamped just before
context_switch() in __schedule() for the threads switching from one to
another and one to idle. Following is what I get on cpu 1 and 45 that are
siblings, cpu 1 is where the other thread preempts:

[  403.216625] cpu:45 sub1->idle:403216624579
[  403.238623] cpu:1 sub1->sub2:403238621585
[  403.238624] cpu:45 sub1->idle:403238621787
[  403.260619] cpu:1 sub1->sub2:403260619182
[  403.260620] cpu:45 sub1->idle:403260619413
[  403.282617] cpu:1 sub1->sub2:403282617157
[  403.282618] cpu:45 sub1->idle:403282617317
..

Not sure why the first switch on cpu to idle happened. But then onwards
the difference in timestamps is less than a microsec. This is just a crude
way to get a sense of the delay, may not be exact.

Thanks,
Subhra


Now, there are some things that may delay processing an IPI, but in those
cases the target CPU isn't executing user code.

I've yet to produce some current numbers for SMT-only coscheduling. An
older ballpark number I have is about 2 microseconds for the collective
context switch of one hierarchy level, but take that with a grain of salt.

Regards
Jan



Re: [PATCH i2c-next v8 5/5] i2c: aspeed: Add bus idle waiting logic for multi-master use cases

2018-10-29 Thread kbuild test robot
Hi Jae,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on wsa/i2c/for-next]
[also build test WARNING on v4.19 next-20181029]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]

url:
https://github.com/0day-ci/linux/commits/Jae-Hyun-Yoo/i2c-aspeed-Add-bus-idle-waiting-logic-for-multi-master-use-cases/20181030-051719
base:   https://git.kernel.org/pub/scm/linux/kernel/git/wsa/linux.git 
i2c/for-next
config: xtensa-allyesconfig (attached as .config)
compiler: xtensa-linux-gcc (GCC) 8.1.0
reproduce:
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
GCC_VERSION=8.1.0 make.cross ARCH=xtensa 

Note: it may well be a FALSE warning. FWIW you are at least aware of it now.
http://gcc.gnu.org/wiki/Better_Uninitialized_Warnings

All warnings (new ones prefixed by >>):

   In file included from include/linux/ktime.h:25,
from include/linux/rcutiny.h:28,
from include/linux/rcupdate.h:209,
from include/linux/srcu.h:33,
from include/linux/notifier.h:16,
from include/linux/clk.h:17,
from drivers/i2c/busses/i2c-aspeed.c:13:
   drivers/i2c/busses/i2c-aspeed.c: In function 'aspeed_i2c_master_xfer':
>> include/linux/jiffies.h:108:15: warning: 'check_started' may be used 
>> uninitialized in this function [-Wmaybe-uninitialized]
  ((long)((b) - (a)) < 0))
  ^
   drivers/i2c/busses/i2c-aspeed.c:607:16: note: 'check_started' was declared 
here
 unsigned long check_started;
   ^
--
   In file included from include/linux/ktime.h:25,
from include/linux/rcutiny.h:28,
from include/linux/rcupdate.h:209,
from include/linux/srcu.h:33,
from include/linux/notifier.h:16,
from include/linux/clk.h:17,
from drivers/i2c//busses/i2c-aspeed.c:13:
   drivers/i2c//busses/i2c-aspeed.c: In function 'aspeed_i2c_master_xfer':
>> include/linux/jiffies.h:108:15: warning: 'check_started' may be used 
>> uninitialized in this function [-Wmaybe-uninitialized]
  ((long)((b) - (a)) < 0))
  ^
   drivers/i2c//busses/i2c-aspeed.c:607:16: note: 'check_started' was declared 
here
 unsigned long check_started;
   ^

vim +/check_started +108 include/linux/jiffies.h

^1da177e Linus Torvalds   2005-04-16   91  
^1da177e Linus Torvalds   2005-04-16   92  /*
^1da177e Linus Torvalds   2005-04-16   93   *   These inlines deal with timer 
wrapping correctly. You are 
^1da177e Linus Torvalds   2005-04-16   94   *   strongly encouraged to use them
^1da177e Linus Torvalds   2005-04-16   95   *   1. Because people otherwise 
forget
^1da177e Linus Torvalds   2005-04-16   96   *   2. Because if the timer wrap 
changes in future you won't have to
^1da177e Linus Torvalds   2005-04-16   97   *  alter your driver code.
^1da177e Linus Torvalds   2005-04-16   98   *
^1da177e Linus Torvalds   2005-04-16   99   * time_after(a,b) returns true if 
the time a is after time b.
^1da177e Linus Torvalds   2005-04-16  100   *
^1da177e Linus Torvalds   2005-04-16  101   * Do this with "<0" and ">=0" to 
only test the sign of the result. A
^1da177e Linus Torvalds   2005-04-16  102   * good compiler would generate 
better code (and a really good compiler
^1da177e Linus Torvalds   2005-04-16  103   * wouldn't care). Gcc is currently 
neither.
^1da177e Linus Torvalds   2005-04-16  104   */
^1da177e Linus Torvalds   2005-04-16  105  #define time_after(a,b)  
\
^1da177e Linus Torvalds   2005-04-16  106   (typecheck(unsigned long, a) && 
\
^1da177e Linus Torvalds   2005-04-16  107typecheck(unsigned long, b) && 
\
5a581b36 Paul E. McKenney 2013-07-27 @108((long)((b) - (a)) < 0))
^1da177e Linus Torvalds   2005-04-16  109  #define time_before(a,b) 
time_after(b,a)
^1da177e Linus Torvalds   2005-04-16  110  

:: The code at line 108 was first introduced by commit
:: 5a581b367b5df0531265311fc681c2abd377e5e6 jiffies: Avoid undefined 
behavior from signed overflow

:: TO: Paul E. McKenney 
:: CC: Paul E. McKenney 

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: application/gzip


Re: The linux devs can rescind their license grant.

2018-10-29 Thread Bradley M. Kuhn
On Thu, Oct 25, 2018 at 07:56:26AM +, visionsofal...@redchan.it wrote:
> The linux devs can rescind their license grant.
Greg KH responded on Thu, 25 Oct 2018 09:19:11 +0100:
>> No they can not, please do not keep spreading false information.

I was explicitly cc'ed on this thread by visionsofalice.  I've read the
whole thread, and the only useful thing I can contribute here is to agree
with Greg and additionally provide some backup research on the point:
https://sfconservancy.org/news/2018/sep/26/GPLv2-irrevocability/

Software Freedom Conservancy engaged our legal counsel to write a new
section for the Copyleft Guide that further explains the irrevocability of
GPLv2.  We published this when others raised these specious claims back in
September.  Direct link to new section:
https://copyleft.org/guide/comprehensive-gpl-guidech8.html#x11-540007.4


HTH,
-- 
Bradley M. Kuhn
Distinguished Technologist of Software Freedom Conservancy

Become a Conservancy Supporter today: https://sfconservancy.org/supporter


Re: [LKP] [mm] 68c37ccedc: BUG:kernel_hang_in_early-boot_stage,last_printk:early_console_in_setup_code

2018-10-29 Thread Andrew Morton
On Mon, 29 Oct 2018 21:15:01 +0800 kernel test robot  
wrote:

> FYI, we noticed the following commit (built with gcc-7):
> 
> commit: 68c37ccedcde10514898f4ba3b28c0de85c590d1 ("mm: nobootmem: remove 
> bootmem allocation APIs")
> https://git.kernel.org/cgit/linux/kernel/git/next/linux-next.git master
> 
> in testcase: boot
> 
> on test machine: qemu-system-x86_64 -enable-kvm -cpu host -smp 2 -m 2G
> 
> caused below changes (please refer to attached dmesg/kmsg for entire 
> log/backtrace):
> 
>
> +-+++
> | 
> | 6881a1e729 | 68c37ccedc |
> +-+++
> | boot_successes  
> | 4  | 0  |
> | boot_failures   
> | 0  | 4  |
> | BUG:kernel_hang_in_early-boot_stage,last_printk:early_console_in_setup_code 
> | 0  | 4  |
> +-+++

Well that's irritating.

Does the problem remain when the entire patch series is applied?

mm-remove-config_no_bootmem.patch
mm-remove-config_no_bootmem-fix.patch
mm-remove-config_have_memblock.patch
mm-remove-config_have_memblock-fix.patch
mm-remove-config_have_memblock-fix-2.patch
mm-remove-config_have_memblock-fix-3.patch
mm-remove-bootmem-allocator-implementation.patch
mm-nobootmem-remove-dead-code.patch
memblock-rename-memblock_alloc_nid_try_nid-to-memblock_phys_alloc.patch
memblock-remove-_virt-from-apis-returning-virtual-address.patch
memblock-replace-alloc_bootmem_align-with-memblock_alloc.patch
memblock-replace-alloc_bootmem_low-with-memblock_alloc_low.patch
memblock-replace-__alloc_bootmem_node_nopanic-with-memblock_alloc_try_nid_nopanic.patch
memblock-replace-alloc_bootmem_pages_nopanic-with-memblock_alloc_nopanic.patch
memblock-replace-alloc_bootmem_low-with-memblock_alloc_low-2.patch
memblock-replace-__alloc_bootmem_nopanic-with-memblock_alloc_from_nopanic.patch
memblock-add-align-parameter-to-memblock_alloc_node.patch
memblock-replace-alloc_bootmem_pages_node-with-memblock_alloc_node.patch
memblock-replace-__alloc_bootmem_node-with-appropriate-memblock_-api.patch
memblock-replace-alloc_bootmem_node-with-memblock_alloc_node.patch
memblock-replace-alloc_bootmem_low_pages-with-memblock_alloc_low.patch
memblock-replace-alloc_bootmem_pages-with-memblock_alloc.patch
memblock-replace-__alloc_bootmem-with-memblock_alloc_from.patch
memblock-replace-alloc_bootmem-with-memblock_alloc.patch
mm-nobootmem-remove-bootmem-allocation-apis.patch
memblock-replace-free_bootmem_node-with-memblock_free.patch
memblock-replace-free_bootmem_late-with-memblock_free_late.patch
memblock-rename-free_all_bootmem-to-memblock_free_all.patch
memblock-rename-__free_pages_bootmem-to-memblock_free_pages.patch
mm-remove-nobootmem.patch
memblock-replace-bootmem_alloc_-with-memblock-variants.patch
mm-remove-include-linux-bootmemh.patch
mm-remove-include-linux-bootmemh-fix.patch
mm-remove-include-linux-bootmemh-fix-2.patch
mm-remove-include-linux-bootmemh-fix-3.patch
docs-boot-time-mm-remove-bootmem-documentation.patch



  1   2   3   4   5   6   >