Re: [PATCH v4] powerpc32: provide VIRT_CPU_ACCOUNTING
On 2/11/16, Christophe Leroy wrote: > This patch provides VIRT_CPU_ACCOUTING to PPC32 architecture. > PPC32 doesn't have the PACA structure, so we use the task_info > structure to store the accounting data. > > In order to reuse on PPC32 the PPC64 functions, all u64 data has > been replaced by 'unsigned long' so that it is u32 on PPC32 and > u64 on PPC64 > > Signed-off-by: Christophe Leroy > --- > Changes in v3: unlike previous version of the patch that was inspired > from IA64 architecture, this new version tries to reuse as much as > possible the PPC64 implementation. > > PPC32 doesn't have PACA and past discusion on v2 version has shown > that it is not worth implementing a PACA in PPC32 architecture > (see below benh opinion) > > benh: PACA is actually a data structure and you really really don't want it > on ppc32 :-) Having a register point to current works, having a register > point to per-cpu data instead works too (ie, change what we do today), > but don't introduce a PACA *please* :-) > > Changes in v4: ACCOUNT_CPU_USER_ENTRY/EXIT() needed updates in other > places than entry_32.S and entry_64.S (reported by kbuild-robot) > Related defines in asm-offset.c need to be conditional to > CONFIG_VIRT_CPU_ACCOUNTING_NATIVE (reported by kbuild-robot) > > arch/powerpc/Kconfig | 1 + > arch/powerpc/include/asm/cputime.h | 4 > arch/powerpc/include/asm/exception-64s.h | 2 +- > arch/powerpc/include/asm/ppc_asm.h | 31 -- > arch/powerpc/include/asm/reg.h | 1 + > arch/powerpc/include/asm/thread_info.h | 11 + > arch/powerpc/kernel/asm-offsets.c| 7 ++ > arch/powerpc/kernel/entry_32.S | 17 ++ > arch/powerpc/kernel/entry_64.S | 6 ++--- > arch/powerpc/kernel/exceptions-64e.S | 4 ++-- > arch/powerpc/kernel/time.c | 38 > ++-- > arch/powerpc/platforms/Kconfig.cputype | 1 - > 12 files changed, 97 insertions(+), 26 deletions(-) > > diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig > index 3a557be..57ce4ff 100644 > --- a/arch/powerpc/Kconfig > +++ b/arch/powerpc/Kconfig > @@ -159,6 +159,7 @@ config PPC > select ARCH_HAS_DEVMEM_IS_ALLOWED > select HAVE_ARCH_SECCOMP_FILTER > select ARCH_HAS_UBSAN_SANITIZE_ALL > + select HAVE_VIRT_CPU_ACCOUNTING > > config GENERIC_CSUM > def_bool CPU_LITTLE_ENDIAN > diff --git a/arch/powerpc/include/asm/cputime.h > b/arch/powerpc/include/asm/cputime.h > index e245255..c4c33be 100644 > --- a/arch/powerpc/include/asm/cputime.h > +++ b/arch/powerpc/include/asm/cputime.h > @@ -230,7 +230,11 @@ static inline cputime_t clock_t_to_cputime(const > unsigned long clk) > > #define cputime64_to_clock_t(ct) cputime_to_clock_t((cputime_t)(ct)) > > +#ifdef CONFIG_PPC64 > static inline void arch_vtime_task_switch(struct task_struct *tsk) { } > +#else > +void arch_vtime_task_switch(struct task_struct *tsk); > +#endif > > #endif /* __KERNEL__ */ > #endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ > diff --git a/arch/powerpc/include/asm/exception-64s.h > b/arch/powerpc/include/asm/exception-64s.h > index 93ae809..8bc38d1 100644 > --- a/arch/powerpc/include/asm/exception-64s.h > +++ b/arch/powerpc/include/asm/exception-64s.h > @@ -287,7 +287,7 @@ do_kvm_##n: > \ > std r0,GPR0(r1);/* save r0 in stackframe*/ \ > std r10,GPR1(r1); /* save r1 in stackframe*/ \ > beq 4f; /* if from kernel mode */ \ > - ACCOUNT_CPU_USER_ENTRY(r9, r10); \ > + ACCOUNT_CPU_USER_ENTRY(r13, r9, r10); \ > SAVE_PPR(area, r9, r10); \ > 4: EXCEPTION_PROLOG_COMMON_2(area)\ > EXCEPTION_PROLOG_COMMON_3(n) \ > diff --git a/arch/powerpc/include/asm/ppc_asm.h > b/arch/powerpc/include/asm/ppc_asm.h > index 499d9f8..05b6738 100644 > --- a/arch/powerpc/include/asm/ppc_asm.h > +++ b/arch/powerpc/include/asm/ppc_asm.h > @@ -24,27 +24,34 @@ > */ > > #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE > -#define ACCOUNT_CPU_USER_ENTRY(ra, rb) > -#define ACCOUNT_CPU_USER_EXIT(ra, rb) > +#define ACCOUNT_CPU_USER_ENTRY(ptr, ra, rb) > +#define ACCOUNT_CPU_USER_EXIT(ptr, ra, rb) > #define ACCOUNT_STOLEN_TIME > #else > -#define ACCOUNT_CPU_USER_ENTRY(ra, rb) > \ > +#ifdef CONFIG_PPC64 > +#define AC_LDld > +#define AC_STD std > +#else > +#define AC_LDlwz > +#define AC_STD stw We already have macros in the asm-compat.h > +#endif > +#define ACCOUNT_CPU_USER_ENTRY(ptr, ra, rb) \ > MFTB(ra); /* get timebase */ \ > - ld
Re: [PATCH v8 4/8] ppc64 ftrace_with_regs configuration variables
On Wed, 2016-02-10 at 17:25 +0100, Torsten Duwe wrote: snip > diff --git a/arch/powerpc/gcc-mprofile-kernel-notrace.sh > b/arch/powerpc/gcc-mprofile-kernel-notrace.sh > new file mode 100755 > index 000..68d6482 > --- /dev/null > +++ b/arch/powerpc/gcc-mprofile-kernel-notrace.sh > @@ -0,0 +1,33 @@ > +#!/bin/sh > +# Test whether the compile option -mprofile-kernel > +# generates profiling code ( = a call to mcount), and > +# whether a function without any global references sets > +# the TOC pointer properly at the beginning, and > +# whether the "notrace" function attribute successfully > +# suppresses the _mcount call. > + > +echo "int func() { return 0; }" | \ > +$* -S -x c -O2 -p -mprofile-kernel - -o - 2> /dev/null | \ > +grep -q "mcount" > + > +trace_result=$? > + > +echo "int func() { return 0; }" | \ > +$* -S -x c -O2 -p -mprofile-kernel - -o - 2> /dev/null | \ > +sed -n -e '/func:/,/bl _mcount/p' | grep -q TOC > + > +leaf_toc_result=$? > + leaf_toc_result failed for me with gcc 5. I'll try and grab gcc-6 and give the patches a spin > +/bin/echo -e "#include \nnotrace int func() { return 0; }" | \ > +$* -S -x c -O2 -p -mprofile-kernel - -o - 2> /dev/null | \ > +grep -q "mcount" > + > +notrace_result=$? > + > +if [ "$trace_result" -eq "0" -a \ > + "$leaf_toc_result" -eq "0" -a \ > + "$notrace_result" -eq "1" ]; then > + echo y > +else > + echo n > +fi
Re: [REGRESSION] i915: No HDMI output with 4.4
Daniel, I do confirm that this hacky patch: https://lkml.org/lkml/2016/1/19/637 works around my issue. I understand that this is improper fix, so let me know how could I debug my issue further. Thanks. 09.02.2016 12:11, Daniel Vetter wrote: Can you please retest with latest -rc? There's been some bugs in the HDMI detection changes, which should be fixed now. If that doesn't help please try to bisect which exact change caused the regression. Thanks, Daniel
[PATCH v2] mm/slab: re-implement pfmemalloc support
From: Joonsoo Kim Current implementation of pfmemalloc handling in SLAB has some problems. 1) pfmemalloc_active is set to true when there is just one or more pfmemalloc slabs in the system, but it is cleared when there is no pfmemalloc slab in one arbitrary kmem_cache. So, pfmemalloc_active could be wrongly cleared. 2) Search to partial and free list doesn't happen when non-pfmemalloc object are not found in cpu cache. Instead, allocating new slab happens and it is not optimal. 3) Even after sk_memalloc_socks() is disabled, cpu cache would keep pfmemalloc objects tagged with SLAB_OBJ_PFMEMALLOC. It isn't cleared if sk_memalloc_socks() is disabled so it could cause problem. 4) If cpu cache is filled with pfmemalloc objects, it would cause slow down non-pfmemalloc allocation. To me, current pointer tagging approach looks complex and fragile so this patch re-implement whole thing instead of fixing problems one by one. Design principle for new implementation is that 1) Don't disrupt non-pfmemalloc allocation in fast path even if sk_memalloc_socks() is enabled. It's more likely case than pfmemalloc allocation. 2) Ensure that pfmemalloc slab is used only for pfmemalloc allocation. 3) Don't consider performance of pfmemalloc allocation in memory deficiency state. As a result, all pfmemalloc alloc/free in memory tight state will be handled in slow-path. If there is non-pfmemalloc free object, it will be returned first even for pfmemalloc user in fast-path so that performance of pfmemalloc user isn't affected in normal case and pfmemalloc objects will be kept as long as possible. v2) o remove racy check whether there is free object or not in kmem_cache_node o fix leak case Signed-off-by: Joonsoo Kim --- mm/slab.c | 284 +- 1 file changed, 116 insertions(+), 168 deletions(-) diff --git a/mm/slab.c b/mm/slab.c index d48454b..330bc8a 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -169,12 +169,6 @@ typedef unsigned short freelist_idx_t; #define SLAB_OBJ_MAX_NUM ((1 << sizeof(freelist_idx_t) * BITS_PER_BYTE) - 1) /* - * true if a page was allocated from pfmemalloc reserves for network-based - * swap - */ -static bool pfmemalloc_active __read_mostly; - -/* * struct array_cache * * Purpose: @@ -195,10 +189,6 @@ struct array_cache { * Must have this definition in here for the proper * alignment of array_cache. Also simplifies accessing * the entries. -* -* Entries should not be directly dereferenced as -* entries belonging to slabs marked pfmemalloc will -* have the lower bits set SLAB_OBJ_PFMEMALLOC */ }; @@ -207,23 +197,6 @@ struct alien_cache { struct array_cache ac; }; -#define SLAB_OBJ_PFMEMALLOC1 -static inline bool is_obj_pfmemalloc(void *objp) -{ - return (unsigned long)objp & SLAB_OBJ_PFMEMALLOC; -} - -static inline void set_obj_pfmemalloc(void **objp) -{ - *objp = (void *)((unsigned long)*objp | SLAB_OBJ_PFMEMALLOC); - return; -} - -static inline void clear_obj_pfmemalloc(void **objp) -{ - *objp = (void *)((unsigned long)*objp & ~SLAB_OBJ_PFMEMALLOC); -} - /* * Need this for bootstrapping a per node allocator. */ @@ -585,120 +558,21 @@ static struct array_cache *alloc_arraycache(int node, int entries, return ac; } -static inline bool is_slab_pfmemalloc(struct page *page) -{ - return PageSlabPfmemalloc(page); -} - -/* Clears pfmemalloc_active if no slabs have pfmalloc set */ -static void recheck_pfmemalloc_active(struct kmem_cache *cachep, - struct array_cache *ac) -{ - struct kmem_cache_node *n = get_node(cachep, numa_mem_id()); - struct page *page; - unsigned long flags; - - if (!pfmemalloc_active) - return; - - spin_lock_irqsave(>list_lock, flags); - list_for_each_entry(page, >slabs_full, lru) - if (is_slab_pfmemalloc(page)) - goto out; - - list_for_each_entry(page, >slabs_partial, lru) - if (is_slab_pfmemalloc(page)) - goto out; - - list_for_each_entry(page, >slabs_free, lru) - if (is_slab_pfmemalloc(page)) - goto out; - - pfmemalloc_active = false; -out: - spin_unlock_irqrestore(>list_lock, flags); -} - -static void *__ac_get_obj(struct kmem_cache *cachep, struct array_cache *ac, - gfp_t flags, bool force_refill) -{ - int i; - void *objp = ac->entry[--ac->avail]; - - /* Ensure the caller is allowed to use objects from PFMEMALLOC slab */ - if (unlikely(is_obj_pfmemalloc(objp))) { - struct kmem_cache_node *n; - - if (gfp_pfmemalloc_allowed(flags)) { -
Regression: ABI: /sys/module/mmcblk changed to /sys/module/mmc_block
I recently noticed with 4.4 based kernel's Android's userspace wasn't able to properly detect micro-SD cards when they were inserted. This is due to vold not being able to access /sys/module/mmcblk/parameters/perdev_minors See: http://androidxref.com/6.0.1_r10/xref/system/vold/Disk.cpp#49 Looking into it, it seems commit 829b6962f7e3cfc ("mmc: block: don't use parameter prefix if built as module") changed the behavior, resulting in the userspace path changing to: "/sys/module/mmc_block/..." This seems like an ABI regression, which is breaking existing userspace. So... Probably something to revert? thanks -john
[PATCH v4] powerpc32: provide VIRT_CPU_ACCOUNTING
This patch provides VIRT_CPU_ACCOUTING to PPC32 architecture. PPC32 doesn't have the PACA structure, so we use the task_info structure to store the accounting data. In order to reuse on PPC32 the PPC64 functions, all u64 data has been replaced by 'unsigned long' so that it is u32 on PPC32 and u64 on PPC64 Signed-off-by: Christophe Leroy --- Changes in v3: unlike previous version of the patch that was inspired from IA64 architecture, this new version tries to reuse as much as possible the PPC64 implementation. PPC32 doesn't have PACA and past discusion on v2 version has shown that it is not worth implementing a PACA in PPC32 architecture (see below benh opinion) benh: PACA is actually a data structure and you really really don't want it on ppc32 :-) Having a register point to current works, having a register point to per-cpu data instead works too (ie, change what we do today), but don't introduce a PACA *please* :-) Changes in v4: ACCOUNT_CPU_USER_ENTRY/EXIT() needed updates in other places than entry_32.S and entry_64.S (reported by kbuild-robot) Related defines in asm-offset.c need to be conditional to CONFIG_VIRT_CPU_ACCOUNTING_NATIVE (reported by kbuild-robot) arch/powerpc/Kconfig | 1 + arch/powerpc/include/asm/cputime.h | 4 arch/powerpc/include/asm/exception-64s.h | 2 +- arch/powerpc/include/asm/ppc_asm.h | 31 -- arch/powerpc/include/asm/reg.h | 1 + arch/powerpc/include/asm/thread_info.h | 11 + arch/powerpc/kernel/asm-offsets.c| 7 ++ arch/powerpc/kernel/entry_32.S | 17 ++ arch/powerpc/kernel/entry_64.S | 6 ++--- arch/powerpc/kernel/exceptions-64e.S | 4 ++-- arch/powerpc/kernel/time.c | 38 ++-- arch/powerpc/platforms/Kconfig.cputype | 1 - 12 files changed, 97 insertions(+), 26 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 3a557be..57ce4ff 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -159,6 +159,7 @@ config PPC select ARCH_HAS_DEVMEM_IS_ALLOWED select HAVE_ARCH_SECCOMP_FILTER select ARCH_HAS_UBSAN_SANITIZE_ALL + select HAVE_VIRT_CPU_ACCOUNTING config GENERIC_CSUM def_bool CPU_LITTLE_ENDIAN diff --git a/arch/powerpc/include/asm/cputime.h b/arch/powerpc/include/asm/cputime.h index e245255..c4c33be 100644 --- a/arch/powerpc/include/asm/cputime.h +++ b/arch/powerpc/include/asm/cputime.h @@ -230,7 +230,11 @@ static inline cputime_t clock_t_to_cputime(const unsigned long clk) #define cputime64_to_clock_t(ct) cputime_to_clock_t((cputime_t)(ct)) +#ifdef CONFIG_PPC64 static inline void arch_vtime_task_switch(struct task_struct *tsk) { } +#else +void arch_vtime_task_switch(struct task_struct *tsk); +#endif #endif /* __KERNEL__ */ #endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 93ae809..8bc38d1 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -287,7 +287,7 @@ do_kvm_##n: \ std r0,GPR0(r1);/* save r0 in stackframe*/ \ std r10,GPR1(r1); /* save r1 in stackframe*/ \ beq 4f; /* if from kernel mode */ \ - ACCOUNT_CPU_USER_ENTRY(r9, r10); \ + ACCOUNT_CPU_USER_ENTRY(r13, r9, r10); \ SAVE_PPR(area, r9, r10); \ 4: EXCEPTION_PROLOG_COMMON_2(area)\ EXCEPTION_PROLOG_COMMON_3(n) \ diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index 499d9f8..05b6738 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -24,27 +24,34 @@ */ #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE -#define ACCOUNT_CPU_USER_ENTRY(ra, rb) -#define ACCOUNT_CPU_USER_EXIT(ra, rb) +#define ACCOUNT_CPU_USER_ENTRY(ptr, ra, rb) +#define ACCOUNT_CPU_USER_EXIT(ptr, ra, rb) #define ACCOUNT_STOLEN_TIME #else -#define ACCOUNT_CPU_USER_ENTRY(ra, rb) \ +#ifdef CONFIG_PPC64 +#define AC_LD ld +#define AC_STD std +#else +#define AC_LD lwz +#define AC_STD stw +#endif +#define ACCOUNT_CPU_USER_ENTRY(ptr, ra, rb)\ MFTB(ra); /* get timebase */ \ - ld rb,PACA_STARTTIME_USER(r13);\ - std ra,PACA_STARTTIME(r13); \ + AC_LD rb, PACA_STARTTIME_USER(ptr); \ + AC_STD ra, PACA_STARTTIME(ptr);\ subfrb,rb,ra;
Re: [PATCH v3] err.h: allow IS_ERR_VALUE to handle properly more types
On 02/10/2016 10:01 PM, Arnd Bergmann wrote: > On Tuesday 09 February 2016 09:42:26 Andrzej Hajda wrote: >> +cc Rasmus Villemoes, I forgot to add him earlier. >> >> On 02/08/2016 01:01 PM, Arnd Bergmann wrote: >>> On Monday 08 February 2016 09:45:55 Andrzej Hajda wrote: On 02/05/2016 11:52 AM, Arnd Bergmann wrote: > On Thursday 04 February 2016 10:59:31 Andrew Morton wrote: My version produces shortest code, Arnd's is the same as the old one. On the other side Rasmus proposition seems to be the most straightforward to me. Anyway I am not sure if the code length is the most important here. By the way .data segment size grows almost 4 times between gcc 4.4 and 4.8 :) Also numbers for arm64 looks interesting. Just for the record below all proposed implementations: #define IS_ERR_VALUE_old(x) unlikely((x) >= (unsigned long)-MAX_ERRNO) #define IS_ERR_VALUE_andrzej(x) ((typeof(x))(-1) <= 0 \ ? unlikely((x) <= -1) \ : unlikely((x) >= (typeof(x))-MAX_ERRNO)) #define IS_ERR_VALUE_arnd(x) (unlikely((unsigned long long)(x) >= (unsigned long long)(typeof(x))-MAX_ERRNO)) #define IS_ERR_VALUE_rasmus(x) ({\ typeof(x) _x = (x);\ unlikely(_x >= (typeof(x))-MAX_ERRNO && _x <= (typeof(x))-1);\ }) > Andrzej's version is a little shorter on ARM because in case of signed > numbers > it only checks for negative values, rather than checking for values in the > [-MAX_ERRNO..-1] range. I think the original behavior is more logical > in this case, and my version restores it. As I looked at the usage of the macro in the kernel I have not found any code which could benefit from the original behavior, except some buggy code in staging which have already pending fix[1]. But maybe it would be better to use IS_ERR_VALUE to always check if err is in range [-MAX_ERRNO..-1] and just use simple 'err < 0' in typical case of signed types. >>> If we do that, should we also make it illegal to use an invalid type >>> for IS_ERR()? At least that could also catch any use of 'char' and 'unsigned >>> char' that are still broken. >> I meant rather to make such 'policy' for future code by adding some >> comment to the macro. Optionally adding compile time warning >> to encourage developers to change current usage, however I am >> not sure if it is not too harsh. >> This way it could be also good to use your version of the macro. >> It could be also good to add compiletime_assert to prevent char types >> as suggested by Rasmus. >> >> Finally it could look like: >> /* >> * Use IS_ERR_VALUE only on unsigned types of at least two bytes size. >> * For signed types use '< 0' comparison. >> */ >> #define IS_ERR_VALUE(x)\ >> ({\ >> compiletime_assert(sizeof(x) > 1, "IS_ERR_VALUE does not handle >> byte-size types");\ >> compiletime_assert_warning((typeof(x))(-1) > 0, "IS_ERR_VALUE >> should be called on unsigned types only, use '< 0' instead");\ >> (unlikely((unsigned long long)(x) >= (unsigned long >> long)(typeof(x))-MAX_ERRNO));\ >> }) >> > I think the easiest way to express this would be to ensure that the argument > is 'unsigned long', like: > > #define IS_ERR_VALUE(x) ((unsigned long*)NULL == (typeof (x)*)NULL && \ >unlikely((unsigned long long)(x) >= (unsigned long > long)(typeof(x))-MAX_ERRNO)) This way you will limit it only to unsigned long type, which seems too strict to me. I think the macro should accept all long enough unsigned types, otherwise we could end up with bunch of macros IS_ERR_VALUE_U32, IS_ERR_VALUE_ULL... Regards Andrzej
Re: [PATCH] tty/serial: digicolor: Fix bad usage of IS_ERR_VALUE
Hi Guenter, On Tue, Feb 09, 2016 at 07:08:59AM -0800, Guenter Roeck wrote: > IS_ERR_VALUE() assumes that its parameter is an unsigned long. > It can not be used to check if an unsigned int reflects an error. > Doing so can result in the following build warning. > > drivers/tty/serial/digicolor-usart.c: In function ‘digicolor_uart_probe’: > include/linux/err.h:21:38: warning: > comparison is always false due to limited range of data type > drivers/tty/serial/digicolor-usart.c:485:6: note: > in expansion of macro ‘IS_ERR_VALUE’ > > If that warning is seen, an error return from platform_get_irq() is missed. > > Signed-off-by: Guenter Roeck Acked-by: Baruch Siach Thanks, baruch > --- > drivers/tty/serial/digicolor-usart.c | 9 + > 1 file changed, 5 insertions(+), 4 deletions(-) > > diff --git a/drivers/tty/serial/digicolor-usart.c > b/drivers/tty/serial/digicolor-usart.c > index a80cdad114f3..02ad6953b167 100644 > --- a/drivers/tty/serial/digicolor-usart.c > +++ b/drivers/tty/serial/digicolor-usart.c > @@ -453,7 +453,7 @@ static struct uart_driver digicolor_uart = { > static int digicolor_uart_probe(struct platform_device *pdev) > { > struct device_node *np = pdev->dev.of_node; > - int ret, index; > + int irq, ret, index; > struct digicolor_port *dp; > struct resource *res; > struct clk *uart_clk; > @@ -481,9 +481,10 @@ static int digicolor_uart_probe(struct platform_device > *pdev) > if (IS_ERR(dp->port.membase)) > return PTR_ERR(dp->port.membase); > > - dp->port.irq = platform_get_irq(pdev, 0); > - if (IS_ERR_VALUE(dp->port.irq)) > - return dp->port.irq; > + irq = platform_get_irq(pdev, 0); > + if (irq < 0) > + return irq; > + dp->port.irq = irq; > > dp->port.iotype = UPIO_MEM; > dp->port.uartclk = clk_get_rate(uart_clk); -- http://baruch.siach.name/blog/ ~. .~ Tk Open Systems =}ooO--U--Ooo{= - bar...@tkos.co.il - tel: +972.2.679.5364, http://www.tkos.co.il -
Re: [PATCH] tty/serial: digicolor: Fix bad usage of IS_ERR_VALUE
On 02/11/2016 04:38 AM, Guenter Roeck wrote: > On 02/10/2016 07:21 AM, Arnd Bergmann wrote: >> On Tuesday 09 February 2016 18:37:46 Guenter Roeck wrote: >>> On 02/09/2016 07:26 AM, Arnd Bergmann wrote: On Tuesday 09 February 2016 07:08:59 Guenter Roeck wrote: > IS_ERR_VALUE() assumes that its parameter is an unsigned long. > It can not be used to check if an unsigned int reflects an error. > Doing so can result in the following build warning. > > drivers/tty/serial/digicolor-usart.c: In function ‘digicolor_uart_probe’: > include/linux/err.h:21:38: warning: > comparison is always false due to limited range of data type > drivers/tty/serial/digicolor-usart.c:485:6: note: > in expansion of macro ‘IS_ERR_VALUE’ > > If that warning is seen, an error return from platform_get_irq() is > missed. > > The patch looks correct to me, but what compiler version and which kernel tree is it that triggered the warning? Andrzej Hajda just modified the definition of IS_ERR_VALUE(), and the changes are still under discussion, but I don't see that warning with any of the versions. >>> I see it with gcc 5.1 and 5.2 (and W=1). I did not see / notice Andrzej's >>> patch. >>> >>> I agree that fixing the problem in IS_ERR_VALUE() is preferrable. >>> >>> >> Ah, W=1 explains it. We are still debating about the proper solution. Al Viro >> pointed out that most users of IS_ERR_VALUE() shouldn't be using it at all, >> so your patch is probably best here after all. >> > Yes, after looking into it some more I agree. Coccinelle should be able to > handle > most of the conversions automatically. I actually tried to write a script, > just > for the fun of it, but it misses some of the problem cases in patch mode. > Maybe I get it working tonight. > > Guenter > Maybe it can be helpful. The script which classifies all IS_ERR_VALUE usages as correct, signed, incorrect and unknown (cocci was not able to detect type). Adding patch mode should be quite simple. // Options: --all-includes --include include/uapi/linux/netfilter_ipv6/ip6_tables.h --include include/uapi/linux/netfilter_ipv4/ip_tables.h --include include/uapi/linux/netfilter_arp/arp_tables.h --include include/uapi/linux/netfilter/x_tables.h --include include/linux/mm_types.h virtual context virtual report @r@ typedef bool, u8, u16, u32, u64, s8, s16, s32, s64, __u64, dma_addr_t, Elf32_Addr, Elf64_Addr; {unsigned long, size_t} ev; {short, int, long, s16, s32, ssize_t} ew; {char, unsigned char, unsigned short, unsigned int, long long, unsigned long long, bool, u8, u16, u32, u64, s64, __u64, dma_addr_t, Elf32_Addr, Elf64_Addr} ei; expression eu; position p; @@ IS_ERR_VALUE(\(ev \| ew \| ei \| eu \))@p @script:python depends on report@ p << r.p = []; ev << r.ev = ""; ew << r.ew = ""; ei << r.ei = ""; eu << r.eu = ""; @@ t = "" if ev: t = "correct" if ew: t = "signed" if ei: t = "incorrect" if eu: t = "unknown" e = ev + ew + ei + eu; if t: msg = "WARNING: %s argument type in IS_ERR_VALUE(%s)" % (t, e) coccilib.report.print_report(p[0], msg)
Re: [PATCH] iwlwifi: fix erroneous return value
On 02/10/2016 07:10 PM, Anton Protopopov wrote: > The iwl_trans_pcie_start_fw() function may return the positive value EIO > instead of -EIO in case of error. > > Signed-off-by: Anton Protopopov > --- > drivers/net/wireless/intel/iwlwifi/pcie/trans.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c > b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c > index d60a467..920ea9d 100644 > --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c > +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c > @@ -1034,7 +1034,7 @@ static int iwl_trans_pcie_start_fw(struct iwl_trans > *trans, > if (trans_pcie->is_down) { > IWL_WARN(trans, >"Can't start_fw since the HW hasn't been started\n"); > - ret = EIO; > + ret = -EIO; > goto out; > } > applied - thanks.
RE: [PATCH v3 4/6] PCI: xilinx: Clear interrupt FIFO during probe
> xilinx_pcie_init_port clears the pending interrupts in the interrupt decode > register, but does not clear the interrupt FIFO. This would lead to spurious > interrupts if any were present in the FIFO at probe time. > Clear the interrupt FIFO prior to the interrupt decode register in order to > start with a clean slate as expected. > > Signed-off-by: Paul Burton > Fixes: 8961def56845 ("PCI: xilinx: Add Xilinx AXI PCIe Host Bridge IP driver") > > --- > > Changes in v3: > - Split out from Boston patchset. > > Changes in v2: > - Add Fixes tag. > > drivers/pci/host/pcie-xilinx.c | 13 + > 1 file changed, 13 insertions(+) > > diff --git a/drivers/pci/host/pcie-xilinx.c b/drivers/pci/host/pcie-xilinx.c > index > 1eb74a2..6c5a503 100644 > --- a/drivers/pci/host/pcie-xilinx.c > +++ b/drivers/pci/host/pcie-xilinx.c > @@ -568,6 +568,8 @@ static int xilinx_pcie_init_irq_domain(struct > xilinx_pcie_port *port) > */ > static void xilinx_pcie_init_port(struct xilinx_pcie_port *port) { > + u32 val; > + > if (xilinx_pcie_link_is_up(port)) > dev_info(port->dev, "PCIe Link is UP\n"); > else > @@ -577,6 +579,17 @@ static void xilinx_pcie_init_port(struct > xilinx_pcie_port *port) > pcie_write(port, ~XILINX_PCIE_IDR_ALL_MASK, > XILINX_PCIE_REG_IMR); > > + /* Clear interrupt FIFO */ > + while (1) { > + val = pcie_read(port, XILINX_PCIE_REG_RPIFR1); > + > + if (!(val & XILINX_PCIE_RPIFR1_INTR_VALID)) > + break; > + > + pcie_write(port, XILINX_PCIE_RPIFR1_ALL_MASK, > +XILINX_PCIE_REG_RPIFR1); > + } > + Hi Paul, This case will create problem with error case, suppose if we have continuous correctable errors on link this will always be while loop. Bharat
Re: compat_hdio_ioctl() question
thanks for your reply. It works as expected. is it going to be submitted? Thanks, Soohoon. From: Arnd Bergmann Sent: Wednesday, February 10, 2016 4:55 PM To: Soohoon Lee Cc: Mark Lord; linux-...@vger.kernel.org; Tejun Heo; linux-kernel@vger.kernel.org Subject: Re: compat_hdio_ioctl() question On Tuesday 09 February 2016 17:38:56 Soohoon Lee wrote: > > Hi, > I found that you are the author of this code. I don't think I am, but that's fine. I think I just moved the code from one place to another. > So please let me ask a question. I hope it's ok for you to Cc this to a linux-ide and linux-kernel. > +static int compat_hdio_ioctl(struct inode *inode, struct file *file, > + struct gendisk *disk, unsigned int cmd, unsigned long > arg) > +{ > + mm_segment_t old_fs = get_fs(); > + unsigned long kval; > + unsigned int __user *uvp; > + int error; > + > + set_fs(KERNEL_DS); > + error = blkdev_driver_ioctl(inode, file, disk, > + cmd, (unsigned long)()); > + set_fs(old_fs); > + > + if (error == 0) { > + uvp = compat_ptr(arg); > + if (put_user(kval, uvp)) > + error = -EFAULT; > + } > + return error; > +} > > > kval is local so it has random values. > But one of syscall like HDIO_GET_32BIT/ATA_IOC_GET_IO32 only updates one byte > so if kval has 0x and ioctl() updated one byte then it becomes > 0xaa01. > And put_user() writes 4bytes. This actually looks like a security bug, as we are not supposed to leak kernel stack data. > And I'm having problem with hdparm. > > void process_dev (char *devname) > { > static long parm, multcount; <-- parm is static so it's zero > . > if (do_defaults || get_io32bit) { > if (0 == ioctl(fd, HDIO_GET_32BIT, )) { > > > so parm becomes 0xaa01 and reports wrong mode. > > If hdparm is 64bit then 64bit syscall will update 1byte of parm so it becomes > 0x1. > > What would be a good fix? > > - Modify hdparm to look at only 1byte > > - Initialize kval to zero > > - Copy parm to kval to simulate 64bit syscall more accurately. > Out of these, the last one is the best. My preferred solution however would be to move the handling of the ioctls in question (and maybe some others while we're at it) into the driver that provides the call in the first place. There is another problem: I don't think the code ever worked on big-endian machines: case ATA_IOC_GET_IO32: spin_lock_irqsave(ap->lock, flags); val = ata_ioc32(ap); spin_unlock_irqrestore(ap->lock, flags); if (copy_to_user(arg, , 1)) return -EFAULT; return 0; On little-endian machines, this copies the low byte of the 'int val' variable, while on big-endian machines, it copies the high byte that is always zero. In the hdparm source code, this gets copied into a 'static long parm' variable, which in turn means that when we first read one 32-bit setting and then call ATA_IOC_GET_IO32 as part of the same hdparm command line, the upper 24 or 56 bits (out of 32 or 64 respectively) still contain the previous result. >From what I can tell, the behavior of hdparm matches the behavior of the old drivers/ide/ subsystem, and the compat ioctl handling works with that, but the ioctls in drivers/ata/ suffer from both the compat_ioctl problem and the problem on big-endian systems. Both problems date back to the original commit that added ioctl support in libata back in 2004 (linux-2.6.8). Can you try out the patch below to see if that makes it work? If it does, we probably want something like this backported to all stable kernels, but I think we probably also want to clean this up a bit more to avoid the 'get_fs()/set_fs()' hack in block/compat_ioctl.c. Arnd diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 7e959f90c020..e417e1a1d02c 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -675,19 +675,18 @@ static int ata_ioc32(struct ata_port *ap) int ata_sas_scsi_ioctl(struct ata_port *ap, struct scsi_device *scsidev, int cmd, void __user *arg) { - int val = -EINVAL, rc = -EINVAL; + unsigned long val; + int rc = -EINVAL; unsigned long flags; switch (cmd) { - case ATA_IOC_GET_IO32: + case HDIO_GET_32BIT: spin_lock_irqsave(ap->lock, flags); val = ata_ioc32(ap); spin_unlock_irqrestore(ap->lock, flags); - if (copy_to_user(arg, , 1)) - return -EFAULT; - return 0; + return put_user(val, (unsigned long __user *)arg); - case ATA_IOC_SET_IO32: + case
Re: [PATCH v8 0/8] ftrace with regs + live patching for ppc64 LE (ABI v2)
On Wed, 2016-02-10 at 18:29 +0100, Torsten Duwe wrote: > Changes since V7: > * drop "notrace" attribute for MMU-aiding functions > and their callees. > * merge "-mprofile-kernel"-stripping patches into one. > > Changes since v6: > * include Petr's patch, on popular demand ;) > * move #ifdefs out of functions for readability; > introduce static helper functions instead. > * No more literal binary instructions in hex, > at least not added by this patch set. > * add compile time checker to detect the presence > of known-good -mprofile-kernel support. > * limit Kconfig / compile to the configurations really supported: > + (static) FTRACE with -pg > + DYNAMIC_FTRACE with -pg with or without -mprofile-kernel > (depending on the compiler) > + DYNAMIC_FTRACE_WITH_REGS only with -mprofile-kernel > (will error out if the compiler is broken) > > Changes since v5: > * extra "std r0,LRSAVE(r1)" for gcc-6 > This makes the code compiler-agnostic. > * Follow Petr Mladek's suggestion to avoid > redefinition of HAVE_LIVEPATCH > > Changes since v4: > * change comment style in entry_64.S to C89 > (nobody is using assembler syntax comments there). > * the bool function restore_r2 shouldn't return 2, > that's a little confusing. > * Test whether the compiler supports -mprofile-kernel > and only then define CC_USING_MPROFILE_KERNEL > * also make the return value of klp_check_compiler_support > depend on that. > > Major changes since v3: > * the graph tracer works now. > It turned out the stack frame it tried to manipulate does not > exist at that point. > * changes only needed in order to support -mprofile-kernel are now > in a separate patch, prepended. > * Kconfig cleanup so this is only selectable on ppc64le. > > Petr Mladek (1): > livepatch: Detect offset for the ftrace location during build > > Torsten Duwe (7): > ppc64 (le): prepare for -mprofile-kernel > ppc64le FTRACE_WITH_REGS implementation > ppc use ftrace_modify_all_code default > ppc64 ftrace_with_regs configuration variables > ppc64 ftrace_with_regs: disable profiling for some files > Implement kernel live patching for ppc64le (ABIv2) > Enable LIVEPATCH to be configured on ppc64le and add livepatch.o if it > is selected > > arch/powerpc/Kconfig| 6 + > arch/powerpc/Makefile | 17 +++ > arch/powerpc/gcc-mprofile-kernel-notrace.sh | 33 ++ > arch/powerpc/include/asm/code-patching.h| 24 > arch/powerpc/include/asm/ftrace.h | 5 + > arch/powerpc/include/asm/livepatch.h| 45 > arch/powerpc/kernel/Makefile| 13 ++- > arch/powerpc/kernel/entry_64.S | 169 > +++- > arch/powerpc/kernel/ftrace.c| 129 - > arch/powerpc/kernel/livepatch.c | 38 +++ > arch/powerpc/kernel/module_64.c | 56 - > arch/powerpc/lib/Makefile | 4 +- > arch/s390/Kconfig | 1 + > kernel/livepatch/Makefile | 13 +++ > kernel/livepatch/core.c | 12 +- > kernel/livepatch/ftrace-test.c | 6 + > kernel/trace/Kconfig| 5 + > scripts/recordmcount.c | 6 +- > scripts/recordmcount.h | 17 ++- > 19 files changed, 552 insertions(+), 47 deletions(-) > create mode 100755 arch/powerpc/gcc-mprofile-kernel-notrace.sh > create mode 100644 arch/powerpc/include/asm/livepatch.h > create mode 100644 arch/powerpc/kernel/livepatch.c > create mode 100644 kernel/livepatch/ftrace-test.c > Quick question - I presume these apply on top of 4.5.0-rc2? Balbir Singh.
Re: [PATCH v6 0/3] cpufreq: Replace timers with utilization update callbacks
On 02/10/2016 03:11 PM, Doug Smythies wrote: On 2016.02.10 07:17 Rafael J. Wysocki wrote: On Friday, January 29, 2016 11:52:15 PM Rafael J. Wysocki wrote: The following patch series introduces a mechanism allowing the cpufreq core and "setpolicy" drivers to provide utilization update callbacks to be invoked by the scheduler on utilization changes. Those callbacks can be used to run the sampling and frequency adjustments code (intel_pstate) or to schedule the execution of that code in process context (cpufreq core) instead of per-CPU deferrable timers used in cpufreq today (which Thomas complained about during the last Kernel Summit). This patch set solves a long standing issue with the intel_pstate driver. The issue began with the introduction of the "duration" method for deciding if the CPU had been idle for a long time resulting in forcing the target pstate downwards. Often this was the correct action, but sometimes this was the wrong thing to do, because the cpu was actually very busy, but just so happened to be idle on jiffy boundaries (perhaps similar to what Steve Muckle was referring to on another branch of this thread). For an idle system, this patch set seems to change the maximum duration from 4 seconds to 0.5 seconds for most CPUs. However, when using v1 of patches 1 and 2 of 3 and v5 of 3 of 3, sometimes the durations (time between passes of the intel-pstate driver for a given CPU) of upwards of 120 seconds were observed. When patches 1, 2, and 3 of 3 v6 were used, the maximum observed durations of an idle system were on the order of 500 milliseconds for most CPUs, but CPU 6 sometimes went to 3.5 seconds and CPU 7 sometimes went to 4 seconds (small sample space, I'll consider to run an overnight test for a much much larger sample space). Note 4 seconds, is O.K., and what it was before, I'm just noting it is all. I have a bunch of graphs, if anyone wants to see the supporting data. My test computer has an older model i7 (Intel(R) Core(TM) i7-2600K CPU @ 3.40GHz) Thanks Doug. If you have specific workloads, please compare performance. - Srinivas -- To unsubscribe from this list: send the line "unsubscribe linux-pm" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
RE: [PATCH v3 2/6] PCI: xilinx: Unify INTx & MSI interrupt FIFO decode
> > Subject: [PATCH v3 2/6] PCI: xilinx: Unify INTx & MSI interrupt FIFO > > decode > > > > When decoding either an INTx or MSI interrupt, the driver has no way > > to know which it will pull out of the interrupt FIFO. If both were > > pending then this would lead to either the interrupt being handled > > incorrectly (MSI interrupt treated as INTx) or not at all (INTx interrupt > dropped by MSI path). > > Unify the reading of the interrupt FIFO & act according to the type of > > interrupt actually read. > > > > Signed-off-by: Paul Burton > > Fixes: 8961def56845 ("PCI: xilinx: Add Xilinx AXI PCIe Host Bridge IP > > driver") > > > > --- > > > > Changes in v3: > > - Split out from Boston patchset. > > > > Changes in v2: > > - Add Fixes tag. > > > > drivers/pci/host/pcie-xilinx.c | 47 > > +- > > 1 file changed, 14 insertions(+), 33 deletions(-) > > > > diff --git a/drivers/pci/host/pcie-xilinx.c > > b/drivers/pci/host/pcie-xilinx.c index > > 1490bd1..afdfb09 100644 > > --- a/drivers/pci/host/pcie-xilinx.c > > +++ b/drivers/pci/host/pcie-xilinx.c > > @@ -397,7 +397,7 @@ static const struct irq_domain_ops > intx_domain_ops > > = { static irqreturn_t xilinx_pcie_intr_handler(int irq, void *data) { > > struct xilinx_pcie_port *port = (struct xilinx_pcie_port *)data; > > - u32 val, mask, status, msi_data; > > + u32 val, mask, status; > > > > /* Read interrupt decode and mask registers */ > > val = pcie_read(port, XILINX_PCIE_REG_IDR); @@ -437,8 +437,8 @@ > > static irqreturn_t xilinx_pcie_intr_handler(int irq, void *data) > > xilinx_pcie_clear_err_interrupts(port); > > } > > > > - if (status & XILINX_PCIE_INTR_INTX) { > > - /* INTx interrupt received */ > > + if (status & (XILINX_PCIE_INTR_INTX | XILINX_PCIE_INTR_MSI)) { > > + /* Interrupt received */ > > val = pcie_read(port, XILINX_PCIE_REG_RPIFR1); > > > > /* Check whether interrupt valid */ @@ -447,41 +447,22 @@ > static > > irqreturn_t xilinx_pcie_intr_handler(int irq, void *data) > > return IRQ_HANDLED; > > } > > > > - if (!(val & XILINX_PCIE_RPIFR1_MSI_INTR)) { > > - /* Clear interrupt FIFO register 1 */ > > - pcie_write(port, XILINX_PCIE_RPIFR1_ALL_MASK, > > - XILINX_PCIE_REG_RPIFR1); > > - > > - /* Handle INTx Interrupt */ > > + if (val & XILINX_PCIE_RPIFR1_MSI_INTR) { > > + irq = pcie_read(port, XILINX_PCIE_REG_RPIFR2) & > > + XILINX_PCIE_RPIFR2_MSG_DATA; > > + } else { > > val = ((val & XILINX_PCIE_RPIFR1_INTR_MASK) >> > > XILINX_PCIE_RPIFR1_INTR_SHIFT) + 1; > > - generic_handle_irq(irq_find_mapping(port- > > >irq_domain, > > - val)); > > + irq = irq_find_mapping(port->irq_domain, val); > > } > > - } > > > > - if (status & XILINX_PCIE_INTR_MSI) { > > - /* MSI Interrupt */ > > - val = pcie_read(port, XILINX_PCIE_REG_RPIFR1); > > + /* Clear interrupt FIFO register 1 */ > > + pcie_write(port, XILINX_PCIE_RPIFR1_ALL_MASK, > > + XILINX_PCIE_REG_RPIFR1); > > > > - if (!(val & XILINX_PCIE_RPIFR1_INTR_VALID)) { > > - dev_warn(port->dev, "RP Intr FIFO1 read error\n"); > > - return IRQ_HANDLED; > > - } > > - > > - if (val & XILINX_PCIE_RPIFR1_MSI_INTR) { > > - msi_data = pcie_read(port, > > XILINX_PCIE_REG_RPIFR2) & > > - XILINX_PCIE_RPIFR2_MSG_DATA; > > - > > - /* Clear interrupt FIFO register 1 */ > > - pcie_write(port, XILINX_PCIE_RPIFR1_ALL_MASK, > > - XILINX_PCIE_REG_RPIFR1); > > - > > - if (IS_ENABLED(CONFIG_PCI_MSI)) { > > - /* Handle MSI Interrupt */ > > - generic_handle_irq(msi_data); > > - } > > - } > > + if (IS_ENABLED(CONFIG_PCI_MSI) || > > + !(val & XILINX_PCIE_RPIFR1_MSI_INTR)) > > + generic_handle_irq(irq); > > } > > > > if (status & XILINX_PCIE_INTR_SLV_UNSUPP) > > -- > > Hi Paul, > > Even with above condition you are still missing either MSI or legacy interrupt > handling, when both MSI and legacy interrupts occurred. It would be better if the condition when both legacy and MSI interrupts occurred can be handled separately, leaving the current individual interrupt cases as they are. Bharat
[GIT PULL] platform-drivers-x86 for 4.5-3
Hi Linus, Just two small fixes for the 4.5-rc cycle. Thanks, Darren Hart Intel Open Source Technology Center The following changes since commit 92e963f50fc74041b5e9e744c330dca48e04f08d: Linux 4.5-rc1 (2016-01-24 13:06:47 -0800) are available in the git repository at: git://git.infradead.org/users/dvhart/linux-platform-drivers-x86.git tags/platform-drivers-x86-v4.5-3 for you to fetch changes up to b1d353ad3d5835b16724653b33c05124e1b5acf1: intel_scu_ipcutil: underflow in scu_reg_access() (2016-01-30 09:40:35 -0800) platform-drivers-x86 for 4.5-3 intel_scu_ipcutil: - underflow in scu_reg_access() intel-hid: - fix incorrect entries in intel_hid_keymap Alex Hung (1): intel-hid: fix incorrect entries in intel_hid_keymap Dan Carpenter (1): intel_scu_ipcutil: underflow in scu_reg_access() drivers/platform/x86/intel-hid.c | 3 +-- drivers/platform/x86/intel_scu_ipcutil.c | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) -- Darren Hart Intel Open Source Technology Center
linux-next: Tree for Feb 11
Hi all, Changes since 20160210: The net-next tree gained a conflict against the net tree. The drm-misc tree lost its build failure. The trivial tree gained a build fix from the akpm tree. The aio tree still had a build failure so I used the version from next-20160111. The akpm tree lost a patch to the trivial build tree fix. Non-merge commits (relative to Linus' tree): 4057 3564 files changed, 139812 insertions(+), 63361 deletions(-) I have created today's linux-next tree at git://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git (patches at http://www.kernel.org/pub/linux/kernel/next/ ). If you are tracking the linux-next tree using git, you should not use "git pull" to do so as that will try to merge the new linux-next release with the old one. You should use "git fetch" and checkout or reset to the new master. You can see which trees have been included by looking in the Next/Trees file in the source. There are also quilt-import.log and merge.log files in the Next directory. Between each merge, the tree was built with a ppc64_defconfig for powerpc and an allmodconfig (with CONFIG_BUILD_DOCSRC=n) for x86_64, a multi_v7_defconfig for arm and a native build of tools/perf. After the final fixups (if any), I do an x86_64 modules_install followed by builds for x86_64 allnoconfig, powerpc allnoconfig (32 and 64 bit), ppc44x_defconfig, allyesconfig (this fails its final link) and pseries_le_defconfig and i386, sparc and sparc64 defconfig. Below is a summary of the state of the merge. I am currently merging 239 trees (counting Linus' and 36 trees of patches pending for Linus' tree). Stats about the size of the tree over time can be seen at http://neuling.org/linux-next-size.html . Status of my local build tests will be at http://kisskb.ellerman.id.au/linux-next . If maintainers want to give advice about cross compilers/configs that work, we are always open to add more builds. Thanks to Randy Dunlap for doing many randconfig builds. And to Paul Gortmaker for triage and bug fixes. -- Cheers, Stephen Rothwell $ git checkout master $ git reset --hard stable Merging origin/master (74c7b2af37ad Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dtor/input) Merging fixes/master (36f90b0a2ddd Linux 4.5-rc2) Merging kbuild-current/rc-fixes (3d1450d54a4f Makefile: Force gzip and xz on module install) Merging arc-current/for-curr (74bf8efb5fa6 Linux 4.4-rc7) Merging arm-current/fixes (5070fb14a015 ARM: 8517/1: ICST: avoid arithmetic overflow in icst_hz()) Merging m68k-current/for-linus (daf670bc9d36 m68k/defconfig: Update defconfigs for v4.5-rc1) Merging metag-fixes/fixes (0164a711c97b metag: Fix ioremap_wc/ioremap_cached build errors) Merging mips-fixes/mips-fixes (1795cd9b3a91 Linux 3.16-rc5) Merging powerpc-fixes/fixes (0fe53e8d335b powerpc/powernv: Fix stale PE primary bus) Merging powerpc-merge-mpe/fixes (bc0195aad0da Linux 4.2-rc2) Merging sparc/master (ca0bb0798022 Add sun4v_wdt watchdog driver) Merging net/master (a1b14d27ed09 bpf: fix branch offset adjustment on backjumps after patching ctx expansion) Merging ipsec/master (a8a572a6b5f2 xfrm: dst_entries_init() per-net dst_ops) Merging ipvs/master (b16c29191dc8 netfilter: nf_conntrack: use safer way to lock all buckets) Merging wireless-drivers/master (d76d65fd2695 rtlwifi: fix broken VHT support) Merging mac80211/master (212c5a5e6ba6 mac80211: minstrel: Change expected throughput unit back to Kbps) Merging sound-current/for-linus (397da2d0ab0d Merge branch 'topic/core-fixes' into for-linus) Merging pci-current/for-linus (0cf1337e0b83 PCI: rcar: Add device tree support for r8a7793) Merging driver-core.current/driver-core-linus (00cd29b799e3 klist: fix starting point removed bug in klist iterators) Merging tty.current/tty-linus (c8053b587627 Revert "8250: uniphier: allow modular build with 8250 console") Merging usb.current/usb-linus (89140fdaf11a xhci: harden xhci_find_next_ext_cap against device removal) Merging usb-gadget-fixes/fixes (6a4290cc28be usb: dwc3: gadget: set the OTG flag in dwc3 gadget driver.) Merging usb-serial-fixes/usb-linus (4152b387da81 USB: option: fix Cinterion AHxx enumeration) Merging usb-chipidea-fixes/ci-for-usb-stable (6f51bc340d2a usb: chipidea: imx: fix a possible NULL dereference) Merging staging.current/staging-linus (388f7b1d6e8c Linux 4.5-rc3) Merging char-misc.current/char-misc-linus (3b2b9ead3214 nvmem: qfprom: Specify LE device endianness) Merging input-current/for-linus (ff84dabe3c6e Input: colibri-vf50-ts - add missing #include ) Merging crypto-current/master (8a3978ad55fb crypto: marvell/cesa - fix test in mv_cesa_dev_dma_init()) Merging ide/master (e04a2bd6d8c9 drivers/ide: make ide-scan-pci.c driver explicitly non-modular) Merging devicetree-current/devicetree/merge (f76502aa9140 of/dynamic: Fix test for PPC_PSERIES) Merging rr-fixes/fi
RE: [PATCH v3 2/6] PCI: xilinx: Unify INTx & MSI interrupt FIFO decode
> Subject: [PATCH v3 2/6] PCI: xilinx: Unify INTx & MSI interrupt FIFO decode > > When decoding either an INTx or MSI interrupt, the driver has no way to > know which it will pull out of the interrupt FIFO. If both were pending then > this would lead to either the interrupt being handled incorrectly (MSI > interrupt treated as INTx) or not at all (INTx interrupt dropped by MSI path). > Unify the reading of the interrupt FIFO & act according to the type of > interrupt actually read. > > Signed-off-by: Paul Burton > Fixes: 8961def56845 ("PCI: xilinx: Add Xilinx AXI PCIe Host Bridge IP driver") > > --- > > Changes in v3: > - Split out from Boston patchset. > > Changes in v2: > - Add Fixes tag. > > drivers/pci/host/pcie-xilinx.c | 47 > +- > 1 file changed, 14 insertions(+), 33 deletions(-) > > diff --git a/drivers/pci/host/pcie-xilinx.c b/drivers/pci/host/pcie-xilinx.c > index > 1490bd1..afdfb09 100644 > --- a/drivers/pci/host/pcie-xilinx.c > +++ b/drivers/pci/host/pcie-xilinx.c > @@ -397,7 +397,7 @@ static const struct irq_domain_ops intx_domain_ops > = { static irqreturn_t xilinx_pcie_intr_handler(int irq, void *data) { > struct xilinx_pcie_port *port = (struct xilinx_pcie_port *)data; > - u32 val, mask, status, msi_data; > + u32 val, mask, status; > > /* Read interrupt decode and mask registers */ > val = pcie_read(port, XILINX_PCIE_REG_IDR); @@ -437,8 +437,8 @@ > static irqreturn_t xilinx_pcie_intr_handler(int irq, void *data) > xilinx_pcie_clear_err_interrupts(port); > } > > - if (status & XILINX_PCIE_INTR_INTX) { > - /* INTx interrupt received */ > + if (status & (XILINX_PCIE_INTR_INTX | XILINX_PCIE_INTR_MSI)) { > + /* Interrupt received */ > val = pcie_read(port, XILINX_PCIE_REG_RPIFR1); > > /* Check whether interrupt valid */ > @@ -447,41 +447,22 @@ static irqreturn_t xilinx_pcie_intr_handler(int irq, > void *data) > return IRQ_HANDLED; > } > > - if (!(val & XILINX_PCIE_RPIFR1_MSI_INTR)) { > - /* Clear interrupt FIFO register 1 */ > - pcie_write(port, XILINX_PCIE_RPIFR1_ALL_MASK, > -XILINX_PCIE_REG_RPIFR1); > - > - /* Handle INTx Interrupt */ > + if (val & XILINX_PCIE_RPIFR1_MSI_INTR) { > + irq = pcie_read(port, XILINX_PCIE_REG_RPIFR2) & > + XILINX_PCIE_RPIFR2_MSG_DATA; > + } else { > val = ((val & XILINX_PCIE_RPIFR1_INTR_MASK) >> > XILINX_PCIE_RPIFR1_INTR_SHIFT) + 1; > - generic_handle_irq(irq_find_mapping(port- > >irq_domain, > - val)); > + irq = irq_find_mapping(port->irq_domain, val); > } > - } > > - if (status & XILINX_PCIE_INTR_MSI) { > - /* MSI Interrupt */ > - val = pcie_read(port, XILINX_PCIE_REG_RPIFR1); > + /* Clear interrupt FIFO register 1 */ > + pcie_write(port, XILINX_PCIE_RPIFR1_ALL_MASK, > +XILINX_PCIE_REG_RPIFR1); > > - if (!(val & XILINX_PCIE_RPIFR1_INTR_VALID)) { > - dev_warn(port->dev, "RP Intr FIFO1 read error\n"); > - return IRQ_HANDLED; > - } > - > - if (val & XILINX_PCIE_RPIFR1_MSI_INTR) { > - msi_data = pcie_read(port, > XILINX_PCIE_REG_RPIFR2) & > -XILINX_PCIE_RPIFR2_MSG_DATA; > - > - /* Clear interrupt FIFO register 1 */ > - pcie_write(port, XILINX_PCIE_RPIFR1_ALL_MASK, > -XILINX_PCIE_REG_RPIFR1); > - > - if (IS_ENABLED(CONFIG_PCI_MSI)) { > - /* Handle MSI Interrupt */ > - generic_handle_irq(msi_data); > - } > - } > + if (IS_ENABLED(CONFIG_PCI_MSI) || > + !(val & XILINX_PCIE_RPIFR1_MSI_INTR)) > + generic_handle_irq(irq); > } > > if (status & XILINX_PCIE_INTR_SLV_UNSUPP) > -- Hi Paul, Even with above condition you are still missing either MSI or legacy interrupt handling, when both MSI and legacy interrupts occurred. Bharat
RE: [PATCH V3 3/5] PCI: xilinx: Modifying AXI PCIe Host Bridge driver to work on both Zynq and Microblaze
> Subject: Re: [PATCH V3 3/5] PCI: xilinx: Modifying AXI PCIe Host Bridge driver > to work on both Zynq and Microblaze > > On Wednesday 10 February 2016 09:27:07 Paul Burton wrote: > > On Wed, Feb 10, 2016 at 05:55:51AM +, Bharat Kumar Gogada wrote: > > > > On Tue, Feb 09, 2016 at 04:11:56PM +0530, Bharat Kumar Gogada > wrote: > > > > > Modifying Xilinx AXI PCIe Host Bridge Soft IP driver to work on > > > > > both Zynq and Microblaze Architectures. > > > > > With these modifications drivers/pci/host/pcie-xilinx.c, will > > > > > work on both Zynq and Microblaze Architectures. > > > > > > > > > > Signed-off-by: Bharat Kumar Gogada > > > > > Signed-off-by: Ravi Kiran Gummaluri > > > > > --- > > > > > Changes: > > > > > Removed unneccessary architecture dependent number of MSI's. > > > > > Added #ifdef to pci_fixup_irqs which is ARM specific API. > > > > > > > > Hi Bharat, > > > > > > > > Why do you say pci_fixup_irqs is ARM-specific? It's declared in > > > > include/linux/pci.h, defined in drivers/pci/setup-irq.c & used by > > > > multiple architectures (alpha, arm, m68k, mips, sh, sparc, tile, > > > > unicore32 from a quick grep). > > > > > > > > Will you not break INTX-style interrupts by removing this? > > > > > > > I meant to say ARM specific w.r.t Microblaze architecture, which is > > > what this patch series are for. This has been already discussed in > > > my previous patch by Arnd Bergmann and Lorenzo Pieralisi . > > > (https://lkml.org/lkml/2016/1/12/707) > > > > Hi Bharat, > > > > Ok, so you don't need it for microblaze but do need it for zynq/ARM. > > We also need it for MIPS, where my recent patches enable this driver. > > So if #ifdef'ing this is the current way forwards could you please > > invert the condition to #ifndef CONFIG_MICROBLAZE? > > I think we are getting to the point where we should try much harder to make > sure nobody needs that hack and it all works out of the box. Ok I will invert to this condition and resend the patches. Bharat
Re: [PATCH v5 3/5] mtd: devices: m25p80: add support for mmap read request
On 02/10/2016 01:06 AM, Mark Brown wrote: > On Fri, Dec 11, 2015 at 09:39:58AM +0530, Vignesh R wrote: > >> +if (spi_flash_read_supported(spi)) { >> +struct spi_flash_read_message msg; >> +int ret; >> + >> +msg.buf = buf; >> +msg.from = from; >> +msg.len = len; >> +msg.read_opcode = nor->read_opcode; >> +msg.addr_width = nor->addr_width; >> +msg.dummy_bytes = dummy; >> +/* TODO: Support other combinations */ >> +msg.opcode_nbits = SPI_NBITS_SINGLE; >> +msg.addr_nbits = SPI_NBITS_SINGLE; >> +msg.data_nbits = m25p80_rx_nbits(nor); >> + >> +ret = spi_flash_read(spi, ); >> +*retlen = msg.retlen; >> +return ret; > > Looking at this I can't help but think that spi_flash_read() ought to > have the stub in rather than the caller. But given that we're pretty > much only ever expecting one user I'm not 100% sure it actually matters. Well, my initial patch set passed long list of arguments to spi_flash_read(), but Brian suggested to use struct[1] in order to avoid unnecessary churn when things need changed in the API. [1] https://lkml.org/lkml/2015/11/11/454 -- Regards Vignesh
[PATCH RESEND] scsi: ppa: use new parport device model
Modify ppa driver to use the new parallel port device model. Signed-off-by: Sudip Mukherjee --- Resending as there was no review or ACK for this change. This has exactly same changes as done in scsi/imm.c which has already been accepted. drivers/scsi/ppa.c | 46 -- 1 file changed, 40 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/ppa.c b/drivers/scsi/ppa.c index ee00e27..f6ad579 100644 --- a/drivers/scsi/ppa.c +++ b/drivers/scsi/ppa.c @@ -37,6 +37,7 @@ typedef struct { unsigned long recon_tmo;/* How many usecs to wait for reconnection (6th bit) */ unsigned int failed:1; /* Failure flag */ unsigned wanted:1; /* Parport sharing busy flag*/ + unsigned int dev_no;/* Device number*/ wait_queue_head_t *waiting; struct Scsi_Host *host; struct list_head list; @@ -985,15 +986,40 @@ static struct scsi_host_template ppa_template = { static LIST_HEAD(ppa_hosts); +/* + * Finds the first available device number that can be alloted to the + * new ppa device and returns the address of the previous node so that + * we can add to the tail and have a list in the ascending order. + */ + +static inline ppa_struct *find_parent(void) +{ + ppa_struct *dev, *par = NULL; + unsigned int cnt = 0; + + if (list_empty(_hosts)) + return NULL; + + list_for_each_entry(dev, _hosts, list) { + if (dev->dev_no != cnt) + return par; + cnt++; + par = dev; + } + + return par; +} + static int __ppa_attach(struct parport *pb) { struct Scsi_Host *host; DECLARE_WAIT_QUEUE_HEAD_ONSTACK(waiting); DEFINE_WAIT(wait); - ppa_struct *dev; + ppa_struct *dev, *temp; int ports; int modes, ppb, ppb_hi; int err = -ENOMEM; + struct pardev_cb ppa_cb; dev = kzalloc(sizeof(ppa_struct), GFP_KERNEL); if (!dev) @@ -1002,8 +1028,15 @@ static int __ppa_attach(struct parport *pb) dev->mode = PPA_AUTODETECT; dev->recon_tmo = PPA_RECON_TMO; init_waitqueue_head(); - dev->dev = parport_register_device(pb, "ppa", NULL, ppa_wakeup, - NULL, 0, dev); + temp = find_parent(); + if (temp) + dev->dev_no = temp->dev_no + 1; + + memset(_cb, 0, sizeof(ppa_cb)); + ppa_cb.private = dev; + ppa_cb.wakeup = ppa_wakeup; + + dev->dev = parport_register_dev_model(pb, "ppa", _cb, dev->dev_no); if (!dev->dev) goto out; @@ -1110,9 +1143,10 @@ static void ppa_detach(struct parport *pb) } static struct parport_driver ppa_driver = { - .name = "ppa", - .attach = ppa_attach, - .detach = ppa_detach, + .name = "ppa", + .match_port = ppa_attach, + .detach = ppa_detach, + .devmodel = true, }; static int __init ppa_driver_init(void) -- 1.9.1
Re: [PATCH 1/2] Documentation: dt: mailbox: Add TI Message Manager
Hi Jassi, On 02/10/2016 10:23 PM, Jassi Brar wrote: [...] Thanks for taking the time and checking the TRM, I apologize that the actual details of the hardware block which was supposed to be in sections 8.1.3 and 8.1.4 has unfortunately been dropped since the last time I reviewed in the spec Vs what actually went out into public domain! I do realize the problem of doing a review without comprehensive and accurate documentation - ugghh.. :( But, I am trying to get our internal guys to upload the proper TRM chapter in public domain -> hopefully we will get it done some time soon. >> msgmgr: msgmgr@02a0 { >> compatible = "ti,k2g-message-manager"; >> #mbox-cells = <2>; >> reg-names = "queue_proxy_region", "queue_state_debug_region"; >> reg = <0x02a0 0x40>, <0x028c3400 0x400>; >> interrupt-names = "rx_005_002", >> "rx_057_002"; >> > Looking at figure in page-1445, it seems QID is the h/w channel id, > while proxy is its programming parameter. So maybe we need to list all > the ARM irq's as a list here, matched only by the qid asked by the > consumer ... assuming no two channels could have the same qid (?). The overall story is something like what you already figured out.. message manager has a queue engine and a ram for data buffers, and n queues. Each of these queues have a memory map corresponding to the processor view.. we can call that programming paramater as well. > interrupt-names = "irq_005", "irq_037", "irq_049", "irq_057", > "perr", "ferr", "eerr"; proxy error (perr), free index error(ferr) and ECC error(eerr) cannot be handled by a slave, since it involves controlling a shared register set for a single message manager instance. in the case of K2G, the master of the message manager is actually PMMC, and not the compute processors - it has error handling logic to handle things there - a slave can only report these errors without ability to even expect reliable detection (for example PMMC reacting even before any of these cores have come up from low power state). irq_37 and irq_49 go to the secure world and we have no access from ARM "non secure" world. the "missing documentation" would have helped clarify that :(.. > > I may be slightly off, but the idea remains to not have to encode any > consumer specific info in the provider node. I do realize the reasoning behind your suggestion here. the reasoning for providing rx_qid_pid as the interrupt name was as follows: I was hoping to get a future SoC to provide proxy specific error instead of a global error which is really useless since the processor generating error should be the guy actually be notified.. queue specific interrupts as well.. the reason for naming interrupts with the proxy id information was primarily to let the dtb ABI stay compatible with only additional properties defined when the new SoC gets supported. I can make it compatible for today's SoC, but based on what i explained, how about just "rx_" for the interrupt names? interrupt-names = "rx_005", "rx_057" (I kinda feel using "irq" for interrupt-names is actually redundant information)? *if* i manage to convince to get a new IP with proxy specific interrupts, then "perr_qid_pid" could then be introduced for that new compatible type.. [...] -- Regards, Nishanth Menon
[PATCH RESEND] scsi: sim710: fix build warning
We are getting build warning about: "Section mismatch in reference from the variable sim710_eisa_driver to the function .init.text:sim710_eisa_probe() The variable sim710_eisa_driver references the function __init sim710_eisa_probe()" sim710_eisa_probe() was having __init but that was being referenced from sim710_eisa_driver. Signed-off-by: Sudip Mukherjee --- patch first sent on 4th Sept.2015. There is no review or ACK yet. warning is still there with next-20160209, build log is at: https://travis-ci.org/sudipm-mukherjee/parport/jobs/107948115 drivers/scsi/sim710.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/scsi/sim710.c b/drivers/scsi/sim710.c index 3b3b56f..82ed998 100644 --- a/drivers/scsi/sim710.c +++ b/drivers/scsi/sim710.c @@ -176,8 +176,7 @@ static struct eisa_device_id sim710_eisa_ids[] = { }; MODULE_DEVICE_TABLE(eisa, sim710_eisa_ids); -static __init int -sim710_eisa_probe(struct device *dev) +static int sim710_eisa_probe(struct device *dev) { struct eisa_device *edev = to_eisa_device(dev); unsigned long io_addr = edev->base_addr; -- 1.9.1
[PATCH 2/2] regulator: qcom_saw: Fix uninitialized variable build warning
Fix below build warning: CC [M] drivers/regulator/qcom_saw-regulator.o drivers/regulator/qcom_saw-regulator.c: In function 'qcom_saw_regulator_probe': drivers/regulator/qcom_saw-regulator.c:154:5: warning: 'found' is used uninitialized in this function [-Wuninitialized] drivers/regulator/qcom_saw-regulator.c:140:7: note: 'found' was declared here Signed-off-by: Axel Lin --- drivers/regulator/qcom_saw-regulator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/regulator/qcom_saw-regulator.c b/drivers/regulator/qcom_saw-regulator.c index c00f0df..6751614 100644 --- a/drivers/regulator/qcom_saw-regulator.c +++ b/drivers/regulator/qcom_saw-regulator.c @@ -137,7 +137,7 @@ static struct saw_vreg *saw_get_drv(struct platform_device *pdev, struct saw_vreg *vreg = NULL; struct device_node *cpu_node, *saw_node; int cpu; - bool found; + bool found = false; for_each_possible_cpu(cpu) { cpu_node = of_cpu_device_node_get(cpu); -- 2.1.4
[PATCH 1/2] regulator: qcom_saw: Fix testing wrong value
Signed-off-by: Axel Lin --- drivers/regulator/qcom_saw-regulator.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/regulator/qcom_saw-regulator.c b/drivers/regulator/qcom_saw-regulator.c index c800f16..c00f0df 100644 --- a/drivers/regulator/qcom_saw-regulator.c +++ b/drivers/regulator/qcom_saw-regulator.c @@ -186,8 +186,8 @@ static int qcom_saw_regulator_probe(struct platform_device *pdev) vreg->regmap = syscon_node_to_regmap(saw_np); of_node_put(saw_np); - if (IS_ERR(config.regmap)) - return PTR_ERR(config.regmap); + if (IS_ERR(vreg->regmap)) + return PTR_ERR(vreg->regmap); snprintf(name, sizeof(name), "krait%d", cpu); -- 2.1.4
Re: [RFCv6 PATCH 03/10] sched: scheduler-driven cpu frequency selection
Hi Ricky, On 02/01/2016 09:10 AM, Ricky Liang wrote: >> +static int cpufreq_sched_policy_init(struct cpufreq_policy *policy) >> > +{ >> > + struct gov_data *gd; >> > + int cpu; >> > + >> > + for_each_cpu(cpu, policy->cpus) >> > + memset(_cpu(cpu_sched_capacity_reqs, cpu), 0, >> > + sizeof(struct sched_capacity_reqs)); >> > + >> > + gd = kzalloc(sizeof(*gd), GFP_KERNEL); >> > + if (!gd) >> > + return -ENOMEM; >> > + >> > + gd->throttle_nsec = policy->cpuinfo.transition_latency ? >> > + policy->cpuinfo.transition_latency : >> > + THROTTLE_NSEC; >> > + pr_debug("%s: throttle threshold = %u [ns]\n", >> > + __func__, gd->throttle_nsec); >> > + >> > + if (cpufreq_driver_is_slow()) { >> > + cpufreq_driver_slow = true; >> > + gd->task = kthread_create(cpufreq_sched_thread, policy, >> > + "kschedfreq:%d", >> > + >> > cpumask_first(policy->related_cpus)); >> > + if (IS_ERR_OR_NULL(gd->task)) { >> > + pr_err("%s: failed to create kschedfreq thread\n", >> > + __func__); >> > + goto err; >> > + } >> > + get_task_struct(gd->task); >> > + kthread_bind_mask(gd->task, policy->related_cpus); >> > + wake_up_process(gd->task); >> > + init_irq_work(>irq_work, cpufreq_sched_irq_work); >> > + } >> > + >> > + policy->governor_data = gd; > > This should be moved before if(cpufreq_driver_is_slow()) {...}. I've > seen NULL pointer deference at boot in cpufreq_sched_thread() when it > tried to run sched_setscheduler_nocheck(gd->task, SCHED_FIFO, ). Agreed, this has been addressed during various cleanups and reorganization since the last posting. > >> > + set_sched_freq(); >> > + >> > + return 0; >> > + >> > +err: > And probably also set policy->governor_data to NULL here. Changed. Thanks for the comments. thanks, Steve
Re: [PATCH 1/2] Documentation: dt: mailbox: Add TI Message Manager
On Tue, Feb 9, 2016 at 11:40 PM, Nishanth Menon wrote: > On 09:43-20160209, Nishanth Menon wrote: >> On Tue, Feb 9, 2016 at 8:54 AM, Jassi Brar wrote: > [..] >> Let me prototype this as part of of_xlate and see if I can pull the >> qinst data back out.. obviously one negative will be that I will >> register *all* valid channels as part of probe.. at least based on >> initial code i wrote today morning.. > > OK - I believe I have it working now. How does the following look? If > this looks fine to you, then I will post a v2 including the driver > update. > Changes here: > - dropped the generic message-manager compatible > - dropped child nodes > - moved the valid queue information to driver (no longer in dts) > - rx interrupts per SoC are explicitly named list in binding(and > dts) > > Texas Instruments' Message Manager Driver > > > The Texas Instruments' Message Manager is a mailbox controller that has > configurable queues selectable at SoC(System on Chip) integration. The Message > manager is broken up into queues in different address regions that are called > "proxies" - each instance is unidirectional and is instantiated at SoC > integration level to indicate receive or transmit path. > > Message Manager Device Node: > === > Required properties: > > - compatible: Shall be: "ti,k2g-message-manager" > - reg-names queue_proxy_region - Map the queue proxy region. > queue_state_debug_region - Map the queue state debug > region. > - reg: Contains the register map per reg-names. > - #mbox-cells Shall be 2. Contains the queue ID and proxy ID in that > order referring to the transfer path. > - interrupt-names: Contains interrupt names matching the rx transfer path > for a given SoC. Receive interrupts shall be of the > format: "rx__". > For ti,k2g-message-manager, this shall contain: > "rx_005_002", "rx_057_002" > - interrupts: Contains the interrupt information corresponding to > interrupt-names property. > > Example(K2G): > > > msgmgr: msgmgr@02a0 { > compatible = "ti,k2g-message-manager"; > #mbox-cells = <2>; > reg-names = "queue_proxy_region", "queue_state_debug_region"; > reg = <0x02a0 0x40>, <0x028c3400 0x400>; > interrupt-names = "rx_005_002", > "rx_057_002"; > Looking at figure in page-1445, it seems QID is the h/w channel id, while proxy is its programming parameter. So maybe we need to list all the ARM irq's as a list here, matched only by the qid asked by the consumer ... assuming no two channels could have the same qid (?). interrupt-names = "irq_005", "irq_037", "irq_049", "irq_057", "perr", "ferr", "eerr"; I may be slightly off, but the idea remains to not have to encode any consumer specific info in the provider node. > pmmc: pmmc { > [...] > mbox-names = "rx", "tx"; > # RX queue ID is 5, proxy ID is 2 > # TX queue ID is 0, proxy ID is 0 > mboxes= < 5 2>, > < 0 0>; > [...] > };
[PATCH v2 5/5] tile: query dynamic DEBUG_PAGEALLOC setting
From: Joonsoo Kim We can disable debug_pagealloc processing even if the code is compiled with CONFIG_DEBUG_PAGEALLOC. This patch changes the code to query whether it is enabled or not in runtime. Acked-by: Chris Metcalf Signed-off-by: Joonsoo Kim --- arch/tile/mm/init.c | 11 +++ 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c index d4e1fc4..a0582b7 100644 --- a/arch/tile/mm/init.c +++ b/arch/tile/mm/init.c @@ -896,17 +896,15 @@ void __init pgtable_cache_init(void) panic("pgtable_cache_init(): Cannot create pgd cache"); } -#ifdef CONFIG_DEBUG_PAGEALLOC -static long __write_once initfree; -#else static long __write_once initfree = 1; -#endif +static bool __write_once set_initfree_done; /* Select whether to free (1) or mark unusable (0) the __init pages. */ static int __init set_initfree(char *str) { long val; if (kstrtol(str, 0, ) == 0) { + set_initfree_done = true; initfree = val; pr_info("initfree: %s free init pages\n", initfree ? "will" : "won't"); @@ -919,6 +917,11 @@ static void free_init_pages(char *what, unsigned long begin, unsigned long end) { unsigned long addr = (unsigned long) begin; + /* Prefer user request first */ + if (!set_initfree_done) { + if (debug_pagealloc_enabled()) + initfree = 0; + } if (kdata_huge && !initfree) { pr_warn("Warning: ignoring initfree=0: incompatible with kdata=huge\n"); initfree = 1; -- 1.9.1
[PATCH v2 2/5] mm/slub: query dynamic DEBUG_PAGEALLOC setting
From: Joonsoo Kim We can disable debug_pagealloc processing even if the code is compiled with CONFIG_DEBUG_PAGEALLOC. This patch changes the code to query whether it is enabled or not in runtime. v2: clean up code, per Christian. Signed-off-by: Joonsoo Kim --- mm/slub.c | 7 +++ 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 606488b..a1874c2 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -250,11 +250,10 @@ static inline void *get_freepointer_safe(struct kmem_cache *s, void *object) { void *p; -#ifdef CONFIG_DEBUG_PAGEALLOC + if (!debug_pagealloc_enabled()) + return get_freepointer(s, object); + probe_kernel_read(, (void **)(object + s->offset), sizeof(p)); -#else - p = get_freepointer(s, object); -#endif return p; } -- 1.9.1
[PATCH v2 3/5] sound: query dynamic DEBUG_PAGEALLOC setting
From: Joonsoo Kim We can disable debug_pagealloc processing even if the code is compiled with CONFIG_DEBUG_PAGEALLOC. This patch changes the code to query whether it is enabled or not in runtime. v2: export _debug_pagealloc_enabled to modules, per Andrew. Acked-by: David Rientjes Acked-by: Takashi Iwai Signed-off-by: Joonsoo Kim --- mm/page_alloc.c | 1 + sound/drivers/pcsp/pcsp.c | 9 + 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 87b3e2f..00118fe 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -498,6 +498,7 @@ void prep_compound_page(struct page *page, unsigned int order) unsigned int _debug_guardpage_minorder; bool _debug_pagealloc_enabled __read_mostly = IS_ENABLED(CONFIG_DEBUG_PAGEALLOC_ENABLE_DEFAULT); +EXPORT_SYMBOL(_debug_pagealloc_enabled); bool _debug_guardpage_enabled __read_mostly; static int __init early_debug_pagealloc(char *buf) diff --git a/sound/drivers/pcsp/pcsp.c b/sound/drivers/pcsp/pcsp.c index 27e25bb..72e2d00 100644 --- a/sound/drivers/pcsp/pcsp.c +++ b/sound/drivers/pcsp/pcsp.c @@ -14,6 +14,7 @@ #include #include #include +#include #include "pcsp_input.h" #include "pcsp.h" @@ -148,11 +149,11 @@ static int alsa_card_pcsp_init(struct device *dev) return err; } -#ifdef CONFIG_DEBUG_PAGEALLOC /* Well, CONFIG_DEBUG_PAGEALLOC makes the sound horrible. Lets alert */ - printk(KERN_WARNING "PCSP: CONFIG_DEBUG_PAGEALLOC is enabled, " - "which may make the sound noisy.\n"); -#endif + if (debug_pagealloc_enabled()) { + printk(KERN_WARNING "PCSP: CONFIG_DEBUG_PAGEALLOC is enabled, " + "which may make the sound noisy.\n"); + } return 0; } -- 1.9.1
[PATCH v2 4/5] powerpc: query dynamic DEBUG_PAGEALLOC setting
From: Joonsoo Kim We can disable debug_pagealloc processing even if the code is compiled with CONFIG_DEBUG_PAGEALLOC. This patch changes the code to query whether it is enabled or not in runtime. v2: fix build failure Acked-by: David Rientjes Signed-off-by: Joonsoo Kim --- arch/powerpc/kernel/traps.c | 5 ++--- arch/powerpc/mm/hash_utils_64.c | 36 arch/powerpc/mm/init_32.c | 8 3 files changed, 26 insertions(+), 23 deletions(-) diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index b6becc7..33c47fc 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -203,9 +203,8 @@ static int __kprobes __die(const char *str, struct pt_regs *regs, long err) #ifdef CONFIG_SMP printk("SMP NR_CPUS=%d ", NR_CPUS); #endif -#ifdef CONFIG_DEBUG_PAGEALLOC - printk("DEBUG_PAGEALLOC "); -#endif + if (debug_pagealloc_enabled()) + printk("DEBUG_PAGEALLOC "); #ifdef CONFIG_NUMA printk("NUMA "); #endif diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index ba59d59..1005281 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -255,8 +255,10 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend, if (ret < 0) break; + #ifdef CONFIG_DEBUG_PAGEALLOC - if ((paddr >> PAGE_SHIFT) < linear_map_hash_count) + if (debug_pagealloc_enabled() && + (paddr >> PAGE_SHIFT) < linear_map_hash_count) linear_map_hash_slots[paddr >> PAGE_SHIFT] = ret | 0x80; #endif /* CONFIG_DEBUG_PAGEALLOC */ } @@ -512,17 +514,17 @@ static void __init htab_init_page_sizes(void) if (mmu_has_feature(MMU_FTR_16M_PAGE)) memcpy(mmu_psize_defs, mmu_psize_defaults_gp, sizeof(mmu_psize_defaults_gp)); - found: -#ifndef CONFIG_DEBUG_PAGEALLOC - /* -* Pick a size for the linear mapping. Currently, we only support -* 16M, 1M and 4K which is the default -*/ - if (mmu_psize_defs[MMU_PAGE_16M].shift) - mmu_linear_psize = MMU_PAGE_16M; - else if (mmu_psize_defs[MMU_PAGE_1M].shift) - mmu_linear_psize = MMU_PAGE_1M; -#endif /* CONFIG_DEBUG_PAGEALLOC */ +found: + if (!debug_pagealloc_enabled()) { + /* +* Pick a size for the linear mapping. Currently, we only +* support 16M, 1M and 4K which is the default +*/ + if (mmu_psize_defs[MMU_PAGE_16M].shift) + mmu_linear_psize = MMU_PAGE_16M; + else if (mmu_psize_defs[MMU_PAGE_1M].shift) + mmu_linear_psize = MMU_PAGE_1M; + } #ifdef CONFIG_PPC_64K_PAGES /* @@ -721,10 +723,12 @@ static void __init htab_initialize(void) prot = pgprot_val(PAGE_KERNEL); #ifdef CONFIG_DEBUG_PAGEALLOC - linear_map_hash_count = memblock_end_of_DRAM() >> PAGE_SHIFT; - linear_map_hash_slots = __va(memblock_alloc_base(linear_map_hash_count, - 1, ppc64_rma_size)); - memset(linear_map_hash_slots, 0, linear_map_hash_count); + if (debug_pagealloc_enabled()) { + linear_map_hash_count = memblock_end_of_DRAM() >> PAGE_SHIFT; + linear_map_hash_slots = __va(memblock_alloc_base( + linear_map_hash_count, 1, ppc64_rma_size)); + memset(linear_map_hash_slots, 0, linear_map_hash_count); + } #endif /* CONFIG_DEBUG_PAGEALLOC */ /* On U3 based machines, we need to reserve the DART area and diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index a10be66..c2b7716 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -112,10 +112,10 @@ void __init MMU_setup(void) if (strstr(boot_command_line, "noltlbs")) { __map_without_ltlbs = 1; } -#ifdef CONFIG_DEBUG_PAGEALLOC - __map_without_bats = 1; - __map_without_ltlbs = 1; -#endif + if (debug_pagealloc_enabled()) { + __map_without_bats = 1; + __map_without_ltlbs = 1; + } } /* -- 1.9.1
[PATCH v2 1/5] mm/vmalloc: query dynamic DEBUG_PAGEALLOC setting
From: Joonsoo Kim We can disable debug_pagealloc processing even if the code is compiled with CONFIG_DEBUG_PAGEALLOC. This patch changes the code to query whether it is enabled or not in runtime. v2: update comment, per David. adjust comment to use 80 cols, per Andrew. Reviewed-by: Christian Borntraeger Acked-by: David Rientjes Signed-off-by: Joonsoo Kim --- mm/vmalloc.c | 25 - 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index fb42a5b..d4b2e34 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -531,22 +531,21 @@ static void unmap_vmap_area(struct vmap_area *va) static void vmap_debug_free_range(unsigned long start, unsigned long end) { /* -* Unmap page tables and force a TLB flush immediately if -* CONFIG_DEBUG_PAGEALLOC is set. This catches use after free -* bugs similarly to those in linear kernel virtual address -* space after a page has been freed. +* Unmap page tables and force a TLB flush immediately if pagealloc +* debugging is enabled. This catches use after free bugs similarly to +* those in linear kernel virtual address space after a page has been +* freed. * -* All the lazy freeing logic is still retained, in order to -* minimise intrusiveness of this debugging feature. +* All the lazy freeing logic is still retained, in order to minimise +* intrusiveness of this debugging feature. * -* This is going to be *slow* (linear kernel virtual address -* debugging doesn't do a broadcast TLB flush so it is a lot -* faster). +* This is going to be *slow* (linear kernel virtual address debugging +* doesn't do a broadcast TLB flush so it is a lot faster). */ -#ifdef CONFIG_DEBUG_PAGEALLOC - vunmap_page_range(start, end); - flush_tlb_kernel_range(start, end); -#endif + if (debug_pagealloc_enabled()) { + vunmap_page_range(start, end); + flush_tlb_kernel_range(start, end); + } } /* -- 1.9.1
[PATCH v2 0/5] follow-up "Optimize CONFIG_DEBUG_PAGEALLOC"
From: Joonsoo Kim v2) Changes o fix powerpc build failure (basic build test done) o export symbol for module build o change comment and clean up code As CONFIG_DEBUG_PAGEALLOC can be enabled/disabled via kernel parameters we can optimize some cases by checking the enablement state. This is follow-up work for Christian's Optimize CONFIG_DEBUG_PAGEALLOC. https://lkml.org/lkml/2016/1/27/194 I can't test patches for sound, power and tile, so please review them, maintainers. :) Remaining work is to make sparc to be aware of this but it looks not easy for me so I skip that in this series. It would be the best that these paches are routed through Andrew's tree, because there is a dependency to MM. Andrew, there is mis-spelled word (compliled -> compiled) in commit description so I re-send all. Except powerpc one, others are basically same with the patches on your tree. Thanks. Joonsoo Kim (5): mm/vmalloc: query dynamic DEBUG_PAGEALLOC setting mm/slub: query dynamic DEBUG_PAGEALLOC setting sound: query dynamic DEBUG_PAGEALLOC setting powerpc: query dynamic DEBUG_PAGEALLOC setting tile: query dynamic DEBUG_PAGEALLOC setting arch/powerpc/kernel/traps.c | 5 ++--- arch/powerpc/mm/hash_utils_64.c | 36 arch/powerpc/mm/init_32.c | 8 arch/tile/mm/init.c | 11 +++ mm/page_alloc.c | 1 + mm/slub.c | 7 +++ mm/vmalloc.c| 25 - sound/drivers/pcsp/pcsp.c | 9 + 8 files changed, 54 insertions(+), 48 deletions(-) -- 1.9.1
[PATCH V3 05/12] dtb: amd: Misc changes for I2C device nodes
From: Suravee Suthikulpanit Add new i2c1 device node, and fix the incorrect clock frequency. Signed-off-by: Tom Lendacky Signed-off-by: Suravee Suthikulpanit --- arch/arm64/boot/dts/amd/amd-seattle-soc.dtsi | 10 +- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/amd/amd-seattle-soc.dtsi b/arch/arm64/boot/dts/amd/amd-seattle-soc.dtsi index c7c759a..4be36fd 100644 --- a/arch/arm64/boot/dts/amd/amd-seattle-soc.dtsi +++ b/arch/arm64/boot/dts/amd/amd-seattle-soc.dtsi @@ -77,7 +77,15 @@ compatible = "snps,designware-i2c"; reg = <0 0xe100 0 0x1000>; interrupts = <0 357 4>; - clocks = <_100mhz>; + clocks = <_250mhz>; + }; + + i2c1: i2c@e005 { + status = "disabled"; + compatible = "snps,designware-i2c"; + reg = <0 0xe005 0 0x1000>; + interrupts = <0 340 4>; + clocks = <_250mhz>; }; serial0: serial@e101 { -- 2.5.0
[PATCH V3 09/12] dtb: amd: Add KCS device tree node
From: Brijesh Singh Add KCS device node to support IPMI solution on Overdrive system. Signed-off-by: Brijesh Singh Signed-off-by: Suravee Suthikulpanit --- arch/arm64/boot/dts/amd/amd-seattle-soc.dtsi | 10 ++ 1 file changed, 10 insertions(+) diff --git a/arch/arm64/boot/dts/amd/amd-seattle-soc.dtsi b/arch/arm64/boot/dts/amd/amd-seattle-soc.dtsi index 70f407d..bd3adea 100644 --- a/arch/arm64/boot/dts/amd/amd-seattle-soc.dtsi +++ b/arch/arm64/boot/dts/amd/amd-seattle-soc.dtsi @@ -236,5 +236,15 @@ reg = <0x0 0xe800 0 0x100>; interrupts = <0 380 4>; }; + + ipmi_kcs: kcs@e001 { + status = "disabled"; + compatible = "ipmi-kcs"; + device_type = "ipmi"; + reg = <0x0 0xe001 0 0x8>; + interrupts = <0 389 4>; + reg-size = <1>; + reg-spacing = <4>; + }; }; }; -- 2.5.0
[PATCH V3 07/12] dtb: amd: Misc changes for GPIO devices
From: Suravee Suthikulpanit Add new GPIO device nodes and fix clock on gpio0. Signed-off-by: Suravee Suthikulpanit --- arch/arm64/boot/dts/amd/amd-seattle-soc.dtsi | 49 +--- 1 file changed, 45 insertions(+), 4 deletions(-) diff --git a/arch/arm64/boot/dts/amd/amd-seattle-soc.dtsi b/arch/arm64/boot/dts/amd/amd-seattle-soc.dtsi index 9f59381..ba455d1 100644 --- a/arch/arm64/boot/dts/amd/amd-seattle-soc.dtsi +++ b/arch/arm64/boot/dts/amd/amd-seattle-soc.dtsi @@ -129,7 +129,7 @@ #size-cells = <0>; }; - gpio0: gpio@e104 { + gpio0: gpio@e104 { /* Not available to OS for B0 */ status = "disabled"; compatible = "arm,pl061", "arm,primecell"; #gpio-cells = <2>; @@ -138,18 +138,59 @@ interrupts = <0 359 4>; interrupt-controller; #interrupt-cells = <2>; - clocks = <_100mhz>; + clocks = <_250mhz>; clock-names = "apb_pclk"; }; - gpio1: gpio@e105 { + gpio1: gpio@e105 { /* [0:7] */ status = "disabled"; compatible = "arm,pl061", "arm,primecell"; #gpio-cells = <2>; reg = <0 0xe105 0 0x1000>; gpio-controller; + interrupt-controller; + #interrupt-cells = <2>; interrupts = <0 358 4>; - clocks = <_100mhz>; + clocks = <_250mhz>; + clock-names = "apb_pclk"; + }; + + gpio2: gpio@e002 { /* [8:15] */ + status = "disabled"; + compatible = "arm,pl061", "arm,primecell"; + #gpio-cells = <2>; + reg = <0 0xe002 0 0x1000>; + gpio-controller; + interrupt-controller; + #interrupt-cells = <2>; + interrupts = <0 366 4>; + clocks = <_250mhz>; + clock-names = "apb_pclk"; + }; + + gpio3: gpio@e003 { /* [16:23] */ + status = "disabled"; + compatible = "arm,pl061", "arm,primecell"; + #gpio-cells = <2>; + reg = <0 0xe003 0 0x1000>; + gpio-controller; + interrupt-controller; + #interrupt-cells = <2>; + interrupts = <0 365 4>; + clocks = <_250mhz>; + clock-names = "apb_pclk"; + }; + + gpio4: gpio@e008 { /* [24] */ + status = "disabled"; + compatible = "arm,pl061", "arm,primecell"; + #gpio-cells = <2>; + reg = <0 0xe008 0 0x1000>; + gpio-controller; + interrupt-controller; + #interrupt-cells = <2>; + interrupts = <0 361 4>; + clocks = <_250mhz>; clock-names = "apb_pclk"; }; -- 2.5.0
[PATCH V3 00/12] dtb: amd: Miscellaneous Updates for AMD Seattle DTS
From: Suravee Suthikulpanit This patch series contains several updates for the AMD Seattle SOC DTS files. It also adds new board files for newer Overdrive and Linaro 96boards (Husky) platforms. Olof, You mentioned that you have already applied the V2 of the series to next/dt64. Do you think we can just take the V3 instead if we don't have any other issues/concerns? Thanks, Suravee Changes from V2 (https://lkml.org/lkml/2016/2/8/692): * Remove KCS interrupt name (per Arnd's suggestion) * Fix ccn node address (per Arnd's suggestion) * Add more description in git commit message for patch 3/12 (per Arnd's suggestion) Changes from V1 (https://lkml.org/lkml/2016/1/27/1251): * Fix duplication in MAINTAINERS file (per Martin review comment) * Remove the PCIe SMMU device tree node for now to rework (based on the discussion in the thread). Brijesh Singh (2): dtb: amd: Fix GICv2 hypervisor and virtual interface sizes dtb: amd: Add KCS device tree node Suravee Suthikulpanit (9): MAINTAINERS: Adding Maintainers for AMD Seattle Device Tree dtb: amd: Fix DMA ranges of smb0 and pcie0 dtb: amd: Fix typo in SPI device nodes dtb: amd: Misc changes for I2C device nodes dtb: amd: Misc changes for SATA device tree nodes dtb: amd: Misc changes for GPIO devices dtb: amd: Add PERF CCN-504 device tree node dtb: amd: Add support for new AMD Overdrive boards dtb: amd: Add support for AMD/Linaro 96Boards Enterprise Edition Server board Tom Lendacky (1): dtb: amd: Add AMD XGBE device tree file MAINTAINERS | 8 ++ arch/arm64/boot/dts/amd/Makefile | 4 +- arch/arm64/boot/dts/amd/amd-overdrive-rev-b0.dts | 87 + arch/arm64/boot/dts/amd/amd-overdrive-rev-b1.dts | 91 ++ arch/arm64/boot/dts/amd/amd-seattle-soc.dtsi | 104 +--- arch/arm64/boot/dts/amd/amd-seattle-xgbe-b.dtsi | 117 +++ arch/arm64/boot/dts/amd/husky.dts| 83 7 files changed, 480 insertions(+), 14 deletions(-) create mode 100644 arch/arm64/boot/dts/amd/amd-overdrive-rev-b0.dts create mode 100644 arch/arm64/boot/dts/amd/amd-overdrive-rev-b1.dts create mode 100644 arch/arm64/boot/dts/amd/amd-seattle-xgbe-b.dtsi create mode 100644 arch/arm64/boot/dts/amd/husky.dts -- 2.5.0
[PATCH V3 11/12] dtb: amd: Add support for new AMD Overdrive boards
From: Suravee Suthikulpanit Add device tree files for AMD Overdrive boards which comes with AMD Seattle Revision B0 and B1 SOCs. Signed-off-by: Tom Lendacky Signed-off-by: Suravee Suthikulpanit --- arch/arm64/boot/dts/amd/Makefile | 3 +- arch/arm64/boot/dts/amd/amd-overdrive-rev-b0.dts | 87 ++ arch/arm64/boot/dts/amd/amd-overdrive-rev-b1.dts | 91 3 files changed, 180 insertions(+), 1 deletion(-) create mode 100644 arch/arm64/boot/dts/amd/amd-overdrive-rev-b0.dts create mode 100644 arch/arm64/boot/dts/amd/amd-overdrive-rev-b1.dts diff --git a/arch/arm64/boot/dts/amd/Makefile b/arch/arm64/boot/dts/amd/Makefile index cfdf701..db03293 100644 --- a/arch/arm64/boot/dts/amd/Makefile +++ b/arch/arm64/boot/dts/amd/Makefile @@ -1,4 +1,5 @@ -dtb-$(CONFIG_ARCH_SEATTLE) += amd-overdrive.dtb +dtb-$(CONFIG_ARCH_SEATTLE) += amd-overdrive.dtb \ + amd-overdrive-rev-b0.dtb amd-overdrive-rev-b1.dtb always := $(dtb-y) subdir-y := $(dts-dirs) diff --git a/arch/arm64/boot/dts/amd/amd-overdrive-rev-b0.dts b/arch/arm64/boot/dts/amd/amd-overdrive-rev-b0.dts new file mode 100644 index 000..8e3074a --- /dev/null +++ b/arch/arm64/boot/dts/amd/amd-overdrive-rev-b0.dts @@ -0,0 +1,87 @@ +/* + * DTS file for AMD Seattle Overdrive Development Board + * Note: For Seattle Rev.B0 + * + * Copyright (C) 2015 Advanced Micro Devices, Inc. + */ + +/dts-v1/; + +/include/ "amd-seattle-soc.dtsi" + +/ { + model = "AMD Seattle (Rev.B0) Development Board (Overdrive)"; + compatible = "amd,seattle-overdrive", "amd,seattle"; + + chosen { + stdout-path = + }; + + psci { + compatible = "arm,psci-0.2"; + method = "smc"; + }; +}; + + { + status = "ok"; + amd,zlib-support = <1>; +}; + +/** + * NOTE: In Rev.B, gpio0 is reserved. + */ + { + status = "ok"; +}; + + { + status = "ok"; +}; + + { + status = "ok"; +}; + + { + status = "ok"; +}; + + { + status = "ok"; +}; + + { + status = "ok"; +}; + + { + status = "ok"; +}; + + { + status = "ok"; +}; + + { + status = "ok"; + sdcard0: sdcard@0 { + compatible = "mmc-spi-slot"; + reg = <0>; + spi-max-frequency = <2000>; + voltage-ranges = <3200 3400>; + pl022,hierarchy = <0>; + pl022,interface = <0>; + pl022,com-mode = <0x0>; + pl022,rx-level-trig = <0>; + pl022,tx-level-trig = <0>; + }; +}; + +_kcs { + status = "ok"; +}; + + { + /include/ "amd-seattle-xgbe-b.dtsi" +}; diff --git a/arch/arm64/boot/dts/amd/amd-overdrive-rev-b1.dts b/arch/arm64/boot/dts/amd/amd-overdrive-rev-b1.dts new file mode 100644 index 000..ed5e043 --- /dev/null +++ b/arch/arm64/boot/dts/amd/amd-overdrive-rev-b1.dts @@ -0,0 +1,91 @@ +/* + * DTS file for AMD Seattle Overdrive Development Board + * Note: For Seattle Rev.B1 + * + * Copyright (C) 2015 Advanced Micro Devices, Inc. + */ + +/dts-v1/; + +/include/ "amd-seattle-soc.dtsi" + +/ { + model = "AMD Seattle (Rev.B1) Development Board (Overdrive)"; + compatible = "amd,seattle-overdrive", "amd,seattle"; + + chosen { + stdout-path = + }; + + psci { + compatible = "arm,psci-0.2"; + method = "smc"; + }; +}; + + { + status = "ok"; + amd,zlib-support = <1>; +}; + +/** + * NOTE: In Rev.B, gpio0 is reserved. + */ + { + status = "ok"; +}; + + { + status = "ok"; +}; + + { + status = "ok"; +}; + + { + status = "ok"; +}; + + { + status = "ok"; +}; + + { + status = "ok"; +}; + + { + status = "ok"; +}; + + { + status = "ok"; +}; + + { + status = "ok"; +}; + + { + status = "ok"; + sdcard0: sdcard@0 { + compatible = "mmc-spi-slot"; + reg = <0>; + spi-max-frequency = <2000>; + voltage-ranges = <3200 3400>; + pl022,hierarchy = <0>; + pl022,interface = <0>; + pl022,com-mode = <0x0>; + pl022,rx-level-trig = <0>; + pl022,tx-level-trig = <0>; + }; +}; + +_kcs { + status = "ok"; +}; + + { + /include/ "amd-seattle-xgbe-b.dtsi" +}; -- 2.5.0
[PATCH V3 02/12] dtb: amd: Fix GICv2 hypervisor and virtual interface sizes
From: Brijesh Singh This patch fixes incorrect sizes of the GICv2 device tree node. This has triggered error message when booting Xen hypervisor. Signed-off-by: Brijesh Singh Signed-off-by: Suravee Suthikulpanit --- arch/arm64/boot/dts/amd/amd-seattle-soc.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/amd/amd-seattle-soc.dtsi b/arch/arm64/boot/dts/amd/amd-seattle-soc.dtsi index 2874d92..fdd0c96 100644 --- a/arch/arm64/boot/dts/amd/amd-seattle-soc.dtsi +++ b/arch/arm64/boot/dts/amd/amd-seattle-soc.dtsi @@ -18,8 +18,8 @@ #size-cells = <2>; reg = <0x0 0xe111 0 0x1000>, <0x0 0xe112f000 0 0x2000>, - <0x0 0xe114 0 0x1>, - <0x0 0xe116 0 0x1>; + <0x0 0xe114 0 0x2000>, + <0x0 0xe116 0 0x2000>; interrupts = <1 9 0xf04>; ranges = <0 0 0 0xe110 0 0x10>; v2m0: v2m@e008 { -- 2.5.0
[PATCH V3 04/12] dtb: amd: Fix typo in SPI device nodes
From: Suravee Suthikulpanit Remove invalid entry in the SPI device nodes. Signed-off-by: Suravee Suthikulpanit --- arch/arm64/boot/dts/amd/amd-seattle-soc.dtsi | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm64/boot/dts/amd/amd-seattle-soc.dtsi b/arch/arm64/boot/dts/amd/amd-seattle-soc.dtsi index 5c73117..c7c759a 100644 --- a/arch/arm64/boot/dts/amd/amd-seattle-soc.dtsi +++ b/arch/arm64/boot/dts/amd/amd-seattle-soc.dtsi @@ -91,7 +91,6 @@ spi0: ssp@e102 { status = "disabled"; compatible = "arm,pl022", "arm,primecell"; - #gpio-cells = <2>; reg = <0 0xe102 0 0x1000>; spi-controller; interrupts = <0 330 4>; @@ -102,7 +101,6 @@ spi1: ssp@e103 { status = "disabled"; compatible = "arm,pl022", "arm,primecell"; - #gpio-cells = <2>; reg = <0 0xe103 0 0x1000>; spi-controller; interrupts = <0 329 4>; -- 2.5.0
[PATCH V3 01/12] MAINTAINERS: Adding Maintainers for AMD Seattle Device Tree
From: Suravee Suthikulpanit Adding maintainers for AMD Seattle device tree. Signed-off-by: Brijesh Singh Signed-off-by: Suravee Suthikulpanit Signed-off-by: Tom Lendacky --- MAINTAINERS | 7 +++ 1 file changed, 7 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 7f1fa4f..e349a32 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -673,6 +673,13 @@ F: drivers/gpu/drm/radeon/radeon_kfd.c F: drivers/gpu/drm/radeon/radeon_kfd.h F: include/uapi/linux/kfd_ioctl.h +AMD SEATTLE DEVICE TREE SUPPORT +M: Brijesh Singh +M: Suravee Suthikulpanit +M: Tom Lendacky +S: Supported +F: arch/arm64/boot/dts/amd/ + AMD XGBE DRIVER M: Tom Lendacky L: net...@vger.kernel.org -- 2.5.0
[PATCH V3 06/12] dtb: amd: Misc changes for SATA device tree nodes
From: Suravee Suthikulpanit Add new SATA1 device node, and fix the register range size of SATA0. Signed-off-by: Tom Lendacky Signed-off-by: Suravee Suthikulpanit --- arch/arm64/boot/dts/amd/amd-seattle-soc.dtsi | 12 +++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/amd/amd-seattle-soc.dtsi b/arch/arm64/boot/dts/amd/amd-seattle-soc.dtsi index 4be36fd..9f59381 100644 --- a/arch/arm64/boot/dts/amd/amd-seattle-soc.dtsi +++ b/arch/arm64/boot/dts/amd/amd-seattle-soc.dtsi @@ -66,12 +66,22 @@ sata0: sata@e030 { compatible = "snps,dwc-ahci"; - reg = <0 0xe030 0 0x800>; + reg = <0 0xe030 0 0xf>; interrupts = <0 355 4>; clocks = <_333mhz>; dma-coherent; }; + /* This is for Rev B only */ + sata1: sata@e0d0 { + status = "disabled"; + compatible = "snps,dwc-ahci"; + reg = <0 0xe0d0 0 0xf>; + interrupts = <0 354 4>; + clocks = <_333mhz>; + dma-coherent; + }; + i2c0: i2c@e100 { status = "disabled"; compatible = "snps,designware-i2c"; -- 2.5.0
[PATCH V3 08/12] dtb: amd: Add PERF CCN-504 device tree node
From: Suravee Suthikulpanit Add PERF CCN-504 device tree node. Signed-off-by: Suravee Suthikulpanit --- arch/arm64/boot/dts/amd/amd-seattle-soc.dtsi | 7 +++ 1 file changed, 7 insertions(+) diff --git a/arch/arm64/boot/dts/amd/amd-seattle-soc.dtsi b/arch/arm64/boot/dts/amd/amd-seattle-soc.dtsi index ba455d1..70f407d 100644 --- a/arch/arm64/boot/dts/amd/amd-seattle-soc.dtsi +++ b/arch/arm64/boot/dts/amd/amd-seattle-soc.dtsi @@ -229,5 +229,12 @@ /* 64-bit MMIO (size= 124G) */ <0x0300 0x01 0x 0x01 0x 0x7f 0x>; }; + + /* Perf CCN504 PMU */ + ccn: ccn@e800 { + compatible = "arm,ccn-504"; + reg = <0x0 0xe800 0 0x100>; + interrupts = <0 380 4>; + }; }; }; -- 2.5.0
[PATCH V3 12/12] dtb: amd: Add support for AMD/Linaro 96Boards Enterprise Edition Server board
From: Suravee Suthikulpanit Add device tree file for AMD/Linaro 96Boards Enterprise Edition Server (Husky) Board. This is based on the AMD Seattle Rev.B0 system Signed-off-by: Leo Duran Signed-off-by: Suravee Suthikulpanit --- arch/arm64/boot/dts/amd/Makefile | 3 +- arch/arm64/boot/dts/amd/husky.dts | 83 +++ 2 files changed, 85 insertions(+), 1 deletion(-) create mode 100644 arch/arm64/boot/dts/amd/husky.dts diff --git a/arch/arm64/boot/dts/amd/Makefile b/arch/arm64/boot/dts/amd/Makefile index db03293..ba84770 100644 --- a/arch/arm64/boot/dts/amd/Makefile +++ b/arch/arm64/boot/dts/amd/Makefile @@ -1,5 +1,6 @@ dtb-$(CONFIG_ARCH_SEATTLE) += amd-overdrive.dtb \ - amd-overdrive-rev-b0.dtb amd-overdrive-rev-b1.dtb + amd-overdrive-rev-b0.dtb amd-overdrive-rev-b1.dtb \ + husky.dtb always := $(dtb-y) subdir-y := $(dts-dirs) diff --git a/arch/arm64/boot/dts/amd/husky.dts b/arch/arm64/boot/dts/amd/husky.dts new file mode 100644 index 000..1381d4b --- /dev/null +++ b/arch/arm64/boot/dts/amd/husky.dts @@ -0,0 +1,83 @@ +/* + * DTS file for AMD/Linaro 96Boards Enterprise Edition Server (Husky) Board + * Note: Based-on AMD Seattle Rev.B0 + * + * Copyright (C) 2015 Advanced Micro Devices, Inc. + */ + +/dts-v1/; + +/include/ "amd-seattle-soc.dtsi" + +/ { + model = "Linaro 96Boards Enterprise Edition Server (Husky) Board"; + compatible = "amd,seattle-overdrive", "amd,seattle"; + + chosen { + stdout-path = + }; + + psci { + compatible = "arm,psci-0.2"; + method = "smc"; + }; +}; + + { + status = "ok"; + amd,zlib-support = <1>; +}; + +/** + * NOTE: In Rev.B, gpio0 is reserved. + */ + { + status = "ok"; +}; + + { + status = "ok"; +}; + + { + status = "ok"; +}; + + { + status = "ok"; +}; + + { + status = "ok"; +}; + + { + status = "ok"; +}; + + { + status = "ok"; +}; + + { + status = "ok"; +}; + + { + status = "ok"; + sdcard0: sdcard@0 { + compatible = "mmc-spi-slot"; + reg = <0>; + spi-max-frequency = <2000>; + voltage-ranges = <3200 3400>; + pl022,hierarchy = <0>; + pl022,interface = <0>; + pl022,com-mode = <0x0>; + pl022,rx-level-trig = <0>; + pl022,tx-level-trig = <0>; + }; +}; + + { + /include/ "amd-seattle-xgbe-b.dtsi" +}; -- 2.5.0
[PATCH V3 10/12] dtb: amd: Add AMD XGBE device tree file
From: Tom Lendacky Add AMD XGBE device tree file, which is available in AMD Seattle RevB. Signed-off-by: Tom Lendacky --- MAINTAINERS | 1 + arch/arm64/boot/dts/amd/amd-seattle-xgbe-b.dtsi | 117 2 files changed, 118 insertions(+) create mode 100644 arch/arm64/boot/dts/amd/amd-seattle-xgbe-b.dtsi diff --git a/MAINTAINERS b/MAINTAINERS index e349a32..9547a68 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -685,6 +685,7 @@ M: Tom Lendacky L: net...@vger.kernel.org S: Supported F: drivers/net/ethernet/amd/xgbe/ +F: arch/arm64/boot/dts/amd/amd-seattle-xgbe*.dtsi AMS (Apple Motion Sensor) DRIVER M: Michael Hanselmann diff --git a/arch/arm64/boot/dts/amd/amd-seattle-xgbe-b.dtsi b/arch/arm64/boot/dts/amd/amd-seattle-xgbe-b.dtsi new file mode 100644 index 000..8e86319 --- /dev/null +++ b/arch/arm64/boot/dts/amd/amd-seattle-xgbe-b.dtsi @@ -0,0 +1,117 @@ +/* + * DTS file for AMD Seattle XGBE (RevB) + * + * Copyright (C) 2015 Advanced Micro Devices, Inc. + */ + + xgmacclk0_dma_250mhz: clk250mhz_0 { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <25000>; + clock-output-names = "xgmacclk0_dma_250mhz"; + }; + + xgmacclk0_ptp_250mhz: clk250mhz_1 { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <25000>; + clock-output-names = "xgmacclk0_ptp_250mhz"; + }; + + xgmacclk1_dma_250mhz: clk250mhz_2 { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <25000>; + clock-output-names = "xgmacclk1_dma_250mhz"; + }; + + xgmacclk1_ptp_250mhz: clk250mhz_3 { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <25000>; + clock-output-names = "xgmacclk1_ptp_250mhz"; + }; + + xgmac0: xgmac@e070 { + compatible = "amd,xgbe-seattle-v1a"; + reg = <0 0xe070 0 0x8>, + <0 0xe078 0 0x8>, + <0 0xe1240800 0 0x00400>, /* SERDES RX/TX0 */ + <0 0xe125 0 0x00060>, /* SERDES IR 1/2 */ + <0 0xe12500f8 0 0x4>; /* SERDES IR 2/2 */ + interrupts = <0 325 4>, +<0 346 1>, <0 347 1>, <0 348 1>, <0 349 1>, +<0 323 4>; + amd,per-channel-interrupt; + amd,speed-set = <0>; + amd,serdes-blwc = <1>, <1>, <0>; + amd,serdes-cdr-rate = <2>, <2>, <7>; + amd,serdes-pq-skew = <10>, <10>, <18>; + amd,serdes-tx-amp = <0>, <0>, <0>; + amd,serdes-dfe-tap-config = <3>, <3>, <3>; + amd,serdes-dfe-tap-enable = <0>, <0>, <7>; + mac-address = [ 02 A1 A2 A3 A4 A5 ]; + clocks = <_dma_250mhz>, <_ptp_250mhz>; + clock-names = "dma_clk", "ptp_clk"; + phy-mode = "xgmii"; + #stream-id-cells = <16>; + dma-coherent; + }; + + xgmac1: xgmac@e090 { + compatible = "amd,xgbe-seattle-v1a"; + reg = <0 0xe090 0 0x8>, + <0 0xe098 0 0x8>, + <0 0xe1240c00 0 0x00400>, /* SERDES RX/TX1 */ + <0 0xe1250080 0 0x00060>, /* SERDES IR 1/2 */ + <0 0xe12500fc 0 0x4>; /* SERDES IR 2/2 */ + interrupts = <0 324 4>, +<0 341 1>, <0 342 1>, <0 343 1>, <0 344 1>, +<0 322 4>; + amd,per-channel-interrupt; + amd,speed-set = <0>; + amd,serdes-blwc = <1>, <1>, <0>; + amd,serdes-cdr-rate = <2>, <2>, <7>; + amd,serdes-pq-skew = <10>, <10>, <18>; + amd,serdes-tx-amp = <0>, <0>, <0>; + amd,serdes-dfe-tap-config = <3>, <3>, <3>; + amd,serdes-dfe-tap-enable = <0>, <0>, <7>; + mac-address = [ 02 B1 B2 B3 B4 B5 ]; + clocks = <_dma_250mhz>, <_ptp_250mhz>; + clock-names = "dma_clk", "ptp_clk"; + phy-mode = "xgmii"; + #stream-id-cells = <16>; + dma-coherent; + }; + + xgmac0_smmu: smmu@e060 { +compatible = "arm,mmu-401"; +reg = <0 0xe060 0 0x1>; +#global-interrupts = <1>; +interrupts = /* Uses combined intr for both + * global and context + */ + <0 336 4>, + <0 336 4>; + +mmu-masters = < + 0 1 2 3 4 5 6 7 +
[PATCH V3 03/12] dtb: amd: Fix DMA ranges of smb0 and pcie0
From: Suravee Suthikulpanit Since GICv2m MSI frame is also considered DMA-able, we should also include this range in the dma-range DT property as well. Therefore, this patch fixes the smb0 and pcie0 dma-range properties. Signed-off-by: Suravee Suthikulpanit --- arch/arm64/boot/dts/amd/amd-seattle-soc.dtsi | 10 +++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/arm64/boot/dts/amd/amd-seattle-soc.dtsi b/arch/arm64/boot/dts/amd/amd-seattle-soc.dtsi index fdd0c96..5c73117 100644 --- a/arch/arm64/boot/dts/amd/amd-seattle-soc.dtsi +++ b/arch/arm64/boot/dts/amd/amd-seattle-soc.dtsi @@ -55,8 +55,12 @@ #size-cells = <2>; ranges; - /* DDR range is 40-bit addressing */ - dma-ranges = <0x80 0x0 0x80 0x0 0x7f 0x>; + /* +* dma-ranges is 40-bit address space containing: +* - GICv2m MSI register is at 0xe008 +* - DRAM range [0x80 to 0xff] +*/ + dma-ranges = <0x0 0x0 0x0 0x0 0x100 0x0>; /include/ "amd-seattle-clks.dtsi" @@ -159,7 +163,7 @@ <0x1000 0x0 0x0 0x4 0x0 0x0 0x0 0x123 0x1>; dma-coherent; - dma-ranges = <0x4300 0x80 0x0 0x80 0x0 0x7f 0x>; + dma-ranges = <0x4300 0x0 0x0 0x0 0x0 0x100 0x0>; ranges = /* I/O Memory (size=64K) */ <0x0100 0x00 0x 0x00 0xefff 0x00 0x0001>, -- 2.5.0
Good day
Good day, I need a foreign partner for a proposed mutual business, which refers to the transfer of a large sum of money to an account abroad, as the beneficiary of the funds. Everything about this operation, will be legally done without any bridge financial authority, both in my country and yours.I wil indulge you exercise the utmost discretion in all matters concerning this issue. If you are interested, please reply back through my private email address written down, I'll give you more information about myself with the financial institution I represented and the actual amounts involved about the project so I receive your positive response. Private E-mail: fu.zh...@fuzhongjun.net Friendly greeting, Executive director.
Re: Small writes being split with fdatasync based on non-aligned partition ending
Trying to cc the GNU parted and linux-block mailing lists. On 9 February 2016 at 13:02, Jens Rosenboom wrote: > While trying to reproduce some performance issues I have been seeing > with Ceph, I have come across a strange behaviour which is seemingly > affected only by the end point (and thereby the size) of a partition > being an odd number of sectors. Since all documentation about > alignment only refers to the starting point of the partition, this was > pretty surprising and I would like to know whether this is expected > behaviour or maybe a kernel issue. > > The command I am using is pretty simple: > > fio --rw=randwrite --size=1G --fdatasync=1 --bs=4k > --filename=/dev/sdb2 --runtime=10 --name=test > > The difference shows itself when the partition is created either by > sgdisk or by parted: > > sgdisk --new=2:6000M: /dev/sdb > > parted -s /dev/sdb mkpart osd-device-1-block 6291456000B 100% > > The difference in the partition table looks like this: > > < 2 6291456000B 1600320962559B 1594029506560B > osd-device-1-block > --- >> 2 6291456000B 1600321297919B 1594029841920B >> osd-device-1-block Looks like parted took you at your word when you asked for your partition at 100%. Just out of curiosity if you try and make the same partition interactively with parted do you get any warnings after making and after running align-check ? > So this is really only the end of the partition that is different. > However, in the first case, the 4k writes all get broken up into 512b > writes somewhere in the kernel, as can be seen with btrace: > > 8,16 3 36 0.000102666 8184 A WS 12353985 + 1 <- (8,18) 65985 > 8,16 3 37 0.000102739 8184 Q WS 12353985 + 1 [fio] > 8,16 3 38 0.000102875 8184 M WS 12353985 + 1 [fio] > 8,16 3 39 0.000103038 8184 A WS 12353986 + 1 <- (8,18) 65986 > 8,16 3 40 0.000103109 8184 Q WS 12353986 + 1 [fio] > 8,16 3 41 0.000103196 8184 M WS 12353986 + 1 [fio] > 8,16 3 42 0.000103335 8184 A WS 12353987 + 1 <- (8,18) 65987 > 8,16 3 43 0.000103403 8184 Q WS 12353987 + 1 [fio] > 8,16 3 44 0.000103489 8184 M WS 12353987 + 1 [fio] > 8,16 3 45 0.000103609 8184 A WS 12353988 + 1 <- (8,18) 65988 > 8,16 3 46 0.000103678 8184 Q WS 12353988 + 1 [fio] > 8,16 3 47 0.000103767 8184 M WS 12353988 + 1 [fio] > 8,16 3 48 0.000103879 8184 A WS 12353989 + 1 <- (8,18) 65989 > 8,16 3 49 0.000103947 8184 Q WS 12353989 + 1 [fio] > 8,16 3 50 0.000104035 8184 M WS 12353989 + 1 [fio] > 8,16 3 51 0.000104150 8184 A WS 12353990 + 1 <- (8,18) 65990 > 8,16 3 52 0.000104219 8184 Q WS 12353990 + 1 [fio] > 8,16 3 53 0.000104307 8184 M WS 12353990 + 1 [fio] > 8,16 3 54 0.000104452 8184 A WS 12353991 + 1 <- (8,18) 65991 > 8,16 3 55 0.000104520 8184 Q WS 12353991 + 1 [fio] > 8,16 3 56 0.000104609 8184 M WS 12353991 + 1 [fio] > 8,16 3 57 0.000104885 8184 I WS 12353984 + 8 [fio] > > whereas in the second case, I'm getting the expected 4k writes: > > 8,16 6 42 1266874889.659842036 8409 A WS 12340232 + 8 <- > (8,18) 52232 > 8,16 6 43 1266874889.659842167 8409 Q WS 12340232 + 8 [fio] > 8,16 6 44 1266874889.659842393 8409 G WS 12340232 + 8 [fio] This is weird because --size=1G should mean that fio is "seeing" an aligned end. Does direct=1 with a sequential job of iodepth=1 show the problem too? > The above examples are from running with an SSD, where the small > writes get merged together again before hitting the block device, > which is still pretty o.k. performance wise. But when I run the same > test on some NVMe device, the writes do not get merged, instead the > performance drops to less then 10% of what I get in the second case. Perhaps the ioscheduler doesn't have the opportunity with the NVMe device... > If this is indeed expected behaviour from the kernel pov, it might > need some better documentation and probably sgdisk should also be > enhanced to align the end of the partition as well. FWIW, this happens > on a stock 4.4.0 kernel as well as recent Ubuntu and CentOS kernels. Do you mean parted? -- Sitsofe | http://sucs.org/~sits/
Re: fs/udf and udftools
On 02/10/2016 08:19 PM, Ken Moffat wrote: > On Wed, Feb 10, 2016 at 05:56:16PM -0800, Randy Dunlap wrote: >> [add Jan Kara] >> >> On 02/10/16 13:29, Steve Kenton wrote: >>> Is anyone maintaining these or am I about to volunteer for another job? I guess I should have said "developing" rather than "maintaining" for fs/udf since it's clear that someone has been keeping it running in-tree. I started with udftools from source forge and then discovered that the kernel udf driver does not support fallocate() which I was hoping to use. Thanks for the pointer to Jan Kara. I'll see how he feels about patches from the wilderness. >> >> CUrrent MAINTAINERS file says: >> >> UDF FILESYSTEM >> M: Jan Kara >> S: Maintained >> F: Documentation/filesystems/udf.txt >> F: fs/udf/ >> >> and that Doc. file says: >> >> For the latest version and toolset see: >> http://linux-udf.sourceforge.net/ Yes, that's where I started. The last release was 1.0.0b3 in 2004. Which is what the Ubuntu 14.04LTS package reports as it's version too. >> > A bit of googling for udftools suggests that gentoo are maintaining > their build, debian have patches for gcc-4 and gcc-5 among others, > Fedora have their own patches, and Arch have some patches (which > might be the same as some of hte others, I did not look). > > Looks like the normal "possibly abandonned, but still useful to some > people" software, where distros keep it building. Yep, that's where my ~works came from. Ah, thanks for the links. I'll pull them all together and see what's there. smk > > There may also be others. > > Links - > > https://sources.gentoo.org/cgi-bin/viewvc.cgi/gentoo-x86/sys-fs/udftools/ChangeLog?view=markup > > https://launchpad.net/debian/+source/udftools/+changelog > > http://pkgs.fedoraproject.org/cgit/rpms/udftools.git/tree/ > > https://aur.archlinux.org/packages/udftools/ > >> >>> I'm having to dig into fs/udf and udftools/mkudffs as part of a project I'm >>> working on. >>> It looks like both have been lacking in personal TLC for quite a while. The >>> changes to >>> fs/udf seem to be tree wide VFS work but not updates to things like write >>> support and >>> udftools seems to have been frozen for >10 years. Both ~work but I'd like >>> to fix an >>> oops I'm getting in udftools and work on adding fallocate() support to >>> fs/udf and then >>> feed it back to the community rather than let the changes bit rot locally. >>> >>> Where to go from here? I've been reading LKML on marc: for years, mainly to >>> see what Linus, >>> Al and a variable group of other people say/do but I've never done more >>> than tinker with >>> the kernel locally. I'm using git for the project mentioned above but again >>> am not an >>> expert but willing to learn. I'm not currently subscribed so please cc me >>> if you could. >>> >>> smk >>> >> >> >> -- >> ~Randy >
Re: [PATCH] tty/serial: digicolor: Fix bad usage of IS_ERR_VALUE
On 02/10/2016 07:21 AM, Arnd Bergmann wrote: On Tuesday 09 February 2016 18:37:46 Guenter Roeck wrote: On 02/09/2016 07:26 AM, Arnd Bergmann wrote: On Tuesday 09 February 2016 07:08:59 Guenter Roeck wrote: IS_ERR_VALUE() assumes that its parameter is an unsigned long. It can not be used to check if an unsigned int reflects an error. Doing so can result in the following build warning. drivers/tty/serial/digicolor-usart.c: In function ‘digicolor_uart_probe’: include/linux/err.h:21:38: warning: comparison is always false due to limited range of data type drivers/tty/serial/digicolor-usart.c:485:6: note: in expansion of macro ‘IS_ERR_VALUE’ If that warning is seen, an error return from platform_get_irq() is missed. The patch looks correct to me, but what compiler version and which kernel tree is it that triggered the warning? Andrzej Hajda just modified the definition of IS_ERR_VALUE(), and the changes are still under discussion, but I don't see that warning with any of the versions. I see it with gcc 5.1 and 5.2 (and W=1). I did not see / notice Andrzej's patch. I agree that fixing the problem in IS_ERR_VALUE() is preferrable. Ah, W=1 explains it. We are still debating about the proper solution. Al Viro pointed out that most users of IS_ERR_VALUE() shouldn't be using it at all, so your patch is probably best here after all. Yes, after looking into it some more I agree. Coccinelle should be able to handle most of the conversions automatically. I actually tried to write a script, just for the fun of it, but it misses some of the problem cases in patch mode. Maybe I get it working tonight. Guenter
Re: powerpc/86xx: Use config fragments approach
On Wed, 2016-02-10 at 10:22 +0100, Alessio Igor Bogani wrote: > Signed-off-by: Alessio Igor Bogani > --- > arch/powerpc/Makefile| 10 + > arch/powerpc/configs/86xx-32bit.config | 4 + > arch/powerpc/configs/86xx-hw.config | 106 + > arch/powerpc/configs/86xx-smp.config | 2 + > arch/powerpc/configs/86xx/gef_ppc9a_defconfig| 216 -- > arch/powerpc/configs/86xx/gef_sbc310_defconfig | 214 -- > arch/powerpc/configs/86xx/gef_sbc610_defconfig | 273 > --- > arch/powerpc/configs/86xx/mpc8610_hpcd_defconfig | 110 - > arch/powerpc/configs/86xx/mpc8641_hpcn_defconfig | 156 - > arch/powerpc/configs/86xx/sbc8641d_defconfig | 246 > arch/powerpc/configs/mpc86xx_basic_defconfig | 6 + > arch/powerpc/configs/mpc86xx_defconfig | 162 -- > 12 files changed, 128 insertions(+), 1377 deletions(-) > create mode 100644 arch/powerpc/configs/86xx-32bit.config > create mode 100644 arch/powerpc/configs/86xx-hw.config > create mode 100644 arch/powerpc/configs/86xx-smp.config > delete mode 100644 arch/powerpc/configs/86xx/gef_ppc9a_defconfig > delete mode 100644 arch/powerpc/configs/86xx/gef_sbc310_defconfig > delete mode 100644 arch/powerpc/configs/86xx/gef_sbc610_defconfig > delete mode 100644 arch/powerpc/configs/86xx/mpc8610_hpcd_defconfig > delete mode 100644 arch/powerpc/configs/86xx/mpc8641_hpcn_defconfig > delete mode 100644 arch/powerpc/configs/86xx/sbc8641d_defconfig > create mode 100644 arch/powerpc/configs/mpc86xx_basic_defconfig > delete mode 100644 arch/powerpc/configs/mpc86xx_defconfig Why do your patches not start with [PATCH]? How does the resulting config compare to the individual previous configs? Are there any options that were dropped from the union of the old configs? Mutually exclusive options that had to be resolved one way or another? Changes in non-hw options? It would be nice to do what I did with 85xx in commit 7e2ad2ef851, and start with a patch that makes the defconfigs conform to what the new approach will give (to more easily see what is changing), and then have the next patch switch to fragments. > diff --git a/arch/powerpc/configs/86xx-32bit.config > b/arch/powerpc/configs/86xx-32bit.config > new file mode 100644 > index 000..8a2c6ff > --- /dev/null > +++ b/arch/powerpc/configs/86xx-32bit.config > @@ -0,0 +1,4 @@ > +CONFIG_HIGHMEM=y > +CONFIG_KEXEC=y > +CONFIG_PPC_86xx=y > +CONFIG_PROC_KCORE=y Why is this not in mpc86xx_basic_defconfig? There is no 64-bit 86xx. -Scott
Re: [PATCH V1 6/6] acpi: apei: handle SEA notification type for ARMv8
On 2/10/2016 1:03 PM, Will Deacon wrote: > On Fri, Feb 05, 2016 at 12:13:28PM -0700, Tyler Baicar wrote: >> +#else /* CONFIG_HAVE_ACPI_APEI_SEA */ >> +static inline int ghes_sea_add(struct ghes *ghes) >> +{ >> +pr_err(GHES_PFX "ID: %d, trying to add SEA notification which is not >> supported\n", >> + ghes->generic->header.source_id); >> +return -ENOTSUPP; >> +} >> + >> +static inline void ghes_sea_remove(struct ghes *ghes) >> +{ >> +pr_err(GHES_PFX "ID: %d, trying to remove SEA notification which is not >> supported\n", >> + ghes->generic->header.source_id); >> +} > > Why are these getting called if !CONFIG_HAVE_ACPI_APEI_SEA? > This was added to catch firmware bugs (i.e. bad ACPI tables). Since "SEA" is a valid GHES notify type in ACPI, it's just a number in an ACPI table. If someone incorrectly set SEA as their notify type in their HEST table on an Intel system, this would catch that error here. We may do this with less code by getting rid of the #else (as you suggest), but we need to add #ifdefs to eliminate the calls to ghes_sea_add and ghes_sea_remove to avoid compiler errors. Does the below change look better? >>@@ -1093,6 +1168,11 @@ static int ghes_probe(struct platform_device >>*ghes_dev) >> list_add_rcu(>list, _sci); >> mutex_unlock(_list_mutex); >> break; >> +#ifdef CONFIG_HAVE_ACPI_APEI_SEA >>+ case ACPI_HEST_NOTIFY_SEA: >>+ rc = ghes_sea_add(ghes); >>+ if (rc) >>+ goto err_edac_unreg; >>+ break; +#endif ... >>@@ -1135,6 +1215,9 @@ static int ghes_remove(struct platform_device >>*ghes_dev) >> >>unregister_acpi_hed_notifier(_notifier_sci); >> mutex_unlock(_list_mutex); >> break; +#ifdef CONFIG_HAVE_ACPI_APEI_SEA >>+ case ACPI_HEST_NOTIFY_SEA: >>+ ghes_sea_remove(ghes); >>+ break; +#endif Harb -- Qualcomm Technologies, Inc. on behalf of Qualcomm Innovation Center, Inc. Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux Foundation Collaborative Project
[BUG REPORT] use of unreachable() masks uninitialized variables warnings
Hi, I noticed that the use of the function -- unreachable() -- inside of the BUG() macro in arch/x86/include/asm/bug.h causes compiler output to be suspect based on review of assembly output for quite a few areas. if as a test, you remove the call to unreachable() in the BUG() macro, it seems to generate a large number of build warnings about the use of uninitialized variables that are apparently masked by the compiler since it believes this code is going to halt, even in the cases where the BUG() macro is used conditionally, as in an if (condition) then BUG() (which the compiler does not seem to understand). This seems to indicate that the use of these built in macros telling the compiler to create a bunch of infinite jump labels is masking quite a few bugs lurking around in the regular code since gcc apparently just throws out the checks for uninitialized variables in any function if it sees this macro anywhere in the function. Here are the sources of several bugs I have seen recently in ext4 I am pretty sure with a null bh. One good check is to set the BUG() macro NOT TO call unreachable() as a build test since the compiler will ignore uninitialized variables in a function if someone calls BUG() even conditionally, and never report them during build. The following are from v4.4.1 with a BUG() macro with the call to unreachable() removed: arch/x86/kernel/apic/x2apic_uv_x.c: In function ‘boot_pnode_to_blade’: arch/x86/kernel/apic/x2apic_uv_x.c:443:1: warning: control reaches end of non-void function [-Wreturn-type] } ^ arch/x86/kernel/apic/x2apic_uv_x.c: In function ‘uv_acpi_madt_oem_check’: arch/x86/kernel/apic/x2apic_uv_x.c:201:1: warning: control reaches end of non-void function [-Wreturn-type] } ^ arch/x86/kvm/x86.c: In function ‘emulator_cmpxchg_emulated’: arch/x86/kvm/x86.c:4562:5: warning: ‘exchanged’ may be used uninitialized in this function [-Wmaybe-uninitialized] if (!exchanged) ^ In file included from kernel/locking/qspinlock.c:470:0: kernel/locking/qspinlock_paravirt.h: In function ‘pv_unhash’: kernel/locking/qspinlock_paravirt.h:144:1: warning: control reaches end of non-void function [-Wreturn-type] } ^ kernel/locking/qspinlock_paravirt.h: In function ‘pv_hash’: kernel/locking/qspinlock_paravirt.h:121:1: warning: control reaches end of non-void function [-Wreturn-type] } ^ kernel/sched/core.c: In function ‘pick_next_task’: kernel/sched/core.c:3064:1: warning: control reaches end of non-void function [-Wreturn-type] } ^ kernel/seccomp.c: In function ‘seccomp_phase1’: kernel/seccomp.c:672:1: warning: control reaches end of non-void function [-Wreturn-type] } ^ mm/mmap.c: In function ‘vma_adjust’: mm/mmap.c:661:15: warning: ‘rb_parent’ may be used uninitialized in this function [-Wmaybe-uninitialized] __vma_link_rb(mm, vma, rb_link, rb_parent); ^ mm/mmap.c:692:29: note: ‘rb_parent’ was declared here struct rb_node **rb_link, *rb_parent; ^ mm/mmap.c:661:15: warning: ‘rb_link’ may be used uninitialized in this function [-Wmaybe-uninitialized] __vma_link_rb(mm, vma, rb_link, rb_parent); ^ mm/mmap.c:692:19: note: ‘rb_link’ was declared here struct rb_node **rb_link, *rb_parent; ^ mm/mmap.c:660:17: warning: ‘prev’ may be used uninitialized in this function [-Wmaybe-uninitialized] __vma_link_list(mm, vma, prev, rb_parent); ^ mm/mmap.c:691:25: note: ‘prev’ was declared here struct vm_area_struct *prev; ^ mm/hugetlb.c: In function ‘__vma_reservation_common’: mm/hugetlb.c:1815:3: warning: ‘ret’ may be used uninitialized in this function [-Wmaybe-uninitialized] return ret < 0 ? ret : 0; ^ mm/mempolicy.c: In function ‘mempolicy_slab_node’: mm/mempolicy.c:1731:1: warning: control reaches end of non-void function [-Wreturn-type] } ^ mm/memcontrol.c: In function ‘mem_cgroup_read_u64’: mm/memcontrol.c:2892:1: warning: control reaches end of non-void function [-Wreturn-type] } ^ In file included from mm/memcontrol.c:34:0: mm/memcontrol.c: In function ‘mem_cgroup_reset’: include/linux/page_counter.h:49:21: warning: ‘counter’ may be used uninitialized in this function [-Wmaybe-uninitialized] counter->watermark = page_counter_read(counter); ^ mm/memcontrol.c:3038:23: note: ‘counter’ was declared here struct page_counter *counter; ^ mm/memcontrol.c: In function ‘__mem_cgroup_usage_unregister_event’: mm/memcontrol.c:3511:6: warning: ‘usage’ may be used uninitialized in this function [-Wmaybe-uninitialized] if (new->entries[j].threshold <= usage) { ^ mm/memcontrol.c:3506:35: warning: ‘thresholds’ may be used uninitialized in this function [-Wmaybe-uninitialized] for (i = 0, j = 0; i < thresholds->primary->size; i++) { ^ mm/memcontrol.c: In function ‘__mem_cgroup_usage_register_event’: mm/memcontrol.c:3423:6: warning: ‘usage’ may be used uninitialized in
[PATCH v4 3/3] irqchip: add nps Internal and external irqchips
From: Noam Camus Adding EZchip NPS400 support. NPS internal interrupts are internally handled at Multi Thread Manager (MTM) that is signaled for deactivating an interrupt. External interrupts is handled also at Global Interrupt Controller (GIC) e.g. serial and network devices. Signed-off-by: Noam Camus Cc: Thomas Gleixner Cc: Jason Cooper Cc: Marc Zyngier --- .../interrupt-controller/ezchip,nps400-ic.txt | 17 +++ drivers/irqchip/Kconfig|6 + drivers/irqchip/Makefile |1 + drivers/irqchip/irq-eznps.c| 145 4 files changed, 169 insertions(+), 0 deletions(-) create mode 100644 Documentation/devicetree/bindings/interrupt-controller/ezchip,nps400-ic.txt create mode 100644 drivers/irqchip/irq-eznps.c diff --git a/Documentation/devicetree/bindings/interrupt-controller/ezchip,nps400-ic.txt b/Documentation/devicetree/bindings/interrupt-controller/ezchip,nps400-ic.txt new file mode 100644 index 000..888b2b9 --- /dev/null +++ b/Documentation/devicetree/bindings/interrupt-controller/ezchip,nps400-ic.txt @@ -0,0 +1,17 @@ +EZchip NPS Interrupt Controller + +Required properties: + +- compatible : should be "ezchip,nps400-ic" +- interrupt-controller : Identifies the node as an interrupt controller +- #interrupt-cells : Specifies the number of cells needed to encode an + interrupt source. The value shall be 1. + + +Example: + +intc: interrupt-controller { + compatible = "ezchip,nps400-ic"; + interrupt-controller; + #interrupt-cells = <1>; +}; diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index 4d7294e..bc5e775 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -193,3 +193,9 @@ config IRQ_MXS def_bool y if MACH_ASM9260 || ARCH_MXS select IRQ_DOMAIN select STMP_DEVICE + +config EZNPS_GIC + bool "NPS400 Global Interrupt Manager (GIM)" + select IRQ_DOMAIN + help + Support the EZchip NPS400 global interrupt controller diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile index 177f78f..1390142 100644 --- a/drivers/irqchip/Makefile +++ b/drivers/irqchip/Makefile @@ -55,3 +55,4 @@ obj-$(CONFIG_RENESAS_H8S_INTC)+= irq-renesas-h8s.o obj-$(CONFIG_ARCH_SA1100) += irq-sa11x0.o obj-$(CONFIG_INGENIC_IRQ) += irq-ingenic.o obj-$(CONFIG_IMX_GPCV2)+= irq-imx-gpcv2.o +obj-$(CONFIG_EZNPS_GIC)+= irq-eznps.o diff --git a/drivers/irqchip/irq-eznps.c b/drivers/irqchip/irq-eznps.c new file mode 100644 index 000..acc55a3 --- /dev/null +++ b/drivers/irqchip/irq-eznps.c @@ -0,0 +1,145 @@ +/* + * Copyright(c) 2015 EZchip Technologies. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + */ + +#include +#include +#include +#include +#include +#include +#include + +#define NPS_NR_CPU_IRQS 8 /* number of interrupt lines of NPS400 CPU */ +#define NPS_TIMER0_IRQ 3 + +/* + * NPS400 core includes an Interrupt Controller (IC) support. + * All cores can deactivate level irqs at first level control + * at cores mesh layer called MTM. + * For devices out side chip e.g. uart, network there is another + * level called Global Interrupt Manager (GIM). + * This second level can control level and edge interrupt. + * + * NOTE: AUX_IENABLE and CTOP_AUX_IACK are auxiliary registers + * with private HW copy per CPU. + */ + +static void nps400_irq_mask(struct irq_data *irqd) +{ + unsigned int ienb; + unsigned int irq = irqd_to_hwirq(irqd); + + ienb = read_aux_reg(AUX_IENABLE); + ienb &= ~(1 << irq); + write_aux_reg(AUX_IENABLE, ienb); +} + +static void nps400_irq_unmask(struct irq_data *irqd) +{ + unsigned int ienb; + unsigned int irq = irqd_to_hwirq(irqd); + + ienb = read_aux_reg(AUX_IENABLE); + ienb |= (1 << irq); + write_aux_reg(AUX_IENABLE, ienb); +} + +static void nps400_irq_eoi_global(struct irq_data *irqd) +{ + unsigned int __maybe_unused irq = irqd_to_hwirq(irqd); + + write_aux_reg(CTOP_AUX_IACK, 1 << irq); + + /* Don't ack GIC before all device access attempts are done */ + mb(); + + nps_ack_gic(); +} + +static void nps400_irq_eoi(struct irq_data *irqd) +{ + unsigned int __maybe_unused irq = irqd_to_hwirq(irqd); + + write_aux_reg(CTOP_AUX_IACK, 1 << irq); +} + +static struct irq_chip
[PATCH v4 2/3] clocksource: Add NPS400 timers driver
From: Noam Camus Add internal tick generator which is shared by all cores. Each cluster of cores view it through dedicated address. This is used for SMP system where all CPUs synced by same clock source. Signed-off-by: Noam Camus Cc: Daniel Lezcano Cc: Rob Herring Cc: Thomas Gleixner Cc: John Stultz Acked-by: Vineet Gupta --- .../bindings/timer/ezchip,nps400-timer.txt | 15 drivers/clocksource/Kconfig| 10 +++ drivers/clocksource/Makefile |1 + drivers/clocksource/timer-nps.c| 80 4 files changed, 106 insertions(+), 0 deletions(-) create mode 100644 Documentation/devicetree/bindings/timer/ezchip,nps400-timer.txt create mode 100644 drivers/clocksource/timer-nps.c diff --git a/Documentation/devicetree/bindings/timer/ezchip,nps400-timer.txt b/Documentation/devicetree/bindings/timer/ezchip,nps400-timer.txt new file mode 100644 index 000..c8c03d7 --- /dev/null +++ b/Documentation/devicetree/bindings/timer/ezchip,nps400-timer.txt @@ -0,0 +1,15 @@ +NPS Network Processor + +Required properties: + +- compatible : should be "ezchip,nps400-timer" + +Clocks required for compatible = "ezchip,nps400-timer": +- clocks : Must contain a single entry describing the clock input + +Example: + +timer { + compatible = "ezchip,nps400-timer"; + clocks = <>; +}; diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig index 2eb5f0e..fa7be50 100644 --- a/drivers/clocksource/Kconfig +++ b/drivers/clocksource/Kconfig @@ -132,6 +132,16 @@ config CLKSRC_TI_32K This option enables support for Texas Instruments 32.768 Hz clocksource available on many OMAP-like platforms. +config CLKSRC_NPS + bool "NPS400 clocksource driver" if COMPILE_TEST + depends on !PHYS_ADDR_T_64BIT + select CLKSRC_MMIO + select CLKSRC_OF if OF + help + NPS400 clocksource support. + Got 64 bit counter with update rate up to 1000MHz. + This counter is accessed via couple of 32 bit memory mapped registers. + config CLKSRC_STM32 bool "Clocksource for STM32 SoCs" if !ARCH_STM32 depends on OF && ARM && (ARCH_STM32 || COMPILE_TEST) diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile index 56bd16e..056cffd 100644 --- a/drivers/clocksource/Makefile +++ b/drivers/clocksource/Makefile @@ -46,6 +46,7 @@ obj-$(CONFIG_CLKSRC_QCOM) += qcom-timer.o obj-$(CONFIG_MTK_TIMER)+= mtk_timer.o obj-$(CONFIG_CLKSRC_PISTACHIO) += time-pistachio.o obj-$(CONFIG_CLKSRC_TI_32K)+= timer-ti-32k.o +obj-$(CONFIG_CLKSRC_NPS) += timer-nps.o obj-$(CONFIG_ARM_ARCH_TIMER) += arm_arch_timer.o obj-$(CONFIG_ARM_GLOBAL_TIMER) += arm_global_timer.o diff --git a/drivers/clocksource/timer-nps.c b/drivers/clocksource/timer-nps.c new file mode 100644 index 000..5a15970 --- /dev/null +++ b/drivers/clocksource/timer-nps.c @@ -0,0 +1,80 @@ +/* + * Copyright(c) 2015 EZchip Technologies. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define NPS_MSU_TICK_LOW 0xC8 +#define NPS_CLUSTER_OFFSET 8 +#define NPS_CLUSTER_NUM16 + +/* This array is per cluster of CPUs (Each NPS400 cluster got 256 CPUs) */ +static void *nps_msu_reg_low_addr[NPS_CLUSTER_NUM] __read_mostly; + +static unsigned long nps_timer_rate; + +static cycle_t nps_clksrc_read(struct clocksource *clksrc) +{ + int cluster = raw_smp_processor_id() >> NPS_CLUSTER_OFFSET; + + return (cycle_t)ioread32be(nps_msu_reg_low_addr[cluster]); +} + +static void __init nps_setup_clocksource(struct device_node *node, +struct clk *clk) +{ + int ret, cluster; + + for (cluster = 0; cluster < NPS_CLUSTER_NUM; cluster++) + nps_msu_reg_low_addr[cluster] = + nps_host_reg((cluster << NPS_CLUSTER_OFFSET), +NPS_MSU_BLKID, NPS_MSU_TICK_LOW); + + ret = clk_prepare_enable(clk); + if (ret) { + pr_err("Couldn't enable parent clock\n"); + return; + } + + nps_timer_rate = clk_get_rate(clk); + + ret = clocksource_mmio_init(nps_msu_reg_low_addr, "EZnps-tick", + nps_timer_rate, 301, 32, nps_clksrc_read); + if
[PATCH v4 1/3] soc: Support for EZchip SoC
From: Noam Camus This header file is for NPS400 SoC. It includes macros for accessing memory mapped registers. These are functional registers that core can use to configure SoC. Signed-off-by: Noam Camus --- include/soc/nps/common.h | 150 ++ 1 files changed, 150 insertions(+), 0 deletions(-) create mode 100644 include/soc/nps/common.h diff --git a/include/soc/nps/common.h b/include/soc/nps/common.h new file mode 100644 index 000..35ebb00 --- /dev/null +++ b/include/soc/nps/common.h @@ -0,0 +1,150 @@ +/* + * Copyright(c) 2015 EZchip Technologies. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + */ + +#ifndef SOC_NPS_COMMON_H +#define SOC_NPS_COMMON_H + +#ifdef CONFIG_SMP +#define IPI_IRQ5 +#endif + +#define NPS_HOST_REG_BASE 0xF600 + +#define NPS_MSU_BLKID 0x018 + +#define CTOP_INST_RSPI_GIC_0_R12 0x3C56117E +#define CTOP_INST_MOV2B_FLIP_R3_B1_B2_INST 0x5B60 +#define CTOP_INST_MOV2B_FLIP_R3_B1_B2_LIMM 0x00010422 + +#ifndef __ASSEMBLY__ + +/* In order to increase compilation test coverage */ +#ifdef CONFIG_ARC +static inline void nps_ack_gic(void) +{ + __asm__ __volatile__ ( + " .word %0\n" + : + : "i"(CTOP_INST_RSPI_GIC_0_R12) + : "memory"); +} +#else +static inline void nps_ack_gic(void) { } +#define write_aux_reg(r, v) +#define read_aux_reg(r) 0 +#endif + +/* CPU global ID */ +struct global_id { + union { + struct { +#ifdef CONFIG_EZNPS_MTM_EXT + u32 __reserved:20, cluster:4, core:4, thread:4; +#else + u32 __reserved:24, cluster:4, core:4; +#endif + }; + u32 value; + }; +}; + +/* + * Convert logical to physical CPU IDs + * + * The conversion swap bits 1 and 2 of cluster id (out of 4 bits) + * Now quad of logical clusters id's are adjacent physically, + * and not like the id's physically came with each cluster. + * Below table is 4x4 mesh of core clusters as it layout on chip. + * Cluster ids are in format: logical (physical) + * + *- -- + * 3 | 5 (3) 7 (7) | | 13 (11) 15 (15)| + * + * 2 | 4 (2) 6 (6) | | 12 (10) 14 (14)| + *- -- + * 1 | 1 (1) 3 (5) | | 9 (9) 11 (13)| + * + * 0 | 0 (0) 2 (4) | | 8 (8) 10 (12)| + *- -- + * 0 123 + */ +static inline int nps_cluster_logic_to_phys(int cluster) +{ +#ifdef __arc__ +__asm__ __volatile__( + " mov r3,%0\n" + " .short %1\n" + " .word %2\n" + " mov %0,r3\n" + : "+r"(cluster) + : "i"(CTOP_INST_MOV2B_FLIP_R3_B1_B2_INST), + "i"(CTOP_INST_MOV2B_FLIP_R3_B1_B2_LIMM) + : "r3"); +#endif + + return cluster; +} + +#define NPS_CPU_TO_CLUSTER_NUM(cpu) \ + ({ struct global_id gid; gid.value = cpu; \ + nps_cluster_logic_to_phys(gid.cluster); }) + +struct nps_host_reg_address { + union { + struct { + u32 base:8, cl_x:4, cl_y:4, + blkid:6, reg:8, __reserved:2; + }; + u32 value; + }; +}; + +struct nps_host_reg_address_non_cl { + union { + struct { + u32 base:7, blkid:11, reg:12, __reserved:2; + }; + u32 value; + }; +}; + +static inline void *nps_host_reg_non_cl(u32 blkid, u32 reg) +{ + struct nps_host_reg_address_non_cl reg_address; + + reg_address.value = NPS_HOST_REG_BASE; + reg_address.blkid = blkid; + reg_address.reg = reg; + + return (void *)reg_address.value; +} + +static inline void *nps_host_reg(u32 cpu, u32 blkid, u32 reg) +{ + struct nps_host_reg_address reg_address; + u32 cl = NPS_CPU_TO_CLUSTER_NUM(cpu); + + reg_address.value = NPS_HOST_REG_BASE; + reg_address.cl_x = (cl >> 2) & 0x3; + reg_address.cl_y = cl & 0x3; + reg_address.blkid = blkid; + reg_address.reg = reg; + + return (void *)reg_address.value; +} +#endif /* __ASSEMBLY__ */ + +#endif /* SOC_NPS_COMMON_H */ -- 1.7.1
[PATCH v4 0/3] Adding NPS400 drivers
From: Noam Camus Change Log-- v4: clocksource -- Apply all Daniel comments (Thanks) Handle gracefull return and also using clocksoure mmio driver v3: irqchip - Fix ARM build failure by adding missing include of linux/irq.h clocksource -- Avoid 64bit arch's to build driver by adding new dependency !PHYS_ADDR_T_64BIT This is since we use explicit io access of 32 bit. So for test coverage we allow not only build for ARC, but restrict it to 32 bit arch's. irqchip - Apply all Thomas comments (Thank you) v2: Add header file include/soc/nps/common.h. Now to build we do not depend on ARC subtree. General summay: Both drivers are now apart of previous basic patch set of new platform for ARC. The rest is now can be seen at ARC srctree: https://git.kernel.org/cgit/linux/kernel/git/vgupta/arc.git/ Now ARC is supporting DT for clockevents and the interrupt controller ARC uses irq domain handling. Compare to last version now clocksource driver do not include clockevent registration since NPS400 can use ARC generic driver. Compare to last version now irqchip driver sets domain as default since it is the root domain. Also mapping of IPI is done in this driver. Last thing is that drivers can be build cleanly for i386 (still runs only for ARC) Note: in order to build we need to merge drivers into srctree which includes new header: soc/nps/common.h This header is part of patch set applied to ARC srctree. Regards, Noam Camus Noam Camus (3): soc: Support for EZchip SoC clocksource: Add NPS400 timers driver irqchip: add nps Internal and external irqchips .../interrupt-controller/ezchip,nps400-ic.txt | 17 +++ .../bindings/timer/ezchip,nps400-timer.txt | 15 ++ drivers/clocksource/Kconfig| 10 ++ drivers/clocksource/Makefile |1 + drivers/clocksource/timer-nps.c| 80 +++ drivers/irqchip/Kconfig|6 + drivers/irqchip/Makefile |1 + drivers/irqchip/irq-eznps.c| 145 +++ include/soc/nps/common.h | 150 9 files changed, 425 insertions(+), 0 deletions(-) create mode 100644 Documentation/devicetree/bindings/interrupt-controller/ezchip,nps400-ic.txt create mode 100644 Documentation/devicetree/bindings/timer/ezchip,nps400-timer.txt create mode 100644 drivers/clocksource/timer-nps.c create mode 100644 drivers/irqchip/irq-eznps.c create mode 100644 include/soc/nps/common.h
Re: [PATCH V1 5/6] arm64: exception: handle instruction abort at current EL
On 2/10/2016 1:02 PM, Will Deacon wrote: > On Fri, Feb 05, 2016 at 12:13:27PM -0700, Tyler Baicar wrote: >> Add a handler for instruction aborts at the current EL >> (ESR_ELx_EC_IABT_CUR) so they are no longer handled in el1_inv. >> This allows firmware first handling for possible SEA >> (Synchronous External Abort) caused instruction abort at >> current EL. >> >> Signed-off-by: Tyler Baicar >> Signed-off-by: Naveen Kaje >> --- >> arch/arm64/kernel/entry.S | 19 +++ >> 1 file changed, 19 insertions(+) >> >> diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S >> index 1f7f5a2..6b7fb14 100644 >> --- a/arch/arm64/kernel/entry.S >> +++ b/arch/arm64/kernel/entry.S >> @@ -336,6 +336,8 @@ el1_sync: >> lsr x24, x1, #ESR_ELx_EC_SHIFT // exception class >> cmp x24, #ESR_ELx_EC_DABT_CUR // data abort in EL1 >> b.eqel1_da >> +cmp x24, #ESR_ELx_EC_IABT_CUR // instruction abort in EL1 >> +b.eqel1_ia >> cmp x24, #ESR_ELx_EC_SYS64 // configurable trap >> b.eqel1_undef >> cmp x24, #ESR_ELx_EC_SP_ALIGN // stack alignment exception >> @@ -363,6 +365,23 @@ el1_da: >> // disable interrupts before pulling preserved data off the stack >> disable_irq >> kernel_exit 1 >> +el1_ia: >> +/* >> + * Instruction abort handling >> + */ >> +mrs x0, far_el1 >> +enable_dbg >> +// re-enable interrupts if they were enabled in the aborted context >> +tbnzx23, #7, 1f // PSR_I_BIT >> +enable_irq >> +1: >> +orr x1, x1, #1 << 24// use reserved ISS bit for >> instruction aborts > > Do we actually need to set this bit (ESR_LNX_EXEC) for aborts from EL1? > If not, could we just use the same entry code as el1_da? > This is based on what you already do in el0_ia, so the assumption was that it would be necessary for el1_ia. Here is an example call flow to help illustrate why I think this would be needed: --> el1_ia --> do_mem_abort(): determines its a translation fault --> do_page_fault(): sets VM_EXEC in vm_flags based on ESR_LNX_EXEC I admit that I have no idea how the VM_EXEC flag would be used later on in the guts of the kernel page fault handling code, but we assumed there is some need to differentiate between instruction and data faults based on the existence of this flag. Are you suggesting that this flag does not get used, or is it not really needed? If you think this flag adds no value, then we'll do whatever you suggest. Harb -- Qualcomm Technologies, Inc. on behalf of Qualcomm Innovation Center, Inc. Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux Foundation Collaborative Project
[PATCH] sched: remove an unnecessary memory access, rq->cpu in __schedule()
Is there any reason keeping this statement on the code? -8<- >From d8a387efb8199b69b6464970d6f9fc57cbcf0ab0 Mon Sep 17 00:00:00 2001 From: Byungchul Park Date: Thu, 11 Feb 2016 11:50:53 +0900 Subject: [PATCH] sched: remove an unnecessary memory access, rq->cpu in __schedule() Remove an unnecessary assignment of variable not used any more. Signed-off-by: Byungchul Park --- kernel/sched/core.c | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 1315cec..501f5d9 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3193,7 +3193,6 @@ static void __sched notrace __schedule(bool preempt) trace_sched_switch(preempt, prev, next); rq = context_switch(rq, prev, next); /* unlocks the rq */ - cpu = cpu_of(rq); } else { lockdep_unpin_lock(>lock); raw_spin_unlock_irq(>lock); -- 1.9.1
Re: [PATCH] psmouse: added BYD touchpad driver
Ok I think this about covers it. The line length issues remain, but the script repors them as warnings so I'm not to worried about it. Patch follows: --- Input: BYD: Added proper touch support Implemented absolute position and touch reporting. Now BYD touchpads will use the synaptics/libinput xorg touchpad drivers. Added documenatation for all known gesture packets and initialization commands. Signed-off-by: Richard Pospesel --- >From c0d0ece9ace3939691831eb20c2a5f01343781f1 Mon Sep 17 00:00:00 2001 From: pospeselr Date: Wed, 10 Feb 2016 18:24:00 -0800 Subject: [PATCH] byd changes --- drivers/input/mouse/byd.c | 577 - drivers/input/mouse/psmouse-base.c | 2 +- 2 files changed, 378 insertions(+), 201 deletions(-) diff --git a/drivers/input/mouse/byd.c b/drivers/input/mouse/byd.c index 9425e0f..4c388ed 100644 --- a/drivers/input/mouse/byd.c +++ b/drivers/input/mouse/byd.c @@ -2,20 +2,32 @@ * BYD TouchPad PS/2 mouse driver * * Copyright (C) 2015 Chris Diamand + * Copyright (C) 2015 Richard Pospesel + * Copyright (C) 2015 Tai Chi Minh Ralph Eastwood + * Copyright (C) 2015 Martin Wimpress + * Copyright (C) 2015 Jay Kuri * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 as published by * the Free Software Foundation. + * + * Protocol of BYD Touch Pad reverse-engineered from windows driver: + * filename: "byd touchpad driver - win7, 8, 8.1 - 2.4.1.102.zip" + * md5: 0d5e4660b98fca9587a0df212fca3048 + * sha1: 97a0eca8edc482bf9d08ab9509084a514dad4c4b + * datasheet: http://bydit.com/userfiles/file/BTP10463-XXX.pdf */ #include #include #include #include +#include #include "psmouse.h" #include "byd.h" +/* PS2 Bits */ #define PS2_Y_OVERFLOW BIT_MASK(7) #define PS2_X_OVERFLOW BIT_MASK(6) #define PS2_Y_SIGN BIT_MASK(5) @@ -26,69 +38,246 @@ #define PS2_LEFT BIT_MASK(0) /* - * The touchpad reports gestures in the last byte of each packet. It can take - * any of the following values: + * BYD pad constants */ -/* One-finger scrolling in one of the edge scroll zones. */ -#define BYD_SCROLLUP 0xCA -#define BYD_SCROLLDOWN 0x36 -#define BYD_SCROLLLEFT 0xCB -#define BYD_SCROLLRIGHT0x35 -/* Two-finger scrolling. */ -#define BYD_2DOWN 0x2B -#define BYD_2UP0xD5 -#define BYD_2LEFT 0xD6 -#define BYD_2RIGHT 0x2A -/* Pinching in or out. */ -#define BYD_ZOOMOUT0xD8 -#define BYD_ZOOMIN 0x28 -/* Three-finger swipe. */ -#define BYD_3UP0xD3 -#define BYD_3DOWN 0x2D -#define BYD_3LEFT 0xD4 -#define BYD_3RIGHT 0x2C -/* Four-finger swipe. */ -#define BYD_4UP0xCD -#define BYD_4DOWN 0x33 +/* + * True device resolution is unknown, however experiments show the + * resolution is about 111 units/mm. + * Absolute coordinate packets are in the range 0-255 for both X and Y + * we pick ABS_X/ABS_Y dimensions which are multiples of 256 and in + * the right ballpark given the touchpad's physical dimensions and estimate + * resolution per spec sheet, device active area dimensions are + * 101.6 x 60.1 mm. + */ +#define BYD_PAD_WIDTH 11264 +#define BYD_PAD_HEIGHT 6656 +#define BYD_PAD_RESOLUTION 111 -int byd_detect(struct psmouse *psmouse, bool set_properties) -{ - struct ps2dev *ps2dev = >ps2dev; - unsigned char param[4]; +/* + * Given the above dimensions, relative packets velocity is in multiples of + * 1 unit / 11 milliseconds. We use this dt to estimate distance traveled + */ +#define BYD_DT 11 +/* Time in milliseconds used to timeout various touch events */ +#define BYD_TOUCH_TIMEOUT 64 - param[0] = 0x03; - param[1] = 0x00; - param[2] = 0x00; - param[3] = 0x00; +/* BYD commands reverse engineered from windows driver */ - if (ps2_command(ps2dev, param, PSMOUSE_CMD_SETRES)) - return -1; - if (ps2_command(ps2dev, param, PSMOUSE_CMD_SETRES)) - return -1; - if (ps2_command(ps2dev, param, PSMOUSE_CMD_SETRES)) - return -1; - if (ps2_command(ps2dev, param, PSMOUSE_CMD_SETRES)) - return -1; - if (ps2_command(ps2dev, param, PSMOUSE_CMD_GETINFO)) - return -1; - - if (param[1] != 0x03 || param[2] != 0x64) - return -ENODEV; +/* + * Swipe gesture from off-pad to on-pad + * 0 : disable + * 1 : enable + */ +#define BYD_CMD_SET_OFFSCREEN_SWIPE0x10cc +/* + * Tap and drag delay time + * 0 : disable + * 1 - 8 : least to most delay + */ +#define BYD_CMD_SET_TAP_DRAG_DELAY_TIME0x10cf +/* + * Physical buttons function mapping + * 0 : enable + * 4 : normal + * 5 : left button custom command + * 6 : right
Re: [PATCH v5] mmc: OCTEON: Add host driver for OCTEON MMC controller
Le 10/02/2016 16:32, David Daney a écrit : > On 02/10/2016 03:49 PM, Aaro Koskinen wrote: >> Hi, >> >> On Wed, Feb 10, 2016 at 10:02:23AM -0800, David Daney wrote: >>> On 02/10/2016 09:36 AM, Matt Redfearn wrote: +pr_warn(FW_WARN "%s: Legacy property '%s'. Please remove\n", +node->full_name, legacy_name); >>> >>> I don't like this warning message. >>> >>> The vast majority of people that see it will not be able to change their >>> firmware. So it will be forever cluttering up their boot logs. >> >> Until they switch to use APPENDED_DTB. :-) >> > > I am philosophically opposed to making the DTB an internal kernel > implementation detail. > > For OCTEON boards, it is an ABI between the boot firmware and the > kernel, and is impractical to change. > > One could argue that many years ago, when the decision was made (by me), > that we should have opted to carry in the kernel source code tree the > DTS files for all OCTEON boards ever made, but we did not do that. Due > to the non-reversibility of time, the decision is hard to reverse. > > In the case of this MMC driver, the only real difference is that two > properties have legacy names that later had differing "official" names. > The overhead of carrying the legacy bindings is very low. Since there is an existing FDT patching infrastructure in arch/mips/cavium-octeon/ would not that be a place where you could put an adaptation layer between your legacy firmware properties and the upstream binding? -- Florian
Re: [PATCH] mm: fix pfn_t vs highmem
On 2/10/2016 10:18 PM, Dan Williams wrote: The pfn_t type uses an unsigned long to store a pfn + flags value. On a 64-bit platform the upper 12 bits of an unsigned long are never used for storing the value of a pfn. However, this is not true on highmem platforms, all 32-bits of a pfn value are used to address a 44-bit physical address space. A pfn_t needs to store a 64-bit value. Reported-by: Stuart Foster Reported-by: Julian Margetson Cc: Link: https://bugzilla.kernel.org/show_bug.cgi?id=112211 Fixes: 01c8f1c44b83 ("mm, dax, gpu: convert vm_insert_mixed to pfn_t") Signed-off-by: Dan Williams --- include/linux/pfn.h |2 +- include/linux/pfn_t.h | 19 +-- kernel/memremap.c |2 +- 3 files changed, 11 insertions(+), 12 deletions(-) diff --git a/include/linux/pfn.h b/include/linux/pfn.h index 2d8e49711b63..1132953235c0 100644 --- a/include/linux/pfn.h +++ b/include/linux/pfn.h @@ -10,7 +10,7 @@ * backing is indicated by flags in the high bits of the value. */ typedef struct { - unsigned long val; + u64 val; } pfn_t; #endif diff --git a/include/linux/pfn_t.h b/include/linux/pfn_t.h index 37448ab5fb5c..94994810c7c0 100644 --- a/include/linux/pfn_t.h +++ b/include/linux/pfn_t.h @@ -9,14 +9,13 @@ * PFN_DEV - pfn is not covered by system memmap by default * PFN_MAP - pfn has a dynamic page mapping established by a device driver */ -#define PFN_FLAGS_MASK (((unsigned long) ~PAGE_MASK) \ - << (BITS_PER_LONG - PAGE_SHIFT)) -#define PFN_SG_CHAIN (1UL << (BITS_PER_LONG - 1)) -#define PFN_SG_LAST (1UL << (BITS_PER_LONG - 2)) -#define PFN_DEV (1UL << (BITS_PER_LONG - 3)) -#define PFN_MAP (1UL << (BITS_PER_LONG - 4)) - -static inline pfn_t __pfn_to_pfn_t(unsigned long pfn, unsigned long flags) +#define PFN_FLAGS_MASK (((u64) ~PAGE_MASK) << (BITS_PER_LONG_LONG - PAGE_SHIFT)) +#define PFN_SG_CHAIN (1ULL << (BITS_PER_LONG_LONG - 1)) +#define PFN_SG_LAST (1ULL << (BITS_PER_LONG_LONG - 2)) +#define PFN_DEV (1ULL << (BITS_PER_LONG_LONG - 3)) +#define PFN_MAP (1ULL << (BITS_PER_LONG_LONG - 4)) + +static inline pfn_t __pfn_to_pfn_t(unsigned long pfn, u64 flags) { pfn_t pfn_t = { .val = pfn | (flags & PFN_FLAGS_MASK), }; @@ -29,7 +28,7 @@ static inline pfn_t pfn_to_pfn_t(unsigned long pfn) return __pfn_to_pfn_t(pfn, 0); } -extern pfn_t phys_to_pfn_t(phys_addr_t addr, unsigned long flags); +extern pfn_t phys_to_pfn_t(phys_addr_t addr, u64 flags); static inline bool pfn_t_has_page(pfn_t pfn) { @@ -87,7 +86,7 @@ static inline pmd_t pfn_t_pmd(pfn_t pfn, pgprot_t pgprot) #ifdef __HAVE_ARCH_PTE_DEVMAP static inline bool pfn_t_devmap(pfn_t pfn) { - const unsigned long flags = PFN_DEV|PFN_MAP; + const u64 flags = PFN_DEV|PFN_MAP; return (pfn.val & flags) == flags; } diff --git a/kernel/memremap.c b/kernel/memremap.c index 3427cca5a2a6..b04ea2f5fbfe 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c @@ -152,7 +152,7 @@ void devm_memunmap(struct device *dev, void *addr) } EXPORT_SYMBOL(devm_memunmap); -pfn_t phys_to_pfn_t(phys_addr_t addr, unsigned long flags) +pfn_t phys_to_pfn_t(phys_addr_t addr, u64 flags) { return __pfn_to_pfn_t(addr >> PAGE_SHIFT, flags); } Thanks.This fixes my issue. Tested-by: Julian Margetson Julian
Re: [PATCH] ceph: fix a wrong comparison
> On Feb 11, 2016, at 01:38, Anton Protopopov wrote: > > A negative value rc compared to the positive value ENOENT in the > finish_read() function. > > Signed-off-by: Anton Protopopov > --- > fs/ceph/addr.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c > index c222137..1b809c9 100644 > --- a/fs/ceph/addr.c > +++ b/fs/ceph/addr.c > @@ -276,7 +276,7 @@ static void finish_read(struct ceph_osd_request *req, > struct ceph_msg *msg) > for (i = 0; i < num_pages; i++) { > struct page *page = osd_data->pages[i]; > > - if (rc < 0 && rc != ENOENT) > + if (rc < 0 && rc != -ENOENT) > goto unlock; > if (bytes < (int)PAGE_CACHE_SIZE) { > /* zero (remainder of) page */ Good catch, thanks Yan, Zheng > -- > 2.6.5 >
Re: [PATCH -v1.1] x86/mm: Fix INVPCID asm constraint
On Wed, Feb 10, 2016 at 6:51 AM, Borislav Petkov wrote: > On Wed, Feb 10, 2016 at 02:48:02PM +0100, Michael Matz wrote: >> Hi, >> >> On Wed, 10 Feb 2016, Borislav Petkov wrote: >> >> > --- a/arch/x86/include/asm/tlbflush.h >> > +++ b/arch/x86/include/asm/tlbflush.h >> > @@ -23,7 +23,7 @@ static inline void __invpcid(unsigned long pcid, >> > unsigned long addr, >> > * invpcid (%rcx), %rax in long mode. >> > */ >> > asm volatile (".byte 0x66, 0x0f, 0x38, 0x82, 0x01" >> > - : : "m" (desc), "a" (type), "c" (desc) : "memory"); >> > + : : "m" (*desc), "a" (type), "c" (desc) : "memory"); >> >> That still doesn't do what you want. Arrays in C are funny. *desc is >> exactly equivalent to desc[0], _not_ to the whole array, > > Doh! > >> indeed there's no C syntax to name an lvalue of array type in normal >> expressions. You need to jump through hoops for this: >> >> "m" (*(struct {unsigned long x[2];} *)desc) > > Aha! That's why we wrapped the array in clwb() in a struct too, btw: > > static inline void clwb(volatile void *__p) > { > volatile struct { char x[64]; } *p = __p; > ... > >> It'd probably be easier to simply declare the descriptor as a struct, >> rather than an array, then the original syntax would have been mostly >> correct: >> >> struct {u64 d[2];} desc = { pcid, addr }; >> asm ... "m" (desc), "c" () > > Sounds better. Done. How does that below look like? > > Thanks Micha! > > --- > From: Borislav Petkov > Date: Wed, 10 Feb 2016 12:53:48 +0100 > Subject: [PATCH -v1.1] x86/mm: Fix INVPCID asm constraint > MIME-Version: 1.0 > Content-Type: text/plain; charset=UTF-8 > Content-Transfer-Encoding: 8bit > > So we want to specify the dependency on both @pcid and @addr so that the > compiler doesn't reorder accesses to them *before* the TLB flush. But > for that to work, we need to express this properly in the inline asm and > deref the whole desc array, not the pointer to it. See clwb() for an > example. > > This fixes the build error on 32-bit: > > arch/x86/include/asm/tlbflush.h: In function ‘__invpcid’: > arch/x86/include/asm/tlbflush.h:26:18: error: memory input 0 is not > directly addressable > Acked-by: Andy Lutomirski
[PATCH] thp, dax: do not try to withdraw pgtable from non-anon VMA
DAX doesn't deposit pgtables when it maps huge pages: nothing to withdraw. It can lead to crash. Signed-off-by: Kirill A. Shutemov --- mm/huge_memory.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 5404f7534366..ca7f21516c3a 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1706,7 +1706,8 @@ bool move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma, pmd = pmdp_huge_get_and_clear(mm, old_addr, old_pmd); VM_BUG_ON(!pmd_none(*new_pmd)); - if (pmd_move_must_withdraw(new_ptl, old_ptl)) { + if (pmd_move_must_withdraw(new_ptl, old_ptl) && + vma_is_anonymous(vma)) { pgtable_t pgtable; pgtable = pgtable_trans_huge_withdraw(mm, old_pmd); pgtable_trans_huge_deposit(mm, new_pmd, pgtable); -- 2.7.0
Re: [PATCH V1 4/6] arm64: exception: handle Synchronous External Abort
On 2/10/2016 1:03 PM, Will Deacon wrote: > On Fri, Feb 05, 2016 at 12:13:26PM -0700, Tyler Baicar wrote: >> +static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs >> *regs) >> +{ >> +struct siginfo info; >> + >> +atomic_notifier_call_chain(_handler_chain, 0, NULL); >> + >> +pr_err("Synchronous External Abort: %s (0x%08x) at 0x%016lx\n", >> + fault_name(esr), esr, addr); >> + >> +info.si_signo = SIGBUS; >> +info.si_errno = 0; >> +info.si_code = 0; >> +info.si_addr = (void __user *)addr; >> +arm64_notify_die("", regs, , esr); > > Surely we don't want to call this if the notifier chain handled the > exception? You are correct, Ideally you should not die if the notifier chain handled the exception (e.g. via memory fault handling). However, this patch was intended as a first step to provide the user with more useful information about the hardware error (e.g. details of a cache error, bus error, or memory error that led to the SEA). The thought was to do what your suggesting as a next step (i.e. adding actually recovery mechanisms in the SEA handler). However, there are a couple of questions enumerated below that I think need more discussion. First, you need a way to get information returned from the notifier chain to understand whether or not it recovered from the error. (If this easier than I'm making it out to be, please set me straight here, as it was not clear to me at first glance on how to do that) Second, you need a way to kill/abort the thread that encountered this error, which (I assume) would only be valid/possible thing to do if it was a user thread that encountered the hardware error. For example, let's say we encounter an SEA due to a memory error that was successfully handled by the memory fault handling code (e.g. offline a page owned by some user application). Since this is a synchronous error that may have occurred either on a load, store, or instruction fetch, the SEA handler must also know to kill the user thread that encountered that hardware error. It is not clear to me how we do that cleanly, and what the repercussions would be. Would it get handled naturally after the page has become invalid (e.g. it would just result in a translation fault when attempting to continue the thread, existing kernel software error handling takes it from there)? Also, keep in mind that our current assumption is that *all* kernel data and threads should be considered critical, and any corruption/termination of kernel data/threads should always be treated as fatal. Please let us know if you disagree. Harb -- Qualcomm Technologies, Inc. on behalf of Qualcomm Innovation Center, Inc. Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux Foundation Collaborative Project
Business Partnership
Hello, I am Mr. LAURENT EYADEMA from Republic of Togo.please read the attached proposal. Thanks in anticipation of your urgent response, LAURENT EYADEMA proposal.docx Description: Binary data
[PATCH] ARM: multi_v7_defconfig: enable useful configurations for Vybrid
Enable configuration options useful for Vybrid: - NFC NAND driver - USB dual-role controller (and Chipidea Gadget support) - Built-in EDMA DMA driver (to be available at LPUART probe) - Vybrid ADC driver - IIO hwmon support (used in i.MX 23/28, patch pending for Vybrid) Signed-off-by: Stefan Agner --- arch/arm/configs/multi_v7_defconfig | 7 ++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig index 314f6be..98ae019 100644 --- a/arch/arm/configs/multi_v7_defconfig +++ b/arch/arm/configs/multi_v7_defconfig @@ -182,6 +182,7 @@ CONFIG_MTD_M25P80=y CONFIG_MTD_NAND=y CONFIG_MTD_NAND_ATMEL=y CONFIG_MTD_NAND_BRCMNAND=y +CONFIG_MTD_NAND_VF610_NFC=y CONFIG_MTD_NAND_DAVINCI=y CONFIG_MTD_SPI_NOR=y CONFIG_SPI_FSL_QUADSPI=m @@ -403,6 +404,7 @@ CONFIG_POWER_RESET_KEYSTONE=y CONFIG_POWER_RESET_RMOBILE=y CONFIG_POWER_AVS=y CONFIG_ROCKCHIP_IODOMAIN=y +CONFIG_SENSORS_IIO_HWMON=y CONFIG_SENSORS_LM90=y CONFIG_SENSORS_LM95245=y CONFIG_SENSORS_NTC_THERMISTOR=m @@ -594,6 +596,7 @@ CONFIG_USB_STORAGE=y CONFIG_USB_DWC3=y CONFIG_USB_DWC2=m CONFIG_USB_CHIPIDEA=y +CONFIG_USB_CHIPIDEA_UDC=y CONFIG_USB_CHIPIDEA_HOST=y CONFIG_AB8500_USB=y CONFIG_KEYSTONE_USB_PHY=y @@ -603,6 +606,7 @@ CONFIG_USB_ISP1301=y CONFIG_USB_MSM_OTG=m CONFIG_USB_MXS_PHY=y CONFIG_USB_GADGET=y +CONFIG_USB_FSL_USB2=y CONFIG_USB_RENESAS_USBHS_UDC=m CONFIG_USB_ETH=m CONFIG_MMC=y @@ -690,7 +694,7 @@ CONFIG_DMADEVICES=y CONFIG_DW_DMAC=y CONFIG_AT_HDMAC=y CONFIG_AT_XDMAC=y -CONFIG_FSL_EDMA=m +CONFIG_FSL_EDMA=y CONFIG_MV_XOR=y CONFIG_TEGRA20_APB_DMA=y CONFIG_SH_DMAE=y @@ -749,6 +753,7 @@ CONFIG_IIO=y CONFIG_AT91_ADC=m CONFIG_BERLIN2_ADC=m CONFIG_EXYNOS_ADC=m +CONFIG_VF610_ADC=m CONFIG_XILINX_XADC=y CONFIG_AK8975=y CONFIG_PWM=y -- 2.7.1
[PATCH] mm: fix pfn_t vs highmem
The pfn_t type uses an unsigned long to store a pfn + flags value. On a 64-bit platform the upper 12 bits of an unsigned long are never used for storing the value of a pfn. However, this is not true on highmem platforms, all 32-bits of a pfn value are used to address a 44-bit physical address space. A pfn_t needs to store a 64-bit value. Reported-by: Stuart Foster Reported-by: Julian Margetson Cc: Link: https://bugzilla.kernel.org/show_bug.cgi?id=112211 Fixes: 01c8f1c44b83 ("mm, dax, gpu: convert vm_insert_mixed to pfn_t") Signed-off-by: Dan Williams --- include/linux/pfn.h |2 +- include/linux/pfn_t.h | 19 +-- kernel/memremap.c |2 +- 3 files changed, 11 insertions(+), 12 deletions(-) diff --git a/include/linux/pfn.h b/include/linux/pfn.h index 2d8e49711b63..1132953235c0 100644 --- a/include/linux/pfn.h +++ b/include/linux/pfn.h @@ -10,7 +10,7 @@ * backing is indicated by flags in the high bits of the value. */ typedef struct { - unsigned long val; + u64 val; } pfn_t; #endif diff --git a/include/linux/pfn_t.h b/include/linux/pfn_t.h index 37448ab5fb5c..94994810c7c0 100644 --- a/include/linux/pfn_t.h +++ b/include/linux/pfn_t.h @@ -9,14 +9,13 @@ * PFN_DEV - pfn is not covered by system memmap by default * PFN_MAP - pfn has a dynamic page mapping established by a device driver */ -#define PFN_FLAGS_MASK (((unsigned long) ~PAGE_MASK) \ - << (BITS_PER_LONG - PAGE_SHIFT)) -#define PFN_SG_CHAIN (1UL << (BITS_PER_LONG - 1)) -#define PFN_SG_LAST (1UL << (BITS_PER_LONG - 2)) -#define PFN_DEV (1UL << (BITS_PER_LONG - 3)) -#define PFN_MAP (1UL << (BITS_PER_LONG - 4)) - -static inline pfn_t __pfn_to_pfn_t(unsigned long pfn, unsigned long flags) +#define PFN_FLAGS_MASK (((u64) ~PAGE_MASK) << (BITS_PER_LONG_LONG - PAGE_SHIFT)) +#define PFN_SG_CHAIN (1ULL << (BITS_PER_LONG_LONG - 1)) +#define PFN_SG_LAST (1ULL << (BITS_PER_LONG_LONG - 2)) +#define PFN_DEV (1ULL << (BITS_PER_LONG_LONG - 3)) +#define PFN_MAP (1ULL << (BITS_PER_LONG_LONG - 4)) + +static inline pfn_t __pfn_to_pfn_t(unsigned long pfn, u64 flags) { pfn_t pfn_t = { .val = pfn | (flags & PFN_FLAGS_MASK), }; @@ -29,7 +28,7 @@ static inline pfn_t pfn_to_pfn_t(unsigned long pfn) return __pfn_to_pfn_t(pfn, 0); } -extern pfn_t phys_to_pfn_t(phys_addr_t addr, unsigned long flags); +extern pfn_t phys_to_pfn_t(phys_addr_t addr, u64 flags); static inline bool pfn_t_has_page(pfn_t pfn) { @@ -87,7 +86,7 @@ static inline pmd_t pfn_t_pmd(pfn_t pfn, pgprot_t pgprot) #ifdef __HAVE_ARCH_PTE_DEVMAP static inline bool pfn_t_devmap(pfn_t pfn) { - const unsigned long flags = PFN_DEV|PFN_MAP; + const u64 flags = PFN_DEV|PFN_MAP; return (pfn.val & flags) == flags; } diff --git a/kernel/memremap.c b/kernel/memremap.c index 3427cca5a2a6..b04ea2f5fbfe 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c @@ -152,7 +152,7 @@ void devm_memunmap(struct device *dev, void *addr) } EXPORT_SYMBOL(devm_memunmap); -pfn_t phys_to_pfn_t(phys_addr_t addr, unsigned long flags) +pfn_t phys_to_pfn_t(phys_addr_t addr, u64 flags) { return __pfn_to_pfn_t(addr >> PAGE_SHIFT, flags); }
Re: fs/udf and udftools
On Wed, Feb 10, 2016 at 05:56:16PM -0800, Randy Dunlap wrote: > [add Jan Kara] > > On 02/10/16 13:29, Steve Kenton wrote: > > Is anyone maintaining these or am I about to volunteer for another job? > > CUrrent MAINTAINERS file says: > > UDF FILESYSTEM > M:Jan Kara > S:Maintained > F:Documentation/filesystems/udf.txt > F:fs/udf/ > > and that Doc. file says: > > For the latest version and toolset see: > http://linux-udf.sourceforge.net/ > A bit of googling for udftools suggests that gentoo are maintaining their build, debian have patches for gcc-4 and gcc-5 among others, Fedora have their own patches, and Arch have some patches (which might be the same as some of hte others, I did not look). Looks like the normal "possibly abandonned, but still useful to some people" software, where distros keep it building. There may also be others. Links - https://sources.gentoo.org/cgi-bin/viewvc.cgi/gentoo-x86/sys-fs/udftools/ChangeLog?view=markup https://launchpad.net/debian/+source/udftools/+changelog http://pkgs.fedoraproject.org/cgit/rpms/udftools.git/tree/ https://aur.archlinux.org/packages/udftools/ > > > I'm having to dig into fs/udf and udftools/mkudffs as part of a project I'm > > working on. > > It looks like both have been lacking in personal TLC for quite a while. The > > changes to > > fs/udf seem to be tree wide VFS work but not updates to things like write > > support and > > udftools seems to have been frozen for >10 years. Both ~work but I'd like > > to fix an > > oops I'm getting in udftools and work on adding fallocate() support to > > fs/udf and then > > feed it back to the community rather than let the changes bit rot locally. > > > > Where to go from here? I've been reading LKML on marc: for years, mainly to > > see what Linus, > > Al and a variable group of other people say/do but I've never done more > > than tinker with > > the kernel locally. I'm using git for the project mentioned above but again > > am not an > > expert but willing to learn. I'm not currently subscribed so please cc me > > if you could. > > > > smk > > > > > -- > ~Randy -- This email was written using 100% recycled letters.
[PATCH] ARM: imx_v6_v7_defconfig: enable useful configurations for Vybrid
Enable configuration options useful for Vybrid: - NFC NAND driver - USB dual-role controller support - FTM PWM driver - DSPI SPI driver - Colibri VF50 Touchscreen support. Beside that, enable useful configurations such as IIO hwmon support (used in i.MX 23/28, patch pending for Vybrid), PWM LED support and CPU idle support. Regenerated config using savedefconfig (which removes some configs which are now enabled by default). Signed-off-by: Stefan Agner --- arch/arm/configs/imx_v6_v7_defconfig | 19 +++ 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/arch/arm/configs/imx_v6_v7_defconfig b/arch/arm/configs/imx_v6_v7_defconfig index 25a6066..dc7dc5c 100644 --- a/arch/arm/configs/imx_v6_v7_defconfig +++ b/arch/arm/configs/imx_v6_v7_defconfig @@ -57,6 +57,7 @@ CONFIG_KEXEC=y CONFIG_CPU_FREQ=y CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y CONFIG_ARM_IMX6Q_CPUFREQ=y +CONFIG_CPU_IDLE=y CONFIG_VFP=y CONFIG_NEON=y CONFIG_BINFMT_MISC=m @@ -72,7 +73,6 @@ CONFIG_IP_PNP_DHCP=y # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_INET_LRO is not set -CONFIG_IPV6=y CONFIG_NETFILTER=y CONFIG_CAN=y CONFIG_CAN_FLEXCAN=y @@ -104,10 +104,13 @@ CONFIG_MTD_M25P80=y CONFIG_MTD_SST25L=y CONFIG_MTD_NAND=y CONFIG_MTD_NAND_GPMI_NAND=y +CONFIG_MTD_NAND_VF610_NFC=y CONFIG_MTD_NAND_MXC=y CONFIG_MTD_SPI_NOR=y CONFIG_SPI_FSL_QUADSPI=y CONFIG_MTD_UBI=y +CONFIG_MTD_UBI_FASTMAP=y +CONFIG_MTD_UBI_BLOCK=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=65536 @@ -145,7 +148,6 @@ CONFIG_USB_RTL8152=m CONFIG_USB_USBNET=m CONFIG_USB_NET_CDC_EEM=m CONFIG_BRCMFMAC=m -CONFIG_WL_TI=y CONFIG_WL12XX=m CONFIG_WLCORE_SDIO=m # CONFIG_WILINK_PLATFORM_DATA is not set @@ -164,6 +166,7 @@ CONFIG_TOUCHSCREEN_MC13783=y CONFIG_TOUCHSCREEN_TSC2007=y CONFIG_TOUCHSCREEN_STMPE=y CONFIG_TOUCHSCREEN_SX8654=y +CONFIG_TOUCHSCREEN_COLIBRI_VF50=y CONFIG_INPUT_MISC=y CONFIG_INPUT_MMA8450=y CONFIG_SERIO_SERPORT=m @@ -173,7 +176,6 @@ CONFIG_SERIAL_IMX=y CONFIG_SERIAL_IMX_CONSOLE=y CONFIG_SERIAL_FSL_LPUART=y CONFIG_SERIAL_FSL_LPUART_CONSOLE=y -CONFIG_HW_RANDOM=y # CONFIG_I2C_COMPAT is not set CONFIG_I2C_CHARDEV=y # CONFIG_I2C_HELPER_AUTO is not set @@ -182,6 +184,7 @@ CONFIG_I2C_ALGOPCA=m CONFIG_I2C_IMX=y CONFIG_SPI=y CONFIG_SPI_IMX=y +CONFIG_SPI_FSL_DSPI=y CONFIG_GPIO_SYSFS=y CONFIG_GPIO_MC9S08DZ60=y CONFIG_GPIO_PCA953X=y @@ -192,6 +195,7 @@ CONFIG_POWER_RESET_IMX=y CONFIG_POWER_RESET_SYSCON=y CONFIG_POWER_RESET_SYSCON_POWEROFF=y CONFIG_SENSORS_GPIO_FAN=y +CONFIG_SENSORS_IIO_HWMON=y CONFIG_THERMAL=y CONFIG_CPU_THERMAL=y CONFIG_IMX_THERMAL=y @@ -227,7 +231,6 @@ CONFIG_DRM=y CONFIG_DRM_PANEL_SIMPLE=y CONFIG_DRM_DW_HDMI_AHB_AUDIO=m CONFIG_DRM_IMX=y -CONFIG_DRM_IMX_FB_HELPER=y CONFIG_DRM_IMX_PARALLEL_DISPLAY=y CONFIG_DRM_IMX_TVE=y CONFIG_DRM_IMX_LDB=y @@ -271,6 +274,7 @@ CONFIG_USB_EHSET_TEST_FIXTURE=m CONFIG_NOP_USB_XCEIV=y CONFIG_USB_MXS_PHY=y CONFIG_USB_GADGET=y +CONFIG_USB_FSL_USB2=y CONFIG_USB_CONFIGFS=m CONFIG_USB_CONFIGFS_SERIAL=y CONFIG_USB_CONFIGFS_ACM=y @@ -296,6 +300,7 @@ CONFIG_MMC_SDHCI_ESDHC_IMX=y CONFIG_NEW_LEDS=y CONFIG_LEDS_CLASS=y CONFIG_LEDS_GPIO=y +CONFIG_LEDS_PWM=y CONFIG_LEDS_TRIGGERS=y CONFIG_LEDS_TRIGGER_TIMER=y CONFIG_LEDS_TRIGGER_ONESHOT=y @@ -312,14 +317,15 @@ CONFIG_RTC_DRV_MC13XXX=y CONFIG_RTC_DRV_MXC=y CONFIG_RTC_DRV_SNVS=y CONFIG_DMADEVICES=y +CONFIG_FSL_EDMA=y CONFIG_IMX_SDMA=y CONFIG_MXS_DMA=y -CONFIG_FSL_EDMA=y CONFIG_STAGING=y # CONFIG_IOMMU_SUPPORT is not set CONFIG_IIO=y CONFIG_VF610_ADC=y CONFIG_PWM=y +CONFIG_PWM_FSL_FTM=y CONFIG_PWM_IMX=y CONFIG_NVMEM=y CONFIG_NVMEM_IMX_OCOTP=y @@ -330,9 +336,6 @@ CONFIG_EXT2_FS_SECURITY=y CONFIG_EXT3_FS=y CONFIG_EXT3_FS_POSIX_ACL=y CONFIG_EXT3_FS_SECURITY=y -CONFIG_EXT4_FS=y -CONFIG_EXT4_FS_POSIX_ACL=y -CONFIG_EXT4_FS_SECURITY=y CONFIG_QUOTA=y CONFIG_QUOTA_NETLINK_INTERFACE=y # CONFIG_PRINT_QUOTA_WARNING is not set -- 2.7.1
Re: [PATCH v2 3/3] mm/compaction: speed up pageblock_pfn_to_page() when zone is contiguous
2016-02-11 3:58 GMT+09:00 Andrew Morton : > On Wed, 10 Feb 2016 14:42:57 +0100 Vlastimil Babka wrote: > >> > --- a/mm/memory_hotplug.c >> > +++ b/mm/memory_hotplug.c >> > @@ -509,6 +509,8 @@ int __ref __add_pages(int nid, struct zone *zone, >> > unsigned long phys_start_pfn, >> > int start_sec, end_sec; >> > struct vmem_altmap *altmap; >> > >> > + clear_zone_contiguous(zone); >> > + >> > /* during initialize mem_map, align hot-added range to section */ >> > start_sec = pfn_to_section_nr(phys_start_pfn); >> > end_sec = pfn_to_section_nr(phys_start_pfn + nr_pages - 1); >> > @@ -540,6 +542,8 @@ int __ref __add_pages(int nid, struct zone *zone, >> > unsigned long phys_start_pfn, >> > } >> > vmemmap_populate_print_last(); >> > >> > + set_zone_contiguous(zone); >> > + >> > return err; >> > } >> > EXPORT_SYMBOL_GPL(__add_pages); >> >> Between the clear and set, __add_pages() might return with -EINVAL, >> leaving the flag cleared potentially forever. Not critical, probably >> rare, but it should be possible to avoid this by moving the clear below >> the altmap check? > > um, yes. return-in-the-middle-of-a-function strikes again. > > --- > a/mm/memory_hotplug.c~mm-compaction-speed-up-pageblock_pfn_to_page-when-zone-is-contiguous-fix > +++ a/mm/memory_hotplug.c > @@ -526,7 +526,8 @@ int __ref __add_pages(int nid, struct zo > if (altmap->base_pfn != phys_start_pfn > || vmem_altmap_offset(altmap) > nr_pages) { > pr_warn_once("memory add fail, invalid altmap\n"); > - return -EINVAL; > + err = -EINVAL; > + goto out; > } > altmap->alloc = 0; > } > @@ -544,9 +545,8 @@ int __ref __add_pages(int nid, struct zo > err = 0; > } > vmemmap_populate_print_last(); > - > +out: > set_zone_contiguous(zone); > - > return err; > } > EXPORT_SYMBOL_GPL(__add_pages); Sorry for late response. I was on biggest holiday in Korea until now. It seems that there is no issue left. Andrew, Vlastimil, thanks for fixes and review. Thanks.
Re: fs/udf and udftools
[add Jan Kara] On 02/10/16 13:29, Steve Kenton wrote: > Is anyone maintaining these or am I about to volunteer for another job? CUrrent MAINTAINERS file says: UDF FILESYSTEM M: Jan Kara S: Maintained F: Documentation/filesystems/udf.txt F: fs/udf/ and that Doc. file says: For the latest version and toolset see: http://linux-udf.sourceforge.net/ > I'm having to dig into fs/udf and udftools/mkudffs as part of a project I'm > working on. > It looks like both have been lacking in personal TLC for quite a while. The > changes to > fs/udf seem to be tree wide VFS work but not updates to things like write > support and > udftools seems to have been frozen for >10 years. Both ~work but I'd like to > fix an > oops I'm getting in udftools and work on adding fallocate() support to fs/udf > and then > feed it back to the community rather than let the changes bit rot locally. > > Where to go from here? I've been reading LKML on marc: for years, mainly to > see what Linus, > Al and a variable group of other people say/do but I've never done more than > tinker with > the kernel locally. I'm using git for the project mentioned above but again > am not an > expert but willing to learn. I'm not currently subscribed so please cc me if > you could. > > smk > -- ~Randy
Re: [PATCH v7 00/14] Big fixes, retries, handle a race condition
> "Yaniv" == Yaniv Gardi writes: Yaniv> V7: updated patch 0001 according to a comment also, removed patch Yaniv> 07/15 from V6, so now there are only 14 patches Applied to 4.6/scsi-queue. -- Martin K. Petersen Oracle Linux Engineering
Business Partnership
Hello, I am Mr. LAURENT EYADEMA from Republic of Togo.please read the attached proposal. Thanks in anticipation of your urgent response, LAURENT EYADEMA proposal.docx Description: Binary data
[PATCH v2 1/7] ibmvscsi: Correct values for several viosrp_crq_format enums
The enum values for VIOSRP_LINUX_FORMAT and VIOSRP_INLINE_FORMAT are off by one. They are currently defined as 0x06 and 0x07 respetively. These values are defined in PAPR correctly as 0x05 and 0x06. This inconsistency has gone unnoticed as neither enum is currently used. The possible future support of PING messages between the VIOS and client adapter relies on VIOSRP_INLINE_FORMAT crq messages. Corrected these enum values to match PAPR definitions. Signed-off-by: Tyrel Datwyler Reviewed-by: Johannes Thumshirn --- drivers/scsi/ibmvscsi/viosrp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/ibmvscsi/viosrp.h b/drivers/scsi/ibmvscsi/viosrp.h index 1162430..d1044e9 100644 --- a/drivers/scsi/ibmvscsi/viosrp.h +++ b/drivers/scsi/ibmvscsi/viosrp.h @@ -56,8 +56,8 @@ enum viosrp_crq_formats { VIOSRP_MAD_FORMAT = 0x02, VIOSRP_OS400_FORMAT = 0x03, VIOSRP_AIX_FORMAT = 0x04, - VIOSRP_LINUX_FORMAT = 0x06, - VIOSRP_INLINE_FORMAT = 0x07 + VIOSRP_LINUX_FORMAT = 0x05, + VIOSRP_INLINE_FORMAT = 0x06 }; enum viosrp_crq_status { -- 2.5.0
[PATCH v2 5/7] ibmvscsi: Remove unsupported host config MAD
A VIOSRP_HOST_CONFIG_TYPE management datagram (MAD) has existed in the code for some time. From what information I've gathered from Brian King this was likely implemented on the host side in a SLES 9 based VIOS, which is no longer supported anywhere. Further, it is not defined in PAPR or supported by any AIX based VIOS. Treating as bit rot and removing the associated host config code. The config attribute and its show function are left as not to break userspace. The behavior remains the same returning nothing. Signed-off-by: Tyrel Datwyler --- drivers/scsi/ibmvscsi/ibmvscsi.c | 71 +++- drivers/scsi/ibmvscsi/viosrp.h | 7 2 files changed, 4 insertions(+), 74 deletions(-) diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c index c208295..e8d4af5 100644 --- a/drivers/scsi/ibmvscsi/ibmvscsi.c +++ b/drivers/scsi/ibmvscsi/ibmvscsi.c @@ -1853,62 +1853,6 @@ static void ibmvscsi_handle_crq(struct viosrp_crq *crq, } /** - * ibmvscsi_get_host_config: Send the command to the server to get host - * configuration data. The data is opaque to us. - */ -static int ibmvscsi_do_host_config(struct ibmvscsi_host_data *hostdata, - unsigned char *buffer, int length) -{ - struct viosrp_host_config *host_config; - struct srp_event_struct *evt_struct; - unsigned long flags; - dma_addr_t addr; - int rc; - - evt_struct = get_event_struct(>pool); - if (!evt_struct) { - dev_err(hostdata->dev, "couldn't allocate event for HOST_CONFIG!\n"); - return -1; - } - - init_event_struct(evt_struct, - sync_completion, - VIOSRP_MAD_FORMAT, - info_timeout); - - host_config = _struct->iu.mad.host_config; - - /* The transport length field is only 16-bit */ - length = min(0x, length); - - /* Set up a lun reset SRP command */ - memset(host_config, 0x00, sizeof(*host_config)); - host_config->common.type = cpu_to_be32(VIOSRP_HOST_CONFIG_TYPE); - host_config->common.length = cpu_to_be16(length); - addr = dma_map_single(hostdata->dev, buffer, length, DMA_BIDIRECTIONAL); - - if (dma_mapping_error(hostdata->dev, addr)) { - if (!firmware_has_feature(FW_FEATURE_CMO)) - dev_err(hostdata->dev, - "dma_mapping error getting host config\n"); - free_event_struct(>pool, evt_struct); - return -1; - } - - host_config->buffer = cpu_to_be64(addr); - - init_completion(_struct->comp); - spin_lock_irqsave(hostdata->host->host_lock, flags); - rc = ibmvscsi_send_srp_event(evt_struct, hostdata, info_timeout * 2); - spin_unlock_irqrestore(hostdata->host->host_lock, flags); - if (rc == 0) - wait_for_completion(_struct->comp); - dma_unmap_single(hostdata->dev, addr, length, DMA_BIDIRECTIONAL); - - return rc; -} - -/** * ibmvscsi_slave_configure: Set the "allow_restart" flag for each disk. * @sdev: struct scsi_device device to configure * @@ -2093,21 +2037,14 @@ static struct device_attribute ibmvscsi_host_os_type = { static ssize_t show_host_config(struct device *dev, struct device_attribute *attr, char *buf) { - struct Scsi_Host *shost = class_to_shost(dev); - struct ibmvscsi_host_data *hostdata = shost_priv(shost); - - /* returns null-terminated host config data */ - if (ibmvscsi_do_host_config(hostdata, buf, PAGE_SIZE) == 0) - return strlen(buf); - else - return 0; + return 0; } static struct device_attribute ibmvscsi_host_config = { .attr = { -.name = "config", -.mode = S_IRUGO, -}, + .name = "config", + .mode = S_IRUGO, + }, .show = show_host_config, }; diff --git a/drivers/scsi/ibmvscsi/viosrp.h b/drivers/scsi/ibmvscsi/viosrp.h index d0f689b..c1ab8a4 100644 --- a/drivers/scsi/ibmvscsi/viosrp.h +++ b/drivers/scsi/ibmvscsi/viosrp.h @@ -99,7 +99,6 @@ enum viosrp_mad_types { VIOSRP_EMPTY_IU_TYPE = 0x01, VIOSRP_ERROR_LOG_TYPE = 0x02, VIOSRP_ADAPTER_INFO_TYPE = 0x03, - VIOSRP_HOST_CONFIG_TYPE = 0x04, VIOSRP_CAPABILITIES_TYPE = 0x05, VIOSRP_ENABLE_FAST_FAIL = 0x08, }; @@ -165,11 +164,6 @@ struct viosrp_adapter_info { __be64 buffer; }; -struct viosrp_host_config { - struct mad_common common; - __be64 buffer; -}; - struct viosrp_fast_fail { struct mad_common common; }; @@ -207,7 +201,6 @@ union mad_iu { struct viosrp_empty_iu empty_iu; struct viosrp_error_log error_log; struct viosrp_adapter_info adapter_info; - struct viosrp_host_config host_config; struct
[PATCH v2 4/7] ibmvscsi: Use of_root to access OF device tree root node
The root node of the OF device tree is exported as of_root. No need to look up the root by path name. Instead just get a reference directly via of_root. Signed-off-by: Tyrel Datwyler Reviewed-by: Johannes Thumshirn --- drivers/scsi/ibmvscsi/ibmvscsi.c | 14 ++ 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c index 4b09a9b..c208295 100644 --- a/drivers/scsi/ibmvscsi/ibmvscsi.c +++ b/drivers/scsi/ibmvscsi/ibmvscsi.c @@ -248,25 +248,23 @@ static void ibmvscsi_task(void *data) static void gather_partition_info(void) { - struct device_node *rootdn; - const char *ppartition_name; const __be32 *p_number_ptr; /* Retrieve information about this partition */ - rootdn = of_find_node_by_path("/"); - if (!rootdn) { + if (!of_root) return; - } - ppartition_name = of_get_property(rootdn, "ibm,partition-name", NULL); + of_node_get(of_root); + + ppartition_name = of_get_property(of_root, "ibm,partition-name", NULL); if (ppartition_name) strncpy(partition_name, ppartition_name, sizeof(partition_name)); - p_number_ptr = of_get_property(rootdn, "ibm,partition-no", NULL); + p_number_ptr = of_get_property(of_root, "ibm,partition-no", NULL); if (p_number_ptr) partition_number = of_read_number(p_number_ptr, 1); - of_node_put(rootdn); + of_node_put(of_root); } static void set_adapter_info(struct ibmvscsi_host_data *hostdata) -- 2.5.0
[PATCH v2 3/7] ibmvscsi: Replace magic values in set_adpater_info() with defines
Add defines for mad version and mad os_type, and replace the magic numbers in set_adapter_info() accordingly. Signed-off-by: Tyrel Datwyler --- drivers/scsi/ibmvscsi/ibmvscsi.c | 8 drivers/scsi/ibmvscsi/viosrp.h | 3 +++ 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c index c888ea1..4b09a9b 100644 --- a/drivers/scsi/ibmvscsi/ibmvscsi.c +++ b/drivers/scsi/ibmvscsi/ibmvscsi.c @@ -283,8 +283,8 @@ static void set_adapter_info(struct ibmvscsi_host_data *hostdata) hostdata->madapter_info.partition_number = cpu_to_be32(partition_number); - hostdata->madapter_info.mad_version = cpu_to_be32(1); - hostdata->madapter_info.os_type = cpu_to_be32(2); + hostdata->madapter_info.mad_version = cpu_to_be32(SRP_MAD_VERSION_1); + hostdata->madapter_info.os_type = cpu_to_be32(SRP_MAD_OS_LINUX); } /** @@ -1398,7 +1398,7 @@ static void adapter_info_rsp(struct srp_event_struct *evt_struct) hostdata->host->max_sectors = be32_to_cpu(hostdata->madapter_info.port_max_txu[0]) >> 9; - if (be32_to_cpu(hostdata->madapter_info.os_type) == 3 && + if (be32_to_cpu(hostdata->madapter_info.os_type) == SRP_MAD_OS_AIX && strcmp(hostdata->madapter_info.srp_version, "1.6a") <= 0) { dev_err(hostdata->dev, "host (Ver. %s) doesn't support large transfers\n", hostdata->madapter_info.srp_version); @@ -1407,7 +1407,7 @@ static void adapter_info_rsp(struct srp_event_struct *evt_struct) hostdata->host->sg_tablesize = MAX_INDIRECT_BUFS; } - if (be32_to_cpu(hostdata->madapter_info.os_type) == 3) { + if (be32_to_cpu(hostdata->madapter_info.os_type) == SRP_MAD_OS_AIX) { enable_fast_fail(hostdata); return; } diff --git a/drivers/scsi/ibmvscsi/viosrp.h b/drivers/scsi/ibmvscsi/viosrp.h index 3d20851..d0f689b 100644 --- a/drivers/scsi/ibmvscsi/viosrp.h +++ b/drivers/scsi/ibmvscsi/viosrp.h @@ -221,7 +221,10 @@ struct mad_adapter_info_data { char srp_version[8]; char partition_name[96]; __be32 partition_number; +#define SRP_MAD_VERSION_1 1 __be32 mad_version; +#define SRP_MAD_OS_LINUX 2 +#define SRP_MAD_OS_AIX 3 __be32 os_type; __be32 port_max_txu[8]; /* per-port maximum transfer */ }; -- 2.5.0
[PATCH v2 7/7] ibmvscsi: use H_CLOSED instead of magic number
In a couple places the magic value of 2 is used to check the return code of hypercalls. This translates to H_CLOSED. Signed-off-by: Tyrel Datwyler --- drivers/scsi/ibmvscsi/ibmvscsi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c index 6025481..d9534ee 100644 --- a/drivers/scsi/ibmvscsi/ibmvscsi.c +++ b/drivers/scsi/ibmvscsi/ibmvscsi.c @@ -314,7 +314,7 @@ static int ibmvscsi_reset_crq_queue(struct crq_queue *queue, rc = plpar_hcall_norets(H_REG_CRQ, vdev->unit_address, queue->msg_token, PAGE_SIZE); - if (rc == 2) { + if (rc == H_CLOSED) { /* Adapter is good, but other end is not ready */ dev_warn(hostdata->dev, "Partner adapter not ready\n"); } else if (rc != 0) { @@ -364,7 +364,7 @@ static int ibmvscsi_init_crq_queue(struct crq_queue *queue, rc = ibmvscsi_reset_crq_queue(queue, hostdata); - if (rc == 2) { + if (rc == H_CLOSED) { /* Adapter is good, but other end is not ready */ dev_warn(hostdata->dev, "Partner adapter not ready\n"); retrc = 0; -- 2.5.0
[PATCH v2 2/7] ibmvscsi: Add and use enums for valid CRQ header values
The PAPR defines four valid header values for the first byte of a CRQ message. Namely, an unused/empty message (0x00), a valid command/response entry (0x80), a valid initialization entry (0xC0), and a valid transport event (0xFF). Further, initialization responses have two formats namely initialize (0x01) and initialize complete (0x02). Define these values as enums and use them in the code in place of their magic number equivalents. Signed-off-by: Tyrel Datwyler --- drivers/scsi/ibmvscsi/ibmvscsi.c | 18 +- drivers/scsi/ibmvscsi/viosrp.h | 12 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c index adfef9d..c888ea1 100644 --- a/drivers/scsi/ibmvscsi/ibmvscsi.c +++ b/drivers/scsi/ibmvscsi/ibmvscsi.c @@ -182,7 +182,7 @@ static struct viosrp_crq *crq_queue_next_crq(struct crq_queue *queue) spin_lock_irqsave(>lock, flags); crq = >msgs[queue->cur]; - if (crq->valid & 0x80) { + if (crq->valid != VIOSRP_CRQ_FREE) { if (++queue->cur == queue->size) queue->cur = 0; @@ -231,7 +231,7 @@ static void ibmvscsi_task(void *data) /* Pull all the valid messages off the CRQ */ while ((crq = crq_queue_next_crq(>queue)) != NULL) { ibmvscsi_handle_crq(crq, hostdata); - crq->valid = 0x00; + crq->valid = VIOSRP_CRQ_FREE; } vio_enable_interrupts(vdev); @@ -239,7 +239,7 @@ static void ibmvscsi_task(void *data) if (crq != NULL) { vio_disable_interrupts(vdev); ibmvscsi_handle_crq(crq, hostdata); - crq->valid = 0x00; + crq->valid = VIOSRP_CRQ_FREE; } else { done = 1; } @@ -474,7 +474,7 @@ static int initialize_event_pool(struct event_pool *pool, struct srp_event_struct *evt = >events[i]; memset(>crq, 0x00, sizeof(evt->crq)); atomic_set(>free, 1); - evt->crq.valid = 0x80; + evt->crq.valid = VIOSRP_CRQ_CMD_RSP; evt->crq.IU_length = cpu_to_be16(sizeof(*evt->xfer_iu)); evt->crq.IU_data_ptr = cpu_to_be64(pool->iu_token + sizeof(*evt->xfer_iu) * i); @@ -1767,9 +1767,9 @@ static void ibmvscsi_handle_crq(struct viosrp_crq *crq, struct srp_event_struct *evt_struct = (__force struct srp_event_struct *)crq->IU_data_ptr; switch (crq->valid) { - case 0xC0: /* initialization */ + case VIOSRP_CRQ_INIT_RSP: /* initialization */ switch (crq->format) { - case 0x01: /* Initialization message */ + case VIOSRP_CRQ_INIT: /* Initialization message */ dev_info(hostdata->dev, "partner initialized\n"); /* Send back a response */ rc = ibmvscsi_send_crq(hostdata, 0xC002LL, 0); @@ -1781,7 +1781,7 @@ static void ibmvscsi_handle_crq(struct viosrp_crq *crq, } break; - case 0x02: /* Initialization response */ + case VIOSRP_CRQ_INIT_COMPLETE: /* Initialization response */ dev_info(hostdata->dev, "partner initialization complete\n"); /* Now login */ @@ -1791,7 +1791,7 @@ static void ibmvscsi_handle_crq(struct viosrp_crq *crq, dev_err(hostdata->dev, "unknown crq message type: %d\n", crq->format); } return; - case 0xFF: /* Hypervisor telling us the connection is closed */ + case VIOSRP_CRQ_XPORT_EVENT:/* Hypervisor telling us the connection is closed */ scsi_block_requests(hostdata->host); atomic_set(>request_limit, 0); if (crq->format == 0x06) { @@ -1807,7 +1807,7 @@ static void ibmvscsi_handle_crq(struct viosrp_crq *crq, ibmvscsi_reset_host(hostdata); } return; - case 0x80: /* real payload */ + case VIOSRP_CRQ_CMD_RSP:/* real payload */ break; default: dev_err(hostdata->dev, "got an invalid message type 0x%02x\n", diff --git a/drivers/scsi/ibmvscsi/viosrp.h b/drivers/scsi/ibmvscsi/viosrp.h index d1044e9..3d20851 100644 --- a/drivers/scsi/ibmvscsi/viosrp.h +++ b/drivers/scsi/ibmvscsi/viosrp.h @@ -51,6 +51,18 @@ union srp_iu { u8 reserved[SRP_MAX_IU_LEN]; }; +enum viosrp_crq_headers { + VIOSRP_CRQ_FREE = 0x00, + VIOSRP_CRQ_CMD_RSP = 0x80, + VIOSRP_CRQ_INIT_RSP = 0xC0, + VIOSRP_CRQ_XPORT_EVENT = 0xFF +}; + +enum
[PATCH v2 0/7] ibmvscsi code cleanup
Fixed up a couple spots that were out of line with the PAPR in regards to its defined VSCSI protocol. Did away with some magic numbers directly in the code. Fixed a minor endian issue. -- v2 changes: -Renamed CRQ header enums and added enums for INIT formats -Check that crq->valid != VIOSRP_CRQ_FREE before handling in place of hacky bitwise & to check for first bit being set. -Added define for AIX os_type -Left sysfs config attribute to prevent breaking userspace Tyrel Datwyler (7): ibmvscsi: Correct values for several viosrp_crq_format enums ibmvscsi: Add and use enums for valid CRQ header values ibmvscsi: Replace magic values in set_adpater_info() with defines ibmvscsi: Use of_root to access OF device tree root node ibmvscsi: Remove unsupported host config MAD ibmvscsi: Add endian conversions to sysfs attribute show functions ibmvscsi: use H_CLOSED instead of magic number drivers/scsi/ibmvscsi/ibmvscsi.c | 128 ++- drivers/scsi/ibmvscsi/viosrp.h | 26 +--- 2 files changed, 49 insertions(+), 105 deletions(-) -- 2.5.0
[PATCH v2 6/7] ibmvscsi: Add endian conversions to sysfs attribute show functions
The values returned by the show functions for the host os_type, mad_version, and partition_number attributes get their values directly from the madapter_info struct whose associated fields are __be32 typed. Added endian conversion to ensure these values are sane on LE platforms. Signed-off-by: Tyrel Datwyler Reviewed-by: Johannes Thumshirn --- drivers/scsi/ibmvscsi/ibmvscsi.c | 7 --- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c index e8d4af5..6025481 100644 --- a/drivers/scsi/ibmvscsi/ibmvscsi.c +++ b/drivers/scsi/ibmvscsi/ibmvscsi.c @@ -1983,7 +1983,7 @@ static ssize_t show_host_partition_number(struct device *dev, int len; len = snprintf(buf, PAGE_SIZE, "%d\n", - hostdata->madapter_info.partition_number); + be32_to_cpu(hostdata->madapter_info.partition_number)); return len; } @@ -2003,7 +2003,7 @@ static ssize_t show_host_mad_version(struct device *dev, int len; len = snprintf(buf, PAGE_SIZE, "%d\n", - hostdata->madapter_info.mad_version); + be32_to_cpu(hostdata->madapter_info.mad_version)); return len; } @@ -2022,7 +2022,8 @@ static ssize_t show_host_os_type(struct device *dev, struct ibmvscsi_host_data *hostdata = shost_priv(shost); int len; - len = snprintf(buf, PAGE_SIZE, "%d\n", hostdata->madapter_info.os_type); + len = snprintf(buf, PAGE_SIZE, "%d\n", + be32_to_cpu(hostdata->madapter_info.os_type)); return len; } -- 2.5.0
Re: [Resend PATCH 1/3] scsi:stex.c Support to Pegasus series.
> "Charles" == Charles Chiou writes: Charles, Charles> Pegasus is a high performace hardware RAID solution designed to Charles> unleash the raw power of Thunderbolt technology. Please address Oliver's concerns about power management. Also, I was going to merge the first two patches in the series but your mailer mangled them and they did not apply. Please use git send-email to submit patches. Thank you! -- Martin K. Petersen Oracle Linux Engineering
Re: possible patch
On Wed, 10 Feb 2016 19:49:11 -0500 Justin Keller wrote: > I am new to submitting patches, so sorry if I am doing it wrong. My idea was > to change line 147 in drivers/isdn/pcbit/callbacks.c from > if (cbdata->data.setup.CallingPN == NULL) { > > to > > if (cbdata->data.setup.CallingPN == NULL || > sizeof(cbdata->data.setup.CallingPN)>sizeof(ictl.parm.setup.phone) { If you want to patch the kernel, the best way to describe what you want to do is to ... send a patch. Before you do so, though: - Please read the available documentation, including SubmittingPatches and development-process/* - Prepare a proper changelog saying *why* you want to make this change. You are trying to change the ISDN subsystem, which is pretty static these days; what current problem are you running into that makes you think this change needs to be applied to the kernel? Thanks, jon
[PATCH] cpufreq: Avoid unnecessary locking in show() and store()
From: Rafael J. Wysocki The show() and store() routines in the cpufreq core don't need to acquire all of the locks to check if the struct freq_attr they want to use really provides the callbacks they need as expected, so change them to avoid doing that. Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 27 +++ 1 file changed, 11 insertions(+), 16 deletions(-) Index: linux-pm/drivers/cpufreq/cpufreq.c === --- linux-pm.orig/drivers/cpufreq/cpufreq.c +++ linux-pm/drivers/cpufreq/cpufreq.c @@ -862,13 +862,11 @@ static ssize_t show(struct kobject *kobj struct freq_attr *fattr = to_attr(attr); ssize_t ret; - down_read(>rwsem); - - if (fattr->show) - ret = fattr->show(policy, buf); - else - ret = -EIO; + if (!fattr->show) + return -EIO; + down_read(>rwsem); + ret = fattr->show(policy, buf); up_read(>rwsem); return ret; @@ -881,20 +879,17 @@ static ssize_t store(struct kobject *kob struct freq_attr *fattr = to_attr(attr); ssize_t ret = -EINVAL; - get_online_cpus(); - - if (!cpu_online(policy->cpu)) - goto unlock; + if (!fattr->store) + return -EIO; - down_write(>rwsem); + get_online_cpus(); - if (fattr->store) + if (cpu_online(policy->cpu)) { + down_write(>rwsem); ret = fattr->store(policy, buf, count); - else - ret = -EIO; + up_write(>rwsem); + } - up_write(>rwsem); -unlock: put_online_cpus(); return ret;
Re: [PATCH 2/4] acct,time: change indentation in __acct_update_integrals
On Wed, 2016-02-10 at 20:08 -0500, r...@redhat.com wrote: > Change the indentation in __acct_update_integrals to make the function > a little easier to read. trivia: > diff --git a/kernel/tsacct.c b/kernel/tsacct.c [] > @@ -125,31 +125,32 @@ void xacct_add_tsk(struct taskstats *stats, struct > task_struct *p) [] > + if (!likely(tsk->mm)) > + return; Using if (unlikely(!tsk->mm)) return; would be a lot more common. (~150:1 in the kernel sources)
Re: [PATCH V4 1/7] cpufreq: Merge cpufreq_offline_prepare/finish routines
On Thu, Feb 11, 2016 at 1:59 AM, Rafael J. Wysocki wrote: > On Tue, Feb 9, 2016 at 4:46 AM, Viresh Kumar wrote: >> The offline routine was separated into two halves earlier by >> 'commit 1aee40ac9c86 ("cpufreq: Invoke __cpufreq_remove_dev_finish() >> after releasing cpu_hotplug.lock");. >> >> And the reasons cited were, race issues between accessing policy's sysfs >> files and policy kobject's cleanup. >> >> That race isn't valid anymore, as we don't remove the policy & its >> kobject completely on hotplugs, but do that from ->remove() callback of >> subsys framework. > > Governor sysfs attributes are still removed in > __cpufreq_governor(_EXIT), though, so had store() been used for them, > the deadlock described in the changelog of commit 1aee40ac9c86 would > have been possible. > > Fortunately, we don't use store() (which still does get_online_cpus()) > for those attributes now. We use governor_store() for them and that > doesn't call get_online_cpus(). So in fact this patch is only correct > after the recent rework of the governor attributes handling. > > Please modify the changelog to explain that more thoroughly. And one question tangentially related to this patch: Would it be possible to avoid calling __cpufreq_governor(_EXIT) for CPU offline? The fact that we still carry out the whole governor teardown at that point is slightly disturbing, as in theory it should be possible to keep the governor attributes in place across offline/online. Thanks, Rafael
[PATCH 1/5] x86,perf/cqm : Fix cqm handling of grouping events into a cache_group
Currently cqm(cache quality of service monitoring) is grouping all events belonging to same PID to use one RMID. However its not counting all of these different events. Hence we end up with a count of zero for all events other than the group leader. The patch tries to address the issue by keeping a flag in the perf_event.hw which has other cqm related fields. The field is updated at event creation and during grouping. Signed-off-by: Vikas Shivappa --- arch/x86/kernel/cpu/perf_event_intel_cqm.c | 13 ++--- include/linux/perf_event.h | 1 + 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_event_intel_cqm.c b/arch/x86/kernel/cpu/perf_event_intel_cqm.c index a316ca9..e6be335 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_cqm.c +++ b/arch/x86/kernel/cpu/perf_event_intel_cqm.c @@ -281,9 +281,13 @@ static bool __match_event(struct perf_event *a, struct perf_event *b) /* * Events that target same task are placed into the same cache group. +* Mark it as a multi event group, so that we update ->count +* for every event rather than just the group leader later. */ - if (a->hw.target == b->hw.target) + if (a->hw.target == b->hw.target) { + b->hw.is_group_event = true; return true; + } /* * Are we an inherited event? @@ -849,6 +853,7 @@ static void intel_cqm_setup_event(struct perf_event *event, bool conflict = false; u32 rmid; + event->hw.is_group_event = false; list_for_each_entry(iter, _groups, hw.cqm_groups_entry) { rmid = iter->hw.cqm_rmid; @@ -940,7 +945,9 @@ static u64 intel_cqm_event_count(struct perf_event *event) return __perf_event_count(event); /* -* Only the group leader gets to report values. This stops us +* Only the group leader gets to report values except in case of +* multiple events in the same group, we still need to read the +* other events.This stops us * reporting duplicate values to userspace, and gives us a clear * rule for which task gets to report the values. * @@ -948,7 +955,7 @@ static u64 intel_cqm_event_count(struct perf_event *event) * specific packages - we forfeit that ability when we create * task events. */ - if (!cqm_group_leader(event)) + if (!cqm_group_leader(event) && !event->hw.is_group_event) return 0; /* diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index f9828a4..f11c732 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -121,6 +121,7 @@ struct hw_perf_event { struct { /* intel_cqm */ int cqm_state; u32 cqm_rmid; + boolis_group_event; struct list_headcqm_events_entry; struct list_headcqm_groups_entry; struct list_headcqm_group_entry; -- 1.9.1
[PATCH 4/5] x86/mbm: RMID Recycling MBM changes
RMID could be allocated or deallocated as part of RMID recycling. When an RMID is allocated for mbm event, the mbm counter needs to be initialized because next time we read the counter we need the previous value to account for total bytes that went to the memory controller. Similarly, when RMID is deallocated we need to update the ->count variable. Signed-off-by: Vikas Shivappa --- arch/x86/kernel/cpu/perf_event_intel_cqm.c | 27 +-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_event_intel_cqm.c b/arch/x86/kernel/cpu/perf_event_intel_cqm.c index b1c9663..fea22c8 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_cqm.c +++ b/arch/x86/kernel/cpu/perf_event_intel_cqm.c @@ -454,6 +454,7 @@ struct rmid_read { static void __intel_cqm_event_count(void *info); static void init_mbm_sample(u32 rmid, enum mbm_evt_type evt_type); +static void __intel_mbm_event_count(void *info); static bool is_mbm_event(int e) { @@ -480,8 +481,14 @@ static u32 intel_cqm_xchg_rmid(struct perf_event *group, u32 rmid) .rmid = old_rmid, }; - on_each_cpu_mask(_cpumask, __intel_cqm_event_count, -, 1); + if (is_mbm_event(group->attr.config)) { + rr.evt_type = group->attr.config; + on_each_cpu_mask(_cpumask, __intel_mbm_event_count, +, 1); + } else { + on_each_cpu_mask(_cpumask, __intel_cqm_event_count, +, 1); + } local64_set(>count, atomic64_read()); } @@ -493,6 +500,22 @@ static u32 intel_cqm_xchg_rmid(struct perf_event *group, u32 rmid) raw_spin_unlock_irq(_lock); + /* +* If the allocation is for mbm, init the mbm stats. +* Need to check if each event in the group is mbm event +* because there could be multiple type of events in the same group. +*/ + if (__rmid_valid(rmid)) { + event = group; + if (is_mbm_event(event->attr.config)) + init_mbm_sample(rmid, event->attr.config); + + list_for_each_entry(event, head, hw.cqm_group_entry) { + if (is_mbm_event(event->attr.config)) + init_mbm_sample(rmid, event->attr.config); + } + } + return old_rmid; } -- 1.9.1
[PATCH 5/5] x86/mbm: Add support for MBM counter overflow handling
This patch adds a per package timer which periodically updates the Memory bandwidth counters for the events that are currently active. Current patch has a periodic timer every 1s since the SDM guarantees that the counter will not overflow in 1s but this time can be definitely improved by calibrating on the system. The overflow is really a function of the max memory b/w that the socket can support, max counter value and scaling factor. Signed-off-by: Vikas Shivappa --- arch/x86/kernel/cpu/perf_event_intel_cqm.c | 111 - 1 file changed, 110 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/perf_event_intel_cqm.c b/arch/x86/kernel/cpu/perf_event_intel_cqm.c index fea22c8..8245dbd 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_cqm.c +++ b/arch/x86/kernel/cpu/perf_event_intel_cqm.c @@ -18,6 +18,11 @@ * value */ #define MBM_CNTR_MAX 0xff +/* + * Guaranteed time in ms as per SDM where MBM counters will not overflow. + */ +#define MBM_CTR_OVERFLOW_TIME 1000 + static u32 cqm_max_rmid = -1; static unsigned int cqm_l3_scale; /* supposedly cacheline size */ static bool cqm_enabled, mbm_enabled; @@ -49,6 +54,7 @@ struct intel_pqr_state { * interrupts disabled, which is sufficient for the protection. */ static DEFINE_PER_CPU(struct intel_pqr_state, pqr_state); +static struct hrtimer *mbm_timers; /** * struct sample - mbm event's (local or total) data * @interval_start Time this interval began @@ -1123,6 +1129,84 @@ static void __intel_mbm_event_count(void *info) atomic64_add(val, >value); } +static enum hrtimer_restart mbm_hrtimer_handle(struct hrtimer *hrtimer) +{ + struct perf_event *iter, *iter1; + struct list_head *head; + unsigned long flags; + u32 grp_rmid; + + /* +* Need to cache_lock as the timer Event Select MSR reads +* can race with the mbm/cqm count() and mbm_init() reads. +*/ + mutex_lock(_mutex); + raw_spin_lock_irqsave(_lock, flags); + + if (list_empty(_groups)) + goto out; + + list_for_each_entry(iter, _groups, hw.cqm_groups_entry) { + grp_rmid = iter->hw.cqm_rmid; + if (!__rmid_valid(grp_rmid)) + continue; + if (is_mbm_event(iter->attr.config)) + update_sample(grp_rmid, iter->attr.config, 0); + + head = >hw.cqm_group_entry; + if (list_empty(head)) + continue; + list_for_each_entry(iter1, head, hw.cqm_group_entry) { + if (!iter1->hw.is_group_event) + break; + if (is_mbm_event(iter1->attr.config)) + update_sample(iter1->hw.cqm_rmid, + iter1->attr.config, 0); + } + } + +out: + raw_spin_unlock_irqrestore(_lock, flags); + mutex_unlock(_mutex); + + hrtimer_forward_now(hrtimer, ms_to_ktime(MBM_CTR_OVERFLOW_TIME)); + + return HRTIMER_RESTART; +} + +static void __mbm_start_timer(void *info) +{ + hrtimer_start(_timers[pkg_id], ms_to_ktime(MBM_CTR_OVERFLOW_TIME), +HRTIMER_MODE_REL_PINNED); +} + +static void __mbm_stop_timer(void *info) +{ + hrtimer_cancel(_timers[pkg_id]); +} + +static void mbm_start_timers(void) +{ + on_each_cpu_mask(_cpumask, __mbm_start_timer, NULL, 1); +} + +static void mbm_stop_timers(void) +{ + on_each_cpu_mask(_cpumask, __mbm_stop_timer, NULL, 1); +} + +static void mbm_hrtimer_init(void) +{ + struct hrtimer *hr; + int i; + + for (i = 0; i < mbm_socket_max; i++) { + hr = _timers[i]; + hrtimer_init(hr, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + hr->function = mbm_hrtimer_handle; + } +} + static u64 intel_cqm_event_count(struct perf_event *event) { unsigned long flags; @@ -1285,6 +1369,12 @@ static void intel_cqm_event_destroy(struct perf_event *event) } } + /* +* Stop the mbm overflow timers when the last event is destroyed. + */ + if (list_empty(_groups)) + mbm_stop_timers(); + mutex_unlock(_mutex); } @@ -1317,6 +1407,12 @@ static int intel_cqm_event_init(struct perf_event *event) mutex_lock(_mutex); + /* +* Start the mbm overflow timers when the first event is created. + */ + if (list_empty(_groups)) + mbm_start_timers(); + /* Will also set rmid */ intel_cqm_setup_event(event, ); @@ -1615,10 +1711,23 @@ static int intel_mbm_init(void) mbm_total = kmalloc(array_size, GFP_KERNEL); if (!mbm_total) { - kfree(mbm_local); ret = -ENOMEM; + goto out; + } + + array_size = sizeof(struct hrtimer) * mbm_socket_max; + mbm_timers =
[PATCH 2/5] x86/mbm: Intel Memory B/W Monitoring enumeration and init
The MBM init patch enumerates the Intel (Memory b/w monitoring)MBM and initializes the perf events and datastructures for monitoring the memory b/w. Its based on original patch series by Kanaka Juvva. Memory bandwidth monitoring(MBM) provides OS/VMM a way to monitor bandwidth from one level of cache to another. The current patches support L3 external bandwidth monitoring. It supports both 'local bandwidth' and 'total bandwidth' monitoring for the socket. Local bandwidth measures the amount of data sent through the memory controller on the socket and total b/w measures the total system bandwidth. Extending the cache quality of service monitoring(CQM) we add four more events to the perf infrastructure: intel_cqm_llc/local_bytes - bytes sent through local socket memory controller intel_cqm_llc/total_bytes - total L3 external bytes sent intel_cqm_llc/local_bw - Current local b/w intel_cqm_llc/total_bw - current total b/w The tasks are associated with a Resouce Monitoring ID(RMID) just like in cqm and OS uses a MSR write to indicate the RMID of the task during scheduling. Signed-off-by: Vikas Shivappa --- arch/x86/include/asm/cpufeature.h | 2 + arch/x86/kernel/cpu/common.c | 4 +- arch/x86/kernel/cpu/perf_event_intel_cqm.c | 172 +++-- 3 files changed, 169 insertions(+), 9 deletions(-) diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 7ad8c94..fd80ef6 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -241,6 +241,8 @@ /* Intel-defined CPU QoS Sub-leaf, CPUID level 0x000F:1 (edx), word 12 */ #define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring if 1 */ +#define X86_FEATURE_CQM_MBM_TOTAL (12*32+ 1) /* LLC Total MBM monitoring */ +#define X86_FEATURE_CQM_MBM_LOCAL (12*32+ 2) /* LLC Local MBM monitoring */ /* AMD-defined CPU features, CPUID level 0x8008 (ebx), word 13 */ #define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 37830de..f770221 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -635,7 +635,9 @@ void get_cpu_cap(struct cpuinfo_x86 *c) cpuid_count(0x000F, 1, , , , ); c->x86_capability[CPUID_F_1_EDX] = edx; - if (cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC)) { + if ((cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC)) || + ((cpu_has(c, X86_FEATURE_CQM_MBM_TOTAL)) || + (cpu_has(c, X86_FEATURE_CQM_MBM_LOCAL { c->x86_cache_max_rmid = ecx; c->x86_cache_occ_scale = ebx; } diff --git a/arch/x86/kernel/cpu/perf_event_intel_cqm.c b/arch/x86/kernel/cpu/perf_event_intel_cqm.c index e6be335..e45f5aa 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_cqm.c +++ b/arch/x86/kernel/cpu/perf_event_intel_cqm.c @@ -15,6 +15,8 @@ static u32 cqm_max_rmid = -1; static unsigned int cqm_l3_scale; /* supposedly cacheline size */ +static bool cqm_enabled, mbm_enabled; +static u16 mbm_socket_max; /** * struct intel_pqr_state - State cache for the PQR MSR @@ -42,6 +44,30 @@ struct intel_pqr_state { * interrupts disabled, which is sufficient for the protection. */ static DEFINE_PER_CPU(struct intel_pqr_state, pqr_state); +/** + * struct sample - mbm event's (local or total) data + * @interval_start Time this interval began + * @interval_bytes #bytes in this interval + * @total_bytes#bytes since we began monitoring + * @prev_msr previous value of MSR + * @bandwidth bytes/sec in previous completed interval + */ +struct sample { + ktime_t interval_start; + u64 interval_bytes; + u64 total_bytes; + u64 prev_msr; + u64 bandwidth; +}; + +/* + * samples profiled for total memory bandwidth type events + */ +static struct sample *mbm_total; +/* + * samples profiled for local memory bandwidth type events + */ +static struct sample *mbm_local; /* * Protects cache_cgroups and cqm_rmid_free_lru and cqm_rmid_limbo_lru. @@ -1152,6 +1178,28 @@ EVENT_ATTR_STR(llc_occupancy.unit, intel_cqm_llc_unit, "Bytes"); EVENT_ATTR_STR(llc_occupancy.scale, intel_cqm_llc_scale, NULL); EVENT_ATTR_STR(llc_occupancy.snapshot, intel_cqm_llc_snapshot, "1"); +EVENT_ATTR_STR(total_bytes, intel_cqm_total_bytes, "event=0x02"); +EVENT_ATTR_STR(total_bytes.per-pkg, intel_cqm_total_bytes_pkg, "1"); +EVENT_ATTR_STR(total_bytes.unit, intel_cqm_total_bytes_unit, "MB"); +EVENT_ATTR_STR(total_bytes.scale, intel_cqm_total_bytes_scale, "1e-6"); + +EVENT_ATTR_STR(local_bytes, intel_cqm_local_bytes, "event=0x03"); +EVENT_ATTR_STR(local_bytes.per-pkg, intel_cqm_local_bytes_pkg, "1"); +EVENT_ATTR_STR(local_bytes.unit, intel_cqm_local_bytes_unit, "MB"); +EVENT_ATTR_STR(local_bytes.scale,
[PATCH 2/4] acct,time: change indentation in __acct_update_integrals
From: Rik van Riel Change the indentation in __acct_update_integrals to make the function a little easier to read. Suggested-by: Peter Zijlstra Signed-off-by: Rik van Riel Acked-by: Frederic Weisbecker --- kernel/tsacct.c | 51 ++- 1 file changed, 26 insertions(+), 25 deletions(-) diff --git a/kernel/tsacct.c b/kernel/tsacct.c index 460ee2bbfef3..d12e815b7bcd 100644 --- a/kernel/tsacct.c +++ b/kernel/tsacct.c @@ -125,31 +125,32 @@ void xacct_add_tsk(struct taskstats *stats, struct task_struct *p) static void __acct_update_integrals(struct task_struct *tsk, cputime_t utime, cputime_t stime) { - if (likely(tsk->mm)) { - cputime_t time, dtime; - unsigned long flags; - u64 delta; - - local_irq_save(flags); - time = stime + utime; - dtime = time - tsk->acct_timexpd; - /* Avoid division: cputime_t is often in nanoseconds already. */ - delta = cputime_to_nsecs(dtime); - - if (delta < TICK_NSEC) - goto out; - - tsk->acct_timexpd = time; - /* -* Divide by 1024 to avoid overflow, and to avoid division. -* The final unit reported to userspace is Mbyte-usecs, -* the rest of the math is done in xacct_add_tsk. -*/ - tsk->acct_rss_mem1 += delta * get_mm_rss(tsk->mm) >> 10; - tsk->acct_vm_mem1 += delta * tsk->mm->total_vm >> 10; - out: - local_irq_restore(flags); - } + cputime_t time, dtime; + unsigned long flags; + u64 delta; + + if (!likely(tsk->mm)) + return; + + local_irq_save(flags); + time = stime + utime; + dtime = time - tsk->acct_timexpd; + /* Avoid division: cputime_t is often in nanoseconds already. */ + delta = cputime_to_nsecs(dtime); + + if (delta < TICK_NSEC) + goto out; + + tsk->acct_timexpd = time; + /* +* Divide by 1024 to avoid overflow, and to avoid division. +* The final unit reported to userspace is Mbyte-usecs, +* the rest of the math is done in xacct_add_tsk. +*/ + tsk->acct_rss_mem1 += delta * get_mm_rss(tsk->mm) >> 10; + tsk->acct_vm_mem1 += delta * tsk->mm->total_vm >> 10; +out: + local_irq_restore(flags); } /** -- 2.5.0
Re: N900 sleep mode (in 4.5-rc0, if that matters)
* Tony Lindgren [160209 09:26]: > * Pavel Machek [160207 13:24]: > > > ffdffebd 48004a20 (fa004a20) cm_idlest1_core blocking bits: 00200042 > > 000d 48004a28 (fa004a28) cm_idlest3_core > > Bit 21 in cm_idlest1_core is for MCSPI4 so WLAN. Does that go > down if do sleep 5; cat /sys/kernel/debug/pm_debug/count ? > > If not, the're PM runtime missing or broken somewhere. Just tested n900 wlan here and it's working for me with Emil's patch from the "" thread and the PM runtime regression fix for hsmmc I posted earlier today. Looks like n900 is hitting off mode just fine with wlan0 configured. It does not wake up to pings from outside from off mode though until the system wakes up. I'll take a look and send a patch for that separately. Regards, Tony
[PATCH 4/4] sched,time: switch VIRT_CPU_ACCOUNTING_GEN to jiffy granularity
From: Rik van Riel After removing __acct_update_integrals from the profile, native_sched_clock remains as the top CPU user. This can be reduced by moving VIRT_CPU_ACCOUNTING_GEN to jiffy granularity. This will reduce timing accuracy on nohz_full CPUs to jiffy based sampling, just like on normal CPUs. It results in totally removing native_sched_clock from the profile, and significantly speeding up the syscall entry and exit path, as well as irq entry and exit, and kvm guest entry & exit. Additionally, only call the more expensive functions (and advance the seqlock) when jiffies actually changed. This code relies on another CPU advancing jiffies when the system is busy. On a nohz_full system, this is done by a housekeeping CPU. A microbenchmark calling an invalid syscall number 10 million times in a row speeds up an additional 30% over the numbers with just the previous patches, for a total speedup of about 40% over 4.4 and 4.5-rc1. Run times for the microbenchmark: 4.4 3.8 seconds 4.5-rc1 3.7 seconds 4.5-rc1 + first patch 3.3 seconds 4.5-rc1 + first 3 patches 3.1 seconds 4.5-rc1 + all patches 2.3 seconds A non-NOHZ_FULL cpu (not the housekeeping CPU) all kernels 1.86 seconds Signed-off-by: Rik van Riel --- kernel/sched/cputime.c | 39 +++ 1 file changed, 23 insertions(+), 16 deletions(-) diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index b2ab2ffb1adc..01d9898bc9a2 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -668,26 +668,25 @@ void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime #endif /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN -static unsigned long long vtime_delta(struct task_struct *tsk) +static cputime_t vtime_delta(struct task_struct *tsk) { - unsigned long long clock; + unsigned long now = READ_ONCE(jiffies); - clock = local_clock(); - if (clock < tsk->vtime_snap) + if (time_before(now, (unsigned long)tsk->vtime_snap)) return 0; - return clock - tsk->vtime_snap; + return jiffies_to_cputime(now - tsk->vtime_snap); } static cputime_t get_vtime_delta(struct task_struct *tsk) { - unsigned long long delta = vtime_delta(tsk); + unsigned long now = READ_ONCE(jiffies); + unsigned long delta = now - tsk->vtime_snap; WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_INACTIVE); - tsk->vtime_snap += delta; + tsk->vtime_snap = now; - /* CHECKME: always safe to convert nsecs to cputime? */ - return nsecs_to_cputime(delta); + return jiffies_to_cputime(delta); } static void __vtime_account_system(struct task_struct *tsk) @@ -699,6 +698,9 @@ static void __vtime_account_system(struct task_struct *tsk) void vtime_account_system(struct task_struct *tsk) { + if (!vtime_delta(tsk)) + return; + write_seqcount_begin(>vtime_seqcount); __vtime_account_system(tsk); write_seqcount_end(>vtime_seqcount); @@ -707,7 +709,8 @@ void vtime_account_system(struct task_struct *tsk) void vtime_gen_account_irq_exit(struct task_struct *tsk) { write_seqcount_begin(>vtime_seqcount); - __vtime_account_system(tsk); + if (vtime_delta(tsk)) + __vtime_account_system(tsk); if (context_tracking_in_user()) tsk->vtime_snap_whence = VTIME_USER; write_seqcount_end(>vtime_seqcount); @@ -718,16 +721,19 @@ void vtime_account_user(struct task_struct *tsk) cputime_t delta_cpu; write_seqcount_begin(>vtime_seqcount); - delta_cpu = get_vtime_delta(tsk); tsk->vtime_snap_whence = VTIME_SYS; - account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu)); + if (vtime_delta(tsk)) { + delta_cpu = get_vtime_delta(tsk); + account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu)); + } write_seqcount_end(>vtime_seqcount); } void vtime_user_enter(struct task_struct *tsk) { write_seqcount_begin(>vtime_seqcount); - __vtime_account_system(tsk); + if (vtime_delta(tsk)) + __vtime_account_system(tsk); tsk->vtime_snap_whence = VTIME_USER; write_seqcount_end(>vtime_seqcount); } @@ -742,7 +748,8 @@ void vtime_guest_enter(struct task_struct *tsk) * that can thus safely catch up with a tickless delta. */ write_seqcount_begin(>vtime_seqcount); - __vtime_account_system(tsk); + if (vtime_delta(tsk)) + __vtime_account_system(tsk); current->flags |= PF_VCPU; write_seqcount_end(>vtime_seqcount); } @@ -772,7 +779,7 @@ void arch_vtime_task_switch(struct task_struct *prev) write_seqcount_begin(>vtime_seqcount); current->vtime_snap_whence = VTIME_SYS; -
[PATCH 1/4] sched,time: remove non-power-of-two divides from __acct_update_integrals
From: Rik van Riel When running a microbenchmark calling an invalid syscall number in a loop, on a nohz_full CPU, we spend a full 9% of our CPU time in __acct_update_integrals. This function converts cputime_t to jiffies, to a timeval, only to convert the timeval back to microseconds before discarding it. This patch leaves __acct_update_integrals functionally equivalent, but speeds things up by about 12%, with 10 million calls to an invalid syscall number dropping from 3.7 to 3.25 seconds. Signed-off-by: Rik van Riel --- kernel/tsacct.c | 26 -- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/kernel/tsacct.c b/kernel/tsacct.c index 975cb49e32bf..460ee2bbfef3 100644 --- a/kernel/tsacct.c +++ b/kernel/tsacct.c @@ -93,9 +93,11 @@ void xacct_add_tsk(struct taskstats *stats, struct task_struct *p) { struct mm_struct *mm; - /* convert pages-usec to Mbyte-usec */ - stats->coremem = p->acct_rss_mem1 * PAGE_SIZE / MB; - stats->virtmem = p->acct_vm_mem1 * PAGE_SIZE / MB; + /* convert pages-nsec/1024 to Mbyte-usec, see __acct_update_integrals */ + stats->coremem = p->acct_rss_mem1 * PAGE_SIZE; + do_div(stats->coremem, 1000 * KB); + stats->virtmem = p->acct_vm_mem1 * PAGE_SIZE; + do_div(stats->virtmem, 1000 * KB); mm = get_task_mm(p); if (mm) { /* adjust to KB unit */ @@ -125,22 +127,26 @@ static void __acct_update_integrals(struct task_struct *tsk, { if (likely(tsk->mm)) { cputime_t time, dtime; - struct timeval value; unsigned long flags; u64 delta; local_irq_save(flags); time = stime + utime; dtime = time - tsk->acct_timexpd; - jiffies_to_timeval(cputime_to_jiffies(dtime), ); - delta = value.tv_sec; - delta = delta * USEC_PER_SEC + value.tv_usec; + /* Avoid division: cputime_t is often in nanoseconds already. */ + delta = cputime_to_nsecs(dtime); - if (delta == 0) + if (delta < TICK_NSEC) goto out; + tsk->acct_timexpd = time; - tsk->acct_rss_mem1 += delta * get_mm_rss(tsk->mm); - tsk->acct_vm_mem1 += delta * tsk->mm->total_vm; + /* +* Divide by 1024 to avoid overflow, and to avoid division. +* The final unit reported to userspace is Mbyte-usecs, +* the rest of the math is done in xacct_add_tsk. +*/ + tsk->acct_rss_mem1 += delta * get_mm_rss(tsk->mm) >> 10; + tsk->acct_vm_mem1 += delta * tsk->mm->total_vm >> 10; out: local_irq_restore(flags); } -- 2.5.0
[PATCH V4 0/5] x86/mbm : Intel Memory Bandwidth monitoring support
The V4 version of MBM is almost a complete rewrite of the prior versions. It tries to address all of Thomas earlier comments. The patch series has one preparatory patch for cqm and then 4 MBM patches. *Patches apply on 4.5-rc1*. Memory bandwitdh monitoring(MBM) provides OS/VMM a way to monitor bandwidth from one level of cache to another. The current patches support L3 external bandwitch monitoring. It supports both 'local bandwidth' and 'total bandwidth' monitoring for the socket. Local bandwidth measures the amount of data sent through the memory controller on the socket and total b/w measures the total system bandwidth. The tasks are associated with a Resouce Monitoring ID(RMID) just like in cqm and OS uses a MSR write to indicate the RMID of the task during scheduling. [PATCH 1/5] x86,perf/cqm : Fix cqm handling of grouping events into a [PATCH 2/5] x86/mbm: Intel Memory B/W Monitoring enumeration and init [PATCH 3/5] x86/mbm: Memory bandwidth monitoring event management [PATCH 4/5] x86/mbm: RMID Recycling MBM changes [PATCH 5/5] x86/mbm: Add support for MBM counter overflow handling
[PATCH 3/5] x86/mbm: Memory bandwidth monitoring event management
From: Tony Luck Includes all the core infrastructure to measure the total_bytes and bandwidth. We have per socket counters for both total system wide L3 external bytes and local socket memory-controller bytes. The current b/w is calculated for a minimum diff time(time since it was last counted) of 100ms. The OS does MSR writes to MSR_IA32_QM_EVTSEL and MSR_IA32_QM_CTR to read the counters and uses the IA32_PQR_ASSOC_MSR to associate the RMID with the task. The tasks have a common RMID for cqm(cache quality of service monitoring) and MBM. Hence most of the scheduling code is reused from cqm. Signed-off-by: Vikas Shivappa --- arch/x86/kernel/cpu/perf_event_intel_cqm.c | 159 - 1 file changed, 155 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_event_intel_cqm.c b/arch/x86/kernel/cpu/perf_event_intel_cqm.c index e45f5aa..b1c9663 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_cqm.c +++ b/arch/x86/kernel/cpu/perf_event_intel_cqm.c @@ -13,6 +13,11 @@ #define MSR_IA32_QM_CTR0x0c8e #define MSR_IA32_QM_EVTSEL 0x0c8d +/* + * MBM Counter is 24bits wide. MBM_CNTR_MAX defines max counter + * value + */ +#define MBM_CNTR_MAX 0xff static u32 cqm_max_rmid = -1; static unsigned int cqm_l3_scale; /* supposedly cacheline size */ static bool cqm_enabled, mbm_enabled; @@ -69,6 +74,16 @@ static struct sample *mbm_total; */ static struct sample *mbm_local; +#define pkg_id topology_physical_package_id(smp_processor_id()) +/* + * rmid_2_index returns the index for the rmid in mbm_local/mbm_total array. + * mbm_total[] and mbm_local[] are linearly indexed by socket# * max number of + * rmids per socket, an example is given below + * RMID1 of Socket0: vrmid = 1 + * RMID1 of Socket1: vrmid = 1 * (cqm_max_rmid + 1) + 1 + * RMID1 of Socket2: vrmid = 2 * (cqm_max_rmid + 1) + 1 + */ +#define rmid_2_index(rmid) ((pkg_id * (cqm_max_rmid + 1)) + rmid) /* * Protects cache_cgroups and cqm_rmid_free_lru and cqm_rmid_limbo_lru. * Also protects event->hw.cqm_rmid @@ -92,8 +107,19 @@ static cpumask_t cqm_cpumask; #define RMID_VAL_UNAVAIL (1ULL << 62) #define QOS_L3_OCCUP_EVENT_ID (1 << 0) +/* + * MBM Event IDs as defined in SDM section 17.15.5 + * Event IDs are used to program EVTSEL MSRs before reading mbm event counters + */ +enum mbm_evt_type { + QOS_MBM_TOTAL_EVENT_ID = 0x02, + QOS_MBM_LOCAL_EVENT_ID, + QOS_MBM_TOTAL_BW_EVENT_ID, + QOS_MBM_LOCAL_BW_EVENT_ID, +}; -#define QOS_EVENT_MASK QOS_L3_OCCUP_EVENT_ID +#define QOS_MBM_BW_EVENT_MASK 0x04 +#define QOS_MBM_LOCAL_EVENT_MASK 0x01 /* * This is central to the rotation algorithm in __intel_cqm_rmid_rotate(). @@ -423,9 +449,16 @@ static bool __conflict_event(struct perf_event *a, struct perf_event *b) struct rmid_read { u32 rmid; atomic64_t value; + enum mbm_evt_type evt_type; }; static void __intel_cqm_event_count(void *info); +static void init_mbm_sample(u32 rmid, enum mbm_evt_type evt_type); + +static bool is_mbm_event(int e) +{ + return (e >= QOS_MBM_TOTAL_EVENT_ID && e <= QOS_MBM_LOCAL_BW_EVENT_ID); +} /* * Exchange the RMID of a group of events. @@ -867,6 +900,98 @@ static void intel_cqm_rmid_rotate(struct work_struct *work) schedule_delayed_work(_cqm_rmid_work, delay); } +static struct sample *update_sample(unsigned int rmid, + enum mbm_evt_type evt_type, int first) +{ + ktime_t cur_time; + struct sample *mbm_current; + u32 vrmid = rmid_2_index(rmid); + u64 val, bytes, diff_time; + u32 eventid; + + if (evt_type & QOS_MBM_LOCAL_EVENT_MASK) { + mbm_current = _local[vrmid]; + eventid = QOS_MBM_LOCAL_EVENT_ID; + } else { + mbm_current = _total[vrmid]; + eventid = QOS_MBM_TOTAL_EVENT_ID; + } + + cur_time = ktime_get(); + wrmsr(MSR_IA32_QM_EVTSEL, eventid, rmid); + rdmsrl(MSR_IA32_QM_CTR, val); + if (val & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL)) + return mbm_current; + val &= MBM_CNTR_MAX; + + if (first) { + mbm_current->interval_start = cur_time; + mbm_current->prev_msr = val; + mbm_current->total_bytes = 0; + mbm_current->interval_bytes = 0; + mbm_current->bandwidth = 0; + return mbm_current; + } + + if (val < mbm_current->prev_msr) + bytes = MBM_CNTR_MAX - mbm_current->prev_msr + val + 1; + else + bytes = val - mbm_current->prev_msr; + bytes *= cqm_l3_scale; + + mbm_current->total_bytes += bytes; + mbm_current->interval_bytes += bytes; + mbm_current->prev_msr = val; + diff_time = ktime_ms_delta(cur_time, mbm_current->interval_start); + + /* +* The b/w measured is really the most recent/current b/w. +* We wait till
[PATCH 0/4 v6] sched,time: reduce nohz_full syscall overhead 40%
(v6: make VIRT_CPU_ACCOUNTING_GEN jiffy granularity) Running with nohz_full introduces a fair amount of overhead. Specifically, various things that are usually done from the timer interrupt are now done at syscall, irq, and guest entry and exit times. However, some of the code that is called every single time has only ever worked at jiffy resolution. The code in __acct_update_integrals was also doing some unnecessary calculations. Getting rid of the unnecessary calculations, without changing any of the functionality in __acct_update_integrals gets us about an 11% win. Not calling the time statistics updating code more than once per jiffy, like is done on housekeeping CPUs and on all the CPUs of a non-nohz_full system, shaves off a further 30%. I tested this series with a microbenchmark calling an invalid syscall number ten million times in a row, on a nohz_full cpu. Run times for the microbenchmark: 4.4 3.8 seconds 4.5-rc1 3.7 seconds 4.5-rc1 + first patch 3.3 seconds 4.5-rc1 + first 3 patches 3.1 seconds 4.5-rc1 + all patches 2.3 seconds Same test on a non-NOHZ_FULL, non-housekeeping CPU: all kernels 1.86 seconds
[PATCH 3/4] time,acct: drop irq save & restore from __acct_update_integrals
From: Rik van Riel It looks like all the call paths that lead to __acct_update_integrals already have irqs disabled, and __acct_update_integrals does not need to disable irqs itself. This is very convenient since about half the CPU time left in this function was spent in local_irq_save alone. Performance of a microbenchmark that calls an invalid syscall ten million times in a row on a nohz_full CPU improves 21% vs. 4.5-rc1 with both the removal of divisions from __acct_update_integrals and this patch, with runtime dropping from 3.7 to 2.9 seconds. With these patches applied, the highest remaining cpu user in the trace is native_sched_clock, which is addressed in the next patch. For testing purposes I stuck a WARN_ON(!irqs_disabled()) test in __acct_update_integrals. It did not trigger. Suggested-by: Peter Zijlstra Signed-off-by: Rik van Riel --- kernel/tsacct.c | 9 - 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/kernel/tsacct.c b/kernel/tsacct.c index d12e815b7bcd..f8e26ab963ed 100644 --- a/kernel/tsacct.c +++ b/kernel/tsacct.c @@ -126,20 +126,18 @@ static void __acct_update_integrals(struct task_struct *tsk, cputime_t utime, cputime_t stime) { cputime_t time, dtime; - unsigned long flags; u64 delta; if (!likely(tsk->mm)) return; - local_irq_save(flags); time = stime + utime; dtime = time - tsk->acct_timexpd; /* Avoid division: cputime_t is often in nanoseconds already. */ delta = cputime_to_nsecs(dtime); if (delta < TICK_NSEC) - goto out; + return; tsk->acct_timexpd = time; /* @@ -149,8 +147,6 @@ static void __acct_update_integrals(struct task_struct *tsk, */ tsk->acct_rss_mem1 += delta * get_mm_rss(tsk->mm) >> 10; tsk->acct_vm_mem1 += delta * tsk->mm->total_vm >> 10; -out: - local_irq_restore(flags); } /** @@ -160,9 +156,12 @@ static void __acct_update_integrals(struct task_struct *tsk, void acct_update_integrals(struct task_struct *tsk) { cputime_t utime, stime; + unsigned long flags; + local_irq_save(flags); task_cputime(tsk, , ); __acct_update_integrals(tsk, utime, stime); + local_irq_restore(flags); } /** -- 2.5.0