[PATCH v3 14/14] lib/test_printf.c: test dentry printing

2015-12-03 Thread Rasmus Villemoes
Cc: Al Viro 
Signed-off-by: Rasmus Villemoes 
---
 lib/test_printf.c | 27 +++
 1 file changed, 27 insertions(+)

diff --git a/lib/test_printf.c b/lib/test_printf.c
index 60740c10c3e8..0234356c6698 100644
--- a/lib/test_printf.c
+++ b/lib/test_printf.c
@@ -13,6 +13,7 @@
 #include 
 
 #include 
+#include 
 #include 
 #include 
 
@@ -326,9 +327,35 @@ uuid(void)
test("03020100-0504-0706-0809-0A0B0C0D0E0F", "%pUL", uuid);
 }
 
+static struct dentry test_dentry[4] __initdata = {
+   { .d_parent = _dentry[0],
+ .d_name = { .len = 3, .name = test_dentry[0].d_iname },
+ .d_iname = "foo" },
+   { .d_parent = _dentry[0],
+ .d_name = { .len = 5, .name = test_dentry[1].d_iname },
+ .d_iname = "bravo" },
+   { .d_parent = _dentry[1],
+ .d_name = { .len = 4, .name = test_dentry[2].d_iname },
+ .d_iname = "alfa" },
+   { .d_parent = _dentry[2],
+ .d_name = { .len = 5, .name = test_dentry[3].d_iname },
+ .d_iname = "romeo" },
+};
+
 static void __init
 dentry(void)
 {
+   test("foo", "%pd", _dentry[0]);
+   test("foo", "%pd2", _dentry[0]);
+
+   test("romeo", "%pd", _dentry[3]);
+   test("alfa/romeo", "%pd2", _dentry[3]);
+   test("bravo/alfa/romeo", "%pd3", _dentry[3]);
+   test("/bravo/alfa/romeo", "%pd4", _dentry[3]);
+   test("/bravo/alfa", "%pd4", _dentry[2]);
+
+   test("bravo/alfa  |bravo/alfa  ", "%-12pd2|%*pd2", _dentry[2], 
-12, _dentry[2]);
+   test("  bravo/alfa|  bravo/alfa", "%12pd2|%*pd2", _dentry[2], 12, 
_dentry[2]);
 }
 
 static void __init
-- 
2.6.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v3 12/14] lib/test_printf.c: account for kvasprintf tests

2015-12-03 Thread Rasmus Villemoes
These should also count as performed tests.

Acked-by: Kees Cook 
Signed-off-by: Rasmus Villemoes 
---
 lib/test_printf.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lib/test_printf.c b/lib/test_printf.c
index b23ce824766f..3e21170d327d 100644
--- a/lib/test_printf.c
+++ b/lib/test_printf.c
@@ -127,6 +127,7 @@ __test(const char *expect, int elen, const char *fmt, ...)
 
p = kvasprintf(GFP_KERNEL, fmt, ap);
if (p) {
+   total_tests++;
if (memcmp(p, expect, elen+1)) {
pr_warn("kvasprintf(..., \"%s\", ...) returned '%s', 
expected '%s'\n",
fmt, p, expect);
-- 
2.6.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v3 10/14] lib/test_printf.c: test precision quirks

2015-12-03 Thread Rasmus Villemoes
The kernel's printf doesn't follow the standards in a few corner cases
(which are probably mostly irrelevant). Add tests that document the
current behaviour.

Cc: Kees Cook 
Signed-off-by: Rasmus Villemoes 
---
 lib/test_printf.c | 21 +++--
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/lib/test_printf.c b/lib/test_printf.c
index 1ce1a1dd8faf..3393d667c6b8 100644
--- a/lib/test_printf.c
+++ b/lib/test_printf.c
@@ -166,14 +166,23 @@ test_string(void)
test("", "%s%.0s", "", "123");
test("ABCD|abc|123", "%s|%.3s|%.*s", "ABCD", "abcdef", 3, "123456");
test("1  |  2|3  |  4|5  ", "%-3s|%3s|%-*s|%*s|%*s", "1", "2", 3, "3", 
3, "4", -3, "5");
+   test("1234  ", "%-10.4s", "123456");
+   test("  1234", "%10.4s", "123456");
/*
-* POSIX and C99 say that a missing precision should be
-* treated as a precision of 0. However, the kernel's printf
-* implementation treats this case as if the . wasn't
-* present. Let's add a test case documenting the current
-* behaviour; should anyone ever feel the need to follow the
-* standards more closely, this can be revisited.
+* POSIX and C99 say that a negative precision (which is only
+* possible to pass via a * argument) should be treated as if
+* the precision wasn't present, and that if the precision is
+* omitted (as in %.s), the precision should be taken to be
+* 0. However, the kernel's printf behave exactly opposite,
+* treating a negative precision as 0 and treating an omitted
+* precision specifier as if no precision was given.
+*
+* These test cases document the current behaviour; should
+* anyone ever feel the need to follow the standards more
+* closely, this can be revisited.
 */
+   test("", "%4.*s", -5, "123456");
+   test("123456", "%.s", "123456");
test("a||", "%.s|%.0s|%.*s", "a", "b", 0, "c");
test("a  |   |   ", "%-3.s|%-3.0s|%-3.*s", "a", "b", 0, "c");
 }
-- 
2.6.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v3 13/14] lib/test_printf.c: add test for large bitmaps

2015-12-03 Thread Rasmus Villemoes
Following "lib/vsprintf.c: expand field_width to 24 bits", let's add a
test to see that we now actually support bitmaps with 65536 bits.

Cc: Maurizio Lombardi 
Cc: Tejun Heo 
Acked-by: Kees Cook 
Signed-off-by: Rasmus Villemoes 
---
 lib/test_printf.c | 17 +
 1 file changed, 17 insertions(+)

diff --git a/lib/test_printf.c b/lib/test_printf.c
index 3e21170d327d..60740c10c3e8 100644
--- a/lib/test_printf.c
+++ b/lib/test_printf.c
@@ -12,6 +12,7 @@
 #include 
 #include 
 
+#include 
 #include 
 #include 
 
@@ -341,6 +342,20 @@ struct_clk(void)
 }
 
 static void __init
+large_bitmap(void)
+{
+   const int nbits = 1 << 16;
+   unsigned long *bits = kcalloc(BITS_TO_LONGS(nbits), sizeof(long), 
GFP_KERNEL);
+   if (!bits)
+   return;
+
+   bitmap_set(bits, 1, 20);
+   bitmap_set(bits, 6, 15);
+   test("1-20,6-60014", "%*pbl", nbits, bits);
+   kfree(bits);
+}
+
+static void __init
 bitmap(void)
 {
DECLARE_BITMAP(bits, 20);
@@ -359,6 +374,8 @@ bitmap(void)
bitmap_fill(bits, 20);
test("f|f", "%20pb|%*pb", bits, 20, bits);
test("0-19|0-19", "%20pbl|%*pbl", bits, 20, bits);
+
+   large_bitmap();
 }
 
 static void __init
-- 
2.6.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v3 07/14] lib/kasprintf.c: add sanity check to kvasprintf

2015-12-03 Thread Rasmus Villemoes
kasprintf relies on being able to replay the formatting and getting
the same result (in particular, the same length). This will almost
always work, but it is possible that the object pointed to by a %s or
%p argument changed under us (so we might get truncated output). Add a
somewhat paranoid sanity check and let's see if it ever triggers.

Signed-off-by: Rasmus Villemoes 
---
 lib/kasprintf.c | 10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/lib/kasprintf.c b/lib/kasprintf.c
index f194e6e593e1..7f6c506a4942 100644
--- a/lib/kasprintf.c
+++ b/lib/kasprintf.c
@@ -13,19 +13,21 @@
 /* Simplified asprintf. */
 char *kvasprintf(gfp_t gfp, const char *fmt, va_list ap)
 {
-   unsigned int len;
+   unsigned int first, second;
char *p;
va_list aq;
 
va_copy(aq, ap);
-   len = vsnprintf(NULL, 0, fmt, aq);
+   first = vsnprintf(NULL, 0, fmt, aq);
va_end(aq);
 
-   p = kmalloc_track_caller(len+1, gfp);
+   p = kmalloc_track_caller(first+1, gfp);
if (!p)
return NULL;
 
-   vsnprintf(p, len+1, fmt, ap);
+   second = vsnprintf(p, first+1, fmt, ap);
+   WARN(first != second, "different return values (%u and %u) from 
vsnprintf(\"%s\", ...)",
+first, second, fmt);
 
return p;
 }
-- 
2.6.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: SoCFPGA ethernet broken

2015-12-03 Thread Pavel Machek
On Thu 2015-10-15 13:25:59, Florian Fainelli wrote:
> On 15/10/15 12:59, Dinh Nguyen wrote:
> > On 10/15/2015 03:03 PM, Florian Fainelli wrote:
> >> On 15/10/15 12:09, Dinh Nguyen wrote:
> >>> Hi,
> >>>
> >>> commit "8b63ec1837fa phylib: Make PHYs children of their MDIO bus, not
> >>> the bus' parent." seems to have broken ethernet support for the SoCFPGA
> >>> platform which is using the stmmac ethernet driver.
> >>
> >> It is not clear to me how this relates to what you are seeing yet.
> >>
> >>>
> >>> It appears that during DHCP, it cannot get an IP address. This only
> >>> happens if ethernet was not used by the bootloader to tftp an kernel
> >>> image. If I use the bootloader to tftp an image then ethernet is working
> >>> fine. So I think the PHY is not getting enabled properly.
> >>>
> >>> If I revert this patch, then ethernet is back to working on the platform.
> >>
> >> Is the Device Tree source for this platform available somewhere to look at?
> >>
> > 
> > Yes, I'm using the DTS that is in the mainline:
> > 
> > arch/arm/boot/dts/socfpga.dtsi
> > arch/arm/boot/dts/socfpga_cyclone5.dtsi
> > arch/arm/boot/dts/socfpga_cyclone5_socdk.dts
> 
> There are no PHY devices in any of these DTS files, instead there is the
> non-standard "phy-addr" property which is set to 0x supposedly
> to indicate that the MDIO bus should be scanned. This is likely part of
> your problem. The stmmac driver seems to be looking for "snps,phy-addr"
> and not "phy-addr", so I am not even clear how this is supposed to work,
> and the driver mentions this custom property is deprecated anyway.
> 
> The core problem is in
> drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c::stmmac_mdio_register
> which manually detects the PHY, that is mostly fine, except that it does
> not really seem to work here for a reason that is still unclear to me.
> 
> Your Ethernet PHYs need to be declared in Device Tree, see
> Documentation/devicetree/bindings/net/phy.txt

While updating DTS might be good idea, I don't think you can simply
blame this on DTS. If it worked before the change, it is supposed to
work after the change, otherwise we call that change a "regression"
and revert the change. 

Plus, DTS is supposed to be ABI. Old DTS should still work on new
kernels in ideal world.

Pavel
-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) 
http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] ntp: verify offset doesn't overflow in ntp_update_offset

2015-12-03 Thread Sasha Levin
We need to make sure that the offset is valid before manipulating it,
otherwise it might overflow on the multiplication.

Signed-off-by: Sasha Levin 
---
 kernel/time/ntp.c |6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 149cc80..36616c3 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -297,6 +297,9 @@ static void ntp_update_offset(long offset)
if (!(time_status & STA_PLL))
return;
 
+   /* Make sure the multiplication below won't overflow */
+   offset = clamp(offset, -MAXPHASE, MAXPHASE);
+
if (!(time_status & STA_NANO))
offset *= NSEC_PER_USEC;
 
@@ -304,8 +307,7 @@ static void ntp_update_offset(long offset)
 * Scale the phase adjustment and
 * clamp to the operating range.
 */
-   offset = min(offset, MAXPHASE);
-   offset = max(offset, -MAXPHASE);
+   offset = clamp(offset, -MAXPHASE, MAXPHASE);
 
/*
 * Select how the frequency is to be controlled
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2 3/4] ARM: at91/dt: sama5d4: update i2c compatible string

2015-12-03 Thread Wolfram Sang
On Thu, Dec 03, 2015 at 11:33:24AM +0100, Nicolas Ferre wrote:
> Le 03/12/2015 10:53, Ludovic Desroches a écrit :
> > A new compatible string has been introduced: atmel,sama5d4-i2c. It
> > allows to use the i2c-sda-hold-time-ns property if needed.
> > 
> > Signed-off-by: Ludovic Desroches 
> 
> Wolfram, we'll take this one with us in the at91 branches that will go
> into arm-soc. It'll be queued in at91-4.5-dt branch soon.
> 
> Acked-by: Nicolas Ferre 

I squashed patch 1+2 now and applied them to for-next, thanks!



signature.asc
Description: Digital signature


Re: [PATCH v6 14/19] arm64:ilp32: add sys_ilp32.c and a separate table (in entry.S) to use it

2015-12-03 Thread Arnd Bergmann
On Thursday 03 December 2015 21:14:41 Yury Norov wrote:
> 
> > I'm not sure there is much value in
> > keeping 4*PAGE_SIZE for larger page sizes but I agree that the current
> > 16K value doesn't work well with 64K pages.
> 
> Arnd told there will be a workaround for arm v6 caches. Than this
> header will not be needed at all. Until that, this is simpliest
> fix as it doesn't affect userspace.

I think we should do whatever matches user space: There is no harm
in going to 256KB instead of 64KB if current glibc already uses
4*getpagetsize() for a kernel with native 64K pages.

Arnd
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] workqueue: warn if memory reclaim tries to flush !WQ_MEM_RECLAIM workqueue

2015-12-03 Thread Peter Zijlstra
On Thu, Dec 03, 2015 at 02:26:16PM -0500, Tejun Heo wrote:
> + WARN_ONCE(current->flags & PF_MEMALLOC,

I'm not sure about using PF_MEMALLOC for detecting reclaim. There appear
to be more sites setting this than reclaim. See:

drivers/block/nbd.c:current->flags |= PF_MEMALLOC;
drivers/mmc/card/queue.c:   current->flags |= PF_MEMALLOC;
drivers/mtd/nand/nandsim.c: current->flags |= PF_MEMALLOC;
drivers/scsi/iscsi_tcp.c:   current->flags |= PF_MEMALLOC;
drivers/staging/lustre/include/linux/libcfs/linux/linux-mem.h:#define 
memory_pressure_set() do { current->flags |= PF_MEMALLOC; } while (0)
fs/cifs/connect.c:  current->flags |= PF_MEMALLOC;
fs/xfs/libxfs/xfs_btree.c:  new_pflags |= PF_MEMALLOC | 
PF_SWAPWRITE | PF_KSWAPD;
fs/xfs/xfs_trans_ail.c: current->flags |= PF_MEMALLOC;
include/linux/sched.h:  current->flags |= PF_MEMALLOC_NOIO;
mm/page_alloc.c:current->flags |= PF_MEMALLOC;
mm/page_alloc.c:current->flags |= PF_MEMALLOC;
mm/vmscan.c:tsk->flags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD;
mm/vmscan.c:p->flags |= PF_MEMALLOC;
mm/vmscan.c:p->flags |= PF_MEMALLOC | PF_SWAPWRITE;
net/core/dev.c: current->flags |= PF_MEMALLOC;
net/core/sock.c:current->flags |= PF_MEMALLOC;


The actual reclaim sites in page_alloc and vmscan set
current->reclaim_state. So testing against that might be more accurate.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v4 2/5] DT: PCI: qcom: Document PCIe devicetree bindings

2015-12-03 Thread Rob Herring
On Thu, Dec 03, 2015 at 03:35:21PM +0200, Stanimir Varbanov wrote:
> From: Stanimir Varbanov 
> 
> Document Qualcomm PCIe driver devicetree bindings.
> 
> Signed-off-by: Stanimir Varbanov 
> Signed-off-by: Stanimir Varbanov 

Acked-by: Rob Herring 

> ---
>  .../devicetree/bindings/pci/qcom,pcie.txt  |  233 
> 
>  1 file changed, 233 insertions(+)
>  create mode 100644 Documentation/devicetree/bindings/pci/qcom,pcie.txt
> 
> diff --git a/Documentation/devicetree/bindings/pci/qcom,pcie.txt 
> b/Documentation/devicetree/bindings/pci/qcom,pcie.txt
> new file mode 100644
> index ..6d71ee2e335d
> --- /dev/null
> +++ b/Documentation/devicetree/bindings/pci/qcom,pcie.txt
> @@ -0,0 +1,233 @@
> +* Qualcomm PCI express root complex
> +
> +- compatible:
> + Usage: required
> + Value type: 
> + Definition: Value should contain
> + - "qcom,pcie-ipq8064" for ipq8064
> + - "qcom,pcie-apq8064" for apq8064
> + - "qcom,pcie-apq8084" for apq8084
> +
> +- reg:
> + Usage: required
> + Value type: 
> + Definition: Register ranges as listed in the reg-names property
> +
> +- reg-names:
> + Usage: required
> + Value type: 
> + Definition: Must include the following entries
> + - "parf"   Qualcomm specific registers
> + - "dbi"Designware PCIe registers
> + - "elbi"   External local bus interface registers
> + - "config" PCIe configuration space
> +
> +- device_type:
> + Usage: required
> + Value type: 
> + Definition: Should be "pci". As specified in designware-pcie.txt
> +
> +- #address-cells:
> + Usage: required
> + Value type: 
> + Definition: Should be set to 3. As specified in designware-pcie.txt
> +
> +- #size-cells:
> + Usage: required
> + Value type: 
> + Definition: Should be set 2. As specified in designware-pcie.txt
> +
> +- ranges:
> + Usage: required
> + Value type: 
> + Definition: As specified in designware-pcie.txt
> +
> +- interrupts:
> + Usage: required
> + Value type: 
> + Definition: MSI interrupt
> +
> +- interrupt-names:
> + Usage: required
> + Value type: 
> + Definition: Should contain "msi"
> +
> +- #interrupt-cells:
> + Usage: required
> + Value type: 
> + Definition: Should be 1. As specified in designware-pcie.txt
> +
> +- interrupt-map-mask:
> + Usage: required
> + Value type: 
> + Definition: As specified in designware-pcie.txt
> +
> +- interrupt-map:
> + Usage: required
> + Value type: 
> + Definition: As specified in designware-pcie.txt
> +
> +- clocks:
> + Usage: required
> + Value type: 
> + Definition: List of phandle and clock specifier pairs as listed
> + in clock-names property
> +
> +- clock-names:
> + Usage: required
> + Value type: 
> + Definition: Should contain the following entries
> + - "iface"   Configuration AHB clock
> +
> +- clock-names:
> + Usage: required for ipq/apq8064
> + Value type: 
> + Definition: Should contain the following entries
> + - "core"Clocks the pcie hw block
> + - "phy" Clocks the pcie PHY block
> +- clock-names:
> + Usage: required for apq8084
> + Value type: 
> + Definition: Should contain the following entries
> + - "aux" Auxiliary (AUX) clock
> + - "bus_master"  Master AXI clock
> + - "bus_slave"   Slave AXI clock
> +- resets:
> + Usage: required
> + Value type: 
> + Definition: List of phandle and reset specifier pairs as listed
> + in reset-names property
> +
> +- reset-names:
> + Usage: required for ipq/apq8064
> + Value type: 
> + Definition: Should contain the following entries
> + - "axi"  AXI reset
> + - "ahb"  AHB reset
> + - "por"  POR reset
> + - "pci"  PCI reset
> + - "phy"  PHY reset
> +
> +- reset-names:
> + Usage: required for apq8084
> + Value type: 
> + Definition: Should contain the following entries
> + - "core" Core reset
> +
> +- power-domains:
> + Usage: required for apq8084
> + Value type: 
> + Definition: A phandle and power domain specifier pair to the
> + power domain which is responsible for collapsing
> + and restoring power to the peripheral
> +
> +- vdda-supply:
> + Usage: required
> + Value type: 
> + Definition: A phandle to the core analog power supply
> +
> +- vdda_phy-supply:
> + Usage: required for ipq/apq8064
> + Value type: 
> + Definition: A phandle to the analog power supply for PHY
> +
> +- vdda_refclk-supply:
> + Usage: required for ipq/apq8064
> +  

Re: [PATCH v3 2/5] thermal: rockchip: fix a impossible condition caused by the warning

2015-12-03 Thread Dmitry Torokhov
On Thu, Dec 03, 2015 at 12:33:57PM -0800, Brian Norris wrote:
> On Thu, Dec 03, 2015 at 12:19:08PM -0800, Dmitry Torokhov wrote:
> > On Thu, Dec 03, 2015 at 04:48:40PM +0800, Caesar Wang wrote:
> > > As the Dan report the smatch check the thermal driver warning:
> > > drivers/thermal/rockchip_thermal.c:551 rockchip_configure_from_dt()
> > > warn: impossible condition '(thermal->tshut_temp > ((~0 >> 1))) =>
> > > (s32min-s32max > s32max)'
> > > 
> > > Let's we remove the imposssible condition Since the Temperature is
> > > currently represented as int not long in the thermal driver.
> > > 
> > > Fixes: commit 437df2172e8d
> > > ("thermal: rockchip: consistently use int for temperatures")
> > > 
> > > Reported-by: Dan Carpenter 
> > > Signed-off-by: Caesar Wang 
> > > 
> > > ---
> > > 
> > > Changes in v3:
> > > - As Brian comments on https://patchwork.kernel.org/patch/7580661/,
> > >   let's remove the impossible condition.
> > > 
> > > Changes in v2: None
> > > Changes in v1: None
> > > 
> > >  drivers/thermal/rockchip_thermal.c | 6 --
> > >  1 file changed, 6 deletions(-)
> > > 
> > > diff --git a/drivers/thermal/rockchip_thermal.c 
> > > b/drivers/thermal/rockchip_thermal.c
> > > index ae796ec..611de00 100644
> > > --- a/drivers/thermal/rockchip_thermal.c
> > > +++ b/drivers/thermal/rockchip_thermal.c
> > > @@ -549,12 +549,6 @@ static int rockchip_configure_from_dt(struct device 
> > > *dev,
> > >   thermal->tshut_temp = shut_temp;
> > >   }
> > >  
> > > - if (thermal->tshut_temp > INT_MAX) {
> > > - dev_err(dev, "Invalid tshut temperature specified: %d\n",
> > > - thermal->tshut_temp);
> > > - return -ERANGE;
> > > - }
> > 
> > Well, that is not entirely correct. The value that we read from DT is
> > u32, but we convert it down to int. I believe you want to move the check
> 
> Do we really account for the possibility of sizeof(int) < sizeof(u32)?
> 
> EDIT: A bit after writing the above line, I notice my error, but in case
> anyone else is thinking the same thing... I guess you're referring to
> the sign bit, since we're casting unsigned to signed.

Yes, exactly. Sorry I was not clear.

-- 
Dmitry
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [V2 PATCH] sparc64/gup: check address scope legitimacy

2015-12-03 Thread Sam Ravnborg
Hi Yang.

On Wed, Nov 25, 2015 at 02:45:43PM -0800, Yang Shi wrote:
> Check if user address is accessible in atomic version __get_user_pages_fast()
> before walking the page table.
> And, check if end > start in get_user_pages_fast(), otherwise fallback to slow
> path.

Two different but related things in one patch is often a bad thing.
It would have been better to split it up.


> 
> Signed-off-by: Yang Shi 
> ---
> Just found slow_irqon label is not defined, added it to avoid compile error.
> 
>  arch/sparc/mm/gup.c | 7 ++-
>  1 file changed, 6 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/sparc/mm/gup.c b/arch/sparc/mm/gup.c
> index 2e5c4fc..cf4fb47 100644
> --- a/arch/sparc/mm/gup.c
> +++ b/arch/sparc/mm/gup.c
> @@ -173,6 +173,9 @@ int __get_user_pages_fast(unsigned long start, int 
> nr_pages, int write,
>   addr = start;
>   len = (unsigned long) nr_pages << PAGE_SHIFT;
>   end = start + len;
> + if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
> + (void __user *)start, len)))
> + return 0;
This change is not justified.
Why would we take the time to first do the access_ok() stuff.
If this had been an expensive operation then we had made this function
slower in the normal case ( assuming there were no access violations in the
normal case).
When I look at the implementation of access_ok() I get the impression that
this is not really a check we need.

access_ok() always returns 1.


>  
>   local_irq_save(flags);
>   pgdp = pgd_offset(mm, addr);
> @@ -203,6 +206,8 @@ int get_user_pages_fast(unsigned long start, int 
> nr_pages, int write,
>   addr = start;
>   len = (unsigned long) nr_pages << PAGE_SHIFT;
>   end = start + len;
> + if (end < start)
> + goto slow_irqon;

end can only be smaller than start if there is some overflow.
See how end is calculated just the line above.

This looks like a highly suspicious change.

Sam
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 5/7] perf: Document aux api usage

2015-12-03 Thread Mathieu Poirier
On 3 December 2015 at 03:32, Alexander Shishkin
 wrote:
> In order to ensure safe aux buffer management, we rely on the assumption
> that pmu::stop() stops its ongoing aux transaction and not just the hw.
>
> This patch documents this requirement for perf_aux_output_{begin,end}()
> apis.
>
> Signed-off-by: Alexander Shishkin 
> Cc: Mathieu Poirier 
> ---
>  kernel/events/ring_buffer.c | 10 ++
>  1 file changed, 10 insertions(+)
>
> diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
> index 6865ac95ca..1aed2617e8 100644
> --- a/kernel/events/ring_buffer.c
> +++ b/kernel/events/ring_buffer.c
> @@ -252,6 +252,10 @@ ring_buffer_init(struct ring_buffer *rb, long watermark, 
> int flags)
>   * The ordering is similar to that of perf_output_{begin,end}, with
>   * the exception of (B), which should be taken care of by the pmu
>   * driver, since ordering rules will differ depending on hardware.
> + *
> + * Call this from pmu::start(); see the comment in perf_aux_output_end()
> + * about its use in pmu callbacks. Both can also be called from the PMI
> + * handler if needed.
>   */
>  void *perf_aux_output_begin(struct perf_output_handle *handle,
> struct perf_event *event)
> @@ -323,6 +327,7 @@ void *perf_aux_output_begin(struct perf_output_handle 
> *handle,
> return handle->rb->aux_priv;
>
>  err_put:
> +   /* can't be last */
> rb_free_aux(rb);
>
>  err:
> @@ -337,6 +342,10 @@ err:
>   * aux_head and posting a PERF_RECORD_AUX into the perf buffer. It is the
>   * pmu driver's responsibility to observe ordering rules of the hardware,
>   * so that all the data is externally visible before this is called.
> + *
> + * Note: this has to be called from pmu::stop() callback, as the assumption
> + * of the aux buffer management code is that after pmu::stop(), the aux
> + * transaction must be stopped and therefore drop the aux reference count.
>   */
>  void perf_aux_output_end(struct perf_output_handle *handle, unsigned long 
> size,
>  bool truncated)
> @@ -376,6 +385,7 @@ void perf_aux_output_end(struct perf_output_handle 
> *handle, unsigned long size,
> handle->event = NULL;
>
> local_set(>aux_nest, 0);
> +   /* can't be last */
> rb_free_aux(rb);
> ring_buffer_put(rb);
>  }
> --
> 2.6.2
>

Thanks for the heads-up.  My next version (V7) will follow the same scheme.

Mathieu
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: BUG: KASAN: slab-out-of-bounds in ses_enclosure_data_process+0x900/0xe50

2015-12-03 Thread Andrea Gelmini
On Wed, Dec 02, 2015 at 02:58:21PM -0800, James Bottomley wrote:
> On Tue, 2015-12-01 at 21:20 +0100, Andrea Gelmini wrote:
> OK, this looks like some type of problem with a USB enclosure.  It's
> probably misreporting something in the mode pages.  can you run sg_ses
> on whatever /dev/sg the enclosure turns up as?


root@glen:/tmp/report# cat sg_ses_usb_hd.txt 
  WDMy Passport 0820  1007
disk device (not an enclosure)
Supported diagnostic pages:
  Supported Diagnostic Pages [sdp] [0x0]
  Short Enclosure Status (SES) [ses] [0x8]
   [0x80]
   [0x83]
   [0x84]
   [0x85]


By the way, same issue with kernel 4.3 
(6a13feb9c82803e2b815eca72fa7a9f5561d7861).
Attached the dmesg output.

Thanks a lot,
Andrea


dmesg.txt.gz
Description: application/gzip


signature.asc
Description: Digital signature


Re: [PATCH v3 2/5] thermal: rockchip: fix a impossible condition caused by the warning

2015-12-03 Thread Brian Norris
On Thu, Dec 03, 2015 at 12:19:08PM -0800, Dmitry Torokhov wrote:
> On Thu, Dec 03, 2015 at 04:48:40PM +0800, Caesar Wang wrote:
> > As the Dan report the smatch check the thermal driver warning:
> > drivers/thermal/rockchip_thermal.c:551 rockchip_configure_from_dt()
> > warn: impossible condition '(thermal->tshut_temp > ((~0 >> 1))) =>
> > (s32min-s32max > s32max)'
> > 
> > Let's we remove the imposssible condition Since the Temperature is
> > currently represented as int not long in the thermal driver.
> > 
> > Fixes: commit 437df2172e8d
> > ("thermal: rockchip: consistently use int for temperatures")
> > 
> > Reported-by: Dan Carpenter 
> > Signed-off-by: Caesar Wang 
> > 
> > ---
> > 
> > Changes in v3:
> > - As Brian comments on https://patchwork.kernel.org/patch/7580661/,
> >   let's remove the impossible condition.
> > 
> > Changes in v2: None
> > Changes in v1: None
> > 
> >  drivers/thermal/rockchip_thermal.c | 6 --
> >  1 file changed, 6 deletions(-)
> > 
> > diff --git a/drivers/thermal/rockchip_thermal.c 
> > b/drivers/thermal/rockchip_thermal.c
> > index ae796ec..611de00 100644
> > --- a/drivers/thermal/rockchip_thermal.c
> > +++ b/drivers/thermal/rockchip_thermal.c
> > @@ -549,12 +549,6 @@ static int rockchip_configure_from_dt(struct device 
> > *dev,
> > thermal->tshut_temp = shut_temp;
> > }
> >  
> > -   if (thermal->tshut_temp > INT_MAX) {
> > -   dev_err(dev, "Invalid tshut temperature specified: %d\n",
> > -   thermal->tshut_temp);
> > -   return -ERANGE;
> > -   }
> 
> Well, that is not entirely correct. The value that we read from DT is
> u32, but we convert it down to int. I believe you want to move the check

Do we really account for the possibility of sizeof(int) < sizeof(u32)?

EDIT: A bit after writing the above line, I notice my error, but in case
anyone else is thinking the same thing... I guess you're referring to
the sign bit, since we're casting unsigned to signed.

Brian

> up so that you do:
> 
>   } else if (tshut_temp > INT_MAX) {
>   dev_err(dev, "Invalid tshut temperature specified: %d\n",
>   thermal->tshut_temp);
>   return -ERANGE;
>   } else {
>   thermal->tshut_temp = shut_temp;
>   }
> 
> Thanks.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 3/4] locking: Introduce smp_cond_acquire()

2015-12-03 Thread Peter Zijlstra
On Thu, Dec 03, 2015 at 11:41:39AM -0800, Davidlohr Bueso wrote:

> >+#define smp_cond_acquire(cond)  do {\
> >+while (!(cond)) \
> >+cpu_relax();\
> >+smp_rmb(); /* ctrl + rmb := acquire */  \
> >+} while (0)
> 
> So this hides the fact that we actually are waiting on the cond, as opposed
> to conditional acquiring. Could it be renamed to something like 
> smp_waitcond_acquire()?

Right, I'm conflicted about that. On the one hand you're right, on the
other hand we spin-wait so the next person will want it called
smp_spin_wait_cond_acquire(), also it gets terribly long either way :/

bike-shed away I imagine.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Erratic fan speed and little control kernel 4.2

2015-12-03 Thread Peter Saunderson

[1.] One line summary of the problem:
Erratic fan speed and little control kernel 4.2

[2.] Full description of the problem/report:
After upgrading to kernel 4.2 my CPU fan started speeding and slowing 
without good reason even when the PC is idle.


sensors shows:
it8721-isa-0a10
Adapter: ISA adapter
fan1: 1071 RPM (min = 13 RPM)
fan2: 788 RPM (min = 46 RPM)

...
dell_smm-virtual-0
Adapter: Virtual device
Processor Fan: 1079 RPM
Motherboard Fan: 794 RPM

To make the erratic behaviour start either wait for a few hours or:
echo 200 > /sys/class/hwmon/hwmon3/pwm1
echo 70 > /sys/class/hwmon/hwmon3/pwm1

To stop erratic fan speed - reboot sometimes works.

This bug has been bisected to commit 
f989e55452c74b4f7b22c889b8ec9f1192aaeec4 :

i8k: Add support for fan labels

I noticed that in the good kernel prior to commit 
f989e55452c74b4f7b22c889b8ec9f1192aaeec4 that the order of the fans in 
the it8721-isa-0a10 section of the sensor output is reversed.

it8721-isa-0a10
Adapter: ISA adapter
fan1: 965 RPM (min = 13 RPM)   * good kernel has the Motherboard Fan 
first
fan2: 1112 RPM (min = 21 RPM)  * good kernel lists the Processor Fan 
second.


i8k-virtual-0
Adapter: Virtual device
fan1: 1120 RPM   good kernel has different fan numbering for the 
i8k-virtual-0 device!
fan2: 972 RPM    good kernel has different fan numbering for the 
i8k-virtual-0 device!


commit f989e55452c74b4f7b22c889b8ec9f1192aaeec4 :
adds labels support for fans if SMM function with EAX register
0x03a3 reports it. This information was taken from DOS binary NBSVC.MDM.

Additionally this patch change detection of fan presence. Instead 
reading fan

status now detection is based on new label SMM function. Dell DOS binary
NBSVC.MDM is doing similar checks, so we should do that too.

This patch also remove I8K_FAN_LEFT and I8K_FAN_RIGHT usage from hwmon 
driver
part because that names does not make sense anymore. So numeric 
constants are
used instead. Original /proc/i8k ioctl part was not changed for 
compatibility

reasons.

[3.] Keywords

[4.] Kernel version (from /proc/version):
$ cat /proc/version
Linux version 4.4.0-040400rc3-generic (kernel@gloin) (gcc version 5.2.1 
20151010 (Ubuntu 5.2.1-22ubuntu2) ) #201511300321 SMP Mon Nov 30 
03:23:36 UTC 2015


[5.] Output of Oops.. message (if applicable) with symbolic information 
resolved (see Documentation/oops-tracing.txt)

None

[6.] A small shell script or example program which triggers the problem 
(if possible)

echo 200 > /sys/class/hwmon/hwmon3/pwm1
echo 70 > /sys/class/hwmon/hwmon3/pwm1

[7.] Environment
$ lsb_release -rd
Description:Ubuntu 15.10
Release:15.10


[7.1.] Software (add the output of the ver_linux script here)
$ /usr/src/linux-headers-4.4.0-040400rc3/scripts/ver_linux
If some fields are empty or look unusual you may have an old version.
Compare to the current minimal requirements in Documentation/Changes.

Linux HomeMegaUbuntu 4.4.0-040400rc3-generic #201511300321 SMP Mon Nov 
30 03:23:36 UTC 2015 x86_64 x86_64 x86_64 GNU/Linux


GNU C5.2.1
GNU Make4.0
Binutils2.25.1
Util-linux2.26.2
Mount2.26.2
Module-init-tools21
E2fsprogs1.42.12
Xfsprogs3.2.1
Pcmciautils018
Linux C Library2.21
Dynamic linker (ldd)2.21
Linux C++ Library6.0.21
Procps3.3.9
Net-tools1.60
Kbd1.15.5
Console-tools1.15.5
Sh-utils8.23
Udev225
Wireless-tools30
Modules Loadedamd_iommu_v2 amdkfd autofs4 bcm_phy_lib 
binfmt_misc broadcom btrfs coretemp cuse dcdbas dell_smm_hwmon drm 
drm_kms_helper edac_core fb_sys_fops fjes gpio_ich hfs hfsplus hid 
hid_generic hwmon_vid i2c_algo_bit i7core_edac input_leds irqbypass it87 
jfs kvm kvm_intel libcrc32c lp lpc_ich mac_hid mei mei_me minix msdos 
nls_utf8 ntfs parport parport_pc pata_acpi pci_stub ppdev pps_core 
psmouse ptp qnx4 radeon raid6_pq serio_raw shpchp snd snd_hda_codec 
snd_hda_codec_generic snd_hda_codec_hdmi snd_hda_codec_realtek 
snd_hda_core snd_hda_intel snd_hrtimer snd_hwdep snd_pcm snd_rawmidi 
snd_seq snd_seq_device snd_seq_midi snd_seq_midi_event snd_timer 
soundcore syscopyarea sysfillrect sysimgblt tg3 ttm uas ufs usbhid 
usb_storage vboxdrv vboxnetadp vboxnetflt vboxpci xfs xor


[7.2.] Processor information (from /proc/cpuinfo):
$ cat /proc/cpuinfo
processor: 0
vendor_id: GenuineIntel
cpu family: 6
model: 30
model name: Intel(R) Core(TM) i5 CPU 760  @ 2.80GHz
stepping: 5
microcode: 0x4
cpu MHz: 1200.000
cache size: 8192 KB
physical id: 0
siblings: 4
core id: 0
cpu cores: 4
apicid: 0
initial apicid: 0
fpu: yes
fpu_exception: yes
cpuid level: 11
wp: yes
flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca 
cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall 
nx rdtscp lm constant_tsc arch_perfmon pebs bts rep_good 

Re: [PATCH] stmmac: ipq806x: Return error values instead of pointers

2015-12-03 Thread David Miller
From: Stephen Boyd 
Date: Wed,  2 Dec 2015 23:55:15 -0800

> Typically we return error pointers when we want to use those
> pointers in the non-error case, but this function is just
> returning error pointers or NULL for success. Change the style to
> plain int to follow normal kernel coding styles.
> 
> Cc: Joachim Eastwood 
> Signed-off-by: Stephen Boyd 

Applied.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 3/4] locking: Introduce smp_cond_acquire()

2015-12-03 Thread Peter Zijlstra
On Thu, Dec 03, 2015 at 04:37:26PM +, Will Deacon wrote:
> > +#define smp_cond_acquire(cond) do {\
> > +   while (!(cond)) \
> > +   cpu_relax();\
> > +   smp_rmb(); /* ctrl + rmb := acquire */  \
> > +} while (0)

> > +   smp_cond_acquire(!((val = atomic_read(>val)) & 
> > _Q_LOCKED_PENDING_MASK));
> 
> I think we spoke about this before, but what would work really well for
> arm64 here is if we could override smp_cond_acquire in such a way that
> the atomic_read could be performed explicitly in the macro. That would
> allow us to use an LDXR to set the exclusive monitor, which in turn
> means we can issue a WFE and get a cheap wakeup when lock->val is
> actually modified.
> 
> With the current scheme, there's not enough information expressed in the
> "cond" parameter to perform this optimisation.

Right, but I'm having a hard time constructing something pretty that can
do that. Lambda functions would be lovely, but we don't have those :/

While we can easily pass a pointer to an arbitrary type, we need
an expression to evaluate the result of the pointer load to act as our
condition.

  smp_cond_acquire(>val.counter,
   [](int val){ return !(val & _Q_LOCKED_PENDING_MASK); });

Would be nice, but alas.

The best we can do is hardcode a variable name; maybe something like:

#define smp_cond_acquire(ptr, expr) do {\
typeof(*ptr) val;   \
while ((val = READ_ONCE(*ptr)), expr)   \
cpu_relax();\
smp_rmb(); /* ctrl + rmb := acquire */  \
} while (0)

Which would let us write:

  smp_cond_acquire(>val.counter, !(val & _Q_LOCKED_PENDING_MASK));


Thoughts?
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v4] pci: Limit VPD length for megaraid_sas adapter

2015-12-03 Thread Babu Moger
Reading or Writing of PCI VPD data causes system panic.
We saw this problem by running "lspci -vvv" in the beginning.
However this can be easily reproduced by running
 cat /sys/bus/devices/XX../vpd

VPD length has been set as 32768 by default. Accessing vpd
will trigger read/write of 32k. This causes problem as we
could read data beyond the VPD end tag. Behaviour is un-
predictable when this happens. I see some other adapter doing
similar quirks(commit bffadffd43d4 ("PCI: fix VPD limit quirk
for Broadcom 5708S"))

I see there is an attempt to fix this right way.
https://patchwork.ozlabs.org/patch/534843/ or
https://lkml.org/lkml/2015/10/23/97

Tried to fix it this way, but problem is I dont see the proper
start/end TAGs(at least for this adapter) at all. The data is
mostly junk or zeros. This patch fixes the issue by setting the
vpd length to 0x80.

Signed-off-by: Babu Moger 
Reviewed-by: Khalid Aziz 
Tested-by: Dmitry Klochkov 

Orabug: 22104511

Changes since v3 -> v4
We found some options of the lspci does not work very well if
it cannot find the valid vpd tag(Example command "lspci -s 10:00.0 -vv").
It displays the error message and exits right away. Setting the length
back to 0 fixes the problem.

Changes since v2 -> v3
Changed the vpd length from 0 to 0x80 which leaves the
option open for someone to read first few bytes.

Changes since v1 -> v2
Removed the changes in pci_id.h. Kept all the vendor
ids in quirks.c
---
 drivers/pci/quirks.c |   38 ++
 1 files changed, 38 insertions(+), 0 deletions(-)

diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index b03373f..f739e47 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -2123,6 +2123,44 @@ static void quirk_via_cx700_pci_parking_caching(struct 
pci_dev *dev)
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, 0x324e, 
quirk_via_cx700_pci_parking_caching);
 
 /*
+ * A read/write to sysfs entry ('/sys/bus/pci/devices//vpd')
+ * will dump 32k of data. The default length is set as 32768.
+ * Reading a full 32k will cause an access beyond the VPD end tag.
+ * The system behaviour at that point is mostly unpredictable.
+ * Also I dont believe vendors have implemented this VPD headers properly.
+ * Atleast I dont see it in following megaraid sas controller.
+ * That is why adding the quirk here.
+ */
+static void quirk_megaraid_sas_limit_vpd(struct pci_dev *dev)
+{
+   if (dev->vpd)
+   dev->vpd->len = 0;
+}
+
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0060,
+   quirk_megaraid_sas_limit_vpd);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x007c,
+   quirk_megaraid_sas_limit_vpd);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0413,
+   quirk_megaraid_sas_limit_vpd);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0078,
+   quirk_megaraid_sas_limit_vpd);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0079,
+   quirk_megaraid_sas_limit_vpd);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0073,
+   quirk_megaraid_sas_limit_vpd);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0071,
+   quirk_megaraid_sas_limit_vpd);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x005b,
+   quirk_megaraid_sas_limit_vpd);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x002f,
+   quirk_megaraid_sas_limit_vpd);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x005d,
+   quirk_megaraid_sas_limit_vpd);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x005f,
+   quirk_megaraid_sas_limit_vpd);
+
+/*
  * For Broadcom 5706, 5708, 5709 rev. A nics, any read beyond the
  * VPD end tag will hang the device.  This problem was initially
  * observed when a vpd entry was created in sysfs
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [v9, 3/6] fsl/fman: Add FMan MAC support

2015-12-03 Thread David Miller
From: 
Date: Thu, 3 Dec 2015 09:19:14 +0200

> +static u32 crc_table[256] = {

No way.

We have every conceivable implementation of CRC calculations in the
kernel already.  Use them.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v3 2/5] thermal: rockchip: fix a impossible condition caused by the warning

2015-12-03 Thread Dmitry Torokhov
On Thu, Dec 03, 2015 at 04:48:40PM +0800, Caesar Wang wrote:
> As the Dan report the smatch check the thermal driver warning:
> drivers/thermal/rockchip_thermal.c:551 rockchip_configure_from_dt()
> warn: impossible condition '(thermal->tshut_temp > ((~0 >> 1))) =>
> (s32min-s32max > s32max)'
> 
> Let's we remove the imposssible condition Since the Temperature is
> currently represented as int not long in the thermal driver.
> 
> Fixes: commit 437df2172e8d
> ("thermal: rockchip: consistently use int for temperatures")
> 
> Reported-by: Dan Carpenter 
> Signed-off-by: Caesar Wang 
> 
> ---
> 
> Changes in v3:
> - As Brian comments on https://patchwork.kernel.org/patch/7580661/,
>   let's remove the impossible condition.
> 
> Changes in v2: None
> Changes in v1: None
> 
>  drivers/thermal/rockchip_thermal.c | 6 --
>  1 file changed, 6 deletions(-)
> 
> diff --git a/drivers/thermal/rockchip_thermal.c 
> b/drivers/thermal/rockchip_thermal.c
> index ae796ec..611de00 100644
> --- a/drivers/thermal/rockchip_thermal.c
> +++ b/drivers/thermal/rockchip_thermal.c
> @@ -549,12 +549,6 @@ static int rockchip_configure_from_dt(struct device *dev,
>   thermal->tshut_temp = shut_temp;
>   }
>  
> - if (thermal->tshut_temp > INT_MAX) {
> - dev_err(dev, "Invalid tshut temperature specified: %d\n",
> - thermal->tshut_temp);
> - return -ERANGE;
> - }

Well, that is not entirely correct. The value that we read from DT is
u32, but we convert it down to int. I believe you want to move the check
up so that you do:

} else if (tshut_temp > INT_MAX) {
dev_err(dev, "Invalid tshut temperature specified: %d\n",
thermal->tshut_temp);
return -ERANGE;
} else {
thermal->tshut_temp = shut_temp;
}

Thanks.

-- 
Dmitry
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 0/2] mvneta: implement ethtool autonegotiation control

2015-12-03 Thread David Miller
From: Stas Sergeev 
Date: Wed, 2 Dec 2015 20:32:24 +0300

> These 2 patches add an ability to control the
> autonegotiation via ethtool. For example:
> 
> ethtool -s eth0 autoneg off
> ethtool -s eth0 autoneg on
> 
> This is needed if you want to connect the mvneta's MII
> to different switches or PHYs: the ones the do support
> the in-band status, and the ones that do not.

Series applied to net-next, thanks.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2 2/2] perf/x86: enable cycles:pp for Intel Atom

2015-12-03 Thread Andi Kleen
>   /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
>   INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x01),
> + /* Allow all events as PEBS with no flags */
> + INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),

I don't think this is really needed (no extra PEBS events), but ok it shouldn't
hurt either.

-Andi

-- 
a...@linux.intel.com -- Speaking for myself only
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/4] net: bfin_mac: Use platform_register/unregister_drivers()

2015-12-03 Thread David Miller
From: Thierry Reding 
Date: Wed,  2 Dec 2015 17:30:26 +0100

> From: Thierry Reding 
> 
> These new helpers simplify implementing multi-driver modules and
> properly handle failure to register one driver by unregistering all
> previously registered drivers.
> 
> Signed-off-by: Thierry Reding 

Applied.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 2/4] net: bcm63xx: Use platform_register/unregister_drivers()

2015-12-03 Thread David Miller
From: Thierry Reding 
Date: Wed,  2 Dec 2015 17:30:27 +0100

> From: Thierry Reding 
> 
> These new helpers simplify implementing multi-driver modules and
> properly handle failure to register one driver by unregistering all
> previously registered drivers.
> 
> Signed-off-by: Thierry Reding 

Applied.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 3/4] net: mpc52xx: Use platform_register/unregister_drivers()

2015-12-03 Thread David Miller
From: Thierry Reding 
Date: Wed,  2 Dec 2015 17:30:28 +0100

> From: Thierry Reding 
> 
> These new helpers simplify implementing multi-driver modules and
> properly handle failure to register one driver by unregistering all
> previously registered drivers.
> 
> Signed-off-by: Thierry Reding 

Applied.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 4/4] net: mv643xx: Use platform_register/unregister_drivers()

2015-12-03 Thread David Miller
From: Thierry Reding 
Date: Wed,  2 Dec 2015 17:30:29 +0100

> From: Thierry Reding 
> 
> These new helpers simplify implementing multi-driver modules and
> properly handle failure to register one driver by unregistering all
> previously registered drivers.
> 
> Signed-off-by: Thierry Reding 

Applied.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] sfc: check warm_boot_count after other functions have been reset

2015-12-03 Thread David Miller

Patches not properly CC:'d to netdev will not be queued up in patchwork, and
therefore will also be ignored.

Thanks.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 2/2] workqueue: implement lockup detector

2015-12-03 Thread Ulrich Obergfell

Tejun,

I share Don's concern about connecting the soft lockup detector and the
workqueue watchdog to the same kernel parameter in /proc. I would feel
more comfortable if the workqueue watchdog had its dedicated parameter.


I also see a scenario that the proposed patch does not handle well: The
watchdog_thresh parameter can be changed 'on the fly' - i.e. it is not
necessary to disable and re-enable the watchdog. The flow of execution
looks like this.

  proc_watchdog_thresh
proc_watchdog_update
  if (watchdog_enabled && watchdog_thresh)
  watchdog_enable_all_cpus
if (!watchdog_running) {
...
} else {
//
// update 'on the fly'
//
update_watchdog_all_cpus()
}

The patched watchdog_enable_all_cpus() function disables the workqueue watchdog
unconditionally at [1]. However, the workqueue watchdog remains disabled if the
code path [2] is executed (and wq_watchdog_thresh is not updated as well).

static int watchdog_enable_all_cpus(void)
{
int err = 0;

[1] --> disable_workqueue_watchdog();

if (!watchdog_running) {
...
} else {
 .- /*
 |   * Enable/disable the lockup detectors or
 |   * change the sample period 'on the fly'.
 |   */
[2]  This sort of looks like the hung task detector..
> 
> I am a little concerned because we just made a big effort to properly
> separate the hardlockup and softlockup paths and yet retain the flexibility
> to enable/disable them separately.  Now it seems the workqueue detector is
> permanently entwined with the softlockup detector.  I am not entirely sure
> that is correct thing to do.

The only area they get entwined is how it's controlled from userland.
While it isn't quite the same as softlockup detection, I think what it
monitors is close enough that it makes sense to put them under the
same interface.

> It also seems awkward for the lockup code to have to jump to the workqueue
> code to function properly. :-/  Though we have made exceptions for the virt
> stuff and the workqueue code is simple..

Softlockup code doesn't depend on workqueue in any way.  Workqueue
tags on touch_softlockup to detect cases which shouldn't be warned and
its enabledness is controlled together with softlockup and that's it.

> Actually, I am curious, it seems if you just added a
> /proc/sys/kernel/wq_watchdog entry, you could elminiate the entire need for
> modifying the watchdog code to begin with.  As you really aren't using any
> of it other than piggybacking on the touch_softlockup_watchdog stuff, which
> could probably be easily added without all the extra enable/disable changes
> in watchdog.c.

Yeah, except for touch signal, it's purely interface thing.  I don't
feel too strong about this but it seems a bit silly to introduce a
whole different set of interface for this.  e.g. if the user wanted to
disable softlockup detection, it'd be weird to leave wq lockup
detection running.  The same goes for threshold.

> Again, this looks like what the hung task detector is doing, which I
> struggled with years ago to integrate with the lockup code because in the
> end I had trouble re-using much of it.

So, it's a stall detector and there are inherent similarities but the
conditions tested are pretty different and it's a lot lighter.  I'm
not really sure what you're meaning to say.

Thanks.

-- 
tejun
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] ARM64: Clear out any singlestep state on a ptrace detach operation

2015-12-03 Thread John Blackwood

Hello Will,

I have a patch for a ptrace(2) issue that we encountered on arm64 kernels.
If a debugger singlesteps a ptraced task, and then does a ptrace(2)
PTRACE_DETACH command, the task will not resume successfully. It seems
that clearing out the singlestep state, as something like a ptrace(2)
PTRACE_CONT does, gets this working.

Thank you for your time and considerations.

- -

arm64: Clear out any singlestep state on a ptrace detach operation.

Make sure to clear out any ptrace singlestep state when a
ptrace(2) PTRACE_DETACH call is made on arm64 systems.

Otherwise, the previously ptraced task will die off with a SIGTRAP signal
if the debugger just previously singlestepped the ptraced task.

Signed-off-by: John Blackwood 

Index: b/arch/arm64/kernel/ptrace.c
===
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -58,6 +58,7 @@
  */
 void ptrace_disable(struct task_struct *child)
 {
+   user_disable_single_step(child);
 }
 
 #ifdef CONFIG_HAVE_HW_BREAKPOINT

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v2 0/2] perf/x86: fixes and improvement for Intel Atom PEBS support

2015-12-03 Thread Stephane Eranian
This short series fixes total breakage of Intel Atom PEBS support in recent 
kernels.
The problems were introduced with the changes in the PEBS logic to handle 
deeper buffer.

The first patch fixes PEBS and LBR problems, including NULL pointers, wrong 
pointer
arithmetic, and wrong pebs record layout assumption.

The second patch adds an alias for cycles:pp to Intel Atom given that perf 
record/top
uses cycles:pp nowadays.

In V2, we removed the alias function specific to Atom use use the one from Core2
because it is identical as suggested by Kan Liang.

Stephane Eranian (2):
  perf/x86: fix PEBS and LBR issues on Intel Atom
  perf/x86: enable cycles:pp for Intel Atom

 arch/x86/kernel/cpu/perf_event_intel.c |  1 +
 arch/x86/kernel/cpu/perf_event_intel_ds.c  | 11 ++-
 arch/x86/kernel/cpu/perf_event_intel_lbr.c | 11 +++
 3 files changed, 18 insertions(+), 5 deletions(-)

-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 3/3] serial: amba-pl011: add ACPI support to AMBA probe

2015-12-03 Thread Timur Tabi
On Wed, Sep 30, 2015 at 5:38 AM, Graeme Gregory
 wrote:

> @@ -2368,18 +2368,28 @@ static int pl011_probe(struct amba_device *dev, const 
> struct amba_id *id)
> if (!uap)
> return -ENOMEM;
>
> -   uap->clk = devm_clk_get(>dev, NULL);
> -   if (IS_ERR(uap->clk))
> -   return PTR_ERR(uap->clk);
> -
> -   uap->vendor = vendor;
> -   uap->lcrh_rx = vendor->lcrh_rx;
> -   uap->lcrh_tx = vendor->lcrh_tx;
> -   uap->fifosize = vendor->get_fifosize(dev);
> +   /* ACPI only defines SBSA variant */
> +   if (!ACPI_COMPANION(>dev)) {
> +   uap->clk = devm_clk_get(>dev, NULL);
> +   if (IS_ERR(uap->clk))
> +   return PTR_ERR(uap->clk);
> +
> +   uap->vendor = vendor;
> +   uap->lcrh_rx = vendor->lcrh_rx;
> +   uap->lcrh_tx = vendor->lcrh_tx;
> +   uap->fifosize = vendor->get_fifosize(dev);
> +   uap->port.ops = _pl011_pops;
> +   snprintf(uap->type, sizeof(uap->type), "PL011 rev%u",
> +   amba_rev(dev));
> +} else {
> +   uap->vendor = _sbsa;
> +   uap->fifosize   = 32;
> +   uap->port.ops   = _uart_pops;
> +   uap->fixed_baud = 115200;
> +
> +   snprintf(uap->type, sizeof(uap->type), "SBSA");
> +   }
> uap->port.irq = dev->irq[0];
> -   uap->port.ops = _pl011_pops;
> -
> -   snprintf(uap->type, sizeof(uap->type), "PL011 rev%u", amba_rev(dev));

I'm confused.  We already have ACPI support in amba-pl011 driver, and
pl011_probe() is never called on an SBSA system.  That's what
sbsa_uart_probe() is for.  You even added this patch:

drivers: PL011: add ACPI probing for SBSA UART

-- 
Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v2 1/2] perf/x86: fix PEBS and LBR issues on Intel Atom

2015-12-03 Thread Stephane Eranian
This patches fixes a number of problems in the PEBS
and LBR support of Intel Atom. Those bugs were introduced
by the recent changes to the PEBS code to handle multiple
entries.

The kernel was assuming that if the CPU support 64-bit format
LBR, then it has an LBR_SELECT MSR. Atom uses 64-bit LBR format
but does not have LBR_SELECT. That was causing NULL pointer
dereferences in a couple of places.

The kernel had a pointer arithmetic error in intel_pmu_drain_pebs_core()
when calculating the number of records present in the PEBS buffer.

The get_next_pebs_record_by_bit() was called on PEBS fm0 which does
not use the pebs_record_nhm layout.

This patch fixes all those problems and has PEBS and LBR working again.

Signed-off-by: Stephane Eranian 
---
 arch/x86/kernel/cpu/perf_event_intel_ds.c  |  9 -
 arch/x86/kernel/cpu/perf_event_intel_lbr.c | 11 +++
 2 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c 
b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 5db1c77..dae5f93 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -1101,6 +1101,13 @@ get_next_pebs_record_by_bit(void *base, void *top, int 
bit)
void *at;
u64 pebs_status;
 
+   /*
+* fmt0 does not have a status bitfield (does not use
+* perf_record_nhm format)
+*/
+   if (x86_pmu.intel_cap.pebs_format < 1)
+   return base;
+
if (base == NULL)
return NULL;
 
@@ -1186,7 +1193,7 @@ static void intel_pmu_drain_pebs_core(struct pt_regs 
*iregs)
if (!event->attr.precise_ip)
return;
 
-   n = (top - at) / x86_pmu.pebs_record_size;
+   n = top - at;
if (n <= 0)
return;
 
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c 
b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
index e2fad0c..1390148 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -161,7 +161,7 @@ static void __intel_pmu_lbr_enable(bool pmi)
 */
if (cpuc->lbr_sel)
lbr_select = cpuc->lbr_sel->config & x86_pmu.lbr_sel_mask;
-   if (!pmi)
+   if (!pmi && cpuc->lbr_sel)
wrmsrl(MSR_LBR_SELECT, lbr_select);
 
rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
@@ -430,7 +430,7 @@ static void intel_pmu_lbr_read_32(struct cpu_hw_events 
*cpuc)
  */
 static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
 {
-   bool need_info = !(cpuc->lbr_sel->config & LBR_NO_INFO);
+   bool need_info = false;
unsigned long mask = x86_pmu.lbr_nr - 1;
int lbr_format = x86_pmu.intel_cap.lbr_format;
u64 tos = intel_pmu_lbr_tos();
@@ -438,8 +438,11 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events 
*cpuc)
int out = 0;
int num = x86_pmu.lbr_nr;
 
-   if (cpuc->lbr_sel->config & LBR_CALL_STACK)
-   num = tos;
+   if (cpuc->lbr_sel) {
+   need_info = !(cpuc->lbr_sel->config & LBR_NO_INFO);
+   if (cpuc->lbr_sel->config & LBR_CALL_STACK)
+   num = tos;
+   }
 
for (i = 0; i < num; i++) {
unsigned long lbr_idx = (tos - i) & mask;
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v2 2/2] perf/x86: enable cycles:pp for Intel Atom

2015-12-03 Thread Stephane Eranian
This patch updates the PEBS support for Intel Atom to provide
an alias for the cycles:pp event used by perf record/top by default
nowadays.

On Atom,  only INST_RETIRED:ANY supports PEBS, so we use this event
instead with a large cmask to count cycles. Given that Core2 has
the same issue, we use the intel_pebs_aliases_core2() function for Atom
as well.

Signed-off-by: Stephane Eranian 
---
 arch/x86/kernel/cpu/perf_event_intel.c| 1 +
 arch/x86/kernel/cpu/perf_event_intel_ds.c | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/arch/x86/kernel/cpu/perf_event_intel.c 
b/arch/x86/kernel/cpu/perf_event_intel.c
index 61f2577..cef4d2f 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -3332,6 +3332,7 @@ __init int intel_pmu_init(void)
 
x86_pmu.event_constraints = intel_gen_event_constraints;
x86_pmu.pebs_constraints = intel_atom_pebs_event_constraints;
+   x86_pmu.pebs_aliases = intel_pebs_aliases_core2;
pr_cont("Atom events, ");
break;
 
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c 
b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index dae5f93..1b748ee 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -620,6 +620,8 @@ struct event_constraint intel_atom_pebs_event_constraints[] 
= {
INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1),/* MEM_LOAD_RETIRED.* */
/* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x01),
+   /* Allow all events as PEBS with no flags */
+   INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),
EVENT_CONSTRAINT_END
 };
 
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2 2/3] sched/fair: Move hot load_avg into its own cacheline

2015-12-03 Thread Peter Zijlstra
On Thu, Dec 03, 2015 at 02:56:37PM -0500, Waiman Long wrote:
> >  #ifdef CONFIG_CGROUP_SCHED
> >+task_group_cache = KMEM_CACHE(task_group, 0);
> >+
> Thanks for making that change.
> 
> Do we need to add the flag SLAB_HWCACHE_ALIGN? Or we could make a helper
> flag that define SLAB_HWCACHE_ALIGN if CONFIG_FAIR_GROUP_SCHED is defined.
> Other than that, I am fine with the change.

I don't think we need that, see my reply earlier to Ben.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH linux-next 3/3] mtd: brcmnand: Add support for the BCM6368

2015-12-03 Thread Simon Arlott
The BCM6368 has a NAND interrupt register with combined status and enable
registers.

As the BCM6328, BCM6362 and BCM6368 all use v2.1 controllers, the first
variant that will work with this driver is the BCM63268 using a v4.0
controller.

Set up the device by disabling and acking all interrupts, then handle
the CTRL_READY interrupt.

Signed-off-by: Simon Arlott 
---
 drivers/mtd/nand/brcmnand/Makefile   |   1 +
 drivers/mtd/nand/brcmnand/bcm6368_nand.c | 145 +++
 2 files changed, 146 insertions(+)
 create mode 100644 drivers/mtd/nand/brcmnand/bcm6368_nand.c

diff --git a/drivers/mtd/nand/brcmnand/Makefile 
b/drivers/mtd/nand/brcmnand/Makefile
index 3b1fbfd..b28ffb59 100644
--- a/drivers/mtd/nand/brcmnand/Makefile
+++ b/drivers/mtd/nand/brcmnand/Makefile
@@ -2,5 +2,6 @@
 # more specific iproc_nand.o, for instance
 obj-$(CONFIG_MTD_NAND_BRCMNAND)+= iproc_nand.o
 obj-$(CONFIG_MTD_NAND_BRCMNAND)+= bcm63138_nand.o
+obj-$(CONFIG_MTD_NAND_BRCMNAND)+= bcm6368_nand.o
 obj-$(CONFIG_MTD_NAND_BRCMNAND)+= brcmstb_nand.o
 obj-$(CONFIG_MTD_NAND_BRCMNAND)+= brcmnand.o
diff --git a/drivers/mtd/nand/brcmnand/bcm6368_nand.c 
b/drivers/mtd/nand/brcmnand/bcm6368_nand.c
new file mode 100644
index 000..c347ea5
--- /dev/null
+++ b/drivers/mtd/nand/brcmnand/bcm6368_nand.c
@@ -0,0 +1,145 @@
+/*
+ * Copyright 2015 Simon Arlott
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * Derived from bcm63138_nand.c:
+ * Copyright © 2015 Broadcom Corporation
+ *
+ * Derived from 
bcm963xx_4.12L.06B_consumer/shared/opensource/include/bcm963xx/63268_map_part.h:
+ * Copyright 2000-2010 Broadcom Corporation
+ *
+ * Derived from 
bcm963xx_4.12L.06B_consumer/shared/opensource/flash/nandflash.c:
+ * Copyright 2000-2010 Broadcom Corporation
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "brcmnand.h"
+
+struct bcm6368_nand_soc {
+   struct brcmnand_soc soc;
+   void __iomem *base;
+};
+
+#define BCM6368_NAND_INT   0x00
+#define  BCM6368_NAND_STATUS_SHIFT 0
+#define  BCM6368_NAND_STATUS_MASK  (0xfff << BCM6368_NAND_STATUS_SHIFT)
+#define  BCM6368_NAND_ENABLE_SHIFT 16
+#define  BCM6368_NAND_ENABLE_MASK  (0x << BCM6368_NAND_ENABLE_SHIFT)
+#define BCM6368_NAND_BASE_ADDR00x04
+#define BCM6368_NAND_BASE_ADDR10x0c
+
+enum {
+   BCM6368_NP_READ = BIT(0),
+   BCM6368_BLOCK_ERASE = BIT(1),
+   BCM6368_COPY_BACK   = BIT(2),
+   BCM6368_PAGE_PGM= BIT(3),
+   BCM6368_CTRL_READY  = BIT(4),
+   BCM6368_DEV_RBPIN   = BIT(5),
+   BCM6368_ECC_ERR_UNC = BIT(6),
+   BCM6368_ECC_ERR_CORR= BIT(7),
+};
+
+static bool bcm6368_nand_intc_ack(struct brcmnand_soc *soc)
+{
+   struct bcm6368_nand_soc *priv =
+   container_of(soc, struct bcm6368_nand_soc, soc);
+   void __iomem *mmio = priv->base + BCM6368_NAND_INT;
+   u32 val = brcmnand_readl(mmio);
+
+   if (val & (BCM6368_CTRL_READY << BCM6368_NAND_STATUS_SHIFT)) {
+   /* Ack interrupt */
+   val &= ~BCM6368_NAND_STATUS_MASK;
+   val |= BCM6368_CTRL_READY << BCM6368_NAND_STATUS_SHIFT;
+   brcmnand_writel(val, mmio);
+   return true;
+   }
+
+   return false;
+}
+
+static void bcm6368_nand_intc_set(struct brcmnand_soc *soc, bool en)
+{
+   struct bcm6368_nand_soc *priv =
+   container_of(soc, struct bcm6368_nand_soc, soc);
+   void __iomem *mmio = priv->base + BCM6368_NAND_INT;
+   u32 val = brcmnand_readl(mmio);
+
+   /* Don't ack any interrupts */
+   val &= ~BCM6368_NAND_STATUS_MASK;
+
+   if (en)
+   val |= BCM6368_CTRL_READY << BCM6368_NAND_ENABLE_SHIFT;
+   else
+   val &= ~(BCM6368_CTRL_READY << BCM6368_NAND_ENABLE_SHIFT);
+
+   brcmnand_writel(val, mmio);
+}
+
+static int bcm6368_nand_probe(struct platform_device *pdev)
+{
+   struct device *dev = >dev;
+   struct bcm6368_nand_soc *priv;
+   struct brcmnand_soc *soc;
+   struct resource *res;
+
+   priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+   if (!priv)
+   return -ENOMEM;
+   soc = >soc;
+
+   res = platform_get_resource_byname(pdev,
+   IORESOURCE_MEM, "nand-intr-base");
+   if (!res)
+   return -EINVAL;
+
+   priv->base = devm_ioremap_resource(dev, res);
+   if (IS_ERR(priv->base))
+   return 

[PATCH linux-next 2/3] mtd: brcmnand: Request and enable the clock if present

2015-12-03 Thread Simon Arlott
Attempt to enable a clock named "nand" as some SoCs have a clock for the
controller that needs to be enabled.

Signed-off-by: Simon Arlott 
---
Removed ctrl->clk not NULL check.

 drivers/mtd/nand/brcmnand/brcmnand.c | 64 
 1 file changed, 50 insertions(+), 14 deletions(-)

diff --git a/drivers/mtd/nand/brcmnand/brcmnand.c 
b/drivers/mtd/nand/brcmnand/brcmnand.c
index 35d78f7..cf0374e 100644
--- a/drivers/mtd/nand/brcmnand/brcmnand.c
+++ b/drivers/mtd/nand/brcmnand/brcmnand.c
@@ -11,6 +11,7 @@
  * GNU General Public License for more details.
  */
 
+#include 
 #include 
 #include 
 #include 
@@ -122,6 +123,9 @@ struct brcmnand_controller {
/* Some SoCs provide custom interrupt status register(s) */
struct brcmnand_soc *soc;
 
+   /* Some SoCs have a gateable clock for the controller */
+   struct clk  *clk;
+
int cmd_pending;
booldma_pending;
struct completion   done;
@@ -2127,10 +2131,24 @@ int brcmnand_probe(struct platform_device *pdev, struct 
brcmnand_soc *soc)
if (IS_ERR(ctrl->nand_base))
return PTR_ERR(ctrl->nand_base);
 
+   /* Enable clock before using NAND registers */
+   ctrl->clk = devm_clk_get(dev, "nand");
+   if (!IS_ERR(ctrl->clk)) {
+   ret = clk_prepare_enable(ctrl->clk);
+   if (ret)
+   return ret;
+   } else {
+   ret = PTR_ERR(ctrl->clk);
+   if (ret == -EPROBE_DEFER)
+   return ret;
+
+   ctrl->clk = NULL;
+   }
+
/* Initialize NAND revision */
ret = brcmnand_revision_init(ctrl);
if (ret)
-   return ret;
+   goto err;
 
/*
 * Most chips have this cache at a fixed offset within 'nand' block.
@@ -2139,8 +2157,10 @@ int brcmnand_probe(struct platform_device *pdev, struct 
brcmnand_soc *soc)
res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "nand-cache");
if (res) {
ctrl->nand_fc = devm_ioremap_resource(dev, res);
-   if (IS_ERR(ctrl->nand_fc))
-   return PTR_ERR(ctrl->nand_fc);
+   if (IS_ERR(ctrl->nand_fc)) {
+   ret = PTR_ERR(ctrl->nand_fc);
+   goto err;
+   }
} else {
ctrl->nand_fc = ctrl->nand_base +
ctrl->reg_offsets[BRCMNAND_FC_BASE];
@@ -2150,8 +2170,10 @@ int brcmnand_probe(struct platform_device *pdev, struct 
brcmnand_soc *soc)
res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "flash-dma");
if (res) {
ctrl->flash_dma_base = devm_ioremap_resource(dev, res);
-   if (IS_ERR(ctrl->flash_dma_base))
-   return PTR_ERR(ctrl->flash_dma_base);
+   if (IS_ERR(ctrl->flash_dma_base)) {
+   ret = PTR_ERR(ctrl->flash_dma_base);
+   goto err;
+   }
 
flash_dma_writel(ctrl, FLASH_DMA_MODE, 1); /* linked-list */
flash_dma_writel(ctrl, FLASH_DMA_ERROR_STATUS, 0);
@@ -2160,13 +2182,16 @@ int brcmnand_probe(struct platform_device *pdev, struct 
brcmnand_soc *soc)
ctrl->dma_desc = dmam_alloc_coherent(dev,
 sizeof(*ctrl->dma_desc),
 >dma_pa, GFP_KERNEL);
-   if (!ctrl->dma_desc)
-   return -ENOMEM;
+   if (!ctrl->dma_desc) {
+   ret = -ENOMEM;
+   goto err;
+   }
 
ctrl->dma_irq = platform_get_irq(pdev, 1);
if ((int)ctrl->dma_irq < 0) {
dev_err(dev, "missing FLASH_DMA IRQ\n");
-   return -ENODEV;
+   ret = -ENODEV;
+   goto err;
}
 
ret = devm_request_irq(dev, ctrl->dma_irq,
@@ -2175,7 +2200,7 @@ int brcmnand_probe(struct platform_device *pdev, struct 
brcmnand_soc *soc)
if (ret < 0) {
dev_err(dev, "can't allocate IRQ %d: error %d\n",
ctrl->dma_irq, ret);
-   return ret;
+   goto err;
}
 
dev_info(dev, "enabling FLASH_DMA\n");
@@ -2199,7 +2224,8 @@ int brcmnand_probe(struct platform_device *pdev, struct 
brcmnand_soc *soc)
ctrl->irq = platform_get_irq(pdev, 0);
if ((int)ctrl->irq < 0) {
dev_err(dev, "no IRQ defined\n");
-   return -ENODEV;
+   ret = -ENODEV;
+   goto err;
}
 
/*
@@ -2223,7 +2249,7 @@ int brcmnand_probe(struct platform_device *pdev, struct 
brcmnand_soc *soc)
if (ret < 0) {
  

[PATCH] crypto: fix kernel-doc warnings in crypto/aead.h

2015-12-03 Thread Randy Dunlap
From: Randy Dunlap 

Fix 21 occurrences of this kernel-doc warning in :

..//include/crypto/aead.h:149: warning: No description found for parameter 
'base'

Signed-off-by: Randy Dunlap 
---
 include/crypto/aead.h |1 +
 1 file changed, 1 insertion(+)

--- lnx-44-rc3.orig/include/crypto/aead.h
+++ lnx-44-rc3/include/crypto/aead.h
@@ -128,6 +128,7 @@ struct aead_request {
  * @exit: Deinitialize the cryptographic transformation object. This is a
  *   counterpart to @init, used to remove various changes set in
  *   @init.
+ * @base: Definition of a generic crypto cipher algorithm.
  *
  * All fields except @ivsize is mandatory and must be filled.
  */
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH linux-next 1/3] mtd: brcmnand: Add brcm,bcm6368-nand device tree binding

2015-12-03 Thread Simon Arlott
Add device tree binding for NAND on the BCM6368.

The BCM6368 has a NAND interrupt register with combined status and enable
registers. It also requires a clock, so add an optional clock to the
common brcmnand binding.

Signed-off-by: Simon Arlott 
---
 .../devicetree/bindings/mtd/brcm,brcmnand.txt  | 32 ++
 1 file changed, 32 insertions(+)

diff --git a/Documentation/devicetree/bindings/mtd/brcm,brcmnand.txt 
b/Documentation/devicetree/bindings/mtd/brcm,brcmnand.txt
index 4ff7128..16d7835 100644
--- a/Documentation/devicetree/bindings/mtd/brcm,brcmnand.txt
+++ b/Documentation/devicetree/bindings/mtd/brcm,brcmnand.txt
@@ -45,6 +45,8 @@ Required properties:
 - #size-cells  : <0>
 
 Optional properties:
+- clock : reference to the clock for the NAND controller
+- clock-names   : "nand" (required for the above clock)
 - brcm,nand-has-wp  : Some versions of this IP include a write-protect
   (WP) control bit. It is always available on >=
   v7.0. Use this property to describe the rare
@@ -72,6 +74,12 @@ we define additional 'compatible' properties and associated 
register resources w
and enable registers
  - reg-names: (required) "nand-int-base"
 
+   * "brcm,nand-bcm6368"
+ - compatible: should contain "brcm,nand-bcm", "brcm,nand-bcm6368"
+ - reg: (required) the 'NAND_INTR_BASE' register range, with combined 
status
+   and enable registers, and boot address registers
+ - reg-names: (required) "nand-intr-base"
+
* "brcm,nand-iproc"
  - reg: (required) the "IDM" register range, for interrupt enable and APB
bus access endianness configuration, and the "EXT" register range,
@@ -148,3 +156,27 @@ nand@f0442800 {
};
};
 };
+
+nand@1200 {
+   compatible = "brcm,nand-bcm63168", "brcm,nand-bcm6368",
+   "brcm,brcmnand-v4.0", "brcm,brcmnand";
+   reg = <0x1200 0x180>,
+ <0x1600 0x200>,
+ <0x10b0 0x10>;
+   reg-names = "nand", "nand-cache", "nand-intr-base";
+   interrupt-parent = <_intc>;
+   interrupts = <50>;
+   clocks = <_clk 20>;
+   clock-names = "nand";
+
+   #address-cells = <1>;
+   #size-cells = <0>;
+
+   nand0: nandcs@0 {
+   compatible = "brcm,nandcs";
+   reg = <0>;
+   nand-on-flash-bbt;
+   nand-ecc-strength = <1>;
+   nand-ecc-step-size = <512>;
+   };
+};
-- 
2.1.4

-- 
Simon Arlott
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2 2/3] sched/fair: Move hot load_avg into its own cacheline

2015-12-03 Thread Waiman Long

On 12/03/2015 06:12 AM, Peter Zijlstra wrote:


I made this:

---
Subject: sched/fair: Move hot load_avg into its own cacheline
From: Waiman Long
Date: Wed, 2 Dec 2015 13:41:49 -0500

If a system with large number of sockets was driven to full
utilization, it was found that the clock tick handling occupied a
rather significant proportion of CPU time when fair group scheduling
and autogroup were enabled.

Running a java benchmark on a 16-socket IvyBridge-EX system, the perf
profile looked like:

   10.52%   0.00%  java   [kernel.vmlinux]  [k] smp_apic_timer_interrupt
9.66%   0.05%  java   [kernel.vmlinux]  [k] hrtimer_interrupt
8.65%   0.03%  java   [kernel.vmlinux]  [k] tick_sched_timer
8.56%   0.00%  java   [kernel.vmlinux]  [k] update_process_times
8.07%   0.03%  java   [kernel.vmlinux]  [k] scheduler_tick
6.91%   1.78%  java   [kernel.vmlinux]  [k] task_tick_fair
5.24%   5.04%  java   [kernel.vmlinux]  [k] update_cfs_shares

In particular, the high CPU time consumed by update_cfs_shares()
was mostly due to contention on the cacheline that contained the
task_group's load_avg statistical counter. This cacheline may also
contains variables like shares, cfs_rq&  se which are accessed rather
frequently during clock tick processing.

This patch moves the load_avg variable into another cacheline
separated from the other frequently accessed variables. It also
creates a cacheline aligned kmemcache for task_group to make sure
that all the allocated task_group's are cacheline aligned.

By doing so, the perf profile became:

9.44%   0.00%  java   [kernel.vmlinux]  [k] smp_apic_timer_interrupt
8.74%   0.01%  java   [kernel.vmlinux]  [k] hrtimer_interrupt
7.83%   0.03%  java   [kernel.vmlinux]  [k] tick_sched_timer
7.74%   0.00%  java   [kernel.vmlinux]  [k] update_process_times
7.27%   0.03%  java   [kernel.vmlinux]  [k] scheduler_tick
5.94%   1.74%  java   [kernel.vmlinux]  [k] task_tick_fair
4.15%   3.92%  java   [kernel.vmlinux]  [k] update_cfs_shares

The %cpu time is still pretty high, but it is better than before. The
benchmark results before and after the patch was as follows:

   Before patch - Max-jOPs: 907533Critical-jOps: 134877
   After patch  - Max-jOPs: 916011Critical-jOps: 142366

Cc: Scott J Norton
Cc: Douglas Hatch
Cc: Ingo Molnar
Cc: Yuyang Du
Cc: Paul Turner
Cc: Ben Segall
Cc: Morten Rasmussen
Signed-off-by: Waiman Long
Signed-off-by: Peter Zijlstra (Intel)
Link: 
http://lkml.kernel.org/r/1449081710-20185-3-git-send-email-waiman.l...@hpe.com
---
  kernel/sched/core.c  |   10 +++---
  kernel/sched/sched.h |7 ++-
  2 files changed, 13 insertions(+), 4 deletions(-)

--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7345,6 +7345,9 @@ int in_sched_functions(unsigned long add
   */
  struct task_group root_task_group;
  LIST_HEAD(task_groups);
+
+/* Cacheline aligned slab cache for task_group */
+static struct kmem_cache *task_group_cache __read_mostly;
  #endif

  DECLARE_PER_CPU(cpumask_var_t, load_balance_mask);
@@ -7402,11 +7405,12 @@ void __init sched_init(void)
  #endif /* CONFIG_RT_GROUP_SCHED */

  #ifdef CONFIG_CGROUP_SCHED
+   task_group_cache = KMEM_CACHE(task_group, 0);
+

Thanks for making that change.

Do we need to add the flag SLAB_HWCACHE_ALIGN? Or we could make a helper 
flag that define SLAB_HWCACHE_ALIGN if CONFIG_FAIR_GROUP_SCHED is 
defined. Other than that, I am fine with the change.


Cheers,
Longman
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2 0/2] dm verity: add support for error correction

2015-12-03 Thread Mike Snitzer
On Thu, Dec 03 2015 at  9:26am -0500,
Sami Tolvanen  wrote:

> Changes since v1:
> 
>   - Added CONFIG_DM_VERITY_FEC and split error correction into
> dm-verity-fec.[ch] to further separate the functionality from the
> rest of dm-verity. Follows the same pattern as dm-uevent.
> 
>   - Added missing dependencies for REED_SOLOMON to Kconfig.
> 
>   - Renamed dm-verity.c to dm-verity-target.c to allow an optional
> object to be added. Follows the naming convention of dm-cache and
> dm-era.
> 
>   - Changed the algorithm to work with one or more small buffers (~4k)
> instead of a single large one. The more buffers we can allocate,
> the faster it will work, but we don't have to preallocate a large
> amount of memory anymore.
> 
>   - Changed memory allocation to use mempools. v2 preallocates all the
> memory required for each worker thread to guarantee forward
> progress in case of memory pressure. The code attempts to allocate
> more buffers (using GFP_NOIO) and uses them if available.
> 
>   - Added graceful handling of IO errors, which are now treated as any
> other corruption.
> 
>   - Rebased against linux-dm/for-next.

Thanks a lot for these advances, at a high-level it sounds like you've
handled the issues raised as part of v1 review very well.

I'll review closer now.  Goal is to get these changes staged in
linux-next for upstream inclusion during the 4.5 merge window.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] rtc: Add Epson RX8010SJ RTC driver

2015-12-03 Thread Akshay Bhat
Thanks for the detailed feedback and mentioning --strict option for 
checkpatch :) I have fixed all the issues in the v2 version of the 
patch: https://lkml.org/lkml/2015/12/3/606


On 12/02/2015 06:40 PM, Alexandre Belloni wrote:

On 11/11/2015 at 17:31:58 -0500, Akshay Bhat wrote :

diff --git a/drivers/rtc/rtc-rx8010.c b/drivers/rtc/rtc-rx8010.c
new file mode 100644
index 000..9b8bd76
--- /dev/null
+++ b/drivers/rtc/rtc-rx8010.c
@@ -0,0 +1,570 @@
+/*
+ * Driver for the Epson RTC module RX-8010 SJ
+ *
+ * Copyright(C) Timesys Corporation 2015
+ * Copyright(C) General Electric Company 2015
+ * Copyright(C) SEIKO EPSON CORPORATION 2013. All rights reserved.
+ *
+ * Derived from RX-8025 driver:
+ * Copyright (C) 2009 Wolfgang Grandegger 
+ *
+ * Copyright (C) 2005 by Digi International Inc.
+ * All rights reserved.
+ *
+ * Modified by fengjh at rising.com.cn
+ * 
+ * 2006.11
+ *
+ * Code cleanup by Sergei Poselenov, 
+ * Converted to new style by Wolfgang Grandegger 
+ * Alarm and periodic interrupt added by Dmitry Rakhchev 
+ *


Please remove all those unnecessary copyrights, the original
rx-8025 has been heavily rewritten anyway.


+static int rx8010_read_reg(struct i2c_client *client, int number, u8 *value)
+{
+   int ret = i2c_smbus_read_byte_data(client, number);
+
+   if (ret < 0)
+   return ret;
+
+   *value = ret;
+   return 0;
+}


I don't see the benefit of that function, calling
i2c_smbus_read_byte_data directly is more efficient.


+
+static int rx8010_read_regs(struct i2c_client *client, int number, u8 length,
+   u8 *values)
+{
+   int ret = i2c_smbus_read_i2c_block_data(client, number, length, values);
+
+   if (ret != length)
+   return ret < 0 ? ret : -EIO;
+
+   return 0;
+}


Apart from the error handling, I'd say the same for that function.


+
+static irqreturn_t rx8010_irq_1_handler(int irq, void *dev_id)
+{
+   struct i2c_client *client = dev_id;
+   struct rx8010_data *rx8010 = i2c_get_clientdata(client);
+   u8 flagreg;
+
+   spin_lock(>flags_lock);
+
+   if (rx8010_read_reg(client, RX8010_FLAG, )) {
+   spin_unlock(>flags_lock);
+   return IRQ_NONE;
+   }
+
+   if (flagreg & RX8010_FLAG_VLF)
+   dev_warn(>dev, "Frequency stop detected\n");
+
+   if (flagreg & RX8010_FLAG_TF) {
+   flagreg &= ~RX8010_FLAG_TF;
+   rtc_update_irq(rx8010->rtc, 1, RTC_PF | RTC_IRQF);
+   }
+
+   if (flagreg & RX8010_FLAG_AF) {
+   flagreg &= ~RX8010_FLAG_AF;
+   rtc_update_irq(rx8010->rtc, 1, RTC_AF | RTC_IRQF);
+   }
+
+   if (flagreg & RX8010_FLAG_UF) {
+   flagreg &= ~RX8010_FLAG_UF;
+   rtc_update_irq(rx8010->rtc, 1, RTC_UF | RTC_IRQF);
+   }
+
+   i2c_smbus_write_byte_data(client, RX8010_FLAG, flagreg);
+
+   spin_unlock(>flags_lock);
+   return IRQ_HANDLED;
+}
+
+static int rx8010_get_time(struct device *dev, struct rtc_time *dt)
+{
+   struct rx8010_data *rx8010 = dev_get_drvdata(dev);
+   u8 date[7];
+   u8 flagreg;
+   int err;
+
+   err = rx8010_read_reg(rx8010->client, RX8010_FLAG, );
+   if (err)
+   return err;
+
+   if (flagreg & RX8010_FLAG_VLF) {
+   dev_warn(dev, "Frequency stop detected\n");
+   return -EINVAL;
+   }
+
+   err = rx8010_read_regs(rx8010->client, RX8010_SEC, 7, date);
+   if (err)
+   return err;
+
+   dt->tm_sec = bcd2bin(date[RX8010_SEC-RX8010_SEC] & 0x7f);
+   dt->tm_min = bcd2bin(date[RX8010_MIN-RX8010_SEC] & 0x7f);
+   dt->tm_hour = bcd2bin(date[RX8010_HOUR-RX8010_SEC] & 0x3f);
+   dt->tm_mday = bcd2bin(date[RX8010_MDAY-RX8010_SEC] & 0x3f);
+   dt->tm_mon = bcd2bin(date[RX8010_MONTH-RX8010_SEC] & 0x1f) - 1;
+   dt->tm_year = bcd2bin(date[RX8010_YEAR-RX8010_SEC]);
+   dt->tm_wday = bcd2bin(date[RX8010_WDAY-RX8010_SEC] & 0x7f);
+


This is not the correct value for tm_wday, you should use ffs(), not
that anybody actually cares.

Also, checkpatch --strict complains about missing spaces around those '-'
and a few alignments are not correct, can fix those?



+   if (dt->tm_year < 70)
+   dt->tm_year += 100;
+


I'd say that we don't care about handling dates before 2000 and that the
range should be 2000-2100 as this is actually the range where the leap
year calculation is correct. Also your are not respecting that in
rx8010_set_time() so setting a date in 2072 will end up reading 1972.


+   return rtc_valid_tm(dt);
+}
+
+static int rx8010_set_time(struct device *dev, struct rtc_time *dt)
+{
+   struct rx8010_data *rx8010 = dev_get_drvdata(dev);
+   u8 date[7];
+   u8 ctrl, flagreg;
+   int ret;
+   unsigned long irqflags;
+
+   /* BUG: The HW assumes every year that is a multiple of 4 to be a leap
+* 

Re: [PATCH] parisc: Remove unused pcibios_init_bus()

2015-12-03 Thread Helge Deller
On 01.12.2015 23:02, Grant Grundler wrote:
> On Tue, Dec 1, 2015 at 12:44 PM, Helge Deller  wrote:
>> On 01.12.2015 17:41, Bjorn Helgaas wrote:
>>> There are no callers of pcibios_init_bus(), so remove it.
>>
>> True, pcibios_init_bus() isn't called anywhere, so it should be removed.
>>
>> But I wonder if we might need to initialize latency and parity for PCI-PCI
>> bridges somewhere else then?
> 
> pcibios_fixup_bus() appears to do that...but I don't know the call chain.
> I used cscope to look for PCI_BRIDGE_CTL_PARITY.
> 
>> In one of my machines I have a i960 based RAID controller which isn't working
>> yet (I think it's internally based on a PCI-PCI bridge), and maybe this
>> is the reason it doesn't work? I will need to test it (e.g. firmware doesn't
>> fully initializes PCI-PCI bridges, which is why this code was added once).

machine model : 9000/800/L3000-8x
model name: Marcato W+ (rp5470)

> If lspci doesn't show the PCI-PCI bridge, the i960 RAID card is using
> that internally.

lspci does see the PCI-PCI bridge:

50:00.0 PCI bridge: Intel Corporation 80960RP (i960RP) Microprocessor/Bridge 
(rev 02) (prog-if 00 [Normal decode])
Flags: medium devsel
Bus: primary=50, secondary=51, subordinate=51, sec-latency=0
I/O behind bridge: -0fff
Memory behind bridge: 9440-944f
Prefetchable memory behind bridge: 9450-945f
Capabilities: [68] Power Management version 2

50:00.1 I2O: Intel Corporation 80960RP (i960RP) Microprocessor (rev 02) 
(prog-if 01)
Subsystem: Hewlett-Packard Company Device 1228
Flags: medium devsel, IRQ 71
Memory at 9400 (32-bit, prefetchable) [disabled] [size=4M]
[virtual] Expansion ROM at 9460 [disabled] [size=32K]
Capabilities: [80] Power Management version 2


sec-latency is listed in the lspci output above as "sec-latency=0", so do we 
maybe need to add this part somewhere ?
-   /* PCI-PCI bridge - set the cache line and default latency
-  (32) for primary and secondary buses. */
-   pci_write_config_byte(dev, PCI_SEC_LATENCY_TIMER, 32);

> parisc (and any other architecture) should discover and configure all
> PCI-PCI bridges already.
> I know it did for the "multifunction" cards I had available 10 years ago.

Boot log shows:
[9.408000] Elroy version TR4.0 (0x5) found at 0x9fff4000
[9.564000] LBA: Not a C8000 system - not extending LMMIO range.
[9.568000] LBA: Not registering GMMIO space [mem 
0xfff50400-0xfff57fff]
[9.684000] LBA 0:10: PCI host bridge to bus :50
[9.82] pci_bus :50: root bus resource [io  0x8-0x8] (bus 
address [0x-0x])
[9.956000] pci_bus :50: root bus resource [mem 
0x9400-0x95ff] (bus address [0x9400-0x95ff])
[   10.092000] pci_bus :50: root bus resource [bus 50-57]
[   10.096000] pci :50:00.0: [8086:0964] type 01 class 0x060400
[   10.096000] pci :50:00.1: [8086:1960] type 00 class 0x0e0001
[   10.096000] pci :50:00.1: reg 0x10: [mem 0x-0x003f pref]
[   10.096000] pci :50:00.1: reg 0x30: [mem 0x-0x7fff pref]
[   10.096000] pci_bus :51: busn_res: can not insert [bus 51-ff] under [bus 
50-57] (conflicts with (null) [bus 50-57])
[   10.096000] pci :50:00.0: PCI bridge to [bus 51-ff]
[   10.232000] pci :50:00.0:   bridge window [io  0x8-0x80fff]
[   10.232000] pci :50:00.0:   bridge window [mem 0x-0x000f]
[   10.232000] pci :50:00.0:   bridge window [mem 0x-0x000f 
pref]
[   10.232000] pci :50:00.0: can't claim BAR 14 [mem 
0x-0x000f]: no compatible bridge window
[   10.372000] pci :50:00.0: can't claim BAR 15 [mem 0x-0x000f 
pref]: no compatible bridge window
[   10.508000] pci :50:00.0: can't claim BAR 16 [??? 0x flags 0x0]: 
no compatible bridge window
[   10.648000] pci_bus :51: busn_res: [bus 51-ff] end is updated to 51
[   10.648000] pci :50:00.0: BAR 16: [??? 0x flags 0x2000] has 
bogus alignment
[   10.784000] pci :50:00.1: BAR 0: assigned [mem 
0x9400-0x943f pref]
[   10.92] pci :50:00.0: BAR 14: assigned [mem 
0x9440-0x944f]
[   11.06] pci :50:00.0: BAR 15: assigned [mem 
0x9450-0x945f pref]
[   11.06] pci :50:00.1: BAR 6: assigned [mem 
0x9460-0x94607fff pref]
[   11.20] pci :50:00.0: PCI bridge to [bus 51]
[   11.336000] pci :50:00.0:   bridge window [io  0x8-0x80fff]
[   11.336000] pci :50:00.0:   bridge window [mem 
0x9440-0x944f]
[   11.472000] pci :50:00.0:   bridge window [mem 
0x9450-0x945f pref]

Helge

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More 

Re: [PATCH v3 2/5] tty: Introduce SER_RS485_SOFTWARE read-only flag for struct serial_rs485

2015-12-03 Thread Peter Hurley
On 12/03/2015 12:29 PM, Matwey V. Kornilov wrote:
> 2015-12-03 17:41 GMT+03:00 Peter Hurley :
>> Hi Matwey,
>>
>> On 12/03/2015 12:50 AM, Matwey V. Kornilov wrote:
>>> I am working on v4, where I completely redesigned implementation. And
>>> now I think that it is considerably better than v3.
>>> It looks like the following:
>>> https://github.com/matwey/linux/commits/8520_rs485_v4
>>> But it is not ready yet, there is a bug somewhere.
>>>
>>> In the v4, each subdriver decides separately if it needs rs485
>>> emulation support. Then it enables it like the following:
>>> https://github.com/matwey/linux/commit/4455e425fc045713fb921ccec695fe183f1558f0
>>> Before calling serial8250_rs485_emul_enabled, the driver enables
>>> interrupt on empty shift register (they are always there for omap_).
>>
>> Looks good.
>>
>> Are you testing with CONFIG_SERIAL_8250_DMA=n first to simplify the
>> debug effort? DMA adds a completely different tx path.
> 
> Many thanks for the advice. I've just found that the bug is not in my code =)
> Even with pure 4.3.0 I cannot open /dev/ttyS5 more than once. It just
> hangs on open() and the process is in S+ state.

Hmm, that's odd. So

$ stty -a < /dev/ttyS5

hangs if something like below is running?

$ cat > /dev/ttyS5


>> Also, before submission, please shorten the identifiers. And Greg hates
>> functions returning bool so just expanded serial8250_rs485_emul_enabled()
>> inline.
> 
> Am I allowed to use `re' instead of rs485_emul in names?

Long names and constructs tend to obscure the execution flow.
Some of the names could be reduced where the meaning is obvious:

  serial8250_rts_on_send
  serial8250_rts_after_send
  serial8250_handle_start_timer
  serial8250_handle_stop_timer

These two I would inline into their lone call site:

  serial8250_rs485_emul_startup()
  serial8250_rs485_emul_shutdown()

serial8250_rs485_emul_start_tx  => __start_tx_rs485

rs485_emul => sw485/em485/emul485/soft485 ?

Or just rs485 (except for the field name and structs so as not to confuse
it with the port->rs485)

Just my 2¢

Regards,
Peter Hurley


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 2/2] workqueue: implement lockup detector

2015-12-03 Thread Tejun Heo
Hello, Don.

On Thu, Dec 03, 2015 at 12:50:24PM -0500, Don Zickus wrote:
> This sort of looks like the hung task detector..
> 
> I am a little concerned because we just made a big effort to properly
> separate the hardlockup and softlockup paths and yet retain the flexibility
> to enable/disable them separately.  Now it seems the workqueue detector is
> permanently entwined with the softlockup detector.  I am not entirely sure
> that is correct thing to do.

The only area they get entwined is how it's controlled from userland.
While it isn't quite the same as softlockup detection, I think what it
monitors is close enough that it makes sense to put them under the
same interface.

> It also seems awkward for the lockup code to have to jump to the workqueue
> code to function properly. :-/  Though we have made exceptions for the virt
> stuff and the workqueue code is simple..

Softlockup code doesn't depend on workqueue in any way.  Workqueue
tags on touch_softlockup to detect cases which shouldn't be warned and
its enabledness is controlled together with softlockup and that's it.

> Actually, I am curious, it seems if you just added a
> /proc/sys/kernel/wq_watchdog entry, you could elminiate the entire need for
> modifying the watchdog code to begin with.  As you really aren't using any
> of it other than piggybacking on the touch_softlockup_watchdog stuff, which
> could probably be easily added without all the extra enable/disable changes
> in watchdog.c.

Yeah, except for touch signal, it's purely interface thing.  I don't
feel too strong about this but it seems a bit silly to introduce a
whole different set of interface for this.  e.g. if the user wanted to
disable softlockup detection, it'd be weird to leave wq lockup
detection running.  The same goes for threshold.

> Again, this looks like what the hung task detector is doing, which I
> struggled with years ago to integrate with the lockup code because in the
> end I had trouble re-using much of it.

So, it's a stall detector and there are inherent similarities but the
conditions tested are pretty different and it's a lot lighter.  I'm
not really sure what you're meaning to say.

Thanks.

-- 
tejun
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/4] EDAC: add DDR4 flag

2015-12-03 Thread Borislav Petkov
On Thu, Dec 03, 2015 at 07:16:29PM +, Luck, Tony wrote:
> part4 (the actual KNL piece) seems not to break earlier (Broadwell)
> system ... but that doesn't qualify enough for Ack/Review/Tested -by.

It already has your Reviewed-by. Is it still valid?

-- 
Regards/Gruss,
Boris.

ECO tip #101: Trim your mails when you reply.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v2] rtc: Add Epson RX8010SJ RTC driver

2015-12-03 Thread Akshay Bhat
This driver supports the following functions:
 - reading and setting time
 - alarms when connected to an IRQ
 - reading and clearing the voltage low flags

Datasheet:
http://www.epsondevice.com/docs/qd/en/DownloadServlet?id=ID000956
Signed-off-by: Akshay Bhat 
---
Changes in v2:
- Address comments from Alexandre Belloni
- Update copyright info
- Use i2c_smbus_read_byte_data/i2c_smbus_read_i2c_block_data directly
- Use ffs for wday instead of bcd2bin
- Fixes from checkpatch --strict
- Limit date range to 2000 to 2100
- Fix incorrect write to RX8010_WDAY
- Do not clear VLF flag on power up

 drivers/rtc/Kconfig  |  10 +
 drivers/rtc/Makefile |   1 +
 drivers/rtc/rtc-rx8010.c | 523 +++
 3 files changed, 534 insertions(+)
 create mode 100644 drivers/rtc/rtc-rx8010.c

diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index 2a52424..376322f 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -558,6 +558,16 @@ config RTC_DRV_FM3130
  This driver can also be built as a module. If so the module
  will be called rtc-fm3130.
 
+config RTC_DRV_RX8010
+   tristate "Epson RX8010SJ"
+   depends on I2C
+   help
+ If you say yes here you get support for the Epson RX8010SJ RTC
+ chip.
+
+ This driver can also be built as a module. If so, the module
+ will be called rtc-rx8010.
+
 config RTC_DRV_RX8581
tristate "Epson RX-8581"
help
diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile
index 231f764..62d61b2 100644
--- a/drivers/rtc/Makefile
+++ b/drivers/rtc/Makefile
@@ -128,6 +128,7 @@ obj-$(CONFIG_RTC_DRV_RS5C372)   += rtc-rs5c372.o
 obj-$(CONFIG_RTC_DRV_RV3029C2) += rtc-rv3029c2.o
 obj-$(CONFIG_RTC_DRV_RV8803)   += rtc-rv8803.o
 obj-$(CONFIG_RTC_DRV_RX4581)   += rtc-rx4581.o
+obj-$(CONFIG_RTC_DRV_RX8010)   += rtc-rx8010.o
 obj-$(CONFIG_RTC_DRV_RX8025)   += rtc-rx8025.o
 obj-$(CONFIG_RTC_DRV_RX8581)   += rtc-rx8581.o
 obj-$(CONFIG_RTC_DRV_S35390A)  += rtc-s35390a.o
diff --git a/drivers/rtc/rtc-rx8010.c b/drivers/rtc/rtc-rx8010.c
new file mode 100644
index 000..772d221
--- /dev/null
+++ b/drivers/rtc/rtc-rx8010.c
@@ -0,0 +1,523 @@
+/*
+ * Driver for the Epson RTC module RX-8010 SJ
+ *
+ * Copyright(C) Timesys Corporation 2015
+ * Copyright(C) General Electric Company 2015
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#define RX8010_SEC 0x10
+#define RX8010_MIN 0x11
+#define RX8010_HOUR0x12
+#define RX8010_WDAY0x13
+#define RX8010_MDAY0x14
+#define RX8010_MONTH   0x15
+#define RX8010_YEAR0x16
+#define RX8010_YEAR0x16
+#define RX8010_RESV17  0x17
+#define RX8010_ALMIN   0x18
+#define RX8010_ALHOUR  0x19
+#define RX8010_ALWDAY  0x1A
+#define RX8010_TCOUNT0 0x1B
+#define RX8010_TCOUNT1 0x1C
+#define RX8010_EXT 0x1D
+#define RX8010_FLAG0x1E
+#define RX8010_CTRL0x1F
+/* 0x20 to 0x2F are user registers */
+#define RX8010_RESV30  0x30
+#define RX8010_RESV31  0x32
+#define RX8010_IRQ 0x32
+
+#define RX8010_EXT_WADA  BIT(3)
+
+#define RX8010_FLAG_VLF  BIT(1)
+#define RX8010_FLAG_AF   BIT(3)
+#define RX8010_FLAG_TF   BIT(4)
+#define RX8010_FLAG_UF   BIT(5)
+
+#define RX8010_CTRL_AIE  BIT(3)
+#define RX8010_CTRL_UIE  BIT(5)
+#define RX8010_CTRL_STOP BIT(6)
+#define RX8010_CTRL_TEST BIT(7)
+
+#define RX8010_ALARM_AE  BIT(7)
+
+static const struct i2c_device_id rx8010_id[] = {
+   { "rx8010", 0 },
+   { }
+};
+MODULE_DEVICE_TABLE(i2c, rx8010_id);
+
+struct rx8010_data {
+   struct i2c_client *client;
+   struct rtc_device *rtc;
+   u8 ctrlreg;
+   spinlock_t flags_lock;
+};
+
+static irqreturn_t rx8010_irq_1_handler(int irq, void *dev_id)
+{
+   struct i2c_client *client = dev_id;
+   struct rx8010_data *rx8010 = i2c_get_clientdata(client);
+   int flagreg;
+
+   spin_lock(>flags_lock);
+
+   flagreg = i2c_smbus_read_byte_data(client, RX8010_FLAG);
+
+   if (flagreg <= 0) {
+   spin_unlock(>flags_lock);
+   return IRQ_NONE;
+   }
+
+   if (flagreg & RX8010_FLAG_VLF)
+   dev_warn(>dev, "Frequency stop detected\n");
+
+   if (flagreg & RX8010_FLAG_TF) {
+   flagreg &= ~RX8010_FLAG_TF;
+   rtc_update_irq(rx8010->rtc, 1, RTC_PF | RTC_IRQF);
+   }
+
+   if (flagreg & RX8010_FLAG_AF) {
+   flagreg &= ~RX8010_FLAG_AF;
+   rtc_update_irq(rx8010->rtc, 1, RTC_AF | RTC_IRQF);
+   }
+
+   if (flagreg & RX8010_FLAG_UF) {
+   flagreg &= ~RX8010_FLAG_UF;
+   rtc_update_irq(rx8010->rtc, 1, RTC_UF | RTC_IRQF);
+   }
+
+   i2c_smbus_write_byte_data(client, RX8010_FLAG, flagreg);
+
+   spin_unlock(>flags_lock);
+   return IRQ_HANDLED;
+}
+

Re: [PATCH 3/4] locking: Introduce smp_cond_acquire()

2015-12-03 Thread Davidlohr Bueso

On Thu, 03 Dec 2015, Peter Zijlstra wrote:


+/**
+ * smp_cond_acquire() - Spin wait for cond with ACQUIRE ordering
+ * @cond: boolean expression to wait for
+ *
+ * Equivalent to using smp_load_acquire() on the condition variable but employs
+ * the control dependency of the wait to reduce the barrier on many platforms.
+ *
+ * The control dependency provides a LOAD->STORE order, the additional RMB
+ * provides LOAD->LOAD order, together they provide LOAD->{LOAD,STORE} order,
+ * aka. ACQUIRE.
+ */
+#define smp_cond_acquire(cond) do {\
+   while (!(cond)) \
+   cpu_relax();\
+   smp_rmb(); /* ctrl + rmb := acquire */  \
+} while (0)


So this hides the fact that we actually are waiting on the cond, as opposed
to conditional acquiring. Could it be renamed to something like 
smp_waitcond_acquire()?

Thanks,
Davidlohr
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2 2/3] sched/fair: Move hot load_avg into its own cacheline

2015-12-03 Thread bsegall
Waiman Long  writes:

> On 12/02/2015 03:02 PM, bseg...@google.com wrote:
>> Waiman Long  writes:
>>> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
>>> index efd3bfc..e679895 100644
>>> --- a/kernel/sched/sched.h
>>> +++ b/kernel/sched/sched.h
>>> @@ -248,7 +248,12 @@ struct task_group {
>>> unsigned long shares;
>>>
>>>   #ifdefCONFIG_SMP
>>> -   atomic_long_t load_avg;
>>> +   /*
>>> +* load_avg can be heavily contended at clock tick time, so put
>>> +* it in its own cacheline separated from the fields above which
>>> +* will also be accessed at each tick.
>>> +*/
>>> +   atomic_long_t load_avg cacheline_aligned;
>>>   #endif
>>>   #endif
>> I suppose the question is if it would be better to just move this to
>> wind up on a separate cacheline without the extra empty space, though it
>> would likely be more fragile and unclear.
>
> I have been thinking about that too. The problem is anything that will be in 
> the
> same cacheline as load_avg and have to be accessed at clock click time will
> cause the same contention problem. In the current layout, the fields after
> load_avg are the rt stuff as well some list head structure and pointers. The 
> rt
> stuff should be kind of mutually exclusive of the CFS load_avg in term of 
> usage.
> The list head structure and pointers don't seem to be that frequently 
> accessed.
> So it is the right place to start a new cacheline boundary.
>
> Cheers,
> Longman

Yeah, this is a good place to start a new boundary, I was just saying
you could probably remove the empty space by reordering fields, but that
would be a less logical ordering in terms of programmer clarity.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v3 1/3] resource: Add @flags to region_intersects()

2015-12-03 Thread Toshi Kani
On Thu, 2015-12-03 at 11:01 -0800, Linus Torvalds wrote:
> On Thu, Dec 3, 2015 at 10:40 AM, Borislav Petkov  wrote:
> > On Thu, Dec 03, 2015 at 11:54:19AM -0700, Toshi Kani wrote:
> > > Adding a new type for regular memory will require inspecting the 
> > > codes using IORESOURCE_MEM currently, and modify them to use the new 
> > > type if their target ranges are regular memory.  There are many 
> > > references to this type across multiple architectures and drivers, 
> > > which make this inspection and testing challenging.
> > 
> > What's wrong with adding a new type_flags to struct resource and not
> > touching IORESOURCE_* at all?
> 
> Bah. Both of these ideas are bogus.
> 
> Just add a new flag. The bits are already modifiers that you can
> *combine* to show what kind of resource it is, and we already have
> things like IORESOURCE_PREFETCH etc, that are in *addition* to the
> normal IORESOURCE_MEM bit.
> 
> Just add another modifier: IORESOURCE_RAM.
> 
> So it would still show up as IORESOURCE_MEM, but it would have
> additional information specifying that it's actually RAM.
> 
> If somebody does something like
> 
>  if (res->flags == IORESOURCE_MEM)
> 
> then they are already completely broken and won't work *anyway*. It's
> a bitmask, bit a set of values.

Yes, if we can assign new modifiers, that will be quite simple. :-)  I
assume we can allocate new bits from the remaining free bits as follows.

+#define IORESOURCE_SYSTEM_RAM  0x0100  /* System RAM */
+#define IORESOURCE_PMEM0x0200  /* Persistent memory */
 #define IORESOURCE_EXCLUSIVE   0x0800  /* Userland may not map
this resource */

Note, SYSTEM_RAM represents the OS memory, i.e. "System RAM", not any RAM
ranges.

With the new modifiers, region_intersect() can check these ranges.  One
caveat is that the modifiers are not very extensible for new types as they
are bit maps.  region_intersect() will no longer be capable of checking any
regions with any given name.  I think this is OK since this function was
introduced recently, and is only used for checking "System RAM" and
"Persistent Memory" (with this patch series).

Thanks,
-Toshi
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2 2/3] sched/fair: Move hot load_avg into its own cacheline

2015-12-03 Thread Waiman Long

On 12/03/2015 05:56 AM, Peter Zijlstra wrote:

On Wed, Dec 02, 2015 at 01:41:49PM -0500, Waiman Long wrote:

+/*
+ * Make sure that the task_group structure is cacheline aligned when
+ * fair group scheduling is enabled.
+ */
+#ifdef CONFIG_FAIR_GROUP_SCHED
+static inline struct task_group *alloc_task_group(void)
+{
+   return kmem_cache_alloc(task_group_cache, GFP_KERNEL | __GFP_ZERO);
+}
+
+static inline void free_task_group(struct task_group *tg)
+{
+   kmem_cache_free(task_group_cache, tg);
+}
+#else /* CONFIG_FAIR_GROUP_SCHED */
+static inline struct task_group *alloc_task_group(void)
+{
+   return kzalloc(sizeof(struct task_group), GFP_KERNEL);
+}
+
+static inline void free_task_group(struct task_group *tg)
+{
+   kfree(tg);
+}
+#endif /* CONFIG_FAIR_GROUP_SCHED */

I think we can simply always use the kmem_cache, both slab and slub
merge slabcaches where appropriate.


I did that as I was not sure how much overhead would the introduction of 
a new kmem_cache bring. It seems like it is not really an issue. So I am 
fine with making the kmem_cache change permanent.


Cheers,
Longman
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [net-next v4 4/8] dpaa_eth: add driver's Tx queue selection

2015-12-03 Thread Scott Wood
On Thu, 2015-12-03 at 04:02 -0600, Bucur Madalin-Cristian-B32716 wrote:
> > -Original Message-
> > From: Wood Scott-B07421
> > Sent: Wednesday, December 02, 2015 11:41 PM
> > 
> > On Mon, 2015-11-02 at 19:31 +0200, Madalin Bucur wrote:
> > > Allow the selection of the transmission queue based on the CPU id.
> > 
> > Explain why.
> 
> I'll add more details in the commit log. This is a customer generated
> feature. Bypassing the standard XPS can increase performance by making use
> of the DPAA HW particularities.
> 
> > > 
> > > Signed-off-by: Madalin Bucur 
> > > ---
> > >  drivers/net/ethernet/freescale/dpaa/Kconfig   | 10 ++
> > >  drivers/net/ethernet/freescale/dpaa/dpaa_eth.c|  3 +++
> > >  drivers/net/ethernet/freescale/dpaa/dpaa_eth.h|  6 ++
> > >  drivers/net/ethernet/freescale/dpaa/dpaa_eth_common.c |  8 
> > >  drivers/net/ethernet/freescale/dpaa/dpaa_eth_common.h |  4 
> > >  5 files changed, 31 insertions(+)
> > > 
> > > diff --git a/drivers/net/ethernet/freescale/dpaa/Kconfig
> > > b/drivers/net/ethernet/freescale/dpaa/Kconfig
> > > index 022d5aa..2577aac 100644
> > > --- a/drivers/net/ethernet/freescale/dpaa/Kconfig
> > > +++ b/drivers/net/ethernet/freescale/dpaa/Kconfig
> > > @@ -11,6 +11,16 @@ menuconfig FSL_DPAA_ETH
> > > 
> > >  if FSL_DPAA_ETH
> > > 
> > > +config FSL_DPAA_ETH_USE_NDO_SELECT_QUEUE
> > > + bool "Use driver's Tx queue selection mechanism"
> > > + default y
> > > + ---help---
> > > +   The DPAA Ethernet driver defines a ndo_select_queue()
> > > callback
> > > for optimal selection
> > > +   of the egress FQ. That will override the XPS support for this
> > > netdevice.
> > > +   If for whatever reason you want to be in control of the
> > > egress FQ
> > > -to-CPU selection and mapping,
> > > +   or simply don't want to use the driver's ndo_select_queue()
> > > callback, then unselect this
> > > +   and use the standard XPS support instead.
> > 
> > Is there a use case for needing this to be configurable?
> 
> If the standard XPS is desired, the Kconfig option allows the driver user to
> select that.

Under what circumstances does it make sense to desire this?  Could you put the
answer to that in the help text?

> > > diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
> > > b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
> > > index 31d55b4..894f1a7 100644
> > > --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
> > > +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
> > > @@ -390,6 +390,9 @@ static const struct net_device_ops dpa_private_ops =
> > {
> > >   .ndo_get_stats64 = dpa_get_stats64,
> > >   .ndo_set_mac_address = dpa_set_mac_address,
> > >   .ndo_validate_addr = eth_validate_addr,
> > > +#ifdef CONFIG_FSL_DPAA_ETH_USE_NDO_SELECT_QUEUE
> > > + .ndo_select_queue = dpa_select_queue,
> > > +#endif
> > >   .ndo_change_mtu = dpa_change_mtu,
> > >   .ndo_set_rx_mode = dpa_set_rx_mode,
> > >   .ndo_init = dpa_ndo_init,
> > > diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.h
> > > b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.h
> > > index 1ba6617..87577cf 100644
> > > --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.h
> > > +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.h
> > > @@ -420,9 +420,15 @@ static inline void _dpa_assign_wq(struct dpa_fq
> > *fq)
> > >   }
> > >  }
> > > 
> > > +#ifdef CONFIG_FSL_DPAA_ETH_USE_NDO_SELECT_QUEUE
> > > +/* Use in lieu of skb_get_queue_mapping() */
> > > +#define dpa_get_queue_mapping(skb) \
> > > + raw_smp_processor_id()
> > > +#else
> > >  /* Use the queue selected by XPS */
> > >  #define dpa_get_queue_mapping(skb) \
> > >   skb_get_queue_mapping(skb)
> > > +#endif
> > 
> > Why is this necessary?  Shouldn't providing a custom .ndo_select_queue()
> > be
> > sufficient to ensure that skb_get_queue_mapping() returns the same thing?
> 
> dpa_get_queue_mapping() is used in more than one place, the ndo function
> cannot
> be used directly in all places, the current setup is justified.

It's called in two places that I see.  For the call in dpa_tx(), when will
skb_get_queue_mapping() not return the correct answer?  For the call in
dpa_select_queue(), that's already called via function pointer so it would not
be a new indirection layer.

> > And if this goes away, it's just a matter of a function pointer, so if it
> > does
> > need to be configurable it could be a runtime option.
> > 
> > -Scott
> 
> It's used on the hot path, adding an extra indirection layer to make it
> selectable
> at runtime would defeat the purpose...

"if this goes away"

I wasn't asking for a new indirection layer.

-Scott

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2 2/3] sched/fair: Move hot load_avg into its own cacheline

2015-12-03 Thread Waiman Long

On 12/02/2015 11:32 PM, Mike Galbraith wrote:

On Wed, 2015-12-02 at 13:41 -0500, Waiman Long wrote:


By doing so, the perf profile became:

9.44%   0.00%  java   [kernel.vmlinux]  [k] smp_apic_timer_interrupt
8.74%   0.01%  java   [kernel.vmlinux]  [k] hrtimer_interrupt
7.83%   0.03%  java   [kernel.vmlinux]  [k] tick_sched_timer
7.74%   0.00%  java   [kernel.vmlinux]  [k] update_process_times
7.27%   0.03%  java   [kernel.vmlinux]  [k] scheduler_tick
5.94%   1.74%  java   [kernel.vmlinux]  [k] task_tick_fair
4.15%   3.92%  java   [kernel.vmlinux]  [k] update_cfs_shares

The %cpu time is still pretty high, but it is better than before.

Is that with the box booted skew_tick=1?

-Mike



I haven't tried that kernel parameter. I will try it to see if it can 
improve the situation. BTW, will there be other undesirable side effects 
of using this other than the increased power consumption as said in the 
kernel-parameters.txt file?


Cheers,
Longman
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


WDS interface always busy, cannot be brought up

2015-12-03 Thread Neil Hellfeldt
commit 65a124dd719d6e90591e4756bb04e1719489705e prevents WDS interface 
type from bring brought up.


Simple Test:
before this you must have a AP interface, I used hostapd to create it so 
I don't have a single liner to show that.


iw dev wlan0 interface add wlan0wds0 type wds
ip link set dev wlan0wds0 address 00:11:22:33:44:55
iw dev wlan0wds0 set peer 00:66:77:88:99:AA
ifconfig wlan0wds0 up

Konsole output ifconfig: SIOCSIFFLAGS: Device or resource busy

The commit: says it checks for channel availability.

WDS do not use there own channels. They use the channel of the ap 
interface that already exists,

because of this cfg80211_check_combinations will always return -EBUSY.

That do to this test statement
if (num == 0)
return -EBUSY;
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2 2/7] ACPI / LPSS: allow to use specific PM domain during ->probe()

2015-12-03 Thread Shevchenko, Andriy
On Fri, 2015-11-27 at 11:56 +0200, Andy Shevchenko wrote:
> > > > > 
> On Fri, 2015-11-27 at 00:15 +0100, Rafael J. Wysocki wrote:
> > On Thursday, November 26, 2015 06:45:17 PM Andy Shevchenko wrote:
> > > On Thu, 2015-11-26 at 18:30 +0200, Jarkko Nikula wrote:
> > > > On 11/26/2015 05:19 PM, Andy Shevchenko wrote:
> > > > This won't fix like revert of original commit does.

Jarkko, I will split this one to the revert (with Fixes tag) and new
patch to target DMA issue.
    

> > > > Primary problem here 
> > > > is that there is no explicit power on at all during LPSS device
> > > > probe
> > > > because dev->pm_domain is set before probing.
> > > 
> > > And we can't do this as in very original code of acpi_lpss.c
> > > since
> > > DMA
> > > has to be sure it's powered on while probing. We could guarantee
> > > this
> > > only in case when PM domain is assigned already and we do our
> > > quirk
> > > for
> > > it.
> > > 
> > > From my point of view we have to fix hang first since it's most
> > > painful
> > > case for users and their experience. Though I'm open to any
> > > better
> > > solution if you have any in mind.
> > > 
> > > > 
> > > > driver_probe_device
> > > >    platform_drv_prove
> > > >  dev_pm_domain_attach
> > > >    acpi_dev_pm_attach
> > > >  returns instantly because of dev->pm_domain is set
> > 
> > This looks like a candidate for the new PM domain callbacks,
> > ->activate and
> > ->dismiss.
> > 
> > ->activate() is called before the probe, so it may power up things.
> > 
> > ->dismiss() in turn is called in the failed probe case, so it can
> > do
> > the
> > cleanup.
> > 
> > Have you considered using these?
> 
> Thanks for the hint. We will check this.

I briefly checked this for DMA issue. It will not help anyhow, so we
*have to* move a power domain assignment to the BIND stage.

For I2C and rest LPSS devices this might help (though didn't look
deeply). My understanding that we assign those callbacks in the LPSS
custom PM domain and call them explicitly in acpi_lpss.c.

The code will be the same as we are using now to bring device from
runtime suspend resume. This means whenever we call probe for e.g. I2C
we end up in a sequence similar to:
 pm_runtime_resume(I2C);
 ->probe(I2C);
 pm_runtime_suspend(I2C);

I will try to mock up this and check if it will work, though have no
idea what to do if I2C during probe calls pm_runtime_forbid().

Jarkko, what do you think?

-- 
Andy Shevchenko 
Intel Finland Oy
-
Intel Finland Oy
Registered Address: PL 281, 00181 Helsinki 
Business Identity Code: 0357606 - 4 
Domiciled in Helsinki 

This e-mail and any attachments may contain confidential material for
the sole use of the intended recipient(s). Any review or distribution
by others is strictly prohibited. If you are not the intended
recipient, please contact the sender and delete all copies.


Re: [PATCH] mm/memcontrol.c: use list_{first,next}_entry

2015-12-03 Thread Johannes Weiner
On Thu, Dec 03, 2015 at 05:27:18PM +0100, Michal Hocko wrote:
> On Thu 03-12-15 22:16:55, Geliang Tang wrote:
> > To make the intention clearer, use list_{first,next}_entry instead
> > of list_entry.
> 
> Does this really help readability? This function simply uncharges the
> given list of pages. Why cannot we simply use list_for_each_entry
> instead...

You asked the same thing when reviewing the patch for the first
time. :-) I think it's time to add a comment.

>From e8ba3f31bb43ed4091b997b6ee8857dc8bbcd349 Mon Sep 17 00:00:00 2001
From: Johannes Weiner 
Date: Thu, 3 Dec 2015 14:21:45 -0500
Subject: [PATCH] mm: memcontrol: clarify the uncharge_list() loop

uncharge_list() does an unusual list walk because the function can
take regular lists with dedicated list_heads as well as singleton
lists where a single page is passed via its page->lru list node.

This can sometimes lead to confusion, as well as suggestions to
replace the loop with a list_for_each_entry(), which wouldn't work.

Signed-off-by: Johannes Weiner 
---
 mm/memcontrol.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 9acfb16..f7ee1c0 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5422,6 +5422,10 @@ static void uncharge_list(struct list_head *page_list)
struct list_head *next;
struct page *page;
 
+   /*
+* Note that the list can be a single page->lru; hence the
+* do-while loop instead of a simple list_for_each_entry().
+*/
next = page_list->next;
do {
unsigned int nr_pages = 1;
-- 
2.6.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 0/2] ARM: dts: Use MMC pwrseq instead regulators for IGEP WiFi init

2015-12-03 Thread Tony Lindgren
* Javier Martinez Canillas  [151203 10:29]:
> Hello Tony,
> 
> On 12/03/2015 03:16 PM, Tony Lindgren wrote:
> > * Javier Martinez Canillas  [151203 10:03]:
> >> Hello,
> >>
> >> This series converts the IGEPv2 (IGEP0020) and IGEP COM Module (IGEP0030)
> >> Device Tree to use the MMC power sequence provider to initialize the SDIO
> >> WiFi chip instead of using fake fixed regulators to just toggle the Reset
> >> and Power pins in the chip.
> >>
> >> The patches were tested on an DM3730 IGEPv2 board but the IGEP COM Module
> >> is the same with regard to the SDIO WiFi so it should be safe to land too.
> >>
> >> The IGEPv2 Rev.F and the IGEP COM Module Rev.G DTS were not converted due
> >> using a different WiFi chip (wlcore instead of libertas) than the one in
> >> the board I've access to test so I preferred to leave those untouched.
> > 
> > Do you have some solution for the start-up latency issue?
> >
> 
> No, I don't and that's one of the reasons why I didn't want to touch the
> DTS that have the wlcore chip.
> 
> The omap3-igep0020-rev-f.dts and omap3-igep0030-rev-g.dts don't have a
> startup-delay-us property in the regulator for the WLAN_EN pin as is
> the case for the IGEPv5 DTS but I don't know if those DTS are just wrong.

OK

> The DTS for the igep0020 and igep0030 that have the libertas chip,
> did have a startup-delay-us for the WIFI_PDN but using the GPIOs
> for RESET_N_W and WIFI_PDN in the mmc-pwrseq-simple reset-gpios is
> enough to make the SDIO chip reset, be enumerated and WiFi to work
> correctly so I don't know if that is really needed or is just a bad
> description in the DTS.

Hmm OK.

> Since is working for the boards with the libertas chip, I preferred
> to remove the DTS hack but left the boards with wlcore chip since
> you said the startup-delay-us is needed there (but probably we should
> add to the regulators in the boards that don't have it then).

OK

Thanks,

Tony
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] workqueue: warn if memory reclaim tries to flush !WQ_MEM_RECLAIM workqueue

2015-12-03 Thread Tejun Heo
Task or work item involved in memory reclaim trying to flush a
non-WQ_MEM_RECLAIM workqueue or one of its work items can lead to
deadlock.  Trigger WARN_ONCE() if such conditions are detected.

Signed-off-by: Tejun Heo 
Cc: Peter Zijlstra 
---
Hello,

So, something like this.  Seems to work fine here.  If there's no
objection, I'm gonna push it through wq/for-4.5.

Thanks.

 kernel/workqueue.c |   35 +++
 1 file changed, 35 insertions(+)

--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -2330,6 +2330,37 @@ repeat:
goto repeat;
 }
 
+/**
+ * check_flush_dependency - check for flush dependency sanity
+ * @target_wq: workqueue being flushed
+ * @target_work: work item being flushed (NULL for workqueue flushes)
+ *
+ * %current is trying to flush the whole @target_wq or @target_work on it.
+ * If @target_wq doesn't have %WQ_MEM_RECLAIM, verify that %current is not
+ * reclaiming memory or running on a workqueue which doesn't have
+ * %WQ_MEM_RECLAIM as that can break forward-progress guarantee leading to
+ * a deadlock.
+ */
+static void check_flush_dependency(struct workqueue_struct *target_wq,
+  struct work_struct *target_work)
+{
+   work_func_t target_func = target_work ? target_work->func : NULL;
+   struct worker *worker;
+
+   if (target_wq->flags & WQ_MEM_RECLAIM)
+   return;
+
+   worker = current_wq_worker();
+
+   WARN_ONCE(current->flags & PF_MEMALLOC,
+ "workqueue: PF_MEMALLOC task %d(%s) is flushing 
!WQ_MEM_RECLAIM %s:%pf",
+ current->pid, current->comm, target_wq->name, target_func);
+   WARN_ONCE(worker && (worker->current_pwq->wq->flags & WQ_MEM_RECLAIM),
+ "workqueue: WQ_MEM_RECLAIM %s:%pf is flushing !WQ_MEM_RECLAIM 
%s:%pf",
+ worker->current_pwq->wq->name, worker->current_func,
+ target_wq->name, target_func);
+}
+
 struct wq_barrier {
struct work_struct  work;
struct completion   done;
@@ -2539,6 +2570,8 @@ void flush_workqueue(struct workqueue_st
list_add_tail(_flusher.list, >flusher_overflow);
}
 
+   check_flush_dependency(wq, NULL);
+
mutex_unlock(>mutex);
 
wait_for_completion(_flusher.done);
@@ -2711,6 +2744,8 @@ static bool start_flush_work(struct work
pwq = worker->current_pwq;
}
 
+   check_flush_dependency(pwq->wq, work);
+
insert_wq_barrier(pwq, barr, work, worker);
spin_unlock_irq(>lock);
 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2 2/3] sched/fair: Move hot load_avg into its own cacheline

2015-12-03 Thread Waiman Long

On 12/02/2015 03:02 PM, bseg...@google.com wrote:

Waiman Long  writes:


If a system with large number of sockets was driven to full
utilization, it was found that the clock tick handling occupied a
rather significant proportion of CPU time when fair group scheduling
and autogroup were enabled.

Running a java benchmark on a 16-socket IvyBridge-EX system, the perf
profile looked like:

   10.52%   0.00%  java   [kernel.vmlinux]  [k] smp_apic_timer_interrupt
9.66%   0.05%  java   [kernel.vmlinux]  [k] hrtimer_interrupt
8.65%   0.03%  java   [kernel.vmlinux]  [k] tick_sched_timer
8.56%   0.00%  java   [kernel.vmlinux]  [k] update_process_times
8.07%   0.03%  java   [kernel.vmlinux]  [k] scheduler_tick
6.91%   1.78%  java   [kernel.vmlinux]  [k] task_tick_fair
5.24%   5.04%  java   [kernel.vmlinux]  [k] update_cfs_shares

In particular, the high CPU time consumed by update_cfs_shares()
was mostly due to contention on the cacheline that contained the
task_group's load_avg statistical counter. This cacheline may also
contains variables like shares, cfs_rq&  se which are accessed rather
frequently during clock tick processing.

This patch moves the load_avg variable into another cacheline
separated from the other frequently accessed variables. It also
creates a cacheline aligned kmemcache for task_group to make sure
that all the allocated task_group's are cacheline aligned.

By doing so, the perf profile became:

9.44%   0.00%  java   [kernel.vmlinux]  [k] smp_apic_timer_interrupt
8.74%   0.01%  java   [kernel.vmlinux]  [k] hrtimer_interrupt
7.83%   0.03%  java   [kernel.vmlinux]  [k] tick_sched_timer
7.74%   0.00%  java   [kernel.vmlinux]  [k] update_process_times
7.27%   0.03%  java   [kernel.vmlinux]  [k] scheduler_tick
5.94%   1.74%  java   [kernel.vmlinux]  [k] task_tick_fair
4.15%   3.92%  java   [kernel.vmlinux]  [k] update_cfs_shares

The %cpu time is still pretty high, but it is better than before. The
benchmark results before and after the patch was as follows:

   Before patch - Max-jOPs: 907533Critical-jOps: 134877
   After patch  - Max-jOPs: 916011Critical-jOps: 142366

Signed-off-by: Waiman Long
---
  kernel/sched/core.c  |   36 ++--
  kernel/sched/sched.h |7 ++-
  2 files changed, 40 insertions(+), 3 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 4d568ac..e39204f 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7331,6 +7331,11 @@ int in_sched_functions(unsigned long addr)
   */
  struct task_group root_task_group;
  LIST_HEAD(task_groups);
+
+#ifdef CONFIG_FAIR_GROUP_SCHED
+/* Cacheline aligned slab cache for task_group */
+static struct kmem_cache *task_group_cache __read_mostly;
+#endif
  #endif

  DECLARE_PER_CPU(cpumask_var_t, load_balance_mask);
@@ -7356,6 +7361,7 @@ void __init sched_init(void)
root_task_group.cfs_rq = (struct cfs_rq **)ptr;
ptr += nr_cpu_ids * sizeof(void **);

+   task_group_cache = KMEM_CACHE(task_group, SLAB_HWCACHE_ALIGN);

The KMEM_CACHE macro suggests instead adding
cacheline_aligned_in_smp to the struct definition instead.


The main goal is to have the load_avg placed in a new cacheline 
separated from the read-only fields above. That is why I placed 
cacheline_aligned after load_avg. I omitted the in_smp part because 
it is in the SMP block already. Putting cacheline_aligned_in_smp 
won't guarantee alignment of any field within the structure.


I have done some test and having cacheline_aligned inside the 
structure has the same effect of forcing the whole structure in the 
cacheline aligned boundary.



  #endif /* CONFIG_FAIR_GROUP_SCHED */
  #ifdef CONFIG_RT_GROUP_SCHED
root_task_group.rt_se = (struct sched_rt_entity **)ptr;
@@ -7668,12 +7674,38 @@ void set_curr_task(int cpu, struct task_struct *p)
  /* task_group_lock serializes the addition/removal of task groups */
  static DEFINE_SPINLOCK(task_group_lock);

+/*
+ * Make sure that the task_group structure is cacheline aligned when
+ * fair group scheduling is enabled.
+ */
+#ifdef CONFIG_FAIR_GROUP_SCHED
+static inline struct task_group *alloc_task_group(void)
+{
+   return kmem_cache_alloc(task_group_cache, GFP_KERNEL | __GFP_ZERO);
+}
+
+static inline void free_task_group(struct task_group *tg)
+{
+   kmem_cache_free(task_group_cache, tg);
+}
+#else /* CONFIG_FAIR_GROUP_SCHED */
+static inline struct task_group *alloc_task_group(void)
+{
+   return kzalloc(sizeof(struct task_group), GFP_KERNEL);
+}
+
+static inline void free_task_group(struct task_group *tg)
+{
+   kfree(tg);
+}
+#endif /* CONFIG_FAIR_GROUP_SCHED */
+
  static void free_sched_group(struct task_group *tg)
  {
free_fair_sched_group(tg);
free_rt_sched_group(tg);
autogroup_free(tg);
-   kfree(tg);
+   free_task_group(tg);
  }

  /* allocate runqueue etc for a new task group */
@@ 

[PATCH linux-next 2/2] clk: bcm6345: Add BCM6345 gated clock support

2015-12-03 Thread Simon Arlott
The BCM6345 contains clocks gated with a register. Clocks are indexed
by bits in the register and are active high. Clock gate bits are
interleaved with other status bits and configurable clocks in the same
register.

Enabled by default for BMIPS_GENERIC.

Signed-off-by: Simon Arlott 
---
Renamed to BCM6345.

 MAINTAINERS   |   1 +
 drivers/clk/bcm/Kconfig   |   9 ++
 drivers/clk/bcm/Makefile  |   1 +
 drivers/clk/bcm/clk-bcm6345.c | 191 ++
 4 files changed, 202 insertions(+)
 create mode 100644 drivers/clk/bcm/clk-bcm6345.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 577e5ea..9a61f48 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2381,6 +2381,7 @@ F:arch/mips/bmips/*
 F: arch/mips/include/asm/mach-bmips/*
 F: arch/mips/kernel/*bmips*
 F: arch/mips/boot/dts/brcm/bcm*.dts*
+F: drivers/clk/bcm/clk-bcm6345*
 F: drivers/irqchip/irq-bcm63*
 F: drivers/irqchip/irq-bcm7*
 F: drivers/irqchip/irq-brcmstb*
diff --git a/drivers/clk/bcm/Kconfig b/drivers/clk/bcm/Kconfig
index f287845..043353a 100644
--- a/drivers/clk/bcm/Kconfig
+++ b/drivers/clk/bcm/Kconfig
@@ -8,6 +8,15 @@ config CLK_BCM_63XX
  Enable common clock framework support for Broadcom BCM63xx DSL SoCs
  based on the ARM architecture
 
+config CLK_BCM_6345
+   bool "Broadcom BCM6345 clock support"
+   depends on BMIPS_GENERIC || COMPILE_TEST
+   depends on COMMON_CLK
+   default BMIPS_GENERIC
+   help
+ Enable common clock framework support for Broadcom BCM6345 DSL SoCs
+ based on the MIPS architecture
+
 config CLK_BCM_KONA
bool "Broadcom Kona CCU clock support"
depends on ARCH_BCM_MOBILE || COMPILE_TEST
diff --git a/drivers/clk/bcm/Makefile b/drivers/clk/bcm/Makefile
index 247c267..e2bac43 100644
--- a/drivers/clk/bcm/Makefile
+++ b/drivers/clk/bcm/Makefile
@@ -1,4 +1,5 @@
 obj-$(CONFIG_CLK_BCM_63XX) += clk-bcm63xx.o
+obj-$(CONFIG_CLK_BCM_6345) += clk-bcm6345.o
 obj-$(CONFIG_CLK_BCM_KONA) += clk-kona.o
 obj-$(CONFIG_CLK_BCM_KONA) += clk-kona-setup.o
 obj-$(CONFIG_CLK_BCM_KONA) += clk-bcm281xx.o
diff --git a/drivers/clk/bcm/clk-bcm6345.c b/drivers/clk/bcm/clk-bcm6345.c
new file mode 100644
index 000..88a1e7e
--- /dev/null
+++ b/drivers/clk/bcm/clk-bcm6345.c
@@ -0,0 +1,191 @@
+/*
+ * Copyright 2015 Simon Arlott
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * Derived from clk-gate.c:
+ * Copyright (C) 2010-2011 Canonical Ltd 
+ * Copyright (C) 2011-2012 Mike Turquette, Linaro Ltd 
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+/**
+ * DOC: Basic clock which uses a bit in a regmap to gate and ungate the output
+ *
+ * Traits of this clock:
+ * prepare - clk_(un)prepare only ensures parent is (un)prepared
+ * enable - clk_enable and clk_disable are functional & control gating
+ * rate - inherits rate from parent.  No clk_set_rate support
+ * parent - fixed parent.  No clk_set_parent support
+ */
+
+struct clk_bcm6345 {
+   struct clk_hw hw;
+   struct regmap *map;
+   u32 offset;
+   u32 mask;
+};
+
+#define to_clk_bcm6345(_hw) container_of(_hw, struct clk_bcm6345, hw)
+
+static int clk_bcm6345_enable(struct clk_hw *hw)
+{
+   struct clk_bcm6345 *gate = to_clk_bcm6345(hw);
+
+   return regmap_write_bits(gate->map, gate->offset,
+   gate->mask, gate->mask);
+}
+
+static void clk_bcm6345_disable(struct clk_hw *hw)
+{
+   struct clk_bcm6345 *gate = to_clk_bcm6345(hw);
+
+   regmap_write_bits(gate->map, gate->offset,
+   gate->mask, 0);
+}
+
+static int clk_bcm6345_is_enabled(struct clk_hw *hw)
+{
+   struct clk_bcm6345 *gate = to_clk_bcm6345(hw);
+   unsigned int val;
+   int ret;
+
+   ret = regmap_read(gate->map, gate->offset, );
+   if (ret)
+   return ret;
+
+   val &= gate->mask;
+
+   return val ? 1 : 0;
+}
+
+const struct clk_ops clk_bcm6345_ops = {
+   .enable = clk_bcm6345_enable,
+   .disable = clk_bcm6345_disable,
+   .is_enabled = clk_bcm6345_is_enabled,
+};
+
+static struct clk * __init of_bcm6345_clk_register(const char *parent_name,
+   const char *clk_name, struct regmap *map, u32 offset, u32 mask)
+{
+   struct clk_bcm6345 *gate;
+   struct clk_init_data init;
+   struct clk *ret;
+
+   gate = kzalloc(sizeof(*gate), GFP_KERNEL);
+   if (!gate)
+  

Can we conditionally force threading irq with primary and thread handler?

2015-12-03 Thread Yunhong Jiang
Hi, Thomas
On Commit 2a1d3ab8986d1b2 ("genirq: Handle force threading of irqs 
with primary and thread handler"), even if the caller of 
request_threaded_irq() provides a primary handler, that primary handler will 
be invoked in thread context. This may cause some latency issue for high 
real time requirement.

I checked the discussion on https://lkml.org/lkml/2015/9/19/372 with 
Okuno and seems we need this change only if the irq is shared, otherwise, we 
can still use Okuno's mechanism, am I right? Do you think it's ok to force 
the primary handler for shared IRQ, otherwise, clear the IRQF_ONESHOT?

If yes, I will cook a patch for it.

Thanks
--jyh

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH linux-next 1/2] clk: Add brcm,bcm6345-gate-clk device tree binding

2015-12-03 Thread Simon Arlott
Add device tree binding for the BCM6345's gated clocks.

The BCM6345 contains clocks gated with a register. Clocks are indexed
by bits in the register and are active high. Clock gate bits are
interleaved with other status bits and configurable clocks in the same
register.

Signed-off-by: Simon Arlott 
---
Renamed to BCM6345.

 .../bindings/clock/brcm,bcm6345-gate-clk.txt   | 58 ++
 1 file changed, 58 insertions(+)
 create mode 100644 
Documentation/devicetree/bindings/clock/brcm,bcm6345-gate-clk.txt

diff --git a/Documentation/devicetree/bindings/clock/brcm,bcm6345-gate-clk.txt 
b/Documentation/devicetree/bindings/clock/brcm,bcm6345-gate-clk.txt
new file mode 100644
index 000..5801264
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/brcm,bcm6345-gate-clk.txt
@@ -0,0 +1,58 @@
+Broadcom BCM6345 clocks
+
+This binding uses the common clock binding:
+   Documentation/devicetree/bindings/clock/clock-bindings.txt
+
+The BCM6345 contains clocks gated with a register. Clocks are indexed
+by bits in the register and are active high. Clock gate bits are
+interleaved with other status bits and configurable clocks in the same
+register.
+
+Required properties:
+- compatible:  Should be "brcm,bcm-gate-clk", "brcm,bcm6345-gate-clk"
+- #clock-cells:Should be <1>.
+- regmap:  The register map phandle
+- offset:  Offset in the register map for the clock register (in bytes)
+- clocks:  The external oscillator clock phandle
+
+Example:
+
+periph_clk: periph_clk {
+   compatible = "brcm,bcm63168-gate-clk", "brcm,bcm6345-gate-clk";
+   regmap = <_cntl>;
+   offset = <0x4>;
+
+   #clock-cells = <1>;
+   clock-indices =
+   <1>,  <2>,<3>,   <4>, <5>,
+   <6>,  <7>,<8>,   <9>, <10>,
+   <11>, <12>,   <13>,  <14>,<15>,
+   <16>, <17>,   <18>,  <19>,<20>,
+   <27>, <31>;
+   clock-output-names =
+   "vdsl_qproc", "vdsl_afe", "vdsl","mips",  "wlan_ocp",
+   "dect",   "fap0", "fap1","sar",   "robosw",
+   "pcm","usbd", "usbh","ipsec", "spi",
+   "hsspi",  "pcie", "phymips", "gmac",  "nand",
+   "tbus",   "robosw250";
+};
+
+timer_clk: timer_clk {
+   compatible = "brcm,bcm63168-gate-clk", "brcm,bcm6345-gate-clk";
+   regmap = <_cntl>;
+   offset = <0x4>;
+
+   #clock-cells = <1>;
+   clock-indices = <17>, <18>;
+   clock-output-names = "uto_extin", "usb_ref";
+};
+
+ehci0: usb@10002500 {
+   compatible = "brcm,bcm63168-ehci", "brcm,bcm6345-ehci", "generic-ehci";
+   reg = <0x10002500 0x100>;
+   big-endian;
+   interrupt-parent = <_intc>;
+   interrupts = <10>;
+   clocks = <_clk 13>, <_clk 18>;
+   phys = <>;
+};
-- 
2.1.4

-- 
Simon Arlott
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] fs: clear file privilege bits when mmap writing

2015-12-03 Thread Kees Cook
Normally, when a user can modify a file that has setuid or setgid bits,
those bits are cleared when they are not the file owner or a member
of the group. This is enforced when using write and truncate but not
when writing to a shared mmap on the file. This could allow the file
writer to gain privileges by changing a binary without losing the
setuid/setgid/caps bits.

Changing the bits requires holding inode->i_mutex, so it cannot be done
during the page fault (due to mmap_sem being held during the fault).
Instead, clear the bits if PROT_WRITE is being used at mmap open time.
But we can't do the check in the right place inside mmap, so we have to
do it before holding mmap_sem, which means duplicating some checks, which
have to be available to the non-MMU builds too.

Signed-off-by: Kees Cook 
---
This just keeps getting uglier. :(

v3:
- move outside of mmap_sem for real now, fengguang
- check return code of file_remove_privs, akpm
v2:
- move to mmap from fault handler, jack
---
 include/linux/mm.h |  1 +
 mm/mmap.c  | 19 ---
 mm/util.c  | 50 ++
 3 files changed, 55 insertions(+), 15 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 00bad7793788..b264c8be7114 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1912,6 +1912,7 @@ extern unsigned long get_unmapped_area(struct file *, 
unsigned long, unsigned lo
 
 extern unsigned long mmap_region(struct file *file, unsigned long addr,
unsigned long len, vm_flags_t vm_flags, unsigned long pgoff);
+extern int do_mmap_shared_checks(struct file *file, unsigned long prot);
 extern unsigned long do_mmap(struct file *file, unsigned long addr,
unsigned long len, unsigned long prot, unsigned long flags,
vm_flags_t vm_flags, unsigned long pgoff, unsigned long *populate);
diff --git a/mm/mmap.c b/mm/mmap.c
index 2ce04a649f6b..bcbe592a2c49 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1321,24 +1321,13 @@ unsigned long do_mmap(struct file *file, unsigned long 
addr,
 
if (file) {
struct inode *inode = file_inode(file);
+   int err;
 
switch (flags & MAP_TYPE) {
case MAP_SHARED:
-   if ((prot_WRITE) && !(file->f_mode_WRITE))
-   return -EACCES;
-
-   /*
-* Make sure we don't allow writing to an append-only
-* file..
-*/
-   if (IS_APPEND(inode) && (file->f_mode & FMODE_WRITE))
-   return -EACCES;
-
-   /*
-* Make sure there are no mandatory locks on the file.
-*/
-   if (locks_verify_locked(file))
-   return -EAGAIN;
+   err = do_mmap_shared_checks(file, prot);
+   if (err)
+   return err;
 
vm_flags |= VM_SHARED | VM_MAYSHARE;
if (!(file->f_mode & FMODE_WRITE))
diff --git a/mm/util.c b/mm/util.c
index 9af1c12b310c..1882eaf33a37 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -283,6 +283,29 @@ int __weak get_user_pages_fast(unsigned long start,
 }
 EXPORT_SYMBOL_GPL(get_user_pages_fast);
 
+int do_mmap_shared_checks(struct file *file, unsigned long prot)
+{
+   struct inode *inode = file_inode(file);
+
+   if ((prot & PROT_WRITE) && !(file->f_mode & FMODE_WRITE))
+   return -EACCES;
+
+   /*
+* Make sure we don't allow writing to an append-only
+* file..
+*/
+   if (IS_APPEND(inode) && (file->f_mode & FMODE_WRITE))
+   return -EACCES;
+
+   /*
+* Make sure there are no mandatory locks on the file.
+*/
+   if (locks_verify_locked(file))
+   return -EAGAIN;
+
+   return 0;
+}
+
 unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr,
unsigned long len, unsigned long prot,
unsigned long flag, unsigned long pgoff)
@@ -291,6 +314,33 @@ unsigned long vm_mmap_pgoff(struct file *file, unsigned 
long addr,
struct mm_struct *mm = current->mm;
unsigned long populate;
 
+   /*
+* If we must remove privs, we do it here since doing it during
+* page fault may be expensive and cannot hold inode->i_mutex,
+* since mm->mmap_sem is already held.
+*/
+   if (file && (flag & MAP_TYPE) == MAP_SHARED && (prot & PROT_WRITE)) {
+   struct inode *inode = file_inode(file);
+   int err;
+
+   if (!IS_NOSEC(inode)) {
+   /*
+* Make sure we can't strip privs from a file that
+* wouldn't otherwise be allowed to be mmapped.
+*/
+   err = do_mmap_shared_checks(file, prot);

Re: [PATCH 00/10] Patches to get dm814x-evm booting to NFSroot

2015-12-03 Thread Tony Lindgren
* Grygorii Strashko  [151203 10:18]:
> On 12/02/2015 01:38 AM, Tony Lindgren wrote:
> 
> > Tony Lindgren (10):
> >ARM: OMAP2+: Fix timer entries for dm814x
> >clk: ti: Add few dm814x clock aliases
> >ARM: OMAP2+: Add DPPLS clock manager for dm814x
> >ARM: OMAP2+: Enable GPIO for dm814x
> >ARM: OMAP2+: Disable GPIO softreset for dm81xx
> >ARM: OMAP2+: Remove useless check for legacy booting for dm814x
> >ARM: dts: Fix dm814x entries for pllss and prcm
> >ARM: dts: Fix some mux and divider clocks to get dm814x-evm booting
> >ARM: dts: Fix dm8148 control modules ranges
> >ARM: dts: Fix dm814x pinctrl address and mask
> 
> I'm worry a bit, if you will apply this series in its current order
> - it will break git bisect.
> Patch one "ARM: OMAP2+: Fix timer entries for dm814x" will use timerX_fck, 
> but those
> clocks will be added by patches 2 "clk: ti: Add few dm814x clock aliases"
> and 8 "ARM: dts: Fix some mux and divider clocks to get dm814x-evm booting"

Yeah you have a point there. I was hoping to separate them to dts and soc
related patches but clearly that's not possible.

We can keep t410 limping along just fine with this order:

ARM: dts: Fix dm814x entries for pllss and prcm
clk: ti: Add few dm814x clock aliases
ARM: OMAP2+: Add DPPLS clock manager for dm814x
ARM: dts: Fix some mux and divider clocks to get dm814x-evm booting
ARM: OMAP2+: Fix timer entries for dm814x
ARM: dts: Fix dm8148 control modules ranges
ARM: dts: Fix dm814x pinctrl address and mask
ARM: OMAP2+: Enable GPIO for dm814x
ARM: OMAP2+: Remove useless check for legacy booting for dm814x

Regards,

Tony
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] proc: add a reschedule point in proc_readfd_common()

2015-12-03 Thread Eric Dumazet
From: Eric Dumazet 

User can pass an arbitrary large buffer to getdents().

It is typically a 32KB buffer used by libc scandir() implementation.

When scanning /proc/{pid}/fd, we can hold cpu way too long,
so add a cond_resched() to be kind with other tasks.

We've seen latencies of more than 50ms on real workloads.

Signed-off-by: Eric Dumazet 
Cc: Alexander Viro 
---
 fs/proc/fd.c |1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/proc/fd.c b/fs/proc/fd.c
index 3c2a915c695a..56afa5ef08f2 100644
--- a/fs/proc/fd.c
+++ b/fs/proc/fd.c
@@ -258,6 +258,7 @@ static int proc_readfd_common(struct file *file, struct 
dir_context *ctx,
 name, len, instantiate, p,
 (void *)(unsigned long)fd))
goto out_fd_loop;
+   cond_resched();
rcu_read_lock();
}
rcu_read_unlock();


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v4 2/3] mac80211: Prevent build failure with CONFIG_UBSAN=y

2015-12-03 Thread Andrey Ryabinin
2015-12-03 20:05 GMT+03:00 Johannes Berg :
> On Thu, 2015-12-03 at 18:50 +0300, Andrey Ryabinin wrote:
>> With upcoming CONFIG_UBSAN the following BUILD_BUG_ON in
>> net/mac80211/debugfs.c starts to trigger:
>>   BUILD_BUG_ON(hw_flag_names[NUM_IEEE80211_HW_FLAGS] != (void
>> *)0x1);
>>
>> It seems, that compiler instrumentation causes some code
>> deoptimizations.
>> Because of that GCC is not being able to resolve condition in
>> BUILD_BUG_ON()
>> at compile time.
>>
>> We could make size of hw_flag_names array unspecified and replace the
>> condition in BUILD_BUG_ON() with following:
>>   ARRAY_SIZE(hw_flag_names) != NUM_IEEE80211_HW_FLAGS
>>
>> That will have the same effect as before (adding new flag without
>> updating
>> array will trigger build failure) except it doesn't fail with
>> CONFIG_UBSAN.
>> As a bonus this patch slightly decreases size of hw_flag_names array.
>>
> Seems fine, would you want to take it through some other tree together
> with UBSAN, or do you expect that to still take long enough to allow
> this to trickle through our trees?
>

I expect that Andrew will take it with UBSAN for 4.5
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCHv2 1/2] extcon: add driver for Intel USB mux

2015-12-03 Thread Sergei Shtylyov

On 12/03/2015 12:29 PM, Heikki Krogerus wrote:


Several Intel PCHs and SOCs have an internal mux that is
used to share one USB port between USB Device Controller and
xHCI. The mux is normally handled by System FW/BIOS, but not
always. For those platforms where the FW does not take care
of the mux, this driver is needed.

Signed-off-by: Heikki Krogerus 

[...]

diff --git a/drivers/extcon/extcon-intel-usb.c 
b/drivers/extcon/extcon-intel-usb.c
new file mode 100644
index 000..3da6039
--- /dev/null
+++ b/drivers/extcon/extcon-intel-usb.c
@@ -0,0 +1,118 @@

[...]

+struct intel_usb_mux *intel_usb_mux_register(struct device *dev,
+struct resource *r)
+{
+   struct intel_usb_mux *mux;
+   int ret;
+
+   mux = kzalloc(sizeof(*mux), GFP_KERNEL);
+   if (!mux)
+   return ERR_PTR(-ENOMEM);
+
+   mux->regs = ioremap_nocache(r->start, resource_size(r));
+   if (!mux->regs) {
+   kfree(mux);
+   return ERR_PTR(-ENOMEM);
+   }
+
+   mux->cfg0_ctx = readl(mux->regs + INTEL_MUX_CFG0);
+
+   mux->edev.dev.parent = dev;
+   mux->edev.supported_cable = intel_mux_cable;
+
+   ret = extcon_dev_register(>edev);


   I don't see where are you calling extcon_set_cable_state() fot the 
"USB-HOST" cable...

This doesn't seem a legitimate extcon driver to me... :-/


+   if (ret)
+   goto err;
+
+   mux->edev.name = "intel_usb_mux";
+   mux->edev.state = !!(readl(mux->regs + INTEL_MUX_CFG1) & CFG1_MODE);
+
+   /* An external source needs to tell us what to do */
+   mux->nb.notifier_call = intel_usb_mux_notifier;
+   ret = extcon_register_notifier(>edev, EXTCON_USB_HOST, >nb);


   So in reality this is an extcon client, not a provider? BTW, this API 
isn't recommended...


MBR, Sergei

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


RE: [PATCH 1/4] EDAC: add DDR4 flag

2015-12-03 Thread Luck, Tony
> For patch 2 and 3 I'd need an ack from Mauro/Tony. CCed.

parts 2 & 3 are OK

Acked-by: Tony Luck 

part4 (the actual KNL piece) seems not to break earlier (Broadwell) system ... 
but that doesn't qualify enough for Ack/Review/Tested -by.


-Tony
N�r��yb�X��ǧv�^�)޺{.n�+{zX����ܨ}���Ơz�:+v���zZ+��+zf���h���~i���z��w���?�&�)ߢf��^jǫy�m��@A�a���
0��h���i

Re: [PATCH V2 2/7] mm/gup: add gup trace points

2015-12-03 Thread Steven Rostedt
On Thu, 03 Dec 2015 10:36:18 -0800
"Shi, Yang"  wrote:

> > called directly that calls these functions internally and the tracepoint
> > can trap the return value.  
> 
> This will incur more changes in other subsystems (futex, kvm, etc), I'm 
> not sure if it is worth making such changes to get return value.

No, it wouldn't require any changes outside of this.

-long __get_user_pages(..)
+static long __get_user_pages_internal(..)
{
  [..]
}
+
+long __get_user_pages(..)
+{
+   long ret;
+   ret = __get_user_pages_internal(..);
+   trace_get_user_pages(.., ret)
+}

> 
> > I can probably make function_graph tracer give return values, although
> > it will give a return value for void functions as well. And it may give
> > long long returns for int returns that may have bogus data in the
> > higher bits.  
> 
> If the return value requirement is not limited to gup, the approach 
> sounds more reasonable.
>

Others have asked about it. Maybe I should do it.

-- Steve

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCHv2 2/2] usb: pci-quirks: register USB mux found on Cherrytrail SOC

2015-12-03 Thread Sergei Shtylyov

Hello.

On 12/03/2015 12:29 PM, Heikki Krogerus wrote:


Intel Braswell/Cherrytrail has an internal mux that shares
one USB port between USB Device Controller and xHCI. The
same mux is found on several SOCs from Intel, but only on
a few Cherrytrail based platforms the OS is expected to
configure it. Normally BIOS takes care of it.

The driver for the mux is an "extcon" driver. With this we
only register the mux if it's detected.


   Hm, I had somewhat identical case on the Renesas SoC: the 2 channel mux 
was mapped to the USB device PHY register space, so I chose to implement a PHY 
driver, not extcon...
   I don't quite understand how mux maps to the extcon core -- doesn't it 
provide support only the input signals?



Suggested-by: Lu Baolu 
Signed-off-by: Heikki Krogerus 
---
  drivers/usb/host/pci-quirks.c | 26 +-
  1 file changed, 25 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/host/pci-quirks.c b/drivers/usb/host/pci-quirks.c
index 26cb8c8..ee875e1 100644
--- a/drivers/usb/host/pci-quirks.c
+++ b/drivers/usb/host/pci-quirks.c

[...]

@@ -1022,9 +1023,32 @@ static void quirk_usb_handoff_xhci(struct pci_dev *pdev)
writel(val, base + ext_cap_offset + XHCI_LEGACY_CONTROL_OFFSET);

  hc_init:
-   if (pdev->vendor == PCI_VENDOR_ID_INTEL)
+   if (pdev->vendor == PCI_VENDOR_ID_INTEL) {
usb_enable_intel_xhci_ports(pdev);

+   /*
+* Initialize the internal mux that shares a port between USB
+* Device Controller and xHCI on platforms that have it.
+*/
+#define XHCI_INTEL_VENDOR_CAPS 192
+#define XHCI_INTEL_USB_MUX_OFFSET 0x80d8
+   if (xhci_find_next_ext_cap(base, 0, XHCI_INTEL_VENDOR_CAPS)) {
+   struct intel_usb_mux *mux;
+   struct resource r;
+
+   r.start = pci_resource_start(pdev, 0) +
+   XHCI_INTEL_USB_MUX_OFFSET;
+   r.end   = r.start + 8;
+   r.flags = IORESOURCE_MEM;
+
+   mux = intel_usb_mux_register(>dev, );
+   if (IS_ERR(mux) && PTR_ERR(mux) == -ENOTSUPP)


   I think you can drop IS_ERR() check here...


+   dev_dbg(>dev, "USB mux not supported\n");
+   else if (IS_ERR(mux))
+   dev_err(>dev, "failed to register mux\n");
+   }
+   }
+
op_reg_base = base + XHCI_HC_LENGTH(readl(base));

/* Wait for the host controller to be ready before writing any


MBR, Sergei

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v3 1/3] resource: Add @flags to region_intersects()

2015-12-03 Thread Linus Torvalds
On Thu, Dec 3, 2015 at 10:40 AM, Borislav Petkov  wrote:
> On Thu, Dec 03, 2015 at 11:54:19AM -0700, Toshi Kani wrote:
>> Adding a new type for regular memory will require inspecting the codes
>> using IORESOURCE_MEM currently, and modify them to use the new type if
>> their target ranges are regular memory.  There are many references to this
>> type across multiple architectures and drivers, which make this inspection
>> and testing challenging.
>
> What's wrong with adding a new type_flags to struct resource and not
> touching IORESOURCE_* at all?

Bah. Both of these ideas are bogus.

Just add a new flag. The bits are already modifiers that you can
*combine* to show what kind of resource it is, and we already have
things like IORESOURCE_PREFETCH etc, that are in *addition* to the
normal IORESOURCE_MEM bit.

Just add another modifier: IORESOURCE_RAM.

So it would still show up as IORESOURCE_MEM, but it would have
additional information specifying that it's actually RAM.

If somebody does something like

 if (res->flags == IORESOURCE_MEM)

then they are already completely broken and won't work *anyway*. It's
a bitmask, bit a set of values.

 Linus
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] x86/rapl: Do not load in a guest

2015-12-03 Thread Jacob Pan
On Thu, 3 Dec 2015 19:42:41 +0100
Borislav Petkov  wrote:

> No, those are going away:
> 
> https://lkml.kernel.org/r/1448982023-19187-4-git-send-email...@alien8.de
> 
> Next on my TODO is killing the rest of them.

Fair enough.
Acked-by: Jacob Pan 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 2/2] pinctrl: single: remove misuse of IRQF_NO_SUSPEND flag

2015-12-03 Thread Sudeep Holla



On 03/12/15 18:13, Tony Lindgren wrote:

* Linus Walleij  [151201 06:07]:

On Fri, Nov 27, 2015 at 6:21 PM, Sudeep Holla  wrote:


From: Sudeep Holla 

The IRQF_NO_SUSPEND flag is used to identify the interrupts that should
be left enabled so as to allow them to work as expected during the
suspend-resume cycle, but doesn't guarantee that it will wake the system
from a suspended state, enable_irq_wake is recommended to be used for
the wakeup.

This patch removes the use of IRQF_NO_SUSPEND flags replacing it with
irq_set_irq_wake instead.

Cc: Linus Walleij 
Cc: linux-g...@vger.kernel.org
Signed-off-by: Sudeep Holla 


I need Tony's ACK on this as well.


At least on omaps, this controller is always powered and we never want to
suspend it as it handles wake-up events for all the IO pins. And that
usecase sounds exactly like what you're describing above.



Understood, but I assume this is a generic driver that can be used by
any pinmux.


I don't quite follow what your suggested alternative for an interrupt
controller is?



Why can't we use enable_irq_wake even for parent/interrupt controller as
they can be considered as parent wakeup irq. I agree the interrupt
controller may not be powered down, but still it's part of wakeup and
the irq core needs to identify that. By just marking IRQF_NO_SUSPEND,
you are saying that you can handle interrupt in the suspend path but not
informing that it's a wakeup interrupt.

With this change, the wakeup handler (including the parent handler) is
called when it's safe as the irq core maintains the state machine.


At least we need to have the alternative patched in with this chage before
just removing IRQF_NO_SUSPEND.



I have added irq_set_irq_wake(pcs_soc->irq, state) in pcs_irq_set_wake
which ensures it's marked for wakeup.


The enable_irq_wake is naturally used for the consumer drivers of this
interrupt controller and actually mostly done automatically now with the
dev_pm_set_dedicated_wake_irq.



Agreed, no doubt on that.

--
Regards,
Sudeep
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH V8] ACPI, PCI, irq: support IRQ numbers greater than 256

2015-12-03 Thread Sinan Kaya
The ACPI compiler uses the extended format when used interrupt numbers
are greater than 15. The extended IRQ is 32 bits according to the ACPI
spec. The code supports parsing the extended interrupt numbers. However,
due to used data structure type; the code silently truncates interrupt
numbers greater than 256.

First, this patch changes the interrupt number type to 32 bits. Next, the
penalty array has been limited to 16 for ISA IRQs. Finally, a new penalty
linklist has been added for all other interrupts greater than 16. If an IRQ
is not found in the link list, an IRQ info structure will be dynamically
allocated on the first access and will be placed on the list for further
reuse. The list will grow by the number of supported interrupts in the
ACPI table rather than having a 256 hard limitation.

Signed-off-by: Sinan Kaya 
---
 drivers/acpi/pci_link.c | 134 +---
 1 file changed, 104 insertions(+), 30 deletions(-)

diff --git a/drivers/acpi/pci_link.c b/drivers/acpi/pci_link.c
index 7c8408b..e10661f 100644
--- a/drivers/acpi/pci_link.c
+++ b/drivers/acpi/pci_link.c
@@ -4,6 +4,7 @@
  *  Copyright (C) 2001, 2002 Andy Grover 
  *  Copyright (C) 2001, 2002 Paul Diefenbaugh 
  *  Copyright (C) 2002   Dominik Brodowski 
+ *  Copyright (c) 2015, The Linux Foundation. All rights reserved.
  *
  * ~~
  *
@@ -67,12 +68,12 @@ static struct acpi_scan_handler pci_link_handler = {
  * later even the link is disable. Instead, we just repick the active irq
  */
 struct acpi_pci_link_irq {
-   u8 active;  /* Current IRQ */
+   u32 active; /* Current IRQ */
u8 triggering;  /* All IRQs */
u8 polarity;/* All IRQs */
u8 resource_type;
u8 possible_count;
-   u8 possible[ACPI_PCI_LINK_MAX_POSSIBLE];
+   u32 possible[ACPI_PCI_LINK_MAX_POSSIBLE];
u8 initialized:1;
u8 reserved:7;
 };
@@ -437,8 +438,7 @@ static int acpi_pci_link_set(struct acpi_pci_link *link, 
int irq)
  * enabled system.
  */
 
-#define ACPI_MAX_IRQS  256
-#define ACPI_MAX_ISA_IRQ   16
+ #define ACPI_MAX_ISA_IRQ  16
 
 #define PIRQ_PENALTY_PCI_AVAILABLE (0)
 #define PIRQ_PENALTY_PCI_POSSIBLE  (16*16)
@@ -447,7 +447,7 @@ static int acpi_pci_link_set(struct acpi_pci_link *link, 
int irq)
 #define PIRQ_PENALTY_ISA_USED  (16*16*16*16*16)
 #define PIRQ_PENALTY_ISA_ALWAYS(16*16*16*16*16*16)
 
-static int acpi_irq_penalty[ACPI_MAX_IRQS] = {
+static int acpi_irq_isa_penalty[ACPI_MAX_ISA_IRQ] = {
PIRQ_PENALTY_ISA_ALWAYS,/* IRQ0 timer */
PIRQ_PENALTY_ISA_ALWAYS,/* IRQ1 keyboard */
PIRQ_PENALTY_ISA_ALWAYS,/* IRQ2 cascade */
@@ -464,9 +464,61 @@ static int acpi_irq_penalty[ACPI_MAX_IRQS] = {
PIRQ_PENALTY_ISA_USED,  /* IRQ13 fpe, sometimes */
PIRQ_PENALTY_ISA_USED,  /* IRQ14 ide0 */
PIRQ_PENALTY_ISA_USED,  /* IRQ15 ide1 */
-   /* >IRQ15 */
 };
 
+struct irq_penalty_info {
+   unsigned int irq;
+   int penalty;
+   struct list_head node;
+};
+
+LIST_HEAD(acpi_irq_penalty_list);
+
+static int acpi_irq_get_penalty(int irq)
+{
+   struct irq_penalty_info *irq_info;
+
+   if (irq < ACPI_MAX_ISA_IRQ)
+   return acpi_irq_isa_penalty[irq];
+
+   list_for_each_entry(irq_info, _irq_penalty_list, node) {
+   if (irq_info->irq == irq)
+   return irq_info->penalty;
+   }
+
+   return 0;
+}
+
+static int acpi_irq_set_penalty(int irq, unsigned int new_penalty)
+{
+   struct irq_penalty_info *irq_info;
+
+   /* see if this is a ISA IRQ */
+   if (irq < ACPI_MAX_ISA_IRQ) {
+   acpi_irq_isa_penalty[irq] = new_penalty;
+   return 0;
+   }
+
+   /* next, try to locate from the dynamic list */
+   list_for_each_entry(irq_info, _irq_penalty_list, node) {
+   if (irq_info->irq == irq) {
+   irq_info->penalty  = new_penalty;
+   return 0;
+   }
+   }
+
+   /* nope, let's allocate a slot for this IRQ */
+   irq_info = kzalloc(sizeof(*irq_info), GFP_KERNEL);
+   if (!irq_info)
+   return -ENOMEM;
+
+   irq_info->irq = irq;
+   irq_info->penalty = new_penalty;
+   list_add_tail(_info->node, _irq_penalty_list);
+
+   return 0;
+}
+
 int __init acpi_irq_penalty_init(void)
 {
struct acpi_pci_link *link;
@@ -487,15 +539,22 @@ int __init acpi_irq_penalty_init(void)
link->irq.possible_count;
 
for (i = 0; i < link->irq.possible_count; i++) {
-   if (link->irq.possible[i] < ACPI_MAX_ISA_IRQ)
-   acpi_irq_penalty[link->irq.
-possible[i]] 

Re: [PATCH 0/5] Threaded MSI interrupt for VFIO PCI device

2015-12-03 Thread Alex Williamson
On Thu, 2015-12-03 at 10:22 -0800, Yunhong Jiang wrote:
> When assigning a VFIO device to a KVM guest with low latency requirement, it  
> is better to handle the interrupt in the hard interrupt context, to reduce 
> the context switch to/from the IRQ thread.
> 
> Based on discussion on https://lkml.org/lkml/2015/10/26/764, the VFIO msi 
> interrupt is changed to use request_threaded_irq(). The primary interrupt 
> handler tries to set the guest interrupt atomically. If it fails to achieve 
> it, a threaded interrupt handler will be invoked.
> 
> The irq_bypass manager is extended for this purpose. The KVM eventfd will 
> provide a irqbypass consumer to handle the interrupt at hard interrupt 
> context. The producer will invoke the consumer's handler then.

Do you have any performance data?  Thanks,

Alex

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] net: smc911x: convert pxa dma to dmaengine

2015-12-03 Thread Robert Jarzmik
David Miller  writes:

> From: Robert Jarzmik 
> Date: Mon, 30 Nov 2015 22:40:28 +0100
>
>> Convert the dma transfers to be dmaengine based, now pxa has a dmaengine
>> slave driver. This makes this driver a bit more PXA agnostic.
>> 
>> The driver was only compile tested. The risk is quite small as no
>> current PXA platform I'm aware of is using smc911x driver.
>> 
>> Signed-off-by: Robert Jarzmik 
>
> I've marked this 'deferred' in patchwork until someone tests
> these changes and says they should be good on all platforms
> this chip is used.

Okay, so would any maintainer of non pxa boards give a feedback for this patch ?
The ones I have found are :
 - sh2007: Guennadi and Hitoshi
 - armadillo5x0: Alberto
 - imx v6 and imx v7: Fabio
I've added the patch at the end of this mail for easier handling.

Now, if no maintainer gives it a test, what do we do, David ? I'm intending to
remove "arch/arm/mach-pxa/include/mach/dma.h" in the near future, which will
break this driver somehow (at least for PXA boards, even if none is identified
so far).
So could we agree on a deadline, and what you wish to do : either drop the patch
or apply, or something else.

Cheers.

-- 
Robert

--8>--
>From 9b7e996fe92d81d417f59a4eed92b3472594a2e8 Mon Sep 17 00:00:00 2001
From: Robert Jarzmik 
Date: Thu, 10 Sep 2015 14:48:09 +0200
Subject: [PATCH] net: smc911x: convert pxa dma to dmaengine

Convert the dma transfers to be dmaengine based, now pxa has a dmaengine
slave driver. This makes this driver a bit more PXA agnostic.

The driver was only compile tested. The risk is quite small as no
current PXA platform I'm aware of is using smc911x driver.

Signed-off-by: Robert Jarzmik 
---
 drivers/net/ethernet/smsc/smc911x.c | 85 -
 drivers/net/ethernet/smsc/smc911x.h | 63 ---
 2 files changed, 82 insertions(+), 66 deletions(-)

diff --git a/drivers/net/ethernet/smsc/smc911x.c 
b/drivers/net/ethernet/smsc/smc911x.c
index bd64eb982e52..3f5711061432 100644
--- a/drivers/net/ethernet/smsc/smc911x.c
+++ b/drivers/net/ethernet/smsc/smc911x.c
@@ -73,6 +73,9 @@ static const char version[] =
 #include 
 #include 
 
+#include 
+#include 
+
 #include 
 
 #include "smc911x.h"
@@ -1174,18 +1177,16 @@ static irqreturn_t smc911x_interrupt(int irq, void 
*dev_id)
 
 #ifdef SMC_USE_DMA
 static void
-smc911x_tx_dma_irq(int dma, void *data)
+smc911x_tx_dma_irq(void *data)
 {
-   struct net_device *dev = (struct net_device *)data;
-   struct smc911x_local *lp = netdev_priv(dev);
+   struct smc911x_local *lp = data;
+   struct net_device *dev = lp->netdev;
struct sk_buff *skb = lp->current_tx_skb;
unsigned long flags;
 
DBG(SMC_DEBUG_FUNC, dev, "--> %s\n", __func__);
 
DBG(SMC_DEBUG_TX | SMC_DEBUG_DMA, dev, "TX DMA irq handler\n");
-   /* Clear the DMA interrupt sources */
-   SMC_DMA_ACK_IRQ(dev, dma);
BUG_ON(skb == NULL);
dma_unmap_single(NULL, tx_dmabuf, tx_dmalen, DMA_TO_DEVICE);
dev->trans_start = jiffies;
@@ -1208,18 +1209,16 @@ smc911x_tx_dma_irq(int dma, void *data)
"TX DMA irq completed\n");
 }
 static void
-smc911x_rx_dma_irq(int dma, void *data)
+smc911x_rx_dma_irq(void *data)
 {
-   struct net_device *dev = (struct net_device *)data;
-   struct smc911x_local *lp = netdev_priv(dev);
+   struct smc911x_local *lp = data;
+   struct net_device *dev = lp->netdev;
struct sk_buff *skb = lp->current_rx_skb;
unsigned long flags;
unsigned int pkts;
 
DBG(SMC_DEBUG_FUNC, dev, "--> %s\n", __func__);
DBG(SMC_DEBUG_RX | SMC_DEBUG_DMA, dev, "RX DMA irq handler\n");
-   /* Clear the DMA interrupt sources */
-   SMC_DMA_ACK_IRQ(dev, dma);
dma_unmap_single(NULL, rx_dmabuf, rx_dmalen, DMA_FROM_DEVICE);
BUG_ON(skb == NULL);
lp->current_rx_skb = NULL;
@@ -1792,6 +1791,9 @@ static int smc911x_probe(struct net_device *dev)
unsigned int val, chip_id, revision;
const char *version_string;
unsigned long irq_flags;
+   struct dma_slave_config config;
+   dma_cap_mask_t mask;
+   struct pxad_param param;
 
DBG(SMC_DEBUG_FUNC, dev, "--> %s\n", __func__);
 
@@ -1963,11 +1965,40 @@ static int smc911x_probe(struct net_device *dev)
goto err_out;
 
 #ifdef SMC_USE_DMA
-   lp->rxdma = SMC_DMA_REQUEST(dev, smc911x_rx_dma_irq);
-   lp->txdma = SMC_DMA_REQUEST(dev, smc911x_tx_dma_irq);
+
+   dma_cap_zero(mask);
+   dma_cap_set(DMA_SLAVE, mask);
+   param.prio = PXAD_PRIO_LOWEST;
+   param.drcmr = -1UL;
+
+   lp->rxdma =
+   dma_request_slave_channel_compat(mask, pxad_filter_fn,
+, >dev, "rx");
+   lp->txdma =
+   dma_request_slave_channel_compat(mask, pxad_filter_fn,
+, >dev, "tx");
lp->rxdma_active = 0;
lp->txdma_active = 

Re: [PATCH] x86/rapl: Do not load in a guest

2015-12-03 Thread Borislav Petkov
On Thu, Dec 03, 2015 at 10:38:28AM -0800, Jacob Pan wrote:
> or use this?
> #define cpu_has_hypervisorboot_cpu_has(X86_FEATURE_HYPERVISOR)

No, those are going away:

https://lkml.kernel.org/r/1448982023-19187-4-git-send-email...@alien8.de

Next on my TODO is killing the rest of them.

-- 
Regards/Gruss,
Boris.

ECO tip #101: Trim your mails when you reply.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v3 1/3] resource: Add @flags to region_intersects()

2015-12-03 Thread Borislav Petkov
On Thu, Dec 03, 2015 at 11:54:19AM -0700, Toshi Kani wrote:
> Adding a new type for regular memory will require inspecting the codes
> using IORESOURCE_MEM currently, and modify them to use the new type if
> their target ranges are regular memory.  There are many references to this
> type across multiple architectures and drivers, which make this inspection
> and testing challenging.

What's wrong with adding a new type_flags to struct resource and not
touching IORESOURCE_* at all?

They'll be called something like RES_TYPE_RAM, _PMEM, _SYSMEM...

Or would that confuse more...?

-- 
Regards/Gruss,
Boris.

ECO tip #101: Trim your mails when you reply.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 2/2] SCSI: Fix NULL pointer dereference in runtime PM

2015-12-03 Thread Jens Axboe

On 11/30/2015 11:45 PM, Ken Xue wrote:

The routines in scsi_pm.c assume that if a runtime-PM callback is
invoked for a SCSI device, it can only mean that the device's driver
has asked the block layer to handle the runtime power management (by
calling blk_pm_runtime_init(), which among other things sets q->dev).

However, this assumption turns out to be wrong for things like the ses
driver.  Normally ses devices are not allowed to do runtime PM, but
userspace can override this setting.  If this happens, the kernel gets
a NULL pointer dereference when blk_post_runtime_resume() tries to use
the uninitialized q->dev pointer.

This patch fixes the problem by checking q->dev in block layer before
handle runtime PM. Since ses doesn't define any PM callbacks and call
blk_pm_runtime_init(), the crash won't occur.

This fixes Bugzilla #101371.
https://bugzilla.kernel.org/show_bug.cgi?id=101371

More discussion can be found from below link.
http://marc.info/?l=linux-scsi=144163730531875=2



Added for 4.4, thanks.

--
Jens Axboe

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] x86/rapl: Do not load in a guest

2015-12-03 Thread Jacob Pan
On Thu,  3 Dec 2015 19:27:02 +0100
Borislav Petkov  wrote:

>  
> + if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
> + return 0;
> +
or use this?
#define cpu_has_hypervisor  boot_cpu_has(X86_FEATURE_HYPERVISOR)
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 1/5] KVM: Extract the irqfd_wakeup_pollin/irqfd_wakeup_pollup

2015-12-03 Thread Yunhong Jiang
Separate the irqfd_wakeup_pollin/irqfd_wakeup_pollup from the
irqfd_wakeup, so that we can reuse the logic for MSI fastpath injection.

Signed-off-by: Yunhong Jiang 
---
 virt/kvm/eventfd.c | 86 --
 1 file changed, 51 insertions(+), 35 deletions(-)

diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 46dbc0a7dfc1..c31d43b762db 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -180,6 +180,53 @@ int __attribute__((weak)) kvm_arch_set_irq_inatomic(
return -EWOULDBLOCK;
 }
 
+static int
+irqfd_wakeup_pollin(struct kvm_kernel_irqfd *irqfd)
+{
+   struct kvm *kvm = irqfd->kvm;
+   struct kvm_kernel_irq_routing_entry irq;
+   unsigned seq;
+   int idx, ret;
+
+   idx = srcu_read_lock(>irq_srcu);
+   do {
+   seq = read_seqcount_begin(>irq_entry_sc);
+   irq = irqfd->irq_entry;
+   } while (read_seqcount_retry(>irq_entry_sc, seq));
+   /* An event has been signaled, inject an interrupt */
+   ret = kvm_arch_set_irq_inatomic(, kvm,
+   KVM_USERSPACE_IRQ_SOURCE_ID, 1,
+   false);
+   srcu_read_unlock(>irq_srcu, idx);
+
+   return ret;
+}
+
+static int
+irqfd_wakeup_pollup(struct kvm_kernel_irqfd *irqfd)
+{
+   struct kvm *kvm = irqfd->kvm;
+   unsigned long flags;
+
+   spin_lock_irqsave(>irqfds.lock, flags);
+
+   /*
+* We must check if someone deactivated the irqfd before
+* we could acquire the irqfds.lock since the item is
+* deactivated from the KVM side before it is unhooked from
+* the wait-queue.  If it is already deactivated, we can
+* simply return knowing the other side will cleanup for us.
+* We cannot race against the irqfd going away since the
+* other side is required to acquire wqh->lock, which we hold
+*/
+   if (irqfd_is_active(irqfd))
+   irqfd_deactivate(irqfd);
+
+   spin_unlock_irqrestore(>irqfds.lock, flags);
+
+   return 0;
+}
+
 /*
  * Called with wqh->lock held and interrupts disabled
  */
@@ -189,45 +236,14 @@ irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, 
void *key)
struct kvm_kernel_irqfd *irqfd =
container_of(wait, struct kvm_kernel_irqfd, wait);
unsigned long flags = (unsigned long)key;
-   struct kvm_kernel_irq_routing_entry irq;
-   struct kvm *kvm = irqfd->kvm;
-   unsigned seq;
-   int idx;
 
-   if (flags & POLLIN) {
-   idx = srcu_read_lock(>irq_srcu);
-   do {
-   seq = read_seqcount_begin(>irq_entry_sc);
-   irq = irqfd->irq_entry;
-   } while (read_seqcount_retry(>irq_entry_sc, seq));
-   /* An event has been signaled, inject an interrupt */
-   if (kvm_arch_set_irq_inatomic(, kvm,
- KVM_USERSPACE_IRQ_SOURCE_ID, 1,
- false) == -EWOULDBLOCK)
+   if (flags & POLLIN)
+   if (irqfd_wakeup_pollin(irqfd) == -EWOULDBLOCK)
schedule_work(>inject);
-   srcu_read_unlock(>irq_srcu, idx);
-   }
 
-   if (flags & POLLHUP) {
+   if (flags & POLLHUP)
/* The eventfd is closing, detach from KVM */
-   unsigned long flags;
-
-   spin_lock_irqsave(>irqfds.lock, flags);
-
-   /*
-* We must check if someone deactivated the irqfd before
-* we could acquire the irqfds.lock since the item is
-* deactivated from the KVM side before it is unhooked from
-* the wait-queue.  If it is already deactivated, we can
-* simply return knowing the other side will cleanup for us.
-* We cannot race against the irqfd going away since the
-* other side is required to acquire wqh->lock, which we hold
-*/
-   if (irqfd_is_active(irqfd))
-   irqfd_deactivate(irqfd);
-
-   spin_unlock_irqrestore(>irqfds.lock, flags);
-   }
+   irqfd_wakeup_pollup(irqfd);
 
return 0;
 }
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 3/5] VFIO: Support threaded interrupt handling on VFIO

2015-12-03 Thread Yunhong Jiang
For VFIO device with MSI interrupt type, it's possible to handle the
interrupt on hard interrupt context without invoking the interrupt
thread. Handling the interrupt on hard interrupt context reduce the
interrupt latency.

Signed-off-by: Yunhong Jiang 
---
 drivers/vfio/pci/vfio_pci_intrs.c | 39 ++-
 1 file changed, 34 insertions(+), 5 deletions(-)

diff --git a/drivers/vfio/pci/vfio_pci_intrs.c 
b/drivers/vfio/pci/vfio_pci_intrs.c
index 3b3ba15558b7..108d335c5656 100644
--- a/drivers/vfio/pci/vfio_pci_intrs.c
+++ b/drivers/vfio/pci/vfio_pci_intrs.c
@@ -236,12 +236,35 @@ static void vfio_intx_disable(struct vfio_pci_device 
*vdev)
kfree(vdev->ctx);
 }
 
+static irqreturn_t vfio_msihandler(int irq, void *arg)
+{
+   struct vfio_pci_irq_ctx *ctx = arg;
+   struct irq_bypass_producer *producer = >producer;
+   struct irq_bypass_consumer *consumer;
+   int ret = IRQ_HANDLED, idx;
+
+   idx = srcu_read_lock(>srcu);
+
+   list_for_each_entry_rcu(consumer, >consumers, sibling) {
+   /*
+* Invoke the thread handler if any consumer would block, but
+* finish all consumes.
+*/
+   if (consumer->handle_irq(consumer->irq_context) == -EWOULDBLOCK)
+   ret = IRQ_WAKE_THREAD;
+   continue;
+   }
+
+   srcu_read_unlock(>srcu, idx);
+   return ret;
+}
+
 /*
  * MSI/MSI-X
  */
-static irqreturn_t vfio_msihandler(int irq, void *arg)
+static irqreturn_t vfio_msihandler_threaded(int irq, void *arg)
 {
-   struct eventfd_ctx *trigger = arg;
+   struct eventfd_ctx *trigger = ((struct vfio_pci_irq_ctx *)arg)->trigger;
 
eventfd_signal(trigger, 1);
return IRQ_HANDLED;
@@ -318,7 +341,7 @@ static int vfio_msi_set_vector_signal(struct 
vfio_pci_device *vdev,
return -EINVAL;
 
if (vdev->ctx[vector].trigger) {
-   free_irq(irq, vdev->ctx[vector].trigger);
+   free_irq(irq, >ctx[vector]);
irq_bypass_unregister_producer(>ctx[vector].producer);
kfree(vdev->ctx[vector].name);
eventfd_ctx_put(vdev->ctx[vector].trigger);
@@ -353,8 +376,14 @@ static int vfio_msi_set_vector_signal(struct 
vfio_pci_device *vdev,
pci_write_msi_msg(irq, );
}
 
-   ret = request_irq(irq, vfio_msihandler, 0,
- vdev->ctx[vector].name, trigger);
+   /*
+* Currently the primary handler for the thread_irq will be invoked on
+* a thread, the IRQF_ONESHOT is a hack for it.
+*/
+   ret = request_threaded_irq(irq, vfio_msihandler,
+  vfio_msihandler_threaded,
+  IRQF_ONESHOT, vdev->ctx[vector].name,
+  >ctx[vector]);
if (ret) {
kfree(vdev->ctx[vector].name);
eventfd_ctx_put(trigger);
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/2] mm, printk: introduce new format string for flags

2015-12-03 Thread yalin wang

> On Dec 3, 2015, at 00:03, Rasmus Villemoes  wrote:
> 
> On Thu, Dec 03 2015, yalin wang  wrote:
> 
>>> On Dec 2, 2015, at 13:04, Vlastimil Babka  wrote:
>>> 
>>> On 12/02/2015 06:40 PM, yalin wang wrote:
>>> 
>>> (please trim your reply next time, no need to quote whole patch here)
>>> 
 i am thinking why not make %pg* to be more generic ?
 not restricted to only GFP / vma flags / page flags .
 so could we change format like this ?
 define a flag spec struct to include flag and trace_print_flags and some 
 other option :
 typedef struct { 
 unsigned long flag;
 structtrace_print_flags *flags;
 unsigned long option; } flag_sec;
 flag_sec my_flag;
 in printk we only pass like this :
 printk(“%pg\n”, _flag) ;
 then it can print any flags defined by user .
 more useful for other drivers to use .
>>> 
>>> I don't know, it sounds quite complicated
> 
> Agreed, I think this would be premature generalization. There's also
> some value in having the individual %pgX specifiers, as that allows
> individual tweaks such as the mask_out for page flags.
> 
> given that we had no flags printing
>> 
if we use this generic method, %pgX where X can be used to specify some flag to
mask out some thing .  it will be great .

> 
> Compared to printk("%pgv\n", >flag), I know which I'd prefer to read.
> 
>> i am not if DECLARE_FLAG_PRINTK_FMT and FLAG_PRINTK_FMT macro 
>> can be defined into one macro ?
>> maybe need some trick here .
>> 
>> is it possible ?
> 
> Technically, I think the answer is yes, at least in C99 (and I suppose
> gcc would accept it in gnu89 mode as well).
> 
> printk("%pg\n", &(struct flag_printer){.flags = my_flags, .names = 
> vmaflags_names});
> 
> Not tested, and I still don't think it would be particularly readable
> even when macroized
> 
> printk("%pg\n", PRINTF_VMAFLAGS(my_flags));
i test on gcc 4.9.3, it can work for this method,
so the final solution like this:
printk.h:
struct flag_fmt_spec {
unsigned long flag;
struct trace_print_flags *flags;
int array_size;
char delimiter; }

#define FLAG_FORMAT(flag, flag_array, delimiter) (&(struct flag_ft_spec){ .flag 
= flag, .flags = flag_array, .array_size = ARRAY_SIZE(flag_array), .delimiter = 
delimiter})
#define VMA_FLAG_FORMAT(flag)  FLAG_FORMAT(flag, vmaflags_names, ‘|')

source code:
printk("%pg\n", VMA_FLAG_FORMAT(my_flags)); 

that’s all, see cpumask_pr_args(masks) macro,
it also use macro and  %*pb  to print cpu mask .
i think this method is not very complex to use .

search source code ,
there is lots of printk to print flag into hex number :
$ grep -n  -r 'printk.*flag.*%x’  .
it will be great if this flag string print is generic.

Thanks









--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 4/5] KVM: Add the irq handling consumer

2015-12-03 Thread Yunhong Jiang
Add an irq_bypass consumer to the KVM eventfd, so that when a MSI interrupt
happens and triggerred from VFIO, it can be handled fast.

Signed-off-by: Yunhong Jiang 
---
 include/linux/kvm_irqfd.h |  1 +
 virt/kvm/eventfd.c| 42 ++
 2 files changed, 43 insertions(+)

diff --git a/include/linux/kvm_irqfd.h b/include/linux/kvm_irqfd.h
index 0c1de05098c8..5573d53ccebb 100644
--- a/include/linux/kvm_irqfd.h
+++ b/include/linux/kvm_irqfd.h
@@ -65,6 +65,7 @@ struct kvm_kernel_irqfd {
poll_table pt;
struct work_struct shutdown;
struct irq_bypass_consumer consumer;
+   struct irq_bypass_consumer fastpath;
struct irq_bypass_producer *producer;
 };
 
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index c31d43b762db..b20a2d1bbf73 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -144,6 +144,8 @@ irqfd_shutdown(struct work_struct *work)
 #ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
irq_bypass_unregister_consumer(>consumer);
 #endif
+   if (irqfd->fastpath.token)
+   irq_bypass_unregister_consumer(>fastpath);
eventfd_ctx_put(irqfd->eventfd);
kfree(irqfd);
 }
@@ -203,6 +205,14 @@ irqfd_wakeup_pollin(struct kvm_kernel_irqfd *irqfd)
 }
 
 static int
+kvm_fastpath_irq(void *arg)
+{
+   struct kvm_kernel_irqfd *irqfd = arg;
+
+   return irqfd_wakeup_pollin(irqfd);
+}
+
+static int
 irqfd_wakeup_pollup(struct kvm_kernel_irqfd *irqfd)
 {
struct kvm *kvm = irqfd->kvm;
@@ -296,6 +306,34 @@ int  __attribute__((weak)) kvm_arch_update_irqfd_routing(
 }
 #endif
 
+static int kvm_fastpath_stub(struct irq_bypass_consumer *stub,
+struct irq_bypass_producer *stub1)
+{
+   return 0;
+}
+
+static void kvm_fastpath_stub1(struct irq_bypass_consumer *stub,
+struct irq_bypass_producer *stub1)
+{
+}
+
+static int setup_fastpath_consumer(struct kvm_kernel_irqfd *irqfd)
+{
+   int ret;
+
+   irqfd->fastpath.token = (void *)irqfd->eventfd;
+   irqfd->fastpath.add_producer = kvm_fastpath_stub;
+   irqfd->fastpath.del_producer = kvm_fastpath_stub1;
+   irqfd->fastpath.handle_irq = kvm_fastpath_irq;
+   irqfd->fastpath.irq_context = irqfd;
+   ret = irq_bypass_register_consumer(>fastpath);
+
+   if (ret)
+   /* A special tag to indicate consumer not working */
+   irqfd->fastpath.token = (void *)0;
+   return ret;
+}
+
 static int
 kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
 {
@@ -435,6 +473,10 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
irqfd->consumer.token, ret);
 #endif
 
+   if (setup_fastpath_consumer(irqfd))
+   pr_info("irq bypass fastpath consumer (toke %p) registration 
fails: %d\n",
+   irqfd->eventfd, ret);
+
return 0;
 
 fail:
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 2/5] VIRT: Support runtime irq_bypass consumer

2015-12-03 Thread Yunhong Jiang
Extend the irq_bypass manager to support runtime consumers. A runtime
irq_bypass consumer can handle interrupt when an interrupt triggered. A
runtime consumer has it's handle_irq() function set and passing a
irq_context for the irq handling.

A producer keep a link for the runtime consumers, so that it can invoke
each consumer's handle_irq() when irq invoked.

Currently the irq_bypass manager has several code path assuming there is
only one consumer/producer pair for each token. For example, when
register the producer, it exits the loop after finding one match
consumer.  This is updated to support both static consumer (like for
Posted Interrupt consumer) and runtime consumer.

Signed-off-by: Yunhong Jiang 
---
 include/linux/irqbypass.h |  8 +
 virt/lib/irqbypass.c  | 82 +++
 2 files changed, 69 insertions(+), 21 deletions(-)

diff --git a/include/linux/irqbypass.h b/include/linux/irqbypass.h
index 1551b5b2f4c2..d5bec0c7be3a 100644
--- a/include/linux/irqbypass.h
+++ b/include/linux/irqbypass.h
@@ -12,6 +12,7 @@
 #define IRQBYPASS_H
 
 #include 
+#include 
 
 struct irq_bypass_consumer;
 
@@ -47,6 +48,9 @@ struct irq_bypass_consumer;
  */
 struct irq_bypass_producer {
struct list_head node;
+   /* Update side is synchronized by the lock on irqbypass.c */
+   struct srcu_struct srcu;
+   struct list_head consumers;
void *token;
int irq;
int (*add_consumer)(struct irq_bypass_producer *,
@@ -61,6 +65,7 @@ struct irq_bypass_producer {
  * struct irq_bypass_consumer - IRQ bypass consumer definition
  * @node: IRQ bypass manager private list management
  * @token: opaque token to match between producer and consumer
+ * @sibling: consumers with same token list management
  * @add_producer: Connect the IRQ consumer to an IRQ producer
  * @del_producer: Disconnect the IRQ consumer from an IRQ producer
  * @stop: Perform any quiesce operations necessary prior to add/del (optional)
@@ -73,6 +78,7 @@ struct irq_bypass_producer {
  */
 struct irq_bypass_consumer {
struct list_head node;
+   struct list_head sibling;
void *token;
int (*add_producer)(struct irq_bypass_consumer *,
struct irq_bypass_producer *);
@@ -80,6 +86,8 @@ struct irq_bypass_consumer {
 struct irq_bypass_producer *);
void (*stop)(struct irq_bypass_consumer *);
void (*start)(struct irq_bypass_consumer *);
+   int (*handle_irq)(void *arg);
+   void *irq_context;
 };
 
 int irq_bypass_register_producer(struct irq_bypass_producer *);
diff --git a/virt/lib/irqbypass.c b/virt/lib/irqbypass.c
index 09a03b5a21ff..43ef9e2c77dc 100644
--- a/virt/lib/irqbypass.c
+++ b/virt/lib/irqbypass.c
@@ -21,6 +21,7 @@
 #include 
 #include 
 #include 
+#include 
 
 MODULE_LICENSE("GPL v2");
 MODULE_DESCRIPTION("IRQ bypass manager utility module");
@@ -49,11 +50,8 @@ static int __connect(struct irq_bypass_producer *prod,
prod->del_consumer(prod, cons);
}
 
-   if (cons->start)
-   cons->start(cons);
-   if (prod->start)
-   prod->start(prod);
-
+   if (!ret && cons->handle_irq)
+   list_add_rcu(>sibling, >consumers);
return ret;
 }
 
@@ -71,6 +69,11 @@ static void __disconnect(struct irq_bypass_producer *prod,
if (prod->del_consumer)
prod->del_consumer(prod, cons);
 
+   if (cons->handle_irq) {
+   list_del_rcu(>sibling);
+   synchronize_srcu(>srcu);
+   }
+
if (cons->start)
cons->start(cons);
if (prod->start)
@@ -87,7 +90,8 @@ static void __disconnect(struct irq_bypass_producer *prod,
 int irq_bypass_register_producer(struct irq_bypass_producer *producer)
 {
struct irq_bypass_producer *tmp;
-   struct irq_bypass_consumer *consumer;
+   struct list_head *node, *next, siblings = LIST_HEAD_INIT(siblings);
+   int ret;
 
might_sleep();
 
@@ -96,6 +100,9 @@ int irq_bypass_register_producer(struct irq_bypass_producer 
*producer)
 
mutex_lock();
 
+   INIT_LIST_HEAD(>consumers);
+   init_srcu_struct(>srcu);
+
list_for_each_entry(tmp, , node) {
if (tmp->token == producer->token) {
mutex_unlock();
@@ -104,23 +111,48 @@ int irq_bypass_register_producer(struct 
irq_bypass_producer *producer)
}
}
 
-   list_for_each_entry(consumer, , node) {
+   list_for_each_safe(node, next, ) {
+   struct irq_bypass_consumer *consumer = container_of(
+   node, struct irq_bypass_consumer, node);
+
if (consumer->token == producer->token) {
-   int ret = __connect(producer, consumer);
-   if (ret) {
-   mutex_unlock();
-   module_put(THIS_MODULE);
-   

Re: [PATCH V2 1/7] trace/events: Add gup trace events

2015-12-03 Thread Shi, Yang

On 12/2/2015 8:07 PM, Steven Rostedt wrote:

On Wed,  2 Dec 2015 14:53:27 -0800
Yang Shi  wrote:


page-faults events record the invoke to handle_mm_fault, but the invoke
may come from do_page_fault or gup. In some use cases, the finer event count
mey be needed, so add trace events support for:

__get_user_pages
__get_user_pages_fast
fixup_user_fault

Signed-off-by: Yang Shi 
---
  include/trace/events/gup.h | 71 ++
  1 file changed, 71 insertions(+)
  create mode 100644 include/trace/events/gup.h

diff --git a/include/trace/events/gup.h b/include/trace/events/gup.h
new file mode 100644
index 000..03a4674
--- /dev/null
+++ b/include/trace/events/gup.h
@@ -0,0 +1,71 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM gup
+
+#if !defined(_TRACE_GUP_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_GUP_H
+
+#include 
+#include 
+
+TRACE_EVENT(gup_fixup_user_fault,
+
+   TP_PROTO(struct task_struct *tsk, struct mm_struct *mm,
+   unsigned long address, unsigned int fault_flags),
+
+   TP_ARGS(tsk, mm, address, fault_flags),


Arges added and not used by TP_fast_assign(), this will slow down the
code while tracing is enabled, as they need to be added to the trace
function call.


+
+   TP_STRUCT__entry(
+   __field(unsigned long,  address )
+   ),
+
+   TP_fast_assign(
+   __entry->address = address;
+   ),
+
+   TP_printk("address=%lx",  __entry->address)
+);
+
+TRACE_EVENT(gup_get_user_pages,
+
+   TP_PROTO(struct task_struct *tsk, struct mm_struct *mm,
+   unsigned long start, unsigned long nr_pages),
+
+   TP_ARGS(tsk, mm, start, nr_pages),


Here too but this is worse. See below.


+
+   TP_STRUCT__entry(
+   __field(unsigned long,  start   )
+   __field(unsigned long,  nr_pages)
+   ),
+
+   TP_fast_assign(
+   __entry->start   = start;
+   __entry->nr_pages= nr_pages;
+   ),
+
+   TP_printk("start=%lx nr_pages=%lu", __entry->start, __entry->nr_pages)
+);
+
+TRACE_EVENT(gup_get_user_pages_fast,
+
+   TP_PROTO(unsigned long start, int nr_pages, int write,
+   struct page **pages),
+
+   TP_ARGS(start, nr_pages, write, pages),


This and the above "gup_get_user_pages" have the same entry field,
assign and printk. They should be combined into a DECLARE_EVENT_CLASS()
and two DEFINE_EVENT()s. That will save on size as the
DECLARE_EVENT_CLASS() is the biggest part of each TRACE_EVENT().


Thanks for the suggestion, will fix them in V3.

Regards,
Yang



-- Steve



+
+   TP_STRUCT__entry(
+   __field(unsigned long,  start   )
+   __field(unsigned long,  nr_pages)
+   ),
+
+   TP_fast_assign(
+   __entry->start   = start;
+   __entry->nr_pages= nr_pages;
+   ),
+
+   TP_printk("start=%lx nr_pages=%lu",  __entry->start, __entry->nr_pages)
+);
+
+#endif /* _TRACE_GUP_H */
+
+/* This part must be outside protection */
+#include 




--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 0/5] Threaded MSI interrupt for VFIO PCI device

2015-12-03 Thread Yunhong Jiang
When assigning a VFIO device to a KVM guest with low latency requirement, it  
is better to handle the interrupt in the hard interrupt context, to reduce 
the context switch to/from the IRQ thread.

Based on discussion on https://lkml.org/lkml/2015/10/26/764, the VFIO msi 
interrupt is changed to use request_threaded_irq(). The primary interrupt 
handler tries to set the guest interrupt atomically. If it fails to achieve 
it, a threaded interrupt handler will be invoked.

The irq_bypass manager is extended for this purpose. The KVM eventfd will 
provide a irqbypass consumer to handle the interrupt at hard interrupt 
context. The producer will invoke the consumer's handler then.

Yunhong Jiang (5):
  Extract the irqfd_wakeup_pollin/irqfd_wakeup_pollup
  Support runtime irq_bypass consumer
  Support threaded interrupt handling on VFIO
  Add the irq handling consumer
  Expose x86 kvm_arch_set_irq_inatomic()

 arch/x86/kvm/Kconfig  |   1 +
 drivers/vfio/pci/vfio_pci_intrs.c |  39 ++--
 include/linux/irqbypass.h |   8 +++
 include/linux/kvm_host.h  |  19 +-
 include/linux/kvm_irqfd.h |   1 +
 virt/kvm/Kconfig  |   3 +
 virt/kvm/eventfd.c| 131 ++
 virt/lib/irqbypass.c  |  82 ++--
 8 files changed, 214 insertions(+), 70 deletions(-)

-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 5/5] KVM: Expose x86 kvm_arch_set_irq_inatomic()

2015-12-03 Thread Yunhong Jiang
The x86 support setting irq in atomic, expose it to vfio driver.

Signed-off-by: Yunhong Jiang 
---
 arch/x86/kvm/Kconfig |  1 +
 include/linux/kvm_host.h | 19 ---
 virt/kvm/Kconfig |  3 +++
 virt/kvm/eventfd.c   |  9 -
 4 files changed, 20 insertions(+), 12 deletions(-)

diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 639a6e34500c..642e8b905c96 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -30,6 +30,7 @@ config KVM
select HAVE_KVM_IRQFD
select IRQ_BYPASS_MANAGER
select HAVE_KVM_IRQ_BYPASS
+   select KVM_SET_IRQ_INATOMIC
select HAVE_KVM_IRQ_ROUTING
select HAVE_KVM_EVENTFD
select KVM_APIC_ARCHITECTURE
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 590c46e672df..a6e237275928 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -852,9 +852,6 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 
irq, int level,
bool line_status);
 int kvm_set_msi(struct kvm_kernel_irq_routing_entry *irq_entry, struct kvm 
*kvm,
int irq_source_id, int level, bool line_status);
-int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e,
-  struct kvm *kvm, int irq_source_id,
-  int level, bool line_status);
 bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin);
 void kvm_notify_acked_gsi(struct kvm *kvm, int gsi);
 void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin);
@@ -1207,4 +1204,20 @@ int kvm_arch_update_irqfd_routing(struct kvm *kvm, 
unsigned int host_irq,
  uint32_t guest_irq, bool set);
 #endif /* CONFIG_HAVE_KVM_IRQ_BYPASS */
 
+#ifndef CONFIG_KVM_SET_IRQ_INATOMIC
+int __attribute__((weak)) kvm_arch_set_irq_inatomic(
+   struct kvm_kernel_irq_routing_entry *irq,
+   struct kvm *kvm, int irq_source_id,
+   int level,
+   bool line_status)
+{
+   return -EWOULDBLOCK;
+}
+#else
+extern int kvm_arch_set_irq_inatomic(
+   struct kvm_kernel_irq_routing_entry *e,
+   struct kvm *kvm, int irq_source_id, int level,
+   bool line_status);
+#endif
+
 #endif
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index 7a79b6853583..7c99dd4724a4 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -50,3 +50,6 @@ config KVM_COMPAT
 
 config HAVE_KVM_IRQ_BYPASS
bool
+
+config KVM_SET_IRQ_INATOMIC
+   bool
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index b20a2d1bbf73..405c26742380 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -173,15 +173,6 @@ irqfd_deactivate(struct kvm_kernel_irqfd *irqfd)
queue_work(irqfd_cleanup_wq, >shutdown);
 }
 
-int __attribute__((weak)) kvm_arch_set_irq_inatomic(
-   struct kvm_kernel_irq_routing_entry *irq,
-   struct kvm *kvm, int irq_source_id,
-   int level,
-   bool line_status)
-{
-   return -EWOULDBLOCK;
-}
-
 static int
 irqfd_wakeup_pollin(struct kvm_kernel_irqfd *irqfd)
 {
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH V2 2/7] mm/gup: add gup trace points

2015-12-03 Thread Shi, Yang

On 12/2/2015 8:13 PM, Steven Rostedt wrote:

On Wed, 2 Dec 2015 15:36:50 -0800
Dave Hansen  wrote:


On 12/02/2015 02:53 PM, Yang Shi wrote:

diff --git a/mm/gup.c b/mm/gup.c
index deafa2c..10245a4 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -13,6 +13,9 @@
  #include 
  #include 

+#define CREATE_TRACE_POINTS
+#include 
+
  #include 
  #include 


This needs to be _the_ last thing that gets #included.  Otherwise, you
risk colliding with any other trace header that gets implicitly included
below.


Agreed.




@@ -1340,6 +1346,8 @@ int __get_user_pages_fast(unsigned long start, int 
nr_pages, int write,
start, len)))
return 0;

+   trace_gup_get_user_pages_fast(start, nr_pages, write, pages);
+
/*
 * Disable interrupts.  We use the nested form as we can already have
 * interrupts disabled by get_futex_key.


It would be _really_ nice to be able to see return values from the
various gup calls as well.  Is that feasible?


Only if you rewrite the functions to have a single return code path
that we can add a tracepoint too. Or have a wrapper function that gets


Yes. My preliminary test just could cover the success case. gup could 
return errno from different a few code path.



called directly that calls these functions internally and the tracepoint
can trap the return value.


This will incur more changes in other subsystems (futex, kvm, etc), I'm 
not sure if it is worth making such changes to get return value.



I can probably make function_graph tracer give return values, although
it will give a return value for void functions as well. And it may give
long long returns for int returns that may have bogus data in the
higher bits.


If the return value requirement is not limited to gup, the approach 
sounds more reasonable.


Thanks,
Yang



-- Steve



--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 2/2] pinctrl: single: remove misuse of IRQF_NO_SUSPEND flag

2015-12-03 Thread Grygorii Strashko
On 12/03/2015 08:13 PM, Tony Lindgren wrote:
> * Linus Walleij  [151201 06:07]:
>> On Fri, Nov 27, 2015 at 6:21 PM, Sudeep Holla  wrote:
>>
>>> From: Sudeep Holla 
>>>
>>> The IRQF_NO_SUSPEND flag is used to identify the interrupts that should
>>> be left enabled so as to allow them to work as expected during the
>>> suspend-resume cycle, but doesn't guarantee that it will wake the system
>>> from a suspended state, enable_irq_wake is recommended to be used for
>>> the wakeup.
>>>
>>> This patch removes the use of IRQF_NO_SUSPEND flags replacing it with
>>> irq_set_irq_wake instead.
>>>
>>> Cc: Linus Walleij 
>>> Cc: linux-g...@vger.kernel.org
>>> Signed-off-by: Sudeep Holla 
>>
>> I need Tony's ACK on this as well.
> 
> At least on omaps, this controller is always powered and we never want to
> suspend it as it handles wake-up events for all the IO pins. And that
> usecase sounds exactly like what you're describing above.
> 
> I don't quite follow what your suggested alternative for an interrupt
> controller is?
> 
> At least we need to have the alternative patched in with this chage before
> just removing IRQF_NO_SUSPEND.
> 
> The enable_irq_wake is naturally used for the consumer drivers of this
> interrupt controller and actually mostly done automatically now with the
> dev_pm_set_dedicated_wake_irq.
> 

I think, this patch should not break our wake-up functionality.
It will just change the moment when pcs_irq_handler() will be called:

before this change:
- suspend_enter()
  
  - arch_suspend_enable_irqs();
- ^ right here

after this change:
- suspend_enter()
  
  dpm_resume_noirq()
  - resume_device_irqs()
^ here

Correct? And as for me this is more safe.

-- 
regards,
-grygorii
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v3 04/18] irqchip: add nps Internal and external irqchips

2015-12-03 Thread Marc Zyngier
Hi Noam,

On 02/12/15 15:08, Noam Camus wrote:
>> From: Marc Zyngier [mailto:marc.zyng...@arm.com] 
>> Sent: Tuesday, December 01, 2015 3:29 PM
> 
>> +  interrupt source. The value shall be 1.
> 
>> So you never have to encode the interrupt trigger type? Do you only support 
>> edge or level?
> I Always use level sensitive.
> 
>> +
>> +#define NPS_GIM_P_EN0x100   /* Peripheral interrupts source enable 
>> */
>> +#define NPS_GIM_P_BLK   0x118   /* Peripheral interrupts blocking for 
>> sources */
> 
>>> Are these the interrupts the peripherals are using? If yes, they really 
>>> have nothing to do here...
> I will move this from here 
>>> +   __asm__ __volatile__ (
>>> +   "   .word %0\n"
>>> +   :
>>> +   : "i"(CTOP_INST_RSPI_GIC_0_R12)
>>> +   : "memory");
> 
>> Silly question: why cannot you just write the actual instruction
>> instead of shoving the instruction like this? Also, .inst would be
>> more appropriate...
> [Noam Camus] Since this is instruction that yet is not part of
> up-streamed binutils of ARC.  Now ARC maintainer can build our kernel
> with generic ARC toolchain.

OK. If you decide to carry on using this, I'd still recommend using
.inst instead of .word, so that you can get a proper disassembly.

>>> +static int nps400_irq_map(struct irq_domain *d, unsigned int irq,
>>> + irq_hw_number_t hw)
>>> +{
>>> +   switch (irq) {
>>> +   case TIMER0_IRQ:
>>> +#if defined(CONFIG_SMP)
>>> +   case IPI_IRQ:
>>> +#endif
>>> +   irq_set_chip_and_handler(irq, _irq_chip_percpu,
>>> +handle_percpu_irq);
>>> +   break;
>>> +   default:
>>> +   irq_set_chip_and_handler(irq, _irq_chip_fasteoi,
>>> +handle_fasteoi_irq);
>>> +   break;
>>> +   }
> 
>> No. This is just wrong. Either you get per interrupt information
>> from the device tree to configure the interrupt the right way, or
>> you have different interrupt controllers for each device.
> I am not sure how you want me to get it from DTB? Please refer to
> some reference.

Here, you are assuming that 'irq' is a hardware number, while there is
no reason why it should be (it only works because you are using legacy
domains, more on that later).

Your switch/case statement should be based on the 'hw' parameter,
because that is your HW IRQ number. the irq parameter can be completely
random, and will eventually be once you fix the rest of the driver.

Also, can you always tell the per-cpu property of your interrupt based
on its number? If you can, then it is fine.

>> But using the Linux irq number is always wrong. You should only consider the 
>> hwirq.
> I will change
> 
>> +
>> +nps400_root_domain = irq_domain_add_legacy(node, NR_CPU_IRQS, 0, 0,
>> +   _irq_ops, NULL);
> 
>> And that's why you can get away with the above horror. Don't use
>> legacy domains. This stuff is by no mean legacy.
> So what is my alternative here?

Your alternative is to use irq_domain_add_linear, for example, and to
make sure that you always refer to the hw number when manipulating the
HW. You will quickly notice that the Linux IRQ number has nothing to do
with the HW one, and you'll be able to quickly iron out the bugs.

Looking forward to reviewing your next version.

Thanks,

M.
-- 
Jazz is not dead. It just smells funny...
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 0/2] ARM: dts: Use MMC pwrseq instead regulators for IGEP WiFi init

2015-12-03 Thread Javier Martinez Canillas
Hello Tony,

On 12/03/2015 03:16 PM, Tony Lindgren wrote:
> * Javier Martinez Canillas  [151203 10:03]:
>> Hello,
>>
>> This series converts the IGEPv2 (IGEP0020) and IGEP COM Module (IGEP0030)
>> Device Tree to use the MMC power sequence provider to initialize the SDIO
>> WiFi chip instead of using fake fixed regulators to just toggle the Reset
>> and Power pins in the chip.
>>
>> The patches were tested on an DM3730 IGEPv2 board but the IGEP COM Module
>> is the same with regard to the SDIO WiFi so it should be safe to land too.
>>
>> The IGEPv2 Rev.F and the IGEP COM Module Rev.G DTS were not converted due
>> using a different WiFi chip (wlcore instead of libertas) than the one in
>> the board I've access to test so I preferred to leave those untouched.
> 
> Do you have some solution for the start-up latency issue?
>

No, I don't and that's one of the reasons why I didn't want to touch the
DTS that have the wlcore chip.

The omap3-igep0020-rev-f.dts and omap3-igep0030-rev-g.dts don't have a
startup-delay-us property in the regulator for the WLAN_EN pin as is
the case for the IGEPv5 DTS but I don't know if those DTS are just wrong.

The DTS for the igep0020 and igep0030 that have the libertas chip,
did have a startup-delay-us for the WIFI_PDN but using the GPIOs
for RESET_N_W and WIFI_PDN in the mmc-pwrseq-simple reset-gpios is
enough to make the SDIO chip reset, be enumerated and WiFi to work
correctly so I don't know if that is really needed or is just a bad
description in the DTS.

Since is working for the boards with the libertas chip, I preferred
to remove the DTS hack but left the boards with wlcore chip since
you said the startup-delay-us is needed there (but probably we should
add to the regulators in the boards that don't have it then).

> Regards,
> 
> Tony
> 

Best regards,
-- 
Javier Martinez Canillas
Open Source Group
Samsung Research America
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: Domain faults when CONFIG_CPU_SW_DOMAIN_PAN is enabled

2015-12-03 Thread Nicolas Pitre
On Thu, 3 Dec 2015, Russell King - ARM Linux wrote:

> On Thu, Dec 03, 2015 at 04:41:18PM +, Russell King - ARM Linux wrote:
> > On Thu, Dec 03, 2015 at 04:12:06PM +, Peter Rosin wrote:
> > > * uaccess_with_memcpy.c:__copy_to_user() has a mode in which it copies
> > >   "non-atomically" (if faulthandler_disabled() returns 0). If a fault
> > >   happens during __copy_to_user, what prevents some other thread from
> > >   clobbering DACR?
> > 
> > See the second point above.  Moreover, if we sleep in down_read(),
> > then __switch_to() reads the current DACR value and saves it in the
> > thread information, and will restore that value when resuming the
> > thread - even if the thread has been migrated to a different CPU.
> 
> I thought this was correct, but it isn't - that's what my original solution
> did, but I think when Will reviewed it, we decided it wasn't necessary -
> and it isn't necessary for every single case with the exception of this
> one.  This is exactly what's going wrong: the down_read() in these paths
> calls into the scheduler, which switches away.  When we come back, the
> DACR value is reset by the other thread to 0x51.
> 
> There's a few ways to solve this:
> 
> 1. Make the thread switching code save and restore the DACR register as
>it would do for domains.  This imposes an overhead on every single
>context switch whether or not we happen to be in this _single_
>troublesome code.  (Patch attached - as there's several, I'm attaching
>them.)
> 
> 2. Add additional code to the uaccess-with-memcpy stuff to reset the
>DACR value prior to using memcpy() or memset().  (Patch attached.)
> 
> 3. Make uaccess-with-memcpy depend on !CPU_SW_DOMAINS_PAN (suggested by
>Will)
> 
> 4. Delete the uaccess-with-memcpy code (also suggested by Will.)
> 
> I think the best thing I can do is say... "Discuss amongst yourselves" :)

Personally, I'd advocate for #2 or #4.  Prior commit 0f64b247e6 I was 
already leaning towards #4.

So if some people are still relying on uaccess-with-memcpy and #2 fixes 
it then it's all good.  I'd suggest surrounding the DACR accesses with 
#ifdef CONFIG_CPU_SW_DOMAIN_PAN in the final patch.


Nicolas
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] x86/rapl: Do not load in a guest

2015-12-03 Thread Borislav Petkov
From: Borislav Petkov 

qemu/kvm doesn't support RAPL and RAPL doesn't have a CPUID feature bit
so check whether we're in a guest instead.

Reported-by: Hannes Reinecke 
Signed-off-by: Borislav Petkov 
Cc: Arnaldo Carvalho de Melo 
Cc: "H. Peter Anvin" 
Cc: Ingo Molnar 
Cc: Jacob Pan 
Cc: Peter Zijlstra 
Cc: "Rafael J. Wysocki" 
Cc: Thomas Gleixner 
---
 arch/x86/kernel/cpu/perf_event_intel_rapl.c | 3 +++
 drivers/powercap/intel_rapl.c   | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c 
b/arch/x86/kernel/cpu/perf_event_intel_rapl.c
index ed446bdcbf31..bc60bc1118b4 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_rapl.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_rapl.c
@@ -711,6 +711,9 @@ static int __init rapl_pmu_init(void)
struct x86_pmu_quirk *quirk;
int i;
 
+   if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
+   return 0;
+
/*
 * check for Intel processor family 6
 */
diff --git a/drivers/powercap/intel_rapl.c b/drivers/powercap/intel_rapl.c
index cc97f0869791..297a9b5074e2 100644
--- a/drivers/powercap/intel_rapl.c
+++ b/drivers/powercap/intel_rapl.c
@@ -1511,6 +1511,9 @@ static int __init rapl_init(void)
int ret = 0;
const struct x86_cpu_id *id;
 
+   if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
+   return -ENODEV;
+
id = x86_match_cpu(rapl_ids);
if (!id) {
pr_err("driver does not support CPU family %d model %d\n",
-- 
2.3.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2 2/3] sched/fair: Move hot load_avg into its own cacheline

2015-12-03 Thread bsegall
Peter Zijlstra  writes:

> On Thu, Dec 03, 2015 at 09:56:02AM -0800, bseg...@google.com wrote:
>> Peter Zijlstra  writes:
>
>> > @@ -7402,11 +7405,12 @@ void __init sched_init(void)
>> >  #endif /* CONFIG_RT_GROUP_SCHED */
>> >  
>> >  #ifdef CONFIG_CGROUP_SCHED
>> > +  task_group_cache = KMEM_CACHE(task_group, 0);
>> > +
>> >list_add(_task_group.list, _groups);
>> >INIT_LIST_HEAD(_task_group.children);
>> >INIT_LIST_HEAD(_task_group.siblings);
>> >autogroup_init(_task);
>> > -
>> >  #endif /* CONFIG_CGROUP_SCHED */
>> >  
>> >for_each_possible_cpu(i) {
>> > --- a/kernel/sched/sched.h
>> > +++ b/kernel/sched/sched.h
>> > @@ -248,7 +248,12 @@ struct task_group {
>> >unsigned long shares;
>> >  
>> >  #ifdefCONFIG_SMP
>> > -  atomic_long_t load_avg;
>> > +  /*
>> > +   * load_avg can be heavily contended at clock tick time, so put
>> > +   * it in its own cacheline separated from the fields above which
>> > +   * will also be accessed at each tick.
>> > +   */
>> > +  atomic_long_t load_avg cacheline_aligned;
>> >  #endif
>> >  #endif
>> >  
>> 
>> This loses the cacheline-alignment for task_group, is that ok?
>
> I'm a bit dense (its late) can you spell that out? Did you mean me
> killing SLAB_HWCACHE_ALIGN? That should not matter because:
>
> #define KMEM_CACHE(__struct, __flags) kmem_cache_create(#__struct,\
>   sizeof(struct __struct), __alignof__(struct __struct),\
>   (__flags), NULL)
>
> picks up the alignment explicitly.
>
> And struct task_group having one cacheline aligned member, means that
> the alignment of the composite object (the struct proper) must be an
> integer multiple of this (typically 1).

Ah, yeah, I forgot about this, my fault.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] arm64: calculate the various pages number to show

2015-12-03 Thread Mark Rutland
On Fri, Nov 27, 2015 at 09:52:16AM +0800, Xishi Qiu wrote:
> On 2015/11/26 23:49, Mark Rutland wrote:
> 
> > On Thu, Nov 26, 2015 at 11:05:32PM +0800, zhong jiang wrote:
> >> On 2015/11/25 23:04, Mark Rutland wrote:
> >>> On Wed, Nov 25, 2015 at 09:41:12PM +0800, zhongjiang wrote:
>  This patch add the interface to show the number of 4KB or 64KB page,
>  aims to statistics the number of different types of pages.
> >>>
> >>> What is this useful for? Why do we want it?
> >>>
> >>> What does it account for, just the swapper?
> >>>
> >>
> >> The patch is wirtten when I was in backport set_memory_ro. It can be used 
> >> to
> >> detect whether there is a large page spliting and merging. large page will
> >> significantly reduce the TLB miss, and improve the system performance.
> > 
> > Ok, but typically the user isn't going to be able to do much with this
> > information. It feels more like something that should be in the page
> > table dump code (where we can calculate the values as we walk the
> > tables).
> > 
> > What is it intended to account for?
> > 
> > The entire swapper?
> > 
> > Just the linear mapping?
> 
> Hi Mark,
> 
> x86 has this information when cat /proc/meminfo, so how about just
> like x86 to show it?

The fact that another architecture has some implementation doesn't
necessarily mean it's a good idea. In this case there are concerns that
don't apply to x86, in that we support a number of page sizes, and
anything reading this needs to handle that fact.

If there's a sensible use-case, then I am not opposed to this. I don't
see the point in adding it just because we can.

A prerequisite for adding it is knowing precisely what it is intended to
describe. Otherwise it's impossible to review.

Thanks,
Mark.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [tpmdd-devel] [PATCH v2 0/3] tpm_tis: Clean up force module parameter

2015-12-03 Thread Jason Gunthorpe
On Thu, Dec 03, 2015 at 08:00:42AM +0200, Jarkko Sakkinen wrote:

> I guess it'd be more realiable. In my NUC the current fix works and the
> people who tested it. If you supply me a fix that changes it to use that
> I can test it and this will give also coverage to the people who tested
> my original fix.

Here is the updated series:

https://github.com/jgunthorpe/linux/commits/for-jarkko

What does your dmesg say?

It really isn't OK to hardwire an address for acpi devices, so I've
added something like this. Just completely guessing that control_pa is
where the BIOS is hiding the base address. Maybe it is cca->cmd_pa ?

>From c9f7c0465008657f7fc7880496f68f4a1b3b4a26 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe 
Date: Thu, 3 Dec 2015 10:58:56 -0700
Subject: [PATCH 3/5] tpm_tis: Do not fall back to a hardcoded address for TPM2

If the ACPI tables do not declare a memory resource for the TPM2
then do not just fall back to the x86 default base address.

WIP: Guess that the control_address is the base address for the
TIS 1.2 memory mapped interface.

Signed-off-by: Jason Gunthorpe 
---
 drivers/char/tpm/tpm_tis.c | 50 +++---
 1 file changed, 20 insertions(+), 30 deletions(-)

diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c
index fecd27b45fd1..6b28f8003425 100644
--- a/drivers/char/tpm/tpm_tis.c
+++ b/drivers/char/tpm/tpm_tis.c
@@ -122,39 +122,11 @@ static inline int is_itpm(struct acpi_device *dev)
 {
return has_hid(dev, "INTC0102");
 }
-
-static inline int is_fifo(struct acpi_device *dev)
-{
-   struct acpi_table_tpm2 *tbl;
-   acpi_status st;
-
-   /* TPM 1.2 FIFO */
-   if (!has_hid(dev, "MSFT0101"))
-   return 1;
-
-   st = acpi_get_table(ACPI_SIG_TPM2, 1,
-   (struct acpi_table_header **) );
-   if (ACPI_FAILURE(st)) {
-   dev_err(>dev, "failed to get TPM2 ACPI table\n");
-   return 0;
-   }
-
-   if (tbl->start_method != ACPI_TPM2_MEMORY_MAPPED)
-   return 0;
-
-   /* TPM 2.0 FIFO */
-   return 1;
-}
 #else
 static inline int is_itpm(struct acpi_device *dev)
 {
return 0;
 }
-
-static inline int is_fifo(struct acpi_device *dev)
-{
-   return 1;
-}
 #endif
 
 /* Before we attempt to access the TPM we must see that the valid bit is set.
@@ -980,11 +952,21 @@ static int tpm_check_resource(struct acpi_resource *ares, 
void *data)
 
 static int tpm_tis_acpi_init(struct acpi_device *acpi_dev)
 {
+   struct acpi_table_tpm2 *tbl;
+   acpi_status st;
struct list_head resources;
-   struct tpm_info tpm_info = tis_default_info;
+   struct tpm_info tpm_info = {};
int ret;
 
-   if (!is_fifo(acpi_dev))
+   st = acpi_get_table(ACPI_SIG_TPM2, 1,
+   (struct acpi_table_header **) );
+   if (ACPI_FAILURE(st)) {
+   dev_err(_dev->dev,
+   FW_BUG "failed to get TPM2 ACPI table\n");
+   return -ENODEV;
+   }
+
+   if (tbl->start_method != ACPI_TPM2_MEMORY_MAPPED)
return -ENODEV;
 
INIT_LIST_HEAD();
@@ -996,6 +978,14 @@ static int tpm_tis_acpi_init(struct acpi_device *acpi_dev)
 
acpi_dev_free_resource_list();
 
+   if (tpm_info.start == 0 && tpm_info.len == 0) {
+   tpm_info.start = tbl->control_address;
+   tpm_info.len = TIS_MEM_LEN;
+   dev_err(_dev->dev,
+   FW_BUG "TPM2 ACPI table does not define a memory 
resource, using 0x%lx instead\n",
+   tpm_info.start);
+   }
+
if (is_itpm(acpi_dev))
itpm = true;
 
-- 
2.1.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] sched: remove false-positive warning from wake_up_process()

2015-12-03 Thread Linus Torvalds
On Thu, Dec 3, 2015 at 4:36 AM, Peter Zijlstra  wrote:
>
> I've edited the changelog like so, please let me know if that is fine
> with you.

Ack.

 Linus
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


<    1   2   3   4   5   6   7   8   9   10   >