[PATCH v2] kbuild: move extra gcc checks to scripts/Makefile.extrawarn

2014-04-08 Thread Masahiro Yamada
W=... provides extra gcc checks.

Having such code in scripts/Makefile.build results in the same flags
being added to KBUILD_CFLAGS multiple times becuase
scripts/Makefile.build is invoked every time Kbuild descends into
the subdirectories.

Since the top Makefile is already too cluttered, this commit moves
all of extra gcc check stuff to a new file scripts/Makefile.extrawarn,
which is included from the top Makefile.

Signed-off-by: Masahiro Yamada 
Cc: Michal Marek 
CC: Sam Ravnborg 
---

Changes in v2:
  - Move the code to a new file scripts/Makefile.extrawarn
  - Move also "KBUILD_ENABLE_EXTRA_GCC_CHECKS" definition



 Makefile   |  6 ++---
 scripts/Makefile.build | 51 -
 scripts/Makefile.extrawarn | 57 ++
 3 files changed, 59 insertions(+), 55 deletions(-)
 create mode 100644 scripts/Makefile.extrawarn

diff --git a/Makefile b/Makefile
index cf3e075..4eadf2d 100644
--- a/Makefile
+++ b/Makefile
@@ -105,10 +105,6 @@ ifeq ("$(origin O)", "command line")
   KBUILD_OUTPUT := $(O)
 endif
 
-ifeq ("$(origin W)", "command line")
-  export KBUILD_ENABLE_EXTRA_GCC_CHECKS := $(W)
-endif
-
 # That's our default target when none is given on the command line
 PHONY := _all
 _all:
@@ -702,6 +698,8 @@ ifeq ($(shell $(CONFIG_SHELL) 
$(srctree)/scripts/gcc-goto.sh $(CC)), y)
KBUILD_CFLAGS += -DCC_HAVE_ASM_GOTO
 endif
 
+include $(srctree)/scripts/Makefile.extrawarn
+
 # Add user supplied CPPFLAGS, AFLAGS and CFLAGS as the last assignments
 KBUILD_CPPFLAGS += $(KCPPFLAGS)
 KBUILD_AFLAGS += $(KAFLAGS)
diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index 9f0ee22..b5e02b6 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -50,57 +50,6 @@ ifeq ($(KBUILD_NOPEDANTIC),)
 endif
 endif
 
-#
-# make W=... settings
-#
-# W=1 - warnings that may be relevant and does not occur too often
-# W=2 - warnings that occur quite often but may still be relevant
-# W=3 - the more obscure warnings, can most likely be ignored
-#
-# $(call cc-option, -W...) handles gcc -W.. options which
-# are not supported by all versions of the compiler
-ifdef KBUILD_ENABLE_EXTRA_GCC_CHECKS
-warning-  := $(empty)
-
-warning-1 := -Wextra -Wunused -Wno-unused-parameter
-warning-1 += -Wmissing-declarations
-warning-1 += -Wmissing-format-attribute
-warning-1 += -Wmissing-prototypes
-warning-1 += -Wold-style-definition
-warning-1 += $(call cc-option, -Wmissing-include-dirs)
-warning-1 += $(call cc-option, -Wunused-but-set-variable)
-warning-1 += $(call cc-disable-warning, missing-field-initializers)
-
-warning-2 := -Waggregate-return
-warning-2 += -Wcast-align
-warning-2 += -Wdisabled-optimization
-warning-2 += -Wnested-externs
-warning-2 += -Wshadow
-warning-2 += $(call cc-option, -Wlogical-op)
-warning-2 += $(call cc-option, -Wmissing-field-initializers)
-
-warning-3 := -Wbad-function-cast
-warning-3 += -Wcast-qual
-warning-3 += -Wconversion
-warning-3 += -Wpacked
-warning-3 += -Wpadded
-warning-3 += -Wpointer-arith
-warning-3 += -Wredundant-decls
-warning-3 += -Wswitch-default
-warning-3 += $(call cc-option, -Wpacked-bitfield-compat)
-warning-3 += $(call cc-option, -Wvla)
-
-warning := $(warning-$(findstring 1, $(KBUILD_ENABLE_EXTRA_GCC_CHECKS)))
-warning += $(warning-$(findstring 2, $(KBUILD_ENABLE_EXTRA_GCC_CHECKS)))
-warning += $(warning-$(findstring 3, $(KBUILD_ENABLE_EXTRA_GCC_CHECKS)))
-
-ifeq ("$(strip $(warning))","")
-$(error W=$(KBUILD_ENABLE_EXTRA_GCC_CHECKS) is unknown)
-endif
-
-KBUILD_CFLAGS += $(warning)
-endif
-
 include scripts/Makefile.lib
 
 ifdef host-progs
diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn
new file mode 100644
index 000..54aaddb
--- /dev/null
+++ b/scripts/Makefile.extrawarn
@@ -0,0 +1,57 @@
+# ==
+#
+# make W=... settings
+#
+# W=1 - warnings that may be relevant and does not occur too often
+# W=2 - warnings that occur quite often but may still be relevant
+# W=3 - the more obscure warnings, can most likely be ignored
+#
+# $(call cc-option, -W...) handles gcc -W.. options which
+# are not supported by all versions of the compiler
+# ==
+
+ifeq ("$(origin W)", "command line")
+  export KBUILD_ENABLE_EXTRA_GCC_CHECKS := $(W)
+endif
+
+ifdef KBUILD_ENABLE_EXTRA_GCC_CHECKS
+warning-  := $(empty)
+
+warning-1 := -Wextra -Wunused -Wno-unused-parameter
+warning-1 += -Wmissing-declarations
+warning-1 += -Wmissing-format-attribute
+warning-1 += -Wmissing-prototypes
+warning-1 += -Wold-style-definition
+warning-1 += $(call cc-option, -Wmissing-include-dirs)
+warning-1 += $(call cc-option, -Wunused-but-set-variable)
+warning-1 += $(call cc-disable-warning, missing-field-initializers)
+
+warning-2 := -Waggregate-return
+warning-2 += -Wcast-align
+warning-2 += -Wdisabled-optimization
+warning-2 += 

Re: [PATCH] futex: avoid race between requeue and wake

2014-04-08 Thread Mike Galbraith
On Wed, 2014-04-09 at 07:41 +0200, Peter Zijlstra wrote: 
> On Tue, Apr 08, 2014 at 03:30:07PM -0700, Linus Torvalds wrote:
> > So I'll have to leave this decision to the futex people. But the
> > attached slightly more complex patch *may* be the better one.
> 
> Of course, tglx is the main futex 'people' and he's not on CC.. *sigh*.

(Darren is a futex people _and_ a futextest people)

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 01/15] mtd: st_spi_fsm: Add Macronix MX25L25655E device

2014-04-08 Thread Brian Norris
On Thu, Mar 27, 2014 at 08:20:47AM +, Lee Jones wrote:
> > > From: Angus Clark 
> > >
> > > Add Macronix MX25L25655E to the list of known devices.
> > >
> > > Signed-off-by: Angus Clark 
> > > Signed-off-by: Lee Jones 
> > > ---
> > >  drivers/mtd/devices/st_spi_fsm.c | 3 +++
> > >  1 file changed, 3 insertions(+)
> > >
> > > diff --git a/drivers/mtd/devices/st_spi_fsm.c 
> > > b/drivers/mtd/devices/st_spi_fsm.c
> > > index bea1416..2471061 100644
> > > --- a/drivers/mtd/devices/st_spi_fsm.c
> > > +++ b/drivers/mtd/devices/st_spi_fsm.c
> > > @@ -380,6 +380,9 @@ static struct flash_info flash_types[] = {
> > > { "mx25l25635e", 0xc22019, 0, 64*1024, 512,
> > >   (MX25_FLAG | FLASH_FLAG_32BIT_ADDR | FLASH_FLAG_RESET), 70,
> > >   stfsm_mx25_config },
> > > +   { "mx25l25655e", 0xc22619, 0, 64*1024, 512,
> > > + (MX25_FLAG | FLASH_FLAG_32BIT_ADDR | FLASH_FLAG_RESET), 70,
> > > + stfsm_mx25_config},
> > >
> > >  #define N25Q_FLAG (FLASH_FLAG_READ_WRITE   |   \
> > >FLASH_FLAG_READ_FAST |   \
> > 
> > How much of this table can be shared with the one in m25p80.c?
> 
> I have a long term plan to merge the two. Just waiting for the SPI NOR
> Framework to land before I do so.

Huang's work landed in the spinor branch at l2-mtd.git a bit ago. I will
take a look at this series and probably take this as-is, but long-term I
will not be taking many more additions to this custom table.

Regards,
Brian
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 4/6 V2] perf, sort: Add physid sorting based on mmap2 data

2014-04-08 Thread Peter Zijlstra
On Wed, Apr 09, 2014 at 02:21:49PM +0900, Namhyung Kim wrote:
> > create a new 'physid mode' to group all the sorting rules together
> >   (mimics the mem-mode)
> 
> What is 'physid' then?  I guess you meant physical id but it seems
> unique id or unique map id looks like a better fit IMHO.

I suspect this is legacy naming; they used to do this using physical
addresses.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] futex: avoid race between requeue and wake

2014-04-08 Thread Peter Zijlstra
On Tue, Apr 08, 2014 at 03:30:07PM -0700, Linus Torvalds wrote:
> So I'll have to leave this decision to the futex people. But the
> attached slightly more complex patch *may* be the better one.

Of course, tglx is the main futex 'people' and he's not on CC.. *sigh*.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [GIT PULL 00/10] perf/core improvements and fixes

2014-04-08 Thread Ingo Molnar

* Namhyung Kim  wrote:

> Hi Andi,
> 
> On Wed, Apr 9, 2014 at 4:50 AM, Andi Kleen  wrote:
> > Jiri Olsa  writes:
> >
> >> hi Ingo,
> >> please consider pulling
> >
> > So who actually maintains perf user space now?
> >
> > I have at least 4 perf user patchkits/bug fixes pending, but 
> > Arnaldo hasn't answered any email for weeks.

So a bit of background: sloppy, abusive contributors not willing to 
change get ignored after some time. For example Andi got such a bad 
track record in the x86 and perf space with me that all your mails go 
to /dev/null currently. (I only saw this one because Namhyung replied 
to it.)

Andi, as I explained it to you previously, if you want me to merge 
patches from you then you need to 1) improve the way you work 2) send 
it to someone who is willing to deal with you and sign off on it and 
forward it to me.

Seems like you managed to burn up your goodwill with Arnaldo as well. 
You need to find someone else willing to pay the expense of dealing 
with your passive-aggressive style.

Just a quick example of your abuse: for example you know it perfectly 
well that perf is well maintained, a simple 'git log tools/perf/' will 
tell you that there's a steady stream of patches going in and that 
perf contributors are busy.

Yet in this mail you actually lie about that: you pretend that you 
don't know that most Linux maintainers have trouble working with you, 
you pretend that perf is "unmaintained", you pretend that you don't 
know who is maintaining it, just to create unfair pressure on Arnaldo 
...

That kind of approach really sucks, and if you run out of people's 
patience with petty office tactics like that then you should not fault 
them, you should fault yourself.

Thanks,

Ingo
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 6/6] perf, sort: Allow unique sorting instead of combining hist_entries

2014-04-08 Thread Namhyung Kim
On Mon, 24 Mar 2014 15:34:36 -0400, Don Zickus wrote:
> The cache contention tools needs to keep all the perf records unique in order
> to properly parse all the data.  Currently add_hist_entry() will combine
> the duplicate record and add the weight/period to the existing record.
>
> This throws away the unique data the cache contention tool needs (mainly
> the data source).  Create a flag to force the records to stay unique.

No.  This is why I said you need to add 'mem' and 'snoop' sort keys into
the c2c tool.  This is not how sort works IMHO - if you need to make
samples unique let the sort key(s) distinguish them somehow, or you can
combine same samples (in terms of sort kes) and use the combined entry's
stat.nr_events and stat.period or weight.

Thanks,
Namhyung
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [FMC] BUG: scheduling while atomic: swapper/1/0x10000002

2014-04-08 Thread Fengguang Wu
On Wed, Apr 09, 2014 at 07:08:43AM +0200, Alessandro Rubini wrote:
> Hello.
> Thank you for the report.
> 
> I'm at a conference and I fear I won't be able to test myself in the
> next days, but I think this is already fixed (it is part of
> the "misc_register" call path, so it's the same problem).
> 
> The fix is commit v3.11-rc2-11-g783c2fb
> 
>783c2fb FMC: fix locking in sample chardev driver
> 
> This commit, however, is not part of v3.11 and I think this is why you
> are finding the problem in the v3.10..v3.11 interval.

Alessandro, you are right. There are no more "scheduling while 
atomic" bugs in v3.12 and v3.13.

Our bisect log shows

git bisect  bad 38dbfb59d1175ef458d006556061adeaa8751b72  # 10:03  0-
345  Linus 3.14-rc1

However that happen to be caused by an independent "scheduling while
atomic" bug:

[   20.038125] Fixing recursive fault but reboot is needed!
[   20.038125] BUG: scheduling while atomic: kworker/0:1H/77/0x0005
[   20.038125] INFO: lockdep is turned off.
[   20.038125] irq event stamp: 758
[   20.038125] hardirqs last  enabled at (757): [] 
_raw_spin_unlock_irq+0x22/0x30
[   20.038125] hardirqs last disabled at (758): [] 
_raw_spin_lock_irq+0x14/0x73
[   20.038125] softirqs last  enabled at (302): [] 
__do_softirq+0x186/0x1d2
[   20.038125] softirqs last disabled at (295): [] 
do_softirq_own_stack+0x2f/0x35
[   20.038125] CPU: 0 PID: 77 Comm: kworker/0:1H Tainted: G  D W
3.14.0-rc1 #1
[   20.038125] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
[   20.038125]  c0420610 c0420610 c0449a38 c1c1f562 c0449a54 c1c1b59c c1f91661 
c0420938
[   20.038125]  004d 0005 c0420610 c0449acc c1c2e4e2 c105fff8 01449a7c 
04af
[   20.038125]  c0420610 002c 0001 c0449a7c c0420610 c0449ab4 c106001c 

[   20.038125] Call Trace:
[   20.038125]  [] dump_stack+0x16/0x18
[   20.038125]  [] __schedule_bug+0x5d/0x6f
[   20.038125]  [] __schedule+0x45/0x55f
[   20.038125]  [] ? vprintk_emit+0x367/0x3a4
[   20.038125]  [] ? vprintk_emit+0x38b/0x3a4
[   20.038125]  [] ? trace_hardirqs_off+0xb/0xd
[   20.038125]  [] ? printk+0x38/0x3a
[   20.038125]  [] schedule+0x5d/0x5f
[   20.038125]  [] do_exit+0xcc/0x75d
[   20.038125]  [] ? kmsg_dump+0x184/0x191
[   20.038125]  [] ? kmsg_dump+0x1c/0x191
[   20.038125]  [] oops_end+0x7e/0x83
[   20.038125]  [] no_context+0x1ba/0x1c2
[   20.038125]  [] __bad_area_nosemaphore+0x137/0x13f
[   20.038125]  [] ? pte_offset_kernel+0x13/0x2a
[   20.038125]  [] ? spurious_fault+0x75/0xd5
[   20.038125]  [] bad_area_nosemaphore+0x12/0x14

Thanks,
Fengguang
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [FMC] BUG: scheduling while atomic: swapper/1/0x10000002

2014-04-08 Thread Jet Chen
On 04/09/2014 01:08 PM, Alessandro Rubini wrote:
> Hello.
> Thank you for the report.
> 
> I'm at a conference and I fear I won't be able to test myself in the
> next days, but I think this is already fixed (it is part of
> the "misc_register" call path, so it's the same problem).
> 
> The fix is commit v3.11-rc2-11-g783c2fb
> 
>783c2fb FMC: fix locking in sample chardev driver
> 
> This commit, however, is not part of v3.11 and I think this is why you
> are finding the problem in the v3.10..v3.11 interval.
> 
> thank you again
> /alessandro
> 

I find commit 783c2fb FMC: fix locking in sample chardev driver. I will help to 
test it.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [FMC] BUG: scheduling while atomic: swapper/1/0x10000002

2014-04-08 Thread Alessandro Rubini
Hello.
Thank you for the report.

I'm at a conference and I fear I won't be able to test myself in the
next days, but I think this is already fixed (it is part of
the "misc_register" call path, so it's the same problem).

The fix is commit v3.11-rc2-11-g783c2fb

   783c2fb FMC: fix locking in sample chardev driver

This commit, however, is not part of v3.11 and I think this is why you
are finding the problem in the v3.10..v3.11 interval.

thank you again
/alessandro
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 4/6 V2] perf, sort: Add physid sorting based on mmap2 data

2014-04-08 Thread Namhyung Kim
On Mon, 24 Mar 2014 16:57:18 -0400, Don Zickus wrote:
> In order for the c2c tool to work correctly, it needs to properly
> sort all the records on uniquely identifiable data addresses.  These
> unique addresses are converted from virtual addresses provided by the
> hardware into a kernel address using an mmap2 record as the decoder.
>
> Once a unique address is converted, we can sort on them based on
> various rules.  Then it becomes clear which address are overlapping
> with each other across mmap regions or pid spaces.
>
> This patch just creates the rules and inserts the records into a
> sort entry for safe keeping until later patches process them.
>
> The general sorting rule is:
>
> o group cpumodes together
> o if (nonzero major/minor number - ie mmap'd areas)
>   o sort on major, minor, inode, inode generation numbers
> o else if cpumode is not kernel
>   o sort on pid
> o sort on data addresses
>
> I also hacked in the concept of 'color'.  The purpose of that bit is to
> provides hints later when processing these records that indicate a new unique
> address has been encountered.  Because later processing only checks the data
> addresses, there can be a theoretical scenario that similar sequential data
> addresses (when walking the rbtree) could be misinterpreted as overlapping
> when in fact they are not.
>
> Sample output: (perf report --stdio --physid-mode)
>
>   OverheadData AddressSource AddressCommand:  Pid 
>Tid Major  Minor  Inode  Inode Gen
>     ..   
> . . .  . ... .
> 18.93%  [k] 0xc900139c40b0  [k] igb_update_stats kworker/0:1:  
> 257   257 0  0   0 0
>  7.63%  [k] 0x88082e6cf0a8  [k] watchdog_timer_fnswapper:
> 0 0 0  0   0 0
>  1.86%  [k] 0x88042ef94700  [k] _raw_spin_lock   swapper:
> 0 0 0  0   0 0
>  1.77%  [k] 0x8804278afa50  [k] __switch_to  swapper:
> 0 0 0  0   0 0
>
> V4: add manpage entry in perf-report
>
> V3: split out the sorting into unique entries.  This makes it look
>   far less ugly
> create a new 'physid mode' to group all the sorting rules together
>   (mimics the mem-mode)

What is 'physid' then?  I guess you meant physical id but it seems
unique id or unique map id looks like a better fit IMHO.

>
> Signed-off-by: Don Zickus 
> ---
>  tools/perf/Documentation/perf-report.txt |  23 +++
>  tools/perf/builtin-report.c  |  20 ++-
>  tools/perf/util/hist.c   |  27 ++-
>  tools/perf/util/hist.h   |   8 +
>  tools/perf/util/sort.c   | 294 
> +++
>  tools/perf/util/sort.h   |  13 ++
>  6 files changed, 381 insertions(+), 4 deletions(-)
>
> diff --git a/tools/perf/Documentation/perf-report.txt 
> b/tools/perf/Documentation/perf-report.txt
> index 8eab8a4..01391b0 100644
> --- a/tools/perf/Documentation/perf-report.txt
> +++ b/tools/perf/Documentation/perf-report.txt
> @@ -95,6 +95,23 @@ OPTIONS
>   And default sort keys are changed to comm, dso_from, symbol_from, dso_to
>   and symbol_to, see '--branch-stack'.
>  
> + If --physid-mode option is used, following sort keys are also
> + available:
> + daddr, iaddr, pid, tid, major, minor, inode, inode_gen.
> +
> + - daddr: data address (sorted based on major, minor, inode and inode
> + generation numbers if shared, otherwise pid)

By "if shared", did you mean "for shared file mapping"?


> + - iaddr: instruction address
> + - pid: command and pid of the task
> + - tid: tid of the task
> + - major: major number of mapped location (0 if not mapped)
> + - minor: minor number of mapped location (0 if not mapped)
> + - inode: inode number of mapped location (0 if not mapped)
> + - inode_gen: inode generation number of mapped location (0 if not 
> mapped)

s/if not mapped/if not file-mapped/ ?

> +
> + And default sort keys are changed to daddr, iaddr, pid, tid, major,
> + minor, inode and inode_gen, see '--physid-mode'.
> +
>  -p::
>  --parent=::
>  A regex filter to identify parent. The parent is a caller of this
> @@ -223,6 +240,12 @@ OPTIONS
>   branch stacks and it will automatically switch to the branch view mode,
>   unless --no-branch-stack is used.
>  
> +--physid-mode::
> + Use the data addresses sampled using perf record -d and combine them
> + with the mmap'd area region where they are located.  This helps identify
> + which data addresses collide with similar addresses in another process
> + space.  See --sort for output choices.
> +
>  --objdump=::
>  Path to objdump binary.
>  
> diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
> index c87412b..093f5ad 100644
> --- 

Re: [PATCH 1/2] usb: ehci-exynos: Return immediately from suspend if ehci_suspend fails

2014-04-08 Thread Jingoo Han
On Wednesday, April 09, 2014 1:01 PM, Vivek Gautam wrote:
> 
> Patch 'b8efdaf USB: EHCI: add check for wakeup/suspend race'
> adds a check for possible race between suspend and wakeup interrupt,
> and thereby it returns -EBUSY as error code if there's a wakeup
> interrupt.
> So the platform host controller should not proceed further with
> its suspend callback, rather should return immediately to avoid
> powering down the essential things, like phy.
> 
> Signed-off-by: Vivek Gautam 
> Cc: Alan Stern 
> Cc: Jingoo Han 

Acked-by: Jingoo Han 

Best regards,
Jingoo Han

> ---
> 
> Based on 'usb-next' branch of Greg's usb tree.
> 
>  drivers/usb/host/ehci-exynos.c |4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/usb/host/ehci-exynos.c b/drivers/usb/host/ehci-exynos.c
> index d1d8c47..a4550eb 100644
> --- a/drivers/usb/host/ehci-exynos.c
> +++ b/drivers/usb/host/ehci-exynos.c
> @@ -212,6 +212,8 @@ static int exynos_ehci_suspend(struct device *dev)
>   int rc;
> 
>   rc = ehci_suspend(hcd, do_wakeup);
> + if (rc)
> + return rc;
> 
>   if (exynos_ehci->otg)
>   exynos_ehci->otg->set_host(exynos_ehci->otg, >self);
> @@ -221,7 +223,7 @@ static int exynos_ehci_suspend(struct device *dev)
> 
>   clk_disable_unprepare(exynos_ehci->clk);
> 
> - return rc;
> + return 0;
>  }
> 
>  static int exynos_ehci_resume(struct device *dev)
> --
> 1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[BUG] kernel BUG at /.../block/cfq-iosched.c:3145!

2014-04-08 Thread Benjamin Herrenschmidt
Hi folks !

While testing a branch of fixes before I send it to Linus, which
happens to be based on 18a1a7a1d862ae0794a0179473d08a414dd49234,
I hit this:

kernel BUG at /home/benh/linux-powerpc-test/block/cfq-iosched.c:3145!
cpu 0x3c: Vector: 700 (Program Check) at [c03ca69bb190]
pc: c033b05c: .cfq_dispatch_requests+0x90/0x99c
lr: c033b038: .cfq_dispatch_requests+0x6c/0x99c
sp: c03ca69bb410
   msr: 90029032
  current = 0xc03ca63d32a0
  paca= 0xcffef000   softe: 0irq_happened: 0x01
pid   = 3487, comm = smartd
kernel BUG at /home/benh/linux-powerpc-test/block/cfq-iosched.c:3145!
enter ? for help
[c03ca69bb4c0] c032000c .elv_drain_elevator+0x70/0xc8
[c03ca69bb540] c0320140 .__elv_add_request+0xdc/0x27c
[c03ca69bb5e0] c03286f8 .blk_execute_rq_nowait+0xc0/0xf8
[c03ca69bb670] c03287ec .blk_execute_rq+0xbc/0xe8
[c03ca69bb810] c0332350 .sg_io+0x218/0x39c
[c03ca69bb930] c0332c3c .scsi_cmd_ioctl+0x270/0x4ac
[c03ca69bba70] c05d559c .sd_ioctl+0xa4/0xd8
[c03ca69bbb20] c032eb1c .__blkdev_driver_ioctl+0x34/0x54
[c03ca69bbb90] c032f83c .blkdev_ioctl+0x7b8/0x850
[c03ca69bbc40] c018d6e0 .block_ioctl+0x4c/0x60
[c03ca69bbcb0] c01691cc .do_vfs_ioctl+0x5cc/0x670
[c03ca69bbd90] c01692b4 .SyS_ioctl+0x44/0x70
[c03ca69bbe30] c000a024 syscall_exit+0x0/0x98
--- Exception: c00 (System Call) at 3fffb5240ee0

The storage driver is our usual IBM "IPR".

Is that a known issue ?

Cheers,
Ben.
 


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [clocksource] INFO: possible irq lock inversion dependency detected

2014-04-08 Thread Jet Chen

On 04/09/2014 12:40 PM, Viresh Kumar wrote:

On 9 April 2014 10:04, Jet Chen  wrote:

How did you got this in cc list ?


"abd38155f8293923de5953cc063f9e2d7ecb3f04.1396679170.git.viresh.ku...@linaro.org"





I got it from the patch you sent to me before. attach it again.
Apologizes if it's improper to cc this list.


There is no list like this :), its just the message id number
generated by git while
sending my patch.


Oh, I see. I'm supposed to in-reply-to that message id. I guess I just simple "reply 
all" so that my email client put it in TO list.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH RESEND] ARM: spear: add __init to spear_clocksource_init()

2014-04-08 Thread Viresh Kumar
On Wed, Apr 9, 2014 at 4:04 AM, Alex Elder  wrote:
> I get a build warning because spear_clocksource_init() calls
> clocksource_mmio_init(), but it doesn't have an __init annotation.
> Fix that.
>
> Signed-off-by: Alex Elder 
> ---
>  arch/arm/mach-spear/time.c |2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/arch/arm/mach-spear/time.c b/arch/arm/mach-spear/time.c
> index 218ba5b..c4d0931 100644
> --- a/arch/arm/mach-spear/time.c
> +++ b/arch/arm/mach-spear/time.c
> @@ -71,7 +71,7 @@ static void clockevent_set_mode(enum clock_event_mode mode,
>  static int clockevent_next_event(unsigned long evt,
>  struct clock_event_device *clk_event_dev);
>
> -static void spear_clocksource_init(void)
> +static void __init spear_clocksource_init(void)
>  {
> u32 tick_rate;
> u16 val;

Acked-by: Viresh Kumar 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [clocksource] INFO: possible irq lock inversion dependency detected

2014-04-08 Thread Viresh Kumar
On 9 April 2014 10:04, Jet Chen  wrote:
>> How did you got this in cc list ?
>>
>>
>> "abd38155f8293923de5953cc063f9e2d7ecb3f04.1396679170.git.viresh.ku...@linaro.org"
>>
>> 
>>
>
> I got it from the patch you sent to me before. attach it again.
> Apologizes if it's improper to cc this list.

There is no list like this :), its just the message id number
generated by git while
sending my patch.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [clocksource] INFO: possible irq lock inversion dependency detected

2014-04-08 Thread Jet Chen

On 04/09/2014 12:25 PM, Viresh Kumar wrote:

On 9 April 2014 06:51, Jet Chen  wrote:

spin_lock_irqsave() does fix this issue.

Tested-by: Jet Chen 


Thanks a lot :)



Welcome.


How did you got this in cc list ?

"abd38155f8293923de5953cc063f9e2d7ecb3f04.1396679170.git.viresh.ku...@linaro.org"




I got it from the patch you sent to me before. attach it again.
Apologizes if it's improper to cc this list.

>From abd38155f8293923de5953cc063f9e2d7ecb3f04 Mon Sep 17 00:00:00 2001
Message-Id: 
From: Viresh Kumar 
Date: Sat, 5 Apr 2014 11:43:25 +0530
Subject: [PATCH] clocksource: register cpu notifier to remove timer from
 dying CPU

clocksource core is using add_timer_on() to run clocksource_watchdog() on all
CPUs one by one. But when a core is brought down, clocksource core doesn't
remove this timer from the dying CPU. And in this case timer core gives this
(Gives this only with unmerged code, anyway in the current code as well timer
core is migrating a pinned timer to other CPUs, which is also wrong:
http://www.gossamer-threads.com/lists/linux/kernel/1898117)

migrate_timer_list: can't migrate pinned timer: 81f06a60,
timer->function: 810d7010,deactivating it Modules linked in:

CPU: 0 PID: 1932 Comm: 01-cpu-hotplug Not tainted 3.14.0-rc1-00088-gab3c4fd #4
Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
 0009 88001d407c38 817237bd 88001d407c80
 88001d407c70 8106a1dd 0010 81f06a60
 88001e04d040 81e3d4c0 88001e04d030 88001d407cd0
Call Trace:
 [] dump_stack+0x4d/0x66
 [] warn_slowpath_common+0x7d/0xa0
 [] warn_slowpath_fmt+0x4c/0x50
 [] ? __internal_add_timer+0x113/0x130
 [] ? clocksource_watchdog_kthread+0x40/0x40
 [] migrate_timer_list+0xdb/0xf0
 [] timer_cpu_notify+0xfc/0x1f0
 [] notifier_call_chain+0x4c/0x70
 [] __raw_notifier_call_chain+0xe/0x10
 [] cpu_notify+0x23/0x50
 [] cpu_notify_nofail+0xe/0x20
 [] _cpu_down+0x1ad/0x2e0
 [] cpu_down+0x34/0x50
 [] cpu_subsys_offline+0x14/0x20
 [] device_offline+0x95/0xc0
 [] online_store+0x40/0x90
 [] dev_attr_store+0x18/0x30
 [] sysfs_kf_write+0x3d/0x50

This patch tries to fix this by registering cpu notifiers from clocksource core,
only when we start clocksource-watchdog. And if on the CPU_DEAD notification it
is found that dying CPU was the CPU on which this timer is queued on, then it is
removed from that CPU and queued to next CPU.

Reported-by: Jet Chen 
Reported-by: Fengguang Wu 
Signed-off-by: Viresh Kumar 
---
 kernel/time/clocksource.c | 64 +++
 1 file changed, 53 insertions(+), 11 deletions(-)

diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index ba3e502..9e96853 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -23,16 +23,21 @@
  *   o Allow clocksource drivers to be unregistered
  */
 
+#include 
 #include 
 #include 
 #include 
 #include 
+#include 
 #include  /* for spin_unlock_irq() using preempt_count() m68k */
 #include 
 #include 
 
 #include "tick-internal.h"
 
+/* Tracks next CPU to queue watchdog timer on */
+static int timer_cpu;
+
 void timecounter_init(struct timecounter *tc,
 		  const struct cyclecounter *cc,
 		  u64 start_tstamp)
@@ -246,12 +251,25 @@ void clocksource_mark_unstable(struct clocksource *cs)
 	spin_unlock_irqrestore(_lock, flags);
 }
 
+void queue_timer_on_next_cpu(void)
+{
+	/*
+	 * Cycle through CPUs to check if the CPUs stay synchronized to each
+	 * other.
+	 */
+	timer_cpu = cpumask_next(timer_cpu, cpu_online_mask);
+	if (timer_cpu >= nr_cpu_ids)
+		timer_cpu = cpumask_first(cpu_online_mask);
+	watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL;
+	add_timer_on(_timer, timer_cpu);
+}
+
 static void clocksource_watchdog(unsigned long data)
 {
 	struct clocksource *cs;
 	cycle_t csnow, wdnow;
 	int64_t wd_nsec, cs_nsec;
-	int next_cpu, reset_pending;
+	int reset_pending;
 
 	spin_lock(_lock);
 	if (!watchdog_running)
@@ -336,27 +354,50 @@ static void clocksource_watchdog(unsigned long data)
 	if (reset_pending)
 		atomic_dec(_reset_pending);
 
-	/*
-	 * Cycle through CPUs to check if the CPUs stay synchronized
-	 * to each other.
-	 */
-	next_cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask);
-	if (next_cpu >= nr_cpu_ids)
-		next_cpu = cpumask_first(cpu_online_mask);
-	watchdog_timer.expires += WATCHDOG_INTERVAL;
-	add_timer_on(_timer, next_cpu);
+	queue_timer_on_next_cpu();
 out:
 	spin_unlock(_lock);
 }
 
+static int clocksource_cpu_notify(struct notifier_block *self,
+unsigned long action, void *hcpu)
+{
+	long cpu = (long)hcpu;
+
+	spin_lock(_lock);
+	if (!watchdog_running)
+		goto notify_out;
+
+	switch (action) {
+	case CPU_DEAD:
+	case CPU_DEAD_FROZEN:
+		if (cpu != timer_cpu)
+			break;
+		del_timer(_timer);
+		queue_timer_on_next_cpu();
+		break;
+	}
+
+notify_out:
+	spin_unlock(_lock);
+	return NOTIFY_OK;
+}
+
+static struct notifier_block clocksource_nb = {
+	.notifier_call	= clocksource_cpu_notify,
+	

[PATCH V2] clocksource: register cpu notifier to remove timer from dying CPU

2014-04-08 Thread Viresh Kumar
clocksource core is using add_timer_on() to run clocksource_watchdog() on all
CPUs one by one. But when a core is brought down, clocksource core doesn't
remove this timer from the dying CPU. And in this case timer core gives this
(Gives this only with unmerged code, anyway in the current code as well timer
core is migrating a pinned timer to other CPUs, which is also wrong:
http://www.gossamer-threads.com/lists/linux/kernel/1898117)

migrate_timer_list: can't migrate pinned timer: 81f06a60,
timer->function: 810d7010,deactivating it Modules linked in:

CPU: 0 PID: 1932 Comm: 01-cpu-hotplug Not tainted 3.14.0-rc1-00088-gab3c4fd #4
Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
 0009 88001d407c38 817237bd 88001d407c80
 88001d407c70 8106a1dd 0010 81f06a60
 88001e04d040 81e3d4c0 88001e04d030 88001d407cd0
Call Trace:
 [] dump_stack+0x4d/0x66
 [] warn_slowpath_common+0x7d/0xa0
 [] warn_slowpath_fmt+0x4c/0x50
 [] ? __internal_add_timer+0x113/0x130
 [] ? clocksource_watchdog_kthread+0x40/0x40
 [] migrate_timer_list+0xdb/0xf0
 [] timer_cpu_notify+0xfc/0x1f0
 [] notifier_call_chain+0x4c/0x70
 [] __raw_notifier_call_chain+0xe/0x10
 [] cpu_notify+0x23/0x50
 [] cpu_notify_nofail+0xe/0x20
 [] _cpu_down+0x1ad/0x2e0
 [] cpu_down+0x34/0x50
 [] cpu_subsys_offline+0x14/0x20
 [] device_offline+0x95/0xc0
 [] online_store+0x40/0x90
 [] dev_attr_store+0x18/0x30
 [] sysfs_kf_write+0x3d/0x50

This patch tries to fix this by registering cpu notifiers from clocksource core,
only when we start clocksource-watchdog. And if on the CPU_DEAD notification it
is found that dying CPU was the CPU on which this timer is queued on, then it is
removed from that CPU and queued to next CPU.

Reported-and-tested-by: Jet Chen 
Reported-by: Fengguang Wu 
Signed-off-by: Viresh Kumar 
---
V1->V2:
- Moved 'static int timer_cpu' within #ifdef CONFIG_CLOCKSOURCE_WATCHDOG/endif
- replaced spin_lock with spin_lock_irqsave in clocksource_cpu_notify() as a bug
  is reported by Jet Chen with that.
- Tested again by Jet Chen (Thanks again :))

 kernel/time/clocksource.c | 65 +++
 1 file changed, 54 insertions(+), 11 deletions(-)

diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index ba3e502..d288f1f 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -23,10 +23,12 @@
  *   o Allow clocksource drivers to be unregistered
  */
 
+#include 
 #include 
 #include 
 #include 
 #include 
+#include 
 #include  /* for spin_unlock_irq() using preempt_count() m68k */
 #include 
 #include 
@@ -180,6 +182,9 @@ static char override_name[CS_NAME_LEN];
 static int finished_booting;
 
 #ifdef CONFIG_CLOCKSOURCE_WATCHDOG
+/* Tracks current CPU to queue watchdog timer on */
+static int timer_cpu;
+
 static void clocksource_watchdog_work(struct work_struct *work);
 static void clocksource_select(void);
 
@@ -246,12 +251,25 @@ void clocksource_mark_unstable(struct clocksource *cs)
spin_unlock_irqrestore(_lock, flags);
 }
 
+static void queue_timer_on_next_cpu(void)
+{
+   /*
+* Cycle through CPUs to check if the CPUs stay synchronized to each
+* other.
+*/
+   timer_cpu = cpumask_next(timer_cpu, cpu_online_mask);
+   if (timer_cpu >= nr_cpu_ids)
+   timer_cpu = cpumask_first(cpu_online_mask);
+   watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL;
+   add_timer_on(_timer, timer_cpu);
+}
+
 static void clocksource_watchdog(unsigned long data)
 {
struct clocksource *cs;
cycle_t csnow, wdnow;
int64_t wd_nsec, cs_nsec;
-   int next_cpu, reset_pending;
+   int reset_pending;
 
spin_lock(_lock);
if (!watchdog_running)
@@ -336,27 +354,51 @@ static void clocksource_watchdog(unsigned long data)
if (reset_pending)
atomic_dec(_reset_pending);
 
-   /*
-* Cycle through CPUs to check if the CPUs stay synchronized
-* to each other.
-*/
-   next_cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask);
-   if (next_cpu >= nr_cpu_ids)
-   next_cpu = cpumask_first(cpu_online_mask);
-   watchdog_timer.expires += WATCHDOG_INTERVAL;
-   add_timer_on(_timer, next_cpu);
+   queue_timer_on_next_cpu();
 out:
spin_unlock(_lock);
 }
 
+static int clocksource_cpu_notify(struct notifier_block *self,
+   unsigned long action, void *hcpu)
+{
+   long cpu = (long)hcpu;
+   unsigned long flags;
+
+   spin_lock_irqsave(_lock, flags);
+   if (!watchdog_running)
+   goto notify_out;
+
+   switch (action) {
+   case CPU_DEAD:
+   case CPU_DEAD_FROZEN:
+   if (cpu != timer_cpu)
+   break;
+   del_timer(_timer);
+   queue_timer_on_next_cpu();
+   break;
+   }
+
+notify_out:
+   

Re: [PATCH 1/3] w1: fix netlink refcnt leak on error path

2014-04-08 Thread David Fries
This patch is a bug fix, and I see from the mailing list I'm not the
only one to run into this bug, so it would be nice for this patch to
make it into this merge window.  I didn't tag this one for stable
because it doesn't apply cleanly due to previous changes that did make
it into the merge window.  Let me know if I should rewrite it for
stable and which kernel version.

The other two patches are more feature based changes.

On Tue, Apr 08, 2014 at 10:37:07PM -0500, David Fries wrote:
> If the message type is W1_MASTER_CMD or W1_SLAVE_CMD, then a reference
> is taken when searching for the slave or master device.  If there
> isn't any following data m->len (mlen is a copy) is 0 and packing up
> the message for later execution is skipped leaving nothing to
> decrement the reference counts.
> 
> Way back when, m->len was checked before the search that increments the
> reference count, but W1_LIST_MASTERS has no additional data, the check
> was moved in 9be62e0b2fadaf5ff causing this bug.
> 
> This change reorders to put the check before the reference count is
> incremented avoiding the problem.
> 
> Signed-off-by: David Fries 
> Acked-by: Evgeniy Polyakov 

-- 
David Fries PGP pub CB1EE8F0
http://fries.net/~david/
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] cpufreq: highbank: fix ARM_HIGHBANK_CPUFREQ dependency warning

2014-04-08 Thread Viresh Kumar
On 9 April 2014 08:04, Kefeng Wang  wrote:
> When make ARCH=arm multi_v7_defconfig, we get the following warnings:
>
> warning: (ARM_HIGHBANK_CPUFREQ) selects GENERIC_CPUFREQ_CPU0 which has
> unmet direct dependencies (ARCH_HAS_CPUFREQ && CPU_FREQ && HAVE_CLK
> && REGULATOR && OF && THERMAL && CPU_THERMAL)
>
> To fix this, make ARM_HIGHBANK_CPUFREQ depend on ARCH_HAS_CPUFREQ and
> REGULATOR instead of selecting them, PM_OPP will be selected by 
> ARCH_HAS_CPUFREQ.
>
> Signed-off-by: Kefeng Wang 
> ---
>  drivers/cpufreq/Kconfig.arm | 6 +-
>  1 file changed, 1 insertion(+), 5 deletions(-)

Acked-by: Viresh Kumar 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [clocksource] INFO: possible irq lock inversion dependency detected

2014-04-08 Thread Viresh Kumar
On 9 April 2014 06:51, Jet Chen  wrote:
> spin_lock_irqsave() does fix this issue.
>
> Tested-by: Jet Chen 

Thanks a lot :)

How did you got this in cc list ?

"abd38155f8293923de5953cc063f9e2d7ecb3f04.1396679170.git.viresh.ku...@linaro.org"

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Ocfs2-devel] [PATCH 1/1] OCFS2: Remove NULL assignments on static

2014-04-08 Thread Fabian Frederick
On Wed, 09 Apr 2014 08:53:14 +0800
Wengang  wrote:

> Hi Fabian,
> 
> What's the badness if we do the assignments?
> 
> thanks,
> wengang

Hi Wengang,

Nothing wrong but AFAICS redundant.

Regards,
Fabian

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


USB: OMAP: questions on patch "Implement runtime idling and remote wakeup for OMAP EHCI controller"

2014-04-08 Thread Chao Xu
Hi,

I have two questions about this patch series. Any help is appreciated.
1. in the cover letter(https://lkml.org/lkml/2013/7/10/355), it states
"the series implements suspend/resume for the OMAP EHCI host
controller during runtime idle". I don't understand when is the EHCI
host controller "runtime idle"? Does "runtime idle" refers to when all
attached USB devices are suspended? or does it refer to whenever there
is no tx/rx ongoing in the EHCI host controller?

2. I think the purpose of the patch is to allow the USB power domain
(namely, L3INIT power domain) to enter low power state. So assuming
the power domain is now in RETENTION state, will the EHCI host be able
to detect remote wakeup in this case? i.e. will the ULPI DATA0, DATA1
and DATA3 reflect the Linestate and Interrupt when the L3INIT power
domain is in RETENTION? Put it in another way, when the power domain
in is a low power state defined in OMAP spec, is the USB host and PHY
in  "low power mode" defined in USB spec?

Thank you!




Regards,
Chao Xu
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 2/2] usb: ehci-platform: Return immediately from suspend if ehci_suspend fails

2014-04-08 Thread Vivek Gautam
Patch 'b8efdaf USB: EHCI: add check for wakeup/suspend race'
adds a check for possible race between suspend and wakeup interrupt,
and thereby it returns -EBUSY as error code if there's a wakeup
interrupt.
So the platform host controller should not proceed further with
its suspend callback, rather should return immediately to avoid
powering down the essential things, like phy.

Signed-off-by: Vivek Gautam 
Cc: Alan Stern 
Cc: Hauke Mehrtens 
Cc: Hans de Goede 
---

Based on 'usb-next' branch of Greg's usb tree.

 drivers/usb/host/ehci-platform.c |4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/host/ehci-platform.c b/drivers/usb/host/ehci-platform.c
index b3a0e11..60d3d1a 100644
--- a/drivers/usb/host/ehci-platform.c
+++ b/drivers/usb/host/ehci-platform.c
@@ -303,11 +303,13 @@ static int ehci_platform_suspend(struct device *dev)
int ret;
 
ret = ehci_suspend(hcd, do_wakeup);
+   if (ret)
+   return ret;
 
if (pdata->power_suspend)
pdata->power_suspend(pdev);
 
-   return ret;
+   return 0;
 }
 
 static int ehci_platform_resume(struct device *dev)
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 1/2] usb: ehci-exynos: Return immediately from suspend if ehci_suspend fails

2014-04-08 Thread Vivek Gautam
Patch 'b8efdaf USB: EHCI: add check for wakeup/suspend race'
adds a check for possible race between suspend and wakeup interrupt,
and thereby it returns -EBUSY as error code if there's a wakeup
interrupt.
So the platform host controller should not proceed further with
its suspend callback, rather should return immediately to avoid
powering down the essential things, like phy.

Signed-off-by: Vivek Gautam 
Cc: Alan Stern 
Cc: Jingoo Han 
---

Based on 'usb-next' branch of Greg's usb tree.

 drivers/usb/host/ehci-exynos.c |4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/host/ehci-exynos.c b/drivers/usb/host/ehci-exynos.c
index d1d8c47..a4550eb 100644
--- a/drivers/usb/host/ehci-exynos.c
+++ b/drivers/usb/host/ehci-exynos.c
@@ -212,6 +212,8 @@ static int exynos_ehci_suspend(struct device *dev)
int rc;
 
rc = ehci_suspend(hcd, do_wakeup);
+   if (rc)
+   return rc;
 
if (exynos_ehci->otg)
exynos_ehci->otg->set_host(exynos_ehci->otg, >self);
@@ -221,7 +223,7 @@ static int exynos_ehci_suspend(struct device *dev)
 
clk_disable_unprepare(exynos_ehci->clk);
 
-   return rc;
+   return 0;
 }
 
 static int exynos_ehci_resume(struct device *dev)
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] sched/cpupri: fix cpupri_find() for high priority tasks

2014-04-08 Thread Mike Galbraith
Hi Steven,

Seems c92211d9b7727 introduced a buglet.

--snip--

Bail on task_pri >= MAX_RT_PRIO excludes userspace prio 98 and 99 tasks,
which map to 100 and 101 respectively.

A user reported that given two SCHED_RR tasks, one hog, one light, the light
task may be stacked on top of the hog iff prio >= 98, latency hit follows.

Signed-off-by: Mike Galbraith 
Cc: 
Fixes: c92211d9b7727 sched/cpupri: Remove the vec->lock
---
 kernel/sched/cpupri.c |3 ---
 1 file changed, 3 deletions(-)

--- a/kernel/sched/cpupri.c
+++ b/kernel/sched/cpupri.c
@@ -70,9 +70,6 @@ int cpupri_find(struct cpupri *cp, struc
int idx = 0;
int task_pri = convert_prio(p->prio);
 
-   if (task_pri >= MAX_RT_PRIO)
-   return 0;
-
for (idx = 0; idx < task_pri; idx++) {
struct cpupri_vec *vec  = >pri_to_cpu[idx];
int skip = 0;


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Intel-gfx] i915 DVI resolution regression (3.13.7+)

2014-04-08 Thread Dave Airlie
On Tue, Apr 8, 2014 at 5:32 PM, Daniel J Blueman  wrote:
> On 8 April 2014 15:14, Jani Nikula  wrote:
>> On Tue, 08 Apr 2014, Daniel J Blueman  wrote:
>>> Ville et al,
>>>
>>> It looks like commit e3ea8fa6beaf55fee64bf816f3b8a80ad733b2c2 (or
>>> another commit in 3.13.7) broke modes which require DVI-D dual-link,
>>> eg 2560x1440 with my panel.
>>>
>>> I don't see these modelines in 3.13.7 or later (eg 3.14):
>>>
>>> [ 5.582] (II) intel(0): Modeline "2560x1440"x60.0  312.25  2560
>>> 2752 3024 3488  1440 1443 1448 1493 -hsync +vsync (89.5 kHz eP)
>>> [ 5.582] (II) intel(0): Modeline "2560x1440"x60.0  312.25  2560
>>> 2752 3024 3488  1440 1443 1448 1493 -hsync +vsync (89.5 kHz eP)
>>> [ 5.582] (II) intel(0): Modeline "1920x1200"x59.9  193.25  1920
>>> 2056 2256 2592  1200 1203 1209 1245 -hsync +vsync (74.6 kHz e)
>>>
>>> My monitor is a Dell U2713HM; mobo uses an H87 chipset with i5-4670.
>>
>> By allowing those modes we regressed setups which were not capable of
>> displaying them. So you've got an HDMI->DVI converter?
>>
>> https://bugzilla.kernel.org/show_bug.cgi?id=72961
>
> I am using a dual-link DVI-D to DVI-D cable to this monitor, since I
> previously couldn't get 2560x1440 via HDMI.

Intel hw has dual-link DVI-D? I'm not sure I've ever seen that, is
this SDVO device or plain DVI-D?

Dave.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 3/3] w1: optional bundling of netlink kernel replies

2014-04-08 Thread David Fries
Applications can submit a set of commands in one packet to the kernel,
and in some cases it is required such as reading the temperature
sensor results.  This adds an option W1_CN_BUNDLE to the flags of
cn_msg to request the kernel to reply in one packet for efficiency.

The cn_msg flags now check for unknown flag values and return an error
if one is seen.  See "Proper handling of unknown flags in system
calls" http://lwn.net/Articles/588444/

This corrects the ack values returned as per the protocol standard,
namely the original ack for status messages and seq + 1 for all others
such as the data returned from a read.

Some of the common variable names have been standardized as follows.
struct cn_msg *cn
struct w1_netlink_msg *msg
struct w1_netlink_cmd *cmd
struct w1_master *dev

When an argument and a function scope variable would collide, add req_
to the argument.

Signed-off-by: David Fries 
Acked-by: Evgeniy Polyakov 
---
 Documentation/connector/connector.txt |2 +-
 Documentation/w1/w1.generic   |2 +-
 Documentation/w1/w1.netlink   |   13 +-
 drivers/w1/w1.h   |8 -
 drivers/w1/w1_netlink.c   |  649 -
 drivers/w1/w1_netlink.h   |   36 ++
 6 files changed, 447 insertions(+), 263 deletions(-)

diff --git a/Documentation/connector/connector.txt 
b/Documentation/connector/connector.txt
index e56abdb..f6215f9 100644
--- a/Documentation/connector/connector.txt
+++ b/Documentation/connector/connector.txt
@@ -118,7 +118,7 @@ acknowledge number MUST be the same + 1.
 If we receive a message and its sequence number is not equal to one we
 are expecting, then it is a new message.  If we receive a message and
 its sequence number is the same as one we are expecting, but its
-acknowledge is not equal to the acknowledge number in the original
+acknowledge is not equal to the sequence number in the original
 message + 1, then it is a new message.
 
 Obviously, the protocol header contains the above id.
diff --git a/Documentation/w1/w1.generic b/Documentation/w1/w1.generic
index a31c5a2..b2033c6 100644
--- a/Documentation/w1/w1.generic
+++ b/Documentation/w1/w1.generic
@@ -82,7 +82,7 @@ driver - (standard) symlink to the w1 driver
 w1_master_add  - Manually register a slave device
 w1_master_attempts - the number of times a search was attempted
 w1_master_max_slave_count
-   - the maximum slaves that may be attached to a master
+   - maximum number of slaves to search for at a time
 w1_master_name - the name of the device (w1_bus_masterX)
 w1_master_pullup   - 5V strong pullup 0 enabled, 1 disabled
 w1_master_remove   - Manually remove a slave device
diff --git a/Documentation/w1/w1.netlink b/Documentation/w1/w1.netlink
index 927a52c..ef27271 100644
--- a/Documentation/w1/w1.netlink
+++ b/Documentation/w1/w1.netlink
@@ -30,7 +30,7 @@ Protocol.
W1_SLAVE_CMD
userspace command for slave device
(read/write/touch)
-   __u8 res- reserved
+   __u8 status - error indication from kernel
__u16 len   - size of data attached to this header data
union {
__u8 id[8];  - slave unique device id
@@ -44,10 +44,14 @@ Protocol.
__u8 cmd- command opcode.
W1_CMD_READ - read command
W1_CMD_WRITE- write command
-   W1_CMD_TOUCH- touch command
-   (write and sample data back to userspace)
W1_CMD_SEARCH   - search command
W1_CMD_ALARM_SEARCH - alarm search command
+   W1_CMD_TOUCH- touch command
+   (write and sample data back to userspace)
+   W1_CMD_RESET- send bus reset
+   W1_CMD_SLAVE_ADD- add slave to kernel list
+   W1_CMD_SLAVE_REMOVE - remove slave from kernel list
+   W1_CMD_LIST_SLAVES  - get slaves list from kernel
__u8 res- reserved
__u16 len   - length of data for this command
For read command data must be allocated like for write command
@@ -87,8 +91,7 @@ format:
id0 ... idN
 
Each message is at most 4k in size, so if number of master devices
-   exceeds this, it will be split into several messages,
-   cn.seq will be increased for each one.
+   exceeds this, it will be split into several messages.
 
 W1 search and alarm search commands.
 request:
diff --git a/drivers/w1/w1.h b/drivers/w1/w1.h
index 734dab7..56a49ba 100644
--- a/drivers/w1/w1.h
+++ b/drivers/w1/w1.h
@@ -203,7 +203,6 @@ enum w1_master_flags {
  * @search_id: allows continuing a search
  * @refcnt:reference count
  * @priv:  

[PATCH 1/3] w1: fix netlink refcnt leak on error path

2014-04-08 Thread David Fries
If the message type is W1_MASTER_CMD or W1_SLAVE_CMD, then a reference
is taken when searching for the slave or master device.  If there
isn't any following data m->len (mlen is a copy) is 0 and packing up
the message for later execution is skipped leaving nothing to
decrement the reference counts.

Way back when, m->len was checked before the search that increments the
reference count, but W1_LIST_MASTERS has no additional data, the check
was moved in 9be62e0b2fadaf5ff causing this bug.

This change reorders to put the check before the reference count is
incremented avoiding the problem.

Signed-off-by: David Fries 
Acked-by: Evgeniy Polyakov 
---
 drivers/w1/w1_netlink.c |   44 ++--
 1 file changed, 26 insertions(+), 18 deletions(-)

diff --git a/drivers/w1/w1_netlink.c b/drivers/w1/w1_netlink.c
index 5234964..a02704a 100644
--- a/drivers/w1/w1_netlink.c
+++ b/drivers/w1/w1_netlink.c
@@ -300,12 +300,6 @@ static int w1_process_command_root(struct cn_msg *msg,
struct w1_netlink_msg *w;
u32 *id;
 
-   if (mcmd->type != W1_LIST_MASTERS) {
-   printk(KERN_NOTICE "%s: msg: %x.%x, wrong type: %u, len: %u.\n",
-   __func__, msg->id.idx, msg->id.val, mcmd->type, 
mcmd->len);
-   return -EPROTO;
-   }
-
cn = kmalloc(PAGE_SIZE, GFP_KERNEL);
if (!cn)
return -ENOMEM;
@@ -441,6 +435,9 @@ static void w1_process_cb(struct w1_master *dev, struct 
w1_async_cmd *async_cmd)
w1_netlink_send_error(>block->msg, node->m, cmd,
node->block->portid, err);
 
+   /* ref taken in w1_search_slave or w1_search_master_id when building
+* the block
+*/
if (sl)
w1_unref_slave(sl);
else
@@ -503,30 +500,42 @@ static void w1_cn_callback(struct cn_msg *msg, struct 
netlink_skb_parms *nsp)
 
msg_len = msg->len;
while (msg_len && !err) {
-   struct w1_reg_num id;
-   u16 mlen = m->len;
 
dev = NULL;
sl = NULL;
 
-   memcpy(, m->id.id, sizeof(id));
-#if 0
-   printk("%s: %02x.%012llx.%02x: type=%02x, len=%u.\n",
-   __func__, id.family, (unsigned long long)id.id, 
id.crc, m->type, m->len);
-#endif
if (m->len + sizeof(struct w1_netlink_msg) > msg_len) {
err = -E2BIG;
break;
}
 
+   /* execute on this thread, no need to process later */
+   if (m->type == W1_LIST_MASTERS) {
+   err = w1_process_command_root(msg, m, nsp->portid);
+   goto out_cont;
+   }
+
+   /* All following message types require additional data,
+* check here before references are taken.
+*/
+   if (!m->len) {
+   err = -EPROTO;
+   goto out_cont;
+   }
+
+   /* both search calls take reference counts */
if (m->type == W1_MASTER_CMD) {
dev = w1_search_master_id(m->id.mst.id);
} else if (m->type == W1_SLAVE_CMD) {
-   sl = w1_search_slave();
+   sl = w1_search_slave((struct w1_reg_num *)m->id.id);
if (sl)
dev = sl->master;
} else {
-   err = w1_process_command_root(msg, m, nsp->portid);
+   printk(KERN_NOTICE
+   "%s: msg: %x.%x, wrong type: %u, len: %u.\n",
+   __func__, msg->id.idx, msg->id.val,
+   m->type, m->len);
+   err = -EPROTO;
goto out_cont;
}
 
@@ -536,8 +545,6 @@ static void w1_cn_callback(struct cn_msg *msg, struct 
netlink_skb_parms *nsp)
}
 
err = 0;
-   if (!mlen)
-   goto out_cont;
 
atomic_inc(>refcnt);
node->async.cb = w1_process_cb;
@@ -557,7 +564,8 @@ out_cont:
if (err)
w1_netlink_send_error(msg, m, NULL, nsp->portid, err);
msg_len -= sizeof(struct w1_netlink_msg) + m->len;
-   m = (struct w1_netlink_msg *)(((u8 *)m) + sizeof(struct 
w1_netlink_msg) + m->len);
+   m = (struct w1_netlink_msg *)(((u8 *)m) +
+   sizeof(struct w1_netlink_msg) + m->len);
 
/*
 * Let's allow requests for nonexisting devices.
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 2/3] connector: allow multiple messages to be sent in one packet

2014-04-08 Thread David Fries
This increases the amount of bundling to reduce the number of packets
sent.  For the one wire use there can be multiple struct
w1_netlink_cmd in a struct w1_netlink_msg and multiple of those in
struct cn_msg, and with this change multiple of those in a struct
nlmsghdr, and at each level the len identifies there being multiple of
the next.

Signed-off-by: David Fries 
Acked-by: Evgeniy Polyakov 
---
 Documentation/connector/connector.txt |   13 ++---
 drivers/connector/connector.c |   17 +++--
 include/linux/connector.h |1 +
 3 files changed, 26 insertions(+), 5 deletions(-)

diff --git a/Documentation/connector/connector.txt 
b/Documentation/connector/connector.txt
index e5c5f5e..e56abdb 100644
--- a/Documentation/connector/connector.txt
+++ b/Documentation/connector/connector.txt
@@ -24,7 +24,8 @@ netlink based networking for inter-process communication in a 
significantly
 easier way:
 
 int cn_add_callback(struct cb_id *id, char *name, void (*callback) (struct 
cn_msg *, struct netlink_skb_parms *));
-void cn_netlink_send(struct cn_msg *msg, u32 __group, int gfp_mask);
+void cn_netlink_send_multi(struct cn_msg *msg, u16 len, u32 portid, u32 
__group, int gfp_mask);
+void cn_netlink_send(struct cn_msg *msg, u32 portid, u32 __group, int 
gfp_mask);
 
 struct cb_id
 {
@@ -71,15 +72,21 @@ void cn_del_callback(struct cb_id *id);
  struct cb_id *id  - unique connector's user identifier.
 
 
-int cn_netlink_send(struct cn_msg *msg, u32 __groups, int gfp_mask);
+int cn_netlink_send_multi(struct cn_msg *msg, u16 len, u32 portid, u32 
__groups, int gfp_mask);
+int cn_netlink_send(struct cn_msg *msg, u32 portid, u32 __groups, int 
gfp_mask);
 
  Sends message to the specified groups.  It can be safely called from
  softirq context, but may silently fail under strong memory pressure.
  If there are no listeners for given group -ESRCH can be returned.
 
  struct cn_msg *   - message header(with attached data).
+ u16 len   - for *_multi multiple cn_msg messages can be 
sent
+ u32 port  - destination port.
+ If non-zero the message will be sent to the
+ given port, which should be set to the
+ original sender.
  u32 __group   - destination group.
- If __group is zero, then appropriate group 
will
+ If port and __group is zero, then appropriate 
group will
  be searched through all registered connector 
users,
  and message will be delivered to the group 
which was
  created for user with the same ID as in msg.
diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c
index b14f1d3..f612d68 100644
--- a/drivers/connector/connector.c
+++ b/drivers/connector/connector.c
@@ -43,6 +43,8 @@ static struct cn_dev cdev;
 static int cn_already_initialized;
 
 /*
+ * Sends mult (multiple) cn_msg at a time.
+ *
  * msg->seq and msg->ack are used to determine message genealogy.
  * When someone sends message it puts there locally unique sequence
  * and random acknowledge numbers.  Sequence number may be copied into
@@ -62,10 +64,13 @@ static int cn_already_initialized;
  * the acknowledgement number in the original message + 1, then it is
  * a new message.
  *
+ * If msg->len != len, then additional cn_msg messages are expected following
+ * the first msg.
+ *
  * The message is sent to, the portid if given, the group if given, both if
  * both, or if both are zero then the group is looked up and sent there.
  */
-int cn_netlink_send(struct cn_msg *msg, u32 portid, u32 __group,
+int cn_netlink_send_mult(struct cn_msg *msg, u16 len, u32 portid, u32 __group,
gfp_t gfp_mask)
 {
struct cn_callback_entry *__cbq;
@@ -98,7 +103,7 @@ int cn_netlink_send(struct cn_msg *msg, u32 portid, u32 
__group,
if (!portid && !netlink_has_listeners(dev->nls, group))
return -ESRCH;
 
-   size = sizeof(*msg) + msg->len;
+   size = sizeof(*msg) + len;
 
skb = nlmsg_new(size, gfp_mask);
if (!skb)
@@ -121,6 +126,14 @@ int cn_netlink_send(struct cn_msg *msg, u32 portid, u32 
__group,
 gfp_mask);
return netlink_unicast(dev->nls, skb, portid, !(gfp_mask&__GFP_WAIT));
 }
+EXPORT_SYMBOL_GPL(cn_netlink_send_mult);
+
+/* same as cn_netlink_send_mult except msg->len is used for len */
+int cn_netlink_send(struct cn_msg *msg, u32 portid, u32 __group,
+   gfp_t gfp_mask)
+{
+   return cn_netlink_send_mult(msg, msg->len, portid, __group, gfp_mask);
+}
 EXPORT_SYMBOL_GPL(cn_netlink_send);
 
 /*
diff --git a/include/linux/connector.h b/include/linux/connector.h
index be9c4747..f8fe863 100644
--- a/include/linux/connector.h
+++ b/include/linux/connector.h
@@ 

[PATCH 0/3] w1: fixes and bundling replies

2014-04-08 Thread David Fries
A program can bundle a sequence of commands in one netlink packet but
the kernel was having to reply in many different packets, this adds a
flag W1_CN_BUNDLE in the cn_msg.flags to allow the kernel to bundle
the replies in to one message.  This is opt in to avoid breaking
programs that aren't expecting additional messages.

Netlink connector now has a new call cn_netlink_send_multi, which
allow sending multiple cn_msg structures in a nlmsghdr structure.

I tested with my client program that will bundle up 14 temperature
sensor conversions, then after a delay, bundle up another set of
commands to read all 14 with the bundle bit set.  I also tested with a
two year old version of the software that sends requests two one slave
at a time (bundling only the write/read to get the data out), and
doesn't have code to read the bundling the this patch adds.  Both
operate correctly running at the same time.

 Documentation/connector/connector.txt |   15 +-
 Documentation/w1/w1.generic   |2 +-
 Documentation/w1/w1.netlink   |   13 +-
 drivers/connector/connector.c |   17 +-
 drivers/w1/w1.h   |8 -
 drivers/w1/w1_netlink.c   |  673 -
 drivers/w1/w1_netlink.h   |   36 ++
 include/linux/connector.h |1 +
 8 files changed, 489 insertions(+), 276 deletions(-)

[PATCH 1/3] w1: fix netlink refcnt leak on error path
[PATCH 2/3] connector: allow multiple messages to be sent in one
[PATCH 3/3] w1: optional bundling of netlink kernel replies
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[RFC] bpf tracing filters API proposal. Was: [RFC PATCH 00/28] ktap: A lightweight dynamic tracing tool for Linux

2014-04-08 Thread Alexei Starovoitov
On Tue, Apr 8, 2014 at 2:08 AM, Peter Zijlstra  wrote:
> On Tue, Apr 08, 2014 at 04:40:36PM +0900, Masami Hiramatsu wrote:
>> (2014/04/07 22:55), Peter Zijlstra wrote:
>> > On Wed, Apr 02, 2014 at 09:42:03AM +0200, Ingo Molnar wrote:
>> >> I'd suggest using C syntax instead initially, because that's what the
>> >> kernel is using.
>> >>
>> >> The overwhelming majority of people probing the kernel are
>> >> programmers, so there's no point in inventing new syntax, we should
>> >> reuse existing syntax!
>> >
>> > Yes please, keep it C, I forever forget all other syntaxes. While I have
>> > in the past known other languages, I never use them frequently enough to
>> > remember them. And there's nothing more frustrating than having to fight
>> > a tool/language when you just want to get work done.
>>
>> Why wouldn't you write a kernel module in C directly? :)
>> It seems that all what you need is not a tracing language nor a bytecode
>> engine, but an well organized tracing APIs(library?) for writing a kernel
>> module for tracing...
>
> Most my kernels are CONFIG_MODULE=n :-) Also, I never can remember how
> to do modules.
>
> That said; what I currently do it hack the kernel with debug bits and
> pieces and run that, which is effectively the same. Its just that its
> impossible to save/share these hacks in any sane fashion.

seconded.
Fo debugging I have similar setup:
few ko template dirs that I copy into new dir, then tweak, insmod, dmesg.
Process is tedious, since one have to think through every line
of the code before doing insmod.
Similar slow process to explore unfamiliar kernel territory:
add some conditional printks and stackdumps,
think through, recompile, reboot.
What I would like to see is something like:

perf run file.c

where file.c contains my debugging code and looks as close as
possible to normal kernel code:

attach("net:netif_receive_skb")
void my_filter(struct bpf_context *ctx)
{
char devname[4] = "lo";
struct net_device *dev;
struct sk_buff *skb = 0;

skb = (struct sk_buff *)ctx->arg1;
dev = bpf_load_pointer(>dev);
if (bpf_memcmp(dev->name, devname, 2) == 0) {
char fmt[] = "skb %p dev %p \n";
bpf_trace_printk(fmt, sizeof(fmt), (long)skb, (long)dev, 0);
}
}

and I don't need to think hard while writing it, since whatever wrong
memory accesses I do, it shouldn't crash the kernel.

above is a working example, but it needs obvious improvements:
- trace_printk(), memcmp() need to be able to accept 'char *'
  in a normal way
- bpf_load_pointer() can be either a macro or whole bpf program
  can be a no-fault zone, so we can have C like:
  if (strcmp(skb->dev->name, "lo") == 0)

'perf' would run C->bpf compiler and orchestrate attaching
bpf programs to events and printing back results.

Answering Jovi's point about "is supported" vs "will be supported":
it is true.
December patches are obviously obsolete and every building
block will get through its own feedback/rewrite cycles.

For example:
- In december I've been using simplified obj_file format that
llvm was generating and kernel was parsing while loading.
- Last week I mentioned that probably makes sense to
use standard elf. It's actually less code in llvm backend
to output elf then custom obj_file
- today I'm thinking that kernel shouldn't be dealing with
  either elf or custom obj_file at all

kernel API for bpf loading should be simpler.
we already have sk_unattached_filter_create().
we can expose it to userspace and add:
sk_filter_associate_to_event()
Then earlier "one bpf program = one event" misunderstanding
wouldn't have happened.
Userspace can decide what syntax to use to associate
tracing filters to events.
llvm compiler should not care. It just compiles C into elf
with function bodies being ibpf instructions.
Then perf interprets this elf file in userspace and calls
sk_unattached_filter_create() N times and
sk_filter_associate_to_event() M times.
Then waits for user input, tears down things and prints tracebuf.

Similar basic interface I'm thinking to use for bpf tables.
Probably makes sense to drop 'bpf' prefix, since they're just
hash tables. Little to do with bpf.
Have a netlink API from user into kernel:
- create hash table (num_of_entries, key_size, value_size, id)
- dump table via netlink
- add/remove key/value pair
Some kernel module may use it to transfer the data between
kernel and userspace.
This can be a generic kernel/user data sharing facility.

Also let bpf programs do 'table_lookup/update', so that
filters can store interesting data.

To summarize,
proposed new user->kernel API via netlink or debugfs is:
- sk_unattached_filter_create(bpf prog)
- sk_filter_associate_to_event(bpf_prog_id, event)
- hash table create/dump/add/remove
That's it.

event creation, tracebuf facilities are reused as is.

ibpf interpreter, ibpf jits, ibpf verifier are reused across
socket filtering, seccomp, tracing filters.

perf would call llvm compiler, 

Re: [PATCH 4/6] perf, sort: Add physid sorting based on mmap2 data

2014-04-08 Thread Don Zickus
On Mon, Mar 24, 2014 at 03:34:34PM -0400, Don Zickus wrote:
> In order for the c2c tool to work correctly, it needs to properly
> sort all the records on uniquely identifiable data addresses.  These
> unique addresses are converted from virtual addresses provided by the
> hardware into a kernel address using an mmap2 record as the decoder.
> 
> Once a unique address is converted, we can sort on them based on
> various rules.  Then it becomes clear which address are overlapping
> with each other across mmap regions or pid spaces.

I am finishing up another way to sort this data that might make more sense
then the approach in this patch.  Hopefully tomorrow I can do that.

Cheers,
Don
> 
> This patch just creates the rules and inserts the records into a
> sort entry for safe keeping until later patches process them.
> 
> The general sorting rule is:
> 
> o group cpumodes together
> o if (nonzero major/minor number - ie mmap'd areas)
>   o sort on major, minor, inode, inode generation numbers
> o else if cpumode is not kernel
>   o sort on pid
> o sort on data addresses
> 
> I also hacked in the concept of 'color'.  The purpose of that bit is to
> provides hints later when processing these records that indicate a new unique
> address has been encountered.  Because later processing only checks the data
> addresses, there can be a theoretical scenario that similar sequential data
> addresses (when walking the rbtree) could be misinterpreted as overlapping
> when in fact they are not.
> 
> Sample output: (perf report --stdio --physid-mode)
> 
> 18.93%  [k] 0xc900139c40b0  [k] igb_update_stats kworker/0:1:  
> 257   257  0  0   0  0
>  7.63%  [k] 0x88082e6cf0a8  [k] watchdog_timer_fnswapper:
> 0 0  0  0   0  0
>  1.86%  [k] 0x88042ef94700  [k] _raw_spin_lock   swapper:
> 0 0  0  0   0  0
>  1.77%  [k] 0x8804278afa50  [k] __switch_to  swapper:
> 0 0  0  0   0  0
> 
> V3: split out the sorting into unique entries.  This makes it look
>   far less ugly
> create a new 'physid mode' to group all the sorting rules together
>   (mimics the mem-mode)
> 
> Signed-off-by: Don Zickus 
> ---
>  tools/perf/builtin-report.c |  20 ++-
>  tools/perf/util/hist.c  |  27 +++-
>  tools/perf/util/hist.h  |   8 ++
>  tools/perf/util/sort.c  | 294 
> 
>  tools/perf/util/sort.h  |  13 ++
>  5 files changed, 358 insertions(+), 4 deletions(-)
> 
> diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
> index c87412b..093f5ad 100644
> --- a/tools/perf/builtin-report.c
> +++ b/tools/perf/builtin-report.c
> @@ -49,6 +49,7 @@ struct report {
>   boolshow_threads;
>   boolinverted_callchain;
>   boolmem_mode;
> + boolphysid_mode;
>   boolheader;
>   boolheader_only;
>   int max_stack;
> @@ -241,7 +242,7 @@ static int process_sample_event(struct perf_tool *tool,
>   ret = report__add_branch_hist_entry(rep, , sample, evsel);
>   if (ret < 0)
>   pr_debug("problem adding lbr entry, skipping event\n");
> - } else if (rep->mem_mode == 1) {
> + } else if ((rep->mem_mode == 1) || (rep->physid_mode)) {
>   ret = report__add_mem_hist_entry(rep, , sample, evsel);
>   if (ret < 0)
>   pr_debug("problem adding mem entry, skipping event\n");
> @@ -746,6 +747,7 @@ int cmd_report(int argc, const char **argv, const char 
> *prefix __maybe_unused)
>   OPT_BOOLEAN(0, "demangle", _conf.demangle,
>   "Disable symbol demangling"),
>   OPT_BOOLEAN(0, "mem-mode", _mode, "mem access profile"),
> + OPT_BOOLEAN(0, "physid-mode", _mode, "physid access 
> profile"),
>   OPT_CALLBACK(0, "percent-limit", , "percent",
>"Don't show entries under that percent", 
> parse_percent_limit),
>   OPT_END()
> @@ -817,6 +819,22 @@ repeat:
>   sort_order = 
> "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked";
>   }
>  
> + if (report.physid_mode) {
> + if ((sort__mode == SORT_MODE__BRANCH) ||
> + (sort__mode == SORT_MODE__MEMORY)) {
> + pr_err("branch or memory and physid mode 
> incompatible\n");
> + goto error;
> + }
> + sort__mode = SORT_MODE__PHYSID;
> +
> + /*
> +  * if no sort_order is provided, then specify
> +  * branch-mode specific order
> +  */
> + if (sort_order == default_sort_order)
> + sort_order = 
> "daddr,iaddr,pid,tid,major,minor,inode,inode_gen";
> + }
> +
>   if (setup_sorting() < 0) {
>   

Re: [PATCH 3/3] ie31200_edac: Add driver

2014-04-08 Thread Jason Baron
On 04/08/2014 06:34 PM, Luck, Tony wrote:
>>> Btw, this driver is polling, AFAICT. Doesn't e3-12xx support the CMCI
>>> interrupt which you can feed into this driver directly and thus not need
>>> the polling at all?
>> On the system with the ce and ue events that I'm testing on, I don't see
>> 'MCE' nudge above 0, in /proc/interrupts. So I think that implies that
>> we are not getting any CMCI there?
> CMCI will bump up the "THR" (Threshold) entries in /proc/interrupts.

Ok, so on the system with ue and ce events (as reported by driver
and confirmed with a memory scanner), "THR" is 0 across
all cpus, and I see no machine checks in the logs...

>> So if possible maybe we can confirm with Intel whether we expect an MCE
>> for memory errors...
> MCG_CAP bit 10 tells you whether a given processor implements CMCI.
> If that is set - then MCi_CTL2 bit 30 indicates whether a given bank
> supports it (Linux tries to set this bit, if it sticks, then it knows that 
> CMCI
> is supported - Linux also assigns ownership of the bank to the first cpu
> to successfully set it (since a bank may be shared by multiple threads/cores
> on a package).
>
> Consumed uncorrectable errors should generate a machine check. Which
> on the E3-12xx series will be a fatal machine check: MCi_STATUS.PCC=1
>
> -Tony
>

Hmmm...as I said, I'm not getting any machine checks with ue errors. I've
got a fairly old kernel on the system atm, I will try loading a newer kernel,
to see if that makes any difference...

Thanks,

-Jason

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[GIT PULL] MMC updates for 3.15-rc1

2014-04-08 Thread Chris Ball
Hi Linus,

Please pull from:

  git://git.kernel.org/pub/scm/linux/kernel/git/cjb/mmc.git 
tags/mmc-updates-for-3.15-rc1

to receive the MMC merge for 3.15.  These patches have been tested in
linux-next, and there are currently no conflicts.  There's a MAINTAINERS
update to add Ulf Hansson as MMC co-maintainer.  Thanks.
  
The following changes since commit b28a960c42fcd9cfc987441fa6d1c1a471f0f9ed:

  Linux 3.14-rc2 (2014-02-09 18:15:47 -0800)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/cjb/mmc.git 
tags/mmc-updates-for-3.15-rc1

for you to fetch changes up to c67480173f72e883235dd0ad09d90156c8f87600:

  mmc: sdhci-acpi: Intel SDIO has broken card detect (2014-04-07 21:27:14 -0400)


MMC highlights for 3.15:

Core:
 - CONFIG_MMC_UNSAFE_RESUME=y is now default behavior.
 - DT bindings for SDHCI UHS, eMMC HS200, high-speed DDR, at 1.8/1.2V.
 - Add GPIO descriptor based slot-gpio card detect API.

Drivers:
 - dw_mmc: Refactor SOCFPGA support as a variant inside dw_mmc-pltfm.c.
 - mmci: Support HW busy detection on ux500.
 - omap: Support MMC_ERASE.
 - omap_hsmmc: Support MMC_PM_KEEP_POWER, MMC_PM_WAKE_SDIO_IRQ, (a)cmd23.
 - rtsx: Support pre-req/post-req async.
 - sdhci: Add support for Realtek RTS5250 controllers.
 - sdhci-acpi: Add support for 80860F16, fix 80860F14/SDIO card detect.
 - sdhci-msm: Add new driver for Qualcomm SDHCI chipset support.
 - sdhci-pxav3: Add support for Marvell Armada 380 and 385 SoCs.


Adrian Hunter (7):
  mmc: slot-gpio: Record GPIO descriptors instead of GPIO numbers
  mmc: slot-gpio: Split out CD IRQ request into a separate function
  mmc: slot-gpio: Add GPIO descriptor based CD GPIO API
  mmc: sdhci-acpi: Fix broken card detect for ACPI HID 80860F14
  mmc: sdhci-acpi: Add device id 80860F16
  mmc: sdhci: Allow for irq being shared
  mmc: sdhci-acpi: Intel SDIO has broken card detect

Arend van Spriel (1):
  sdhci: only reprogram retuning timer when flag is set

Balaji T K (14):
  mmc: omap_hsmmc: use devm_regulator API
  mmc: omap_hsmmc: handle vcc and vcc_aux independently
  regulator: add pbias regulator support
  mmc: omap_hsmmc: adapt hsmmc to use pbias regulator
  ARM: dts: add pbias dt node
  ARM: OMAP: enable SYSCON and REGULATOR_PBIAS in omap2plus_defconfig
  mmc: omap_hsmmc: remove pbias workaround
  mmc: omap_hsmmc: fix return error code for of_get_hsmmc_pdata
  mmc: omap_hsmmc: remove redundant reset done
  mmc: omap_hsmmc: save clock rate to use in interrupt context
  mmc: omap_hsmmc: fix request done for sbc error case
  mmc: omap_hsmmc: split dma setup
  mmc: omap_hsmmc: add cmd23 support
  mmc: omap_hsmmc: add autocmd23 support

Chris Ball (1):
  Revert "dts: socfpga: Add support for SD/MMC on the SOCFPGA platform"

Christian Daudt (1):
  mmc: rename ARCH_BCM to ARCH_BCM_MOBILE

Dan Carpenter (1):
  mmc: omap_hsmmc: remove a duplicative test

Daniel Mack (1):
  mmc: omap_hsmmc: support more DT properties

Dinh Nguyen (3):
  mmc: dw_mmc-socfpga: Remove the SOCFPGA specific platform for dw_mmc
  mmc: dw_mmc: Add support for SOCFPGA's platform specific implementation
  dts: socfpga: Add support for SD/MMC on the SOCFPGA platform

Felipe Balbi (1):
  mmc: dw_mmc: fix possible build error

Georgi Djakov (3):
  mmc: sdhci-msm: Qualcomm SDHCI binding documentation
  mmc: sdhci-msm: Initial support for Qualcomm chipsets
  mmc: sdhci-msm: Add platform_execute_tuning implementation

Jaehoon Chung (1):
  mmc: core: Add DT bindings for eMMC HS200 1.8/1.2V

Jarkko Nikula (7):
  mmc: omap: Fix NULL pointer dereference due uninitialized cover_tasklet
  mmc: omap: Convert to devm_kzalloc
  mmc: omap: Remove duplicate host->irq assignment
  mmc: omap: Remove mem_res field from struct mmc_omap_host
  mmc: omap: Convert to devm_ioremap_resource
  mmc: omap: Remove always set use_dma flag from struct mmc_omap_host
  mmc: omap: Add erase capability

Kuninori Morimoto (5):
  mmc: sdhi: tidyup sh_mobile_sdhi_of_match position
  mmc: sdhi: update sh_mobile_sdhi_of_data for r8a7778
  mmc: sdhi: update sh_mobile_sdhi_of_data for r8a7779
  mmc: sdhi: update sh_mobile_sdhi_of_data for r8a7790
  mmc: sdhi: update sh_mobile_sdhi_of_data for r8a7791

Marcin Wojtas (1):
  mmc: sdhci-pxav3: add support for the Armada 38x SDHCI controller

Micky Ching (4):
  mmc: sdhci: add support for realtek rts5250
  mmc: rtsx: fix card poweroff bug
  mmc: rtsx: modify phase searching method for tuning
  mmc: rtsx: add support for pre_req and post_req

Nishanth Menon (2):
  mmc: omap_hsmmc: Add support for quirky omap3 hsmmc controller
  ARM: dts: omap3-ldp: fix mmc configuration

Russell King (6):
  mmc: sdhci-bcm-kona: fix 

[PATCH RT 0/2] rwsem-rt: Make rwsem rt closer to mainline

2014-04-08 Thread Steven Rostedt
Looking at mainline's down_read() I noticed that reader locks of rwsems
are not made to nest. In fact, they should not. Although, it may seem
fine if a down_read() nests as multiple readers can have the lock,
rwsems are fair locks. That is, if a writer were to block on a rwsem
while readers have the lock, a new reader will also block. If a reader
were to try to take the lock again while a writer was waiting, it
would block, and cause a deadlock as it has the lock its trying
to grab and wont let it go as the writer is waiting.

I also found that the rt_mutex_init() is identical in the two places
it is defined in rtmutex.h.

Steven Rostedt (Red Hat) (2):
  rwsem-rt: Do not allow readers to nest
  rtmutex: Remove duplicate rt_mutex_init()


 include/linux/rtmutex.h  | 12 +++-
 include/linux/rwsem_rt.h |  1 -
 kernel/rt.c  | 37 -
 3 files changed, 11 insertions(+), 39 deletions(-)
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH RT 2/2] rtmutex: Remove duplicate rt_mutex_init()

2014-04-08 Thread Steven Rostedt
From: "Steven Rostedt (Red Hat)" 

The rt_mutex_init() macro is the same whether or not
CONFIG_DEBUG_RT_MUTEX is set. Remove the duplicate.

Signed-off-by: Steven Rostedt 
---
 include/linux/rtmutex.h | 12 +++-
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/include/linux/rtmutex.h b/include/linux/rtmutex.h
index fa18682..f7e79e8 100644
--- a/include/linux/rtmutex.h
+++ b/include/linux/rtmutex.h
@@ -62,25 +62,19 @@ struct hrtimer_sleeper;
 # define __DEBUG_RT_MUTEX_INITIALIZER(mutexname) \
, .name = #mutexname, .file = __FILE__, .line = __LINE__
 
-# define rt_mutex_init(mutex)  \
-   do {\
-   raw_spin_lock_init(&(mutex)->wait_lock);\
-   __rt_mutex_init(mutex, #mutex); \
-   } while (0)
-
  extern void rt_mutex_debug_task_free(struct task_struct *tsk);
 #else
 # define __DEBUG_RT_MUTEX_INITIALIZER(mutexname)
 
+# define rt_mutex_debug_task_free(t)   do { } while (0)
+#endif
+
 # define rt_mutex_init(mutex)  \
do {\
raw_spin_lock_init(&(mutex)->wait_lock);\
__rt_mutex_init(mutex, #mutex); \
} while (0)
 
-# define rt_mutex_debug_task_free(t)   do { } while (0)
-#endif
-
 #define __RT_MUTEX_INITIALIZER_PLAIN(mutexname) \
.wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \
, .wait_list = PLIST_HEAD_INIT(mutexname.wait_list) \
-- 
1.8.5.3


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH RT 1/2] rwsem-rt: Do not allow readers to nest

2014-04-08 Thread Steven Rostedt
From: "Steven Rostedt (Red Hat)" 

The readers of mainline rwsems are not allowed to nest, the rwsems in the
PREEMPT_RT kernel should not nest either.

Signed-off-by: Steven Rostedt 
---
 include/linux/rwsem_rt.h |  1 -
 kernel/rt.c  | 37 -
 2 files changed, 8 insertions(+), 30 deletions(-)

diff --git a/include/linux/rwsem_rt.h b/include/linux/rwsem_rt.h
index e94d945..a81151c 100644
--- a/include/linux/rwsem_rt.h
+++ b/include/linux/rwsem_rt.h
@@ -20,7 +20,6 @@
 
 struct rw_semaphore {
struct rt_mutex lock;
-   int read_depth;
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
struct lockdep_map  dep_map;
 #endif
diff --git a/kernel/rt.c b/kernel/rt.c
index 5d17727..bb72347 100644
--- a/kernel/rt.c
+++ b/kernel/rt.c
@@ -316,10 +316,8 @@ EXPORT_SYMBOL(rt_up_write);
 
 void  rt_up_read(struct rw_semaphore *rwsem)
 {
-   if (--rwsem->read_depth == 0) {
-   rwsem_release(>dep_map, 1, _RET_IP_);
-   rt_mutex_unlock(>lock);
-   }
+   rwsem_release(>dep_map, 1, _RET_IP_);
+   rt_mutex_unlock(>lock);
 }
 EXPORT_SYMBOL(rt_up_read);
 
@@ -330,7 +328,6 @@ EXPORT_SYMBOL(rt_up_read);
 void  rt_downgrade_write(struct rw_semaphore *rwsem)
 {
BUG_ON(rt_mutex_owner(>lock) != current);
-   rwsem->read_depth = 1;
 }
 EXPORT_SYMBOL(rt_downgrade_write);
 
@@ -367,37 +364,20 @@ void rt_down_write_nested_lock(struct rw_semaphore *rwsem,
 
 int  rt_down_read_trylock(struct rw_semaphore *rwsem)
 {
-   struct rt_mutex *lock = >lock;
-   int ret = 1;
-
-   /*
-* recursive read locks succeed when current owns the rwsem,
-* but not when read_depth == 0 which means that the rwsem is
-* write locked.
-*/
-   if (rt_mutex_owner(lock) != current) {
-   ret = rt_mutex_trylock(>lock);
-   if (ret)
-   rwsem_acquire(>dep_map, 0, 1, _RET_IP_);
-   } else if (!rwsem->read_depth) {
-   ret = 0;
-   }
+   int ret;
 
+   ret = rt_mutex_trylock(>lock);
if (ret)
-   rwsem->read_depth++;
+   rwsem_acquire(>dep_map, 0, 1, _RET_IP_);
+
return ret;
 }
 EXPORT_SYMBOL(rt_down_read_trylock);
 
 static void __rt_down_read(struct rw_semaphore *rwsem, int subclass)
 {
-   struct rt_mutex *lock = >lock;
-
-   if (rt_mutex_owner(lock) != current) {
-   rwsem_acquire(>dep_map, subclass, 0, _RET_IP_);
-   rt_mutex_lock(>lock);
-   }
-   rwsem->read_depth++;
+   rwsem_acquire(>dep_map, subclass, 0, _RET_IP_);
+   rt_mutex_lock(>lock);
 }
 
 void  rt_down_read(struct rw_semaphore *rwsem)
@@ -422,7 +402,6 @@ void  __rt_rwsem_init(struct rw_semaphore *rwsem, const 
char *name,
debug_check_no_locks_freed((void *)rwsem, sizeof(*rwsem));
lockdep_init_map(>dep_map, name, key, 0);
 #endif
-   rwsem->read_depth = 0;
rwsem->lock.save_state = 0;
 }
 EXPORT_SYMBOL(__rt_rwsem_init);
-- 
1.8.5.3


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: driver skip pci_set_master, fix it? No.

2014-04-08 Thread Benjamin Herrenschmidt
On Tue, 2014-04-08 at 17:18 -0400, Mark Lord wrote:
> > I assume you're talking about the one added by cf3e1feba7f9 ("PCI:
> > Workaround missing pci_set_master in pci drivers"), but as far as I
> > can tell, it only calls pci_set_master() for *bridge* devices.  What
> > am I missing?  Is pci_set_master() being called for your endpoint?
> > What path is that?
> 
> Yes, it is being called during execution of the _probe() function in my 
> driver,
> as evidenced by the annoying (and wrong) message it produces.
> 
> Next time I've got the hardware at hand, I'll put a "dump_stack()" into there
> to see the exact calling path.

Note that one of the reason we want to do it early on bridges is that without 
it,
we may also not get the PCIe error messages.

Cheers,
Ben.


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Intel-gfx] i915 DVI resolution regression (3.13.7+)

2014-04-08 Thread Daniel J Blueman
Ville et al,

On 8 April 2014 16:02, Daniel Vetter  wrote:
> On Tue, Apr 8, 2014 at 9:32 AM, Daniel J Blueman  wrote:
>> I am using a dual-link DVI-D to DVI-D cable to this monitor, since I
>> previously couldn't get 2560x1440 via HDMI.
>>
>> If it isn't this commit, then it may be another commit in 3.13.7,
>> albeit it feels less likely.
>
> Before we go on a wild goose chase, can you please confirm your
> suspicion with a revert?

I built stock 3.13.7 to reproduce the issue (Ubuntu mainline config);
I reverted e3ea8fa6beaf55fee64bf816f3b8a80ad733b2c2 and rebuilt, and
don't see the issue. It's probably an idea if you have a panel that
requires dual-link DVI on the automated test matrix if not already.

Anyway, glad to have helped.

Thanks,
  Daniel
-- 
Daniel J Blueman
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


linux-next: manual merge of the userns tree with Linus' tree

2014-04-08 Thread Stephen Rothwell
Hi Eric,

Today's linux-next merge of the userns tree got a conflict in
fs/namespace.c between commits  from Linus' tree and commits from the
userns tree.

I fixed it up (I used the conflict resolution that your sent to Linus -
see below) and can carry the fix as necessary (no action is required).

-- 
Cheers,
Stephen Rothwells...@canb.auug.org.au

diff --cc fs/namespace.c
index 2ffc5a2905d4,52f4174e294c..
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@@ -665,13 -632,47 +666,47 @@@ struct vfsmount *lookup_mnt(struct pat
return m;
  }
  
- static struct mountpoint *new_mountpoint(struct dentry *dentry)
+ /*
+  * __is_local_mountpoint - Test to see if dentry is a mountpoint in the
+  * current mount namespace.
+  *
+  * The common case is dentries are not mountpoints at all and that
+  * test is handled inline.  For the slow case when we are actually
+  * dealing with a mountpoint of some kind, walk through all of the
+  * mounts in the current mount namespace and test to see if the dentry
+  * is a mountpoint.
+  *
+  * The mount_hashtable is not usable in the context because we
+  * need to identify all mounts that may be in the current mount
+  * namespace not just a mount that happens to have some specified
+  * parent mount.
+  */
+ bool __is_local_mountpoint(struct dentry *dentry)
+ {
+   struct mnt_namespace *ns = current->nsproxy->mnt_ns;
+   struct mount *mnt;
+   bool is_covered = false;
+ 
+   if (!d_mountpoint(dentry))
+   goto out;
+ 
+   down_read(_sem);
+   list_for_each_entry(mnt, >list, mnt_list) {
+   is_covered = (mnt->mnt_mountpoint == dentry);
+   if (is_covered)
+   break;
+   }
+   up_read(_sem);
+ out:
+   return is_covered;
+ }
+ 
+ static struct mountpoint *lookup_mountpoint(struct dentry *dentry)
  {
 -  struct list_head *chain = mountpoint_hashtable + hash(NULL, dentry);
 +  struct hlist_head *chain = mp_hash(dentry);
struct mountpoint *mp;
-   int ret;
  
 -  list_for_each_entry(mp, chain, m_hash) {
 +  hlist_for_each_entry(mp, chain, m_hash) {
if (mp->m_dentry == dentry) {
/* might be worth a WARN_ON() */
if (d_unlinked(dentry))
@@@ -680,6 -681,14 +715,14 @@@
return mp;
}
}
+   return NULL;
+ }
+ 
+ static struct mountpoint *new_mountpoint(struct dentry *dentry)
+ {
 -  struct list_head *chain = mountpoint_hashtable + hash(NULL, dentry);
++  struct hlist_head *chain = mp_hash(dentry);
+   struct mountpoint *mp;
+   int ret;
  
mp = kmalloc(sizeof(struct mountpoint), GFP_KERNEL);
if (!mp)
@@@ -693,7 -702,8 +736,8 @@@
  
mp->m_dentry = dentry;
mp->m_count = 1;
 -  list_add(>m_hash, chain);
 +  hlist_add_head(>m_hash, chain);
+   INIT_LIST_HEAD(>m_list);
return mp;
  }
  
@@@ -746,7 -757,8 +791,8 @@@ static void detach_mnt(struct mount *mn
mnt->mnt_parent = mnt;
mnt->mnt_mountpoint = mnt->mnt.mnt_root;
list_del_init(>mnt_child);
 -  list_del_init(>mnt_hash);
 +  hlist_del_init_rcu(>mnt_hash);
+   list_del_init(>mnt_mp_list);
put_mountpoint(mnt->mnt_mp);
mnt->mnt_mp = NULL;
  }



pgpJw7vBLHTVG.pgp
Description: PGP signature


hpsa driver bug crack kernel down!

2014-04-08 Thread Baoquan He
Hi,

The kernel is 3.14.0+ which is pulled just now. 


[   18.402695] systemd[1]: Set hostname to
.
[   18.408456] random: systemd urandom read with 70 bits of entropy
available
[   18md[1]: Expecting device
dev-mapper-rhel_hp\x2d\x2dsl4545g7\x2d\x2d01\x2droot.device...
 Expecting device
dev-mapper-rhel_hp\x2d\x2dsl4545g7\...droot.device...
[   18.860704] systemd[1]: Starting -.slice.
[  OK  ] Created slice -.slice.
[   18.866030] systemd[1]: Created slice -.slice.
[   18.869466] systemd[1]: Starting System Slice.
[  OK  ] Created slice System Sl   18.939116] systemd[1]: Created
slice System Slice.
[   18.976213] systemd[1]: Starting Slices.
[  OK  ] Reached target Slices.
[   18.981154] systemd[1]: Reached target Slices.
[   18.984183] systemd[1]: Starting Timers.
[  OK  ] Reached target Timers.
[   18.989161] systemd[1]: Reached target Timers.
[   18.992004] systemd[1]: Starting Journal Socket.
[  OK  ] Listening on Journal Socket.
[   18.997174] systemd[1]: Listening on Journal Socket.
[   19.000702] systemd[1]: Starting dracut cmdline hook...
 Starting dracut cmdline hook...
[   19.006697] systemd[1]: Started Load KernModules.
[   19.110408] systemd[1]: Starting Setup Virtual Console...
 Starting Setup Virtual Console...
[   19.116652] systemd[1]: Starting Journal Service...
 Starting Journal Service...
[  OK  ] Started Journal Service.
[   19.127172] systemd[1]: Started Journal Service.
[  OK  ] Listening on udev Kernel Socket.
[   19.141504] systemd-journald[281]: Vac[  OK  ] Listening on udev
Control Socket.
[  OK  ] Reached target Sockets.
 Starting Create list of required static device nodes...rrent
kernel...
 Starting Apply Kernel Variables...
[  OK  ] Reached target Swap.
[  OK  ] Reached target Local File Systems.
[  OK  ] Started dracut cmdline hook.
[  OK  ] Started Setup Virtual Console.
[  OK  ] Started Apply Kernel Variables.
[  OK  ] Started Create list of required static device nodes ...current
kernel.
 Starting Create static device nodes in /dev...
 Starting dracut pre-udev hook...
[  OK  ] Started Create static device nodes in /dev.
[   20.247819] device-mapper: uevent: version 1.0.3
[   20.251101] device-mapper: ioctl: 4.27.0-ioctl (2013-10-30)
initialised: dm-de...@redhat.com
[  OK  ] Started dracut pre-udev hook.
 Starting udev Kernel Device Manager...
[   20.322923] systemd-udevd[335]: starting version 208
[  OK  ] Started udev Kernel Device Manager.
 Starting udev Coldplug all Devices...
 Mounting Configuration File System...
[  OK  ] Mounted Configuration File System.
[  OK  ] Started udev Coldplug all Devices.
 Starting dracut initqueue hook...
[  OK  ][1] HP HPSA Driver (v 3.4.4-1)
[   20.832850] hpsa :05:00.0: can't disable ASPM; OS doesn't have
ASPM control
 Reached target System Initialization.
[   20.875178] ACPI: PCI Interrupt Link [I0C0] enabled at IRQ 36
[   20.909000] hpsa :05:00.0: MSIX
[   20.911586] hpsa :05:00.0: Logical aborts not supported
 [   20.916004] [drm] Initialized drm 1.1.0 20060810
[   20.936139] hpsa :05:00.0: hpsa0: <0x323b> at IRQ 73 using DAC
[   20.956967] BUG: unable to handle kernel NULL pointer dereference at
(null)
[   20.956997] IP: []
hpsa_enter_performant_mode+0x4ff/0x580 [hpsa]
[   20.957003] PGD 0 
[   20.957012] Oops: 0002 [#1] SMP 
[   20.957035] Modules linked in: drm(+) libata hpsa(+) i2c_core
dm_mirror dm_region_hash dm_log dm_mod
[   20.957046] CPU: 10 PID: 341 Comm: systemd-udevd Not tainted 3.14.0+
#28
[   20.957049] Hardware name: HP ProLiant SL4545 G7/, BIOS A31
12/08/2012
[   20.957055] task: 880824191b40 ti: 88082309c000 task.ti:
88082309c000
[   20.957078] RIP: 0010:[]  []
hpsa_enter_performant_mode+0x4ff/0x580 [hpsa]
[   20.957083] RSP: 0018:88082309da18  EFLAGS: 00010297
[   20.957088] RAX:  RBX: 7c000167 RCX:
0004
[   20.957091] RDX: 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


linux-next: manual merge of the userns tree with Linus' tree

2014-04-08 Thread Stephen Rothwell
Hi Eric,

Today's linux-next merge of the userns tree got a conflict in fs/namei.c
between commits from Linus' tree and commits 3dd905eaa258 ("vfs: Don't
allow overwriting mounts in the current mount namespace") and
f43d102a391d ("vfs: Lazily remove mounts on unlinked files and
directories") from the userns tree.

I fixed it up (I used the conflict resolution that you sent to Linus -
see below) and can carry the fix as necessary (no action is required).

-- 
Cheers,
Stephen Rothwells...@canb.auug.org.au

diff --cc fs/namei.c
index 88339f59efb5,384fcc6a5606..
--- a/fs/namei.c
+++ b/fs/namei.c
@@@ -4082,33 -4045,17 +4085,33 @@@ int vfs_rename(struct inode *old_dir, s
if (error)
return error;
  
 +  old_name = fsnotify_oldname_init(old_dentry->d_name.name);
dget(new_dentry);
 -  lock_two_nondirectories(source, target);
 +  if (!is_dir || (flags & RENAME_EXCHANGE))
 +  lock_two_nondirectories(source, target);
 +  else if (target)
 +  mutex_lock(>i_mutex);
  
error = -EBUSY;
-   if (d_mountpoint(old_dentry) || d_mountpoint(new_dentry))
+   if (is_local_mountpoint(old_dentry) || is_local_mountpoint(new_dentry))
goto out;
  
 -  error = try_break_deleg(source, delegated_inode);
 -  if (error)
 -  goto out;
 -  if (target) {
 +  if (max_links && new_dir != old_dir) {
 +  error = -EMLINK;
 +  if (is_dir && !new_is_dir && new_dir->i_nlink >= max_links)
 +  goto out;
 +  if ((flags & RENAME_EXCHANGE) && !is_dir && new_is_dir &&
 +  old_dir->i_nlink >= max_links)
 +  goto out;
 +  }
 +  if (is_dir && !(flags & RENAME_EXCHANGE) && target)
 +  shrink_dcache_parent(new_dentry);
 +  if (!is_dir) {
 +  error = try_break_deleg(source, delegated_inode);
 +  if (error)
 +  goto out;
 +  }
 +  if (target && !new_is_dir) {
error = try_break_deleg(target, delegated_inode);
if (error)
goto out;
@@@ -4123,31 -4064,73 +4126,32 @@@
if (error)
goto out;
  
 -  if (target) {
 +  if (!(flags & RENAME_EXCHANGE) && target) {
 +  if (is_dir)
 +  target->i_flags |= S_DEAD;
dont_mount(new_dentry);
+   detach_mounts(new_dentry);
}
 -  if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE))
 -  d_move(old_dentry, new_dentry);
 +  if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) {
 +  if (!(flags & RENAME_EXCHANGE))
 +  d_move(old_dentry, new_dentry);
 +  else
 +  d_exchange(old_dentry, new_dentry);
 +  }
  out:
 -  unlock_two_nondirectories(source, target);
 +  if (!is_dir || (flags & RENAME_EXCHANGE))
 +  unlock_two_nondirectories(source, target);
 +  else if (target)
 +  mutex_unlock(>i_mutex);
dput(new_dentry);
 -  return error;
 -}
 -
 -/**
 - * vfs_rename - rename a filesystem object
 - * @old_dir:  parent of source
 - * @old_dentry:   source
 - * @new_dir:  parent of destination
 - * @new_dentry:   destination
 - * @delegated_inode: returns an inode needing a delegation break
 - *
 - * The caller must hold multiple mutexes--see lock_rename()).
 - *
 - * If vfs_rename discovers a delegation in need of breaking at either
 - * the source or destination, it will return -EWOULDBLOCK and return a
 - * reference to the inode in delegated_inode.  The caller should then
 - * break the delegation and retry.  Because breaking a delegation may
 - * take a long time, the caller should drop all locks before doing
 - * so.
 - *
 - * Alternatively, a caller may pass NULL for delegated_inode.  This may
 - * be appropriate for callers that expect the underlying filesystem not
 - * to be NFS exported.
 - */
 -int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 - struct inode *new_dir, struct dentry *new_dentry,
 - struct inode **delegated_inode)
 -{
 -  int error;
 -  int is_dir = d_is_directory(old_dentry) || d_is_autodir(old_dentry);
 -  const unsigned char *old_name;
 -
 -  if (old_dentry->d_inode == new_dentry->d_inode)
 -  return 0;
 - 
 -  error = may_delete(old_dir, old_dentry, is_dir);
 -  if (error)
 -  return error;
 -
 -  if (!new_dentry->d_inode)
 -  error = may_create(new_dir, new_dentry);
 -  else
 -  error = may_delete(new_dir, new_dentry, is_dir);
 -  if (error)
 -  return error;
 -
 -  if (!old_dir->i_op->rename)
 -  return -EPERM;
 -
 -  old_name = fsnotify_oldname_init(old_dentry->d_name.name);
 -
 -  if (is_dir)
 -  error = 

linux-next: manual merge of the userns tree with Linus' tree

2014-04-08 Thread Stephen Rothwell
Hi Eric,

Today's linux-next merge of the userns tree got a conflict in fs/dcache.c
between commit da1ce0670c14 ("vfs: add cross-rename") from Linus' tree
and commit f43d102a391d ("vfs: Lazily remove mounts on unlinked files and
directories") from the userns tree.

I fixed it up (see below) and can carry the fix as necessary (no action
is required).  This matches the conflict resolution you sent to Linus.

-- 
Cheers,
Stephen Rothwells...@canb.auug.org.au

diff --cc fs/dcache.c
index 40707d88a945,5b78bd98649c..
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@@ -2701,10 -2631,8 +2663,8 @@@ static struct dentry *__d_unalias(struc
goto out_err;
m2 = >d_parent->d_inode->i_mutex;
  out_unalias:
-   if (likely(!d_mountpoint(alias))) {
-   __d_move(alias, dentry, false);
-   ret = alias;
-   }
 -  __d_move(alias, dentry);
++  __d_move(alias, dentry, false);
+   ret = alias;
  out_err:
spin_unlock(>i_lock);
if (m2)


pgpVnxLJHQy6P.pgp
Description: PGP signature


Re: [GIT PULL] Detaching mounts on unlink for 3.15-rc1

2014-04-08 Thread Al Viro
On Wed, Apr 09, 2014 at 03:30:27AM +0100, Al Viro wrote:

> > When renaming or unlinking directory entries that are not mountpoints
> > no additional locks are taken so no performance differences can result,
> > and my benchmark reflected that.
> 
> It also means that d_invalidate() now might trigger fs shutdown.  Which
> has bloody huge stack footprint, for obvious reasons.  And d_invalidate()
> can be called with pretty deep stack - walk into wrong dentry while
> resolving a deeply nested symlink and there you go...

PS: I thought I actually replied with that point back a month or so ago,
but having checked sent-mail...  Looks like I had not.  My deep apologies.

FWIW, I think that overall this thing is a good idea, provided that we can
live with semantics changes.  The implementation is too optimistic, though -
at the very least, we want this work done upon namespace_unlock() held
back until we are not too deep in stack.  task_work_add() fodder, perhaps?
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] cpufreq: highbank: fix ARM_HIGHBANK_CPUFREQ dependency warning

2014-04-08 Thread Kefeng Wang
When make ARCH=arm multi_v7_defconfig, we get the following warnings:

warning: (ARM_HIGHBANK_CPUFREQ) selects GENERIC_CPUFREQ_CPU0 which has
unmet direct dependencies (ARCH_HAS_CPUFREQ && CPU_FREQ && HAVE_CLK
&& REGULATOR && OF && THERMAL && CPU_THERMAL)

To fix this, make ARM_HIGHBANK_CPUFREQ depend on ARCH_HAS_CPUFREQ and
REGULATOR instead of selecting them, PM_OPP will be selected by 
ARCH_HAS_CPUFREQ.

Signed-off-by: Kefeng Wang 
---
 drivers/cpufreq/Kconfig.arm | 6 +-
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm
index 1e2b9db..30dfaef 100644
--- a/drivers/cpufreq/Kconfig.arm
+++ b/drivers/cpufreq/Kconfig.arm
@@ -92,11 +92,7 @@ config ARM_EXYNOS_CPU_FREQ_BOOST_SW
 
 config ARM_HIGHBANK_CPUFREQ
tristate "Calxeda Highbank-based"
-   depends on ARCH_HIGHBANK
-   select GENERIC_CPUFREQ_CPU0
-   select PM_OPP
-   select REGULATOR
-
+   depends on ARCH_HIGHBANK && GENERIC_CPUFREQ_CPU0 && REGULATOR
default m
help
  This adds the CPUFreq driver for Calxeda Highbank SoC
-- 
1.8.2.2


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 3/6] Revert "perf: Disable PERF_RECORD_MMAP2 support"

2014-04-08 Thread Namhyung Kim
On Mon, 24 Mar 2014 15:34:33 -0400, Don Zickus wrote:
> This reverts commit 3090ffb5a2515990182f3f55b0688a7817325488.

It seems if you exchange the order of patch 2 and 3 it'd be less
confusing  ;-p

Thanks,
Namhyung
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [GIT PULL] Detaching mounts on unlink for 3.15-rc1

2014-04-08 Thread Al Viro
On Tue, Apr 08, 2014 at 05:21:32PM -0700, Eric W. Biederman wrote:

> This set of changes has been reviewed and been sitting idle for the last
> 6 weeks.  In that time the vfs has slightly shifted under me the new
> version of rename and the mount hash list becoming a hlist.  None of
> those changes has caused changed the code in ways to invalidate these
> changes, but small conflicts do result and I have attached my conflict
> resolution at the end of this email in case it helps.
> 
> To recap these changes allow a file or a directory that is a mount point
> in one mount namespace to be unlinked/rmdired elsewhere where it is not
> a mount point (either a remote filesystem or another mount namespace).
> As has been agreed during review semantics when only a single mount
> namespace exists remain unchanged.
> 
> This removes a long standing need to lie to the vfs when a mount point
> has been removed behind it's back.  This also removes a DOS attack where
> an unprivileged user could prevent root from renaming or deleting files
> and directories by using them as mountpoints in another mount namespace.
> 
> This change also fixes a few cases where because we were not lying to
> the vfs we could leak mount points.
> 
> When renaming or unlinking directory entries that are not mountpoints
> no additional locks are taken so no performance differences can result,
> and my benchmark reflected that.

It also means that d_invalidate() now might trigger fs shutdown.  Which
has bloody huge stack footprint, for obvious reasons.  And d_invalidate()
can be called with pretty deep stack - walk into wrong dentry while
resolving a deeply nested symlink and there you go...
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] iommu/vt-d: fix memory leakage caused by commit ea8ea46

2014-04-08 Thread Jiang Liu
Commit ea8ea46 "iommu/vt-d: Clean up and fix page table clear/free
behaviour" introduces possible leakage of DMA page tables due to:
for (pte = page_address(pg); !first_pte_in_page(pte); pte++) {
if (dma_pte_present(pte) && !dma_pte_superpage(pte))
freelist = dma_pte_list_pagetables(domain, level - 1,
   pte, freelist);
}

For the first pte in a page, first_pte_in_page(pte) will always be true,
thus dma_pte_list_pagetables() will never be called and leak DMA page
tables if level is bigger than 1.

Signed-off-by: Jiang Liu 
---
 drivers/iommu/intel-iommu.c |6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 69fa7da..13dc231 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -1009,11 +1009,13 @@ static struct page *dma_pte_list_pagetables(struct 
dmar_domain *domain,
if (level == 1)
return freelist;
 
-   for (pte = page_address(pg); !first_pte_in_page(pte); pte++) {
+   pte = page_address(pg);
+   do {
if (dma_pte_present(pte) && !dma_pte_superpage(pte))
freelist = dma_pte_list_pagetables(domain, level - 1,
   pte, freelist);
-   }
+   pte++;
+   } while (!first_pte_in_page(pte));
 
return freelist;
 }
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 2/6] perf: Update mmap2 interface with protection and flag bits

2014-04-08 Thread Namhyung Kim
On Wed, 09 Apr 2014 11:17:44 +0900, Namhyung Kim wrote:
> So you need to synthesize a PERF_RECORD_MMAP2 event then.  The
> mmap_event and mmap2_event shares same fields util ->pgoff only.  So
> copying to mmap.filename will overwrite other bits in mmap2.

Oops, missed patch 3/3 already does it.  Sorry for noise.

Thanks,
Namhyung
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] futex: avoid race between requeue and wake

2014-04-08 Thread Davidlohr Bueso
Adding Thomas to the thread.

Sorry for the late reply, I was out running errands all day just to get
home to find this futex jewel in my inbox.

On Tue, 2014-04-08 at 15:30 -0700, Linus Torvalds wrote:
> On Tue, Apr 8, 2014 at 2:02 PM, Jan Stancek  wrote:
> >
> > I ran reproducer with following change on s390x system, where this
> > can be reproduced usually within seconds:
> >
> > diff --git a/kernel/futex.c b/kernel/futex.c
> > index 67dacaf..9150ffd 100644
> > --- a/kernel/futex.c
> > +++ b/kernel/futex.c
> > @@ -1095,6 +1095,7 @@ static int unlock_futex_pi(u32 __user *uaddr, u32 
> > uval)
> >  static inline void
> >  double_lock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket 
> > *hb2)
> >  {
> > +   hb_waiters_inc(hb2);
> > if (hb1 <= hb2) {
> > spin_lock(>lock);
> > if (hb1 < hb2)
> > @@ -,6 +1112,7 @@ double_unlock_hb(struct futex_hash_bucket *hb1, 
> > struct futex_hash_bucket *hb2)
> > spin_unlock(>lock);
> > if (hb1 != hb2)
> > spin_unlock(>lock);
> > +   hb_waiters_dec(hb2);
> >  }
> >
> >  /*
> >
> > Reproducer is running without failures over an hour now and
> > made ~1.4 million iterations.
> 
> Ok, that's encouraging. That is the smallest patch I could come up
> with, but as mentioned, it's not optimal. We only need it for
> futex_requeue(), but if we do it there we'd have to handle all the
> different error cases (there's only one call to double_lock_hb(), but
> due to the error cases there's four calls to double_unlock_hb().

For consistency and mental sanity, I definitely prefer this alternative
to adding back the spin_is_locked check.

Linus, from what I see from your approach in always adding and
decrementing the hb->waiters count in futex_requeue right before the
whole double_[un]lock_hb() calls, we're basically saying "lets not do
this pending waiters optimization" for futex requeuing, right?
Which is fine, requeing isn't really that used or performance critical
in many cases. But I say it since the legitimate accounting for is done
in requeue_futex(), which can obviously be bogus as we increment *after*
taking the hb->lock. Just want to make sure we're on the same page here.

> 
> I'm not sure how much we care. The simple patch basically adds two
> (unnecessary) atomics to the futex_wake_op() path. I don't know how
> critical that path is - not as critical as the regular "futex_wake()",
> I'd expect, but I guess pthread_cond_signal() is the main user.

Agreed, since the issue occurs because we're requeuing *waiters*, lets
keep it inside the requeueing only. In the case of futex_wake_op() it
doesn't matter as we don't need to account for them. It's more code, but
that's it. I'd rather add error house keeping than add more unnecessary
logic to other paths of futexes.

> 
> So I'll have to leave this decision to the futex people. But the
> attached slightly more complex patch *may* be the better one.
> 
> May I bother you to test this one too? I really think that
> futex_requeue() is the only user that should need this, so doing it
> there rather than in double_[un]lock_hb() should be slightly more
> optimal, but who knows what I've missed. We clearly *all* missed this
> race back when the ordering rules were documented..

Yep, it's quite an obvious thing we overlooked here, and not even arch
specific... I'm surprised that the requeueing path isn't stressed more
often, and while the race window is quite small (I'm still running Jan's
program in a loop and cannot trigger it on my x86-64 80 core box), it
should have been seen earlier by some program/benchmark.

Thanks,
Davidlohr


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 2/6] perf: Update mmap2 interface with protection and flag bits

2014-04-08 Thread Namhyung Kim
On Mon, 24 Mar 2014 15:34:32 -0400, Don Zickus wrote:
> The kernel piece passes more info now.  Update the perf tool to reflect
> that and adjust the synthesized maps to play along.
>
> Signed-off-by: Don Zickus 
> ---
>  tools/perf/util/event.c   | 23 +--
>  tools/perf/util/event.h   |  2 ++
>  tools/perf/util/machine.c |  4 +++-
>  tools/perf/util/map.c |  4 +++-
>  tools/perf/util/map.h |  4 +++-
>  5 files changed, 32 insertions(+), 5 deletions(-)
>
> diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
> index 9d12aa6..6b8646c 100644
> --- a/tools/perf/util/event.c
> +++ b/tools/perf/util/event.c
> @@ -1,4 +1,5 @@
>  #include 
> +#include 
>  #include "event.h"
>  #include "debug.h"
>  #include "hist.h"
> @@ -212,6 +213,21 @@ int perf_event__synthesize_mmap_events(struct perf_tool 
> *tool,
>   else
>   event->header.misc = PERF_RECORD_MISC_GUEST_USER;
>  
> + /* map protection and flags bits */
> + event->mmap2.prot = 0;
> + event->mmap2.flags = 0;
> + if (prot[0] == 'r')
> + event->mmap2.prot |= PROT_READ;
> + if (prot[1] == 'w')
> + event->mmap2.prot |= PROT_WRITE;
> + if (prot[2] == 'x')
> + event->mmap2.prot |= PROT_EXEC;
> +
> + if (prot[3] == 's')
> + event->mmap2.flags |= MAP_SHARED;
> + else
> + event->mmap2.flags |= MAP_PRIVATE;
> +

So you need to synthesize a PERF_RECORD_MMAP2 event then.  The
mmap_event and mmap2_event shares same fields util ->pgoff only.  So
copying to mmap.filename will overwrite other bits in mmap2.

Thanks,
Namhyung
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH RESEND] usb: ohci-exynos: Remove locks for 'ohci' in suspend callback

2014-04-08 Thread Jingoo Han
On Tuesday, April 08, 2014 11:41 PM, Vivek Gautam wrote:
> 
> Patch : 14982e3 USB: OHCI: Properly handle ohci-exynos suspend
> has already removed 'ohci_hcd' settings from exynos glue layer
> as a part of streamlining the ohci controller's suspend.
> So we don't need the locks for 'ohci_hcd' anymore.

Right, this spin_lock/unlock is unnecessary, because it is
already used in ohci_suspend().

Acked-by: Jingoo Han 

Best regards,
Jingoo Han

> 
> Signed-off-by: Vivek Gautam 
> Cc: Manjunath Goudar 
> Cc: Alan Stern 
> ---
>  drivers/usb/host/ohci-exynos.c |6 --
>  1 file changed, 6 deletions(-)
> 
> diff --git a/drivers/usb/host/ohci-exynos.c b/drivers/usb/host/ohci-exynos.c
> index 68588d8..9cf80cb 100644
> --- a/drivers/usb/host/ohci-exynos.c
> +++ b/drivers/usb/host/ohci-exynos.c
> @@ -190,17 +190,13 @@ static int exynos_ohci_suspend(struct device *dev)
>  {
>   struct usb_hcd *hcd = dev_get_drvdata(dev);
>   struct exynos_ohci_hcd *exynos_ohci = to_exynos_ohci(hcd);
> - struct ohci_hcd *ohci = hcd_to_ohci(hcd);
>   struct platform_device *pdev = to_platform_device(dev);
>   bool do_wakeup = device_may_wakeup(dev);
> - unsigned long flags;
>   int rc = ohci_suspend(hcd, do_wakeup);
> 
>   if (rc)
>   return rc;
> 
> - spin_lock_irqsave(>lock, flags);
> -
>   if (exynos_ohci->otg)
>   exynos_ohci->otg->set_host(exynos_ohci->otg, >self);
> 
> @@ -208,8 +204,6 @@ static int exynos_ohci_suspend(struct device *dev)
> 
>   clk_disable_unprepare(exynos_ohci->clk);
> 
> - spin_unlock_irqrestore(>lock, flags);
> -
>   return 0;
>  }
> 
> --
> 1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 07/15 V3] perf, c2c: Add in sort on physid

2014-04-08 Thread Don Zickus
On Wed, Apr 09, 2014 at 10:30:56AM +0900, Namhyung Kim wrote:
> On Tue, 8 Apr 2014 10:17:58 -0400, Don Zickus wrote:
> > On Tue, Apr 08, 2014 at 04:56:25PM +0900, Namhyung Kim wrote:
> >> On Mon, 24 Mar 2014 15:36:58 -0400, Don Zickus wrote:
> >> > +static int perf_c2c__init(struct perf_c2c *c2c)
> >> > +{
> >> > +sort__mode = SORT_MODE__PHYSID;
> >> > +sort__wants_unique = 1;
> >> > +sort_order = "daddr,iaddr,pid,tid";
> >> 
> >> Where are the SORT_MODE__PHYSID, sort__wants_unique and "daddr", "iaddr"
> >> sort keys defined?
> >
> > In a previous patchset that enables the mmap2 interface.
> 
> Ah, missed that.. will look at it soon.
> 
> >
> >> 
> >> Also, more importantly, I think the sort order should contain at least
> >> "mem" and "snoop" keys to group samples based on the exact hitm
> >> information.
> >
> > I can look into it, but after iaddr, pid and tid, sorting on snoop doesn't
> > really change anything if I recall.  The hitms are scattered across iaddr.
> 
> But it doesn't guarantee that all hitms are scattered, right?  Also if
> it's the case I guess adding more sort keys are not harmful since they
> don't even have a chance to test.
> 
> I think you can check hist_entry->stat.nr_events always being 1.
> 
> >
> >> 
> >> In my understanding, if two samples are captured at exactly same
> >> position with a same data access but different hitm info, they cannot be
> >> classified and just use the hitm info of first entry.
> >
> > Why?  If the first hitm access was local and the second one remote,
> > doesn't that indicate the accessed data is being pulled onto different
> > nodes?
> 
> But "hist_entry" won't have the information after calling
> __hists__add__entry() called unless it has 'mem' and 'snoop' sort keys.
> Since two samples have same daddr, iaddr, pid and tid, it'd think those
> two samples are same and then add stats of second one to the first and
> finally discard the second.  So first one will have a double weight for
> the local hitm case only.
> 
> This is the case what I worry about.  Am I missing something?

My patch 6/6 of the enable mmap2 support. :-)  It specifically forces all
the data to remain separate to avoid this issue.  We couldn't have the
data merged because it messed up our stats.

Cheers,
Don
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] driver core: platform: add device binding path 'driver_override'

2014-04-08 Thread Kim Phillips
Needed by platform device drivers, such as the vfio-platform driver [1],
in order to bypass the existing OF, ACPI, id_table and name string matches,
and successfully be able to be bound to any device, like so:

echo vfio-platform > /sys/bus/platform/devices/fff51000.ethernet/driver_override
echo fff51000.ethernet > 
/sys/bus/platform/devices/fff51000.ethernet/driver/unbind
echo fff51000.ethernet > /sys/bus/platform/drivers_probe

This mimics "PCI: Introduce new device binding path using
pci_dev.driver_override" [2], which is an interface enhancement
for more deterministic PCI device binding, e.g., when in the
presence of hotplug.

[1] http://lkml.iu.edu/hypermail/linux/kernel/1402.1/00177.html
[2] 
http://lists-archives.com/linux-kernel/28030441-pci-introduce-new-device-binding-path-using-pci_dev-driver_override.html

Suggested-by: Alex Williamson 
Signed-off-by: Kim Phillips 
---
changes since RFC:
- fixed message Subject to properly reflect a new platform device
patch (instead of leaving it as a reply to the PCI version)
- addressed Guenter Roeck's comment
- updated [2] with link to later (Apr.4th) revision of the PCI patch
- updated documentation to address Christoffer Dall's comments to
  previous version of [2]
- added a Suggested-by, and re-posted as a reply to the PCI patch,
  should they be applied together

 Documentation/ABI/testing/sysfs-bus-platform | 20 
 drivers/base/platform.c  | 46 
 include/linux/platform_device.h  |  1 +
 3 files changed, 67 insertions(+)
 create mode 100644 Documentation/ABI/testing/sysfs-bus-platform

diff --git a/Documentation/ABI/testing/sysfs-bus-platform 
b/Documentation/ABI/testing/sysfs-bus-platform
new file mode 100644
index 000..5172a61
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-platform
@@ -0,0 +1,20 @@
+What:  /sys/bus/platform/devices/.../driver_override
+Date:  April 2014
+Contact:   Kim Phillips 
+Description:
+   This file allows the driver for a device to be specified which
+   will override standard OF, ACPI, ID table, and name matching.
+   When specified, only a driver with a name matching the value
+   written to driver_override will have an opportunity to bind
+   to the device.  The override is specified by writing a string
+   to the driver_override file (echo vfio-platform > \
+   driver_override) and may be cleared with an empty string
+   (echo > driver_override).  This returns the device to standard
+   matching rules binding.  Writing to driver_override does not
+   automatically unbind the device from its current driver or make
+   any attempt to automatically load the specified driver.  If no
+   driver with a matching name is currently loaded in the kernel,
+   the device will not bind to any driver.  This also allows
+   devices to opt-out of driver binding using a driver_override
+   name such as "none".  Only a single driver may be specified in
+   the override, there is no support for parsing delimiters.
diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index e714709..a0909cb 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -22,6 +22,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "base.h"
 #include "power/power.h"
@@ -690,8 +691,49 @@ static ssize_t modalias_show(struct device *dev, struct 
device_attribute *a,
 }
 static DEVICE_ATTR_RO(modalias);
 
+static ssize_t driver_override_store(struct device *dev,
+struct device_attribute *attr,
+const char *buf, size_t count)
+{
+   struct platform_device *pdev = to_platform_device(dev);
+   char *driver_override, *old = pdev->driver_override, *cp;
+
+   if (count > PATH_MAX)
+   return -EINVAL;
+
+   driver_override = kstrndup(buf, count, GFP_KERNEL);
+   if (!driver_override)
+   return -ENOMEM;
+
+   cp = strchr(driver_override, '\n');
+   if (cp)
+   *cp = '\0';
+
+   if (strlen(driver_override)) {
+   pdev->driver_override = driver_override;
+   } else {
+   kfree(driver_override);
+   pdev->driver_override = NULL;
+   }
+
+   kfree(old);
+
+   return count;
+}
+
+static ssize_t driver_override_show(struct device *dev,
+   struct device_attribute *attr, char *buf)
+{
+   struct platform_device *pdev = to_platform_device(dev);
+
+   return sprintf(buf, "%s\n", pdev->driver_override);
+}
+static DEVICE_ATTR_RW(driver_override);
+
+
 static struct attribute *platform_dev_attrs[] = {
_attr_modalias.attr,
+   _attr_driver_override.attr,
NULL,
 };
 ATTRIBUTE_GROUPS(platform_dev);
@@ -747,6 +789,10 @@ 

[PATCH] sym53c8xx_2: Set DID_REQUEUE return code when aborting squeue

2014-04-08 Thread Mikulas Patocka
[ the maintainer didn't respond, so I'm sending to Linus ]


This patch fixes I/O errors when the disk returns QUEUE FULL status.

When the controller encounters an error (including QUEUE FULL or BUSY status),
it aborts all not yet submitted requests in the function
sym_dequeue_from_squeue.

This function aborts them with DID_SOFT_ERROR.

If the disk has full tag queue, the request that caused the overflow is 
aborted with QUEUE FULL status (and the scsi midlayer properly retries it 
until it is accepted by the disk), but the sym53c8xx_2 driver aborts the 
following requests with DID_SOFT_ERROR --- for them, the midlayer does 
just a few retries and then signals the error up to sd.

The result is that disk returning QUEUE FULL causes request failures.

The error was reproduced on 53c895 with COMPAQ BD03685A24 disk (rebranded
ST336607LC) with command queue 48 or 64 tags. The disk has 64 tags, but
under some access patterns it return QUEUE FULL when there are less than
64 pending tags. The SCSI specification allows returning QUEUE FULL
anytime and it is up to the host to retry.

Signed-off-by: Mikulas Patocka 

---
 drivers/scsi/sym53c8xx_2/sym_hipd.c |4 
 1 file changed, 4 insertions(+)

Index: linux-2.6.36-rc5-fast/drivers/scsi/sym53c8xx_2/sym_hipd.c
===
--- linux-2.6.36-rc5-fast.orig/drivers/scsi/sym53c8xx_2/sym_hipd.c  
2010-09-27 10:25:59.0 +0200
+++ linux-2.6.36-rc5-fast/drivers/scsi/sym53c8xx_2/sym_hipd.c   2010-09-27 
10:26:27.0 +0200
@@ -3000,7 +3000,11 @@ sym_dequeue_from_squeue(struct sym_hcb *
if ((target == -1 || cp->target == target) &&
(lun== -1 || cp->lun== lun)&&
(task   == -1 || cp->tag== task)) {
+#ifdef SYM_OPT_HANDLE_DEVICE_QUEUEING
sym_set_cam_status(cp->cmd, DID_SOFT_ERROR);
+#else
+   sym_set_cam_status(cp->cmd, DID_REQUEUE);
+#endif
sym_remque(>link_ccbq);
sym_insque_tail(>link_ccbq, >comp_ccbq);
}
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/2] perf, tools, stat: Initialize statistics correctly

2014-04-08 Thread Namhyung Kim
On Tue, 25 Mar 2014 10:31:38 -0700, Andi Kleen wrote:
> From: Andi Kleen 
>
> perf stat did initialize the stats structure used to compute
> stddev etc. incorrectly. It merely zeroes it. But one member
> (min) needs to be set to a non zero value. This causes min
> to be not computed at all. Call init_stats() correctly.
>
> It doesn't matter for stat currently because it doesn't use
> min, but it's still better to do it correctly.
>
> The other users of statistics are already correct.

Acked-by: Namhyung Kim 

Thanks,
Namhyung
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH V2 1/2] mm: move FAULT_AROUND_ORDER to arch/

2014-04-08 Thread Madhavan Srinivasan
On Friday 04 April 2014 11:20 PM, David Miller wrote:
> From: Dave Hansen 
> Date: Fri, 04 Apr 2014 09:18:43 -0700
> 
>> On 04/03/2014 11:27 PM, Madhavan Srinivasan wrote:
>>> This patch creates infrastructure to move the FAULT_AROUND_ORDER
>>> to arch/ using Kconfig. This will enable architecture maintainers
>>> to decide on suitable FAULT_AROUND_ORDER value based on
>>> performance data for that architecture. Patch also adds
>>> FAULT_AROUND_ORDER Kconfig element in arch/X86.
>>
>> Please don't do it this way.
>>
>> In mm/Kconfig, put
>>
>>  config FAULT_AROUND_ORDER
>>  int
>>  default 1234 if POWERPC
>>  default 4
>>
>> The way you have it now, every single architecture that needs to enable
>> this has to go put that in their Kconfig.  That's madness.  This way,
>> you only put it in one place, and folks only have to care if they want
>> to change the default to be something other than 4.
> 
> It looks more like it's necessary only to change the default, not
> to enable it.  Unless I read his patch wrong...
> 
Yes. With current patch, you only need to change the default by which
you enable it.

With regards
Maddy
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 05/15 V3] perf, c2c: Rework setup code to prepare for features

2014-04-08 Thread Don Zickus
On Wed, Apr 09, 2014 at 10:12:32AM +0900, Namhyung Kim wrote:
> On Tue, 8 Apr 2014 10:11:07 -0400, Don Zickus wrote:
> > On Tue, Apr 08, 2014 at 04:41:29PM +0900, Namhyung Kim wrote:
> >> On Sat, 29 Mar 2014 18:10:18 +0100, Jiri Olsa wrote:
> >> > On Mon, Mar 24, 2014 at 03:36:56PM -0400, Don Zickus wrote:
> >> >
> >> > SNIP
> >> >
> >> >>  
> >> >>  static int perf_c2c__process_load_store(struct perf_c2c *c2c,
> >> >> +   struct addr_location *al,
> >> >> struct perf_sample *sample,
> >> >> -   struct addr_location *al)
> >> >> +   struct perf_evsel *evsel)
> >> >>  {
> >> >> -   if (c2c->raw_records)
> >> >> -   perf_sample__fprintf(sample, ' ', "raw input", al, 
> >> >> stdout);
> >> >> +   struct mem_info *mi;
> >> >> +
> >> >> +   mi = sample__resolve_mem(sample, al);
> >> >> +   if (!mi)
> >> >> +   return -ENOMEM;
> >> >
> >> > perhaps not directly related to this patchset, but I needed
> >> > attached patch to get resolved data in .bss (static), which
> >> > for some reason happened to be located in executable segment
> >> 
> >> Wasn't it a read-only/const data?
> >
> > I believe it had the 'x' bit set.  Or the kernel was not passing any
> > protection bits, so it defaulted to MAP_FUNCTION?
> 
> The perf treats a mapping as a data mapping (MAP_VARIABLE) by default if
> the 'x' bit is not set.  As Jiri said its a static data, I guessed it's
> a const data (set to 0?) and moved into .rodata section and then to the
> text segment.

Unfortunately, his patch will be needed eventually, if not for his reason,
I had a java JAR file example that was pulling data addresses out of a
shared memory region with the protection bits set to 'rwx' (in the
/proc//maps area).  I was losing lots of samples until I came up with
a more complicated hack.

I will try to dig up my example, so you can see, so it doesn't sound like
I am making this up. :-)

Cheers,
Don
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [GIT] kbuild/lto changes for 3.15-rc1

2014-04-08 Thread Andi Kleen
On Tue, Apr 08, 2014 at 03:44:25PM -0700, Linus Torvalds wrote:
> On Tue, Apr 8, 2014 at 1:49 PM,   wrote:
> >
> > In addition to making the kernel smaller and such (I'll leave the
> > specific stats there to Andi), here's the key awesomeness of LTO that
> > you, personally, should find useful and compelling: LTO will eliminate
> > the need to add many lower-level Kconfig symbols to compile out bits of
> > the kernel.
> 
> Actually that, to me, is a negative right now.
> 
> Since there's no way we'll make LTO the default in the foreseeable
> future, people starting to use it like that is just a bad bad thing.
> 
> So really, the main advantage of LTO would be any actual optimizations
> it can do. And call me anal, but I want *numbers* for that before I
> merge it. Not handwaving. I'm not actually aware of how well - if at
> all - code generation actually improves.

Well it looks very different if you look at the generated code. gcc becomes
a lot more aggressive.

But as I said there's currently no significant performance improvement known,
so if your only goal is better performance this patch (as currently) 
known is not a big winner.  My suspicion is that's mostly because
the standard benchmarks we run are not too compiler sensitive.

However the users seem to care about the other benefits, like code size.

And there may well be loads that are compiler sensitive.
As Honza posted, for non kernel workloads LTO is known to have large benefits.

Besides at this point it's pretty much just some additions to the Makefiles.

-Andi

-- 
a...@linux.intel.com -- Speaking for myself only
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [rfc]pwm: add BCM2835 PWM driver

2014-04-08 Thread Stephen Warren
On 04/08/2014 05:02 PM, Tim Kryger wrote:
> On Thu, Apr 3, 2014 at 6:44 AM, Bart Tanghe  wrote:
>> need some recommendation
>> the memory mapped io registers of the bcm2835 pwm hardware are spreaded
>> over the memory mapped io
>> gpio config 0x2024 - clk config 0x201010A0 - pwm configuration 0x2020C000
>> to handle this, I've used the base address of the memory mapped io
>> so I can use positive offsets
> 
> So the registers for this PWM are located in three distinct memory regions?
...
>> +struct bcm2835_pwm_chip {
>> +   struct pwm_chip chip;
>> +   struct device *dev;
>> +   int channel;
>> +   void __iomem *mmio;
> 
> One pointer isn't going to be enough.  You need three.
> 
> I suggest renaming the first and adding two more:
> 
> void __iomem *base_pwm;
> void __iomem *base_clk;
> void __iomem *base_alt;

Sorry, I forgot about this patch. One comment here; the PWM driver can't
touch the clock or alt registers; those should be owned by the clock
driver, and the driver for whatever alt is (pinmux - don't recall what
it's touching there).
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH V2 1/2] mm: move FAULT_AROUND_ORDER to arch/

2014-04-08 Thread Madhavan Srinivasan
On Friday 04 April 2014 09:48 PM, Dave Hansen wrote:
> On 04/03/2014 11:27 PM, Madhavan Srinivasan wrote:
>> This patch creates infrastructure to move the FAULT_AROUND_ORDER
>> to arch/ using Kconfig. This will enable architecture maintainers
>> to decide on suitable FAULT_AROUND_ORDER value based on
>> performance data for that architecture. Patch also adds
>> FAULT_AROUND_ORDER Kconfig element in arch/X86.
> 
> Please don't do it this way.
> 
> In mm/Kconfig, put
> 
>   config FAULT_AROUND_ORDER
>   int
>   default 1234 if POWERPC
>   default 4
> 
> The way you have it now, every single architecture that needs to enable
> this has to go put that in their Kconfig.  That's madness.  This way,

I though about it and decided not to do this way because, in future,
sub platforms of the architecture may decide to change the values. Also,
adding an if line for each architecture with different sub platforms
oring to it will look messy.

With regards
Maddy

> you only put it in one place, and folks only have to care if they want
> to change the default to be something other than 4.
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 07/15 V3] perf, c2c: Add in sort on physid

2014-04-08 Thread Namhyung Kim
On Tue, 8 Apr 2014 10:17:58 -0400, Don Zickus wrote:
> On Tue, Apr 08, 2014 at 04:56:25PM +0900, Namhyung Kim wrote:
>> On Mon, 24 Mar 2014 15:36:58 -0400, Don Zickus wrote:
>> > +static int perf_c2c__init(struct perf_c2c *c2c)
>> > +{
>> > +  sort__mode = SORT_MODE__PHYSID;
>> > +  sort__wants_unique = 1;
>> > +  sort_order = "daddr,iaddr,pid,tid";
>> 
>> Where are the SORT_MODE__PHYSID, sort__wants_unique and "daddr", "iaddr"
>> sort keys defined?
>
> In a previous patchset that enables the mmap2 interface.

Ah, missed that.. will look at it soon.

>
>> 
>> Also, more importantly, I think the sort order should contain at least
>> "mem" and "snoop" keys to group samples based on the exact hitm
>> information.
>
> I can look into it, but after iaddr, pid and tid, sorting on snoop doesn't
> really change anything if I recall.  The hitms are scattered across iaddr.

But it doesn't guarantee that all hitms are scattered, right?  Also if
it's the case I guess adding more sort keys are not harmful since they
don't even have a chance to test.

I think you can check hist_entry->stat.nr_events always being 1.

>
>> 
>> In my understanding, if two samples are captured at exactly same
>> position with a same data access but different hitm info, they cannot be
>> classified and just use the hitm info of first entry.
>
> Why?  If the first hitm access was local and the second one remote,
> doesn't that indicate the accessed data is being pulled onto different
> nodes?

But "hist_entry" won't have the information after calling
__hists__add__entry() called unless it has 'mem' and 'snoop' sort keys.
Since two samples have same daddr, iaddr, pid and tid, it'd think those
two samples are same and then add stats of second one to the first and
finally discard the second.  So first one will have a double weight for
the local hitm case only.

This is the case what I worry about.  Am I missing something?

Thanks,
Namhyung
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 03/15 V3] perf c2c: Shared data analyser

2014-04-08 Thread Andi Kleen
Namhyung Kim  writes:
>
> Well, I'm not sure ;-)  Yes, the c2c is a complex tool which might
> deserve an own command, but the functionality is very similar and I
> guess there's something to share between them.

They work very differently.  I don't see a lot of potential
for sharing.

perf mem is basically just a way to annotate normal samples slightly
with addresses, while c2c is fundamentally address driven.

-Andi

-- 
a...@linux.intel.com -- Speaking for myself only
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 1/2] ARM: dts: msm: Add 8921 PMIC to ssbi bus

2014-04-08 Thread Stephen Boyd
Add the PMIC and the sub-devices that are currently supported in
the kernel to the DT.

Signed-off-by: Stephen Boyd 
---
 arch/arm/boot/dts/qcom-msm8960-cdp.dts | 16 +++
 arch/arm/boot/dts/qcom-msm8960.dtsi| 37 ++
 2 files changed, 53 insertions(+)

diff --git a/arch/arm/boot/dts/qcom-msm8960-cdp.dts 
b/arch/arm/boot/dts/qcom-msm8960-cdp.dts
index a58fb88315f6..6f61c54a653e 100644
--- a/arch/arm/boot/dts/qcom-msm8960-cdp.dts
+++ b/arch/arm/boot/dts/qcom-msm8960-cdp.dts
@@ -1,6 +1,22 @@
+#include 
+
 #include "qcom-msm8960.dtsi"
 
 / {
model = "Qualcomm MSM8960 CDP";
compatible = "qcom,msm8960-cdp", "qcom,msm8960";
+
+};
+
+ {
+   keypad@148 {
+   linux,keymap = <
+   MATRIX_KEY(0, 0, KEY_VOLUMEUP)
+   MATRIX_KEY(0, 1, KEY_VOLUMEDOWN)
+   MATRIX_KEY(0, 2, KEY_CAMERA_FOCUS)
+   MATRIX_KEY(0, 3, KEY_CAMERA)
+   >;
+   keypad,num-rows = <1>;
+   keypad,num-columns = <5>;
+   };
 };
diff --git a/arch/arm/boot/dts/qcom-msm8960.dtsi 
b/arch/arm/boot/dts/qcom-msm8960.dtsi
index 997b7b94e117..fb7d3beece6d 100644
--- a/arch/arm/boot/dts/qcom-msm8960.dtsi
+++ b/arch/arm/boot/dts/qcom-msm8960.dtsi
@@ -124,6 +124,43 @@
compatible = "qcom,ssbi";
reg = <0x50 0x1000>;
qcom,controller-type = "pmic-arbiter";
+
+   pmicintc: pmic@0 {
+   compatible = "qcom,pm8921";
+   interrupt-parent = <>;
+   interrupts = <104 8>;
+   #interrupt-cells = <2>;
+   interrupt-controller;
+   #address-cells = <1>;
+   #size-cells = <0>;
+
+   pwrkey@1c {
+   compatible = "qcom,pm8921-pwrkey";
+   reg = <0x1c>;
+   interrupt-parent = <>;
+   interrupts = <50 1>, <51 1>;
+   debounce = <15625>;
+   pull-up;
+   };
+
+   keypad@148 {
+   compatible = "qcom,pm8921-keypad";
+   reg = <0x148>;
+   interrupt-parent = <>;
+   interrupts = <74 1>, <75 1>;
+   debounce = <15>;
+   scan-delay = <32>;
+   row-hold = <91500>;
+   };
+
+   rtc@11d {
+   compatible = "qcom,pm8921-rtc";
+   interrupt-parent = <>;
+   interrupts = <39 1>;
+   reg = <0x11d>;
+   allow-set-time;
+   };
+   };
};
 
rng@1a50 {
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
hosted by The Linux Foundation

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 2/2] ARM: dts: msm: Add 8058 PMIC to ssbi bus

2014-04-08 Thread Stephen Boyd
Add the PMIC and the sub-devices that are currently supported in
the kernel to the DT.

Signed-off-by: Stephen Boyd 
---
 arch/arm/boot/dts/qcom-msm8660-surf.dts | 30 +++
 arch/arm/boot/dts/qcom-msm8660.dtsi | 42 +
 2 files changed, 72 insertions(+)

diff --git a/arch/arm/boot/dts/qcom-msm8660-surf.dts 
b/arch/arm/boot/dts/qcom-msm8660-surf.dts
index 169bad90dac9..a27eedff62d3 100644
--- a/arch/arm/boot/dts/qcom-msm8660-surf.dts
+++ b/arch/arm/boot/dts/qcom-msm8660-surf.dts
@@ -1,6 +1,36 @@
+#include 
+
 #include "qcom-msm8660.dtsi"
 
 / {
model = "Qualcomm MSM8660 SURF";
compatible = "qcom,msm8660-surf", "qcom,msm8660";
 };
+
+ {
+   keypad@148 {
+   linux,keymap = <
+   MATRIX_KEY(0, 0, KEY_FN_F1)
+   MATRIX_KEY(0, 1, KEY_UP)
+   MATRIX_KEY(0, 2, KEY_LEFT)
+   MATRIX_KEY(0, 3, KEY_VOLUMEUP)
+   MATRIX_KEY(1, 0, KEY_FN_F2)
+   MATRIX_KEY(1, 1, KEY_RIGHT)
+   MATRIX_KEY(1, 2, KEY_DOWN)
+   MATRIX_KEY(1, 3, KEY_VOLUMEDOWN)
+   MATRIX_KEY(2, 3, KEY_ENTER)
+   MATRIX_KEY(4, 0, KEY_CAMERA_FOCUS)
+   MATRIX_KEY(4, 1, KEY_UP)
+   MATRIX_KEY(4, 2, KEY_LEFT)
+   MATRIX_KEY(4, 3, KEY_HOME)
+   MATRIX_KEY(4, 4, KEY_FN_F3)
+   MATRIX_KEY(5, 0, KEY_CAMERA)
+   MATRIX_KEY(5, 1, KEY_RIGHT)
+   MATRIX_KEY(5, 2, KEY_DOWN)
+   MATRIX_KEY(5, 3, KEY_BACK)
+   MATRIX_KEY(5, 4, KEY_MENU)
+   >;
+   keypad,num-rows = <6>;
+   keypad,num-columns = <5>;
+   };
+};
diff --git a/arch/arm/boot/dts/qcom-msm8660.dtsi 
b/arch/arm/boot/dts/qcom-msm8660.dtsi
index c52a9e964a44..985b4c16976a 100644
--- a/arch/arm/boot/dts/qcom-msm8660.dtsi
+++ b/arch/arm/boot/dts/qcom-msm8660.dtsi
@@ -83,5 +83,47 @@
compatible = "qcom,ssbi";
reg = <0x50 0x1000>;
qcom,controller-type = "pmic-arbiter";
+
+   pmicintc: pmic@0 {
+   compatible = "qcom,pm8058";
+   interrupt-parent = <>;
+   interrupts = <88 8>;
+   #interrupt-cells = <2>;
+   interrupt-controller;
+   #address-cells = <1>;
+   #size-cells = <0>;
+
+   pwrkey@1c {
+   compatible = "qcom,pm8058-pwrkey";
+   reg = <0x1c>;
+   interrupt-parent = <>;
+   interrupts = <50 1>, <51 1>;
+   debounce = <15625>;
+   pull-up;
+   };
+
+   keypad@148 {
+   compatible = "qcom,pm8058-keypad";
+   reg = <0x148>;
+   interrupt-parent = <>;
+   interrupts = <74 1>, <75 1>;
+   debounce = <15>;
+   scan-delay = <32>;
+   row-hold = <91500>;
+   };
+
+   rtc@11d {
+   compatible = "qcom,pm8058-rtc";
+   interrupt-parent = <>;
+   interrupts = <39 1>;
+   reg = <0x11d>;
+   allow-set-time;
+   };
+
+   vibrator@4a {
+   compatible = "qcom,pm8058-vib";
+   reg = <0x4a>;
+   };
+   };
};
 };
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
hosted by The Linux Foundation

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Grant Award!!!

2014-04-08 Thread Ford Foundation
Attention,

The Ford Foundation has awarded the sum of 1.5m USD as a Grant Donation to you 
please contacts Email: grantawa...@ford-foundation.org with your details.Thanks

Regards

Ford Foundation Orphanage
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [GIT] kbuild/lto changes for 3.15-rc1

2014-04-08 Thread Andi Kleen
Thanks Honza. Just one comment:

> The runtime benefits are more visible on bigger, bloated and less
> optimized projects than on hand tuned video encoder implementation.
> I believe Kernel largely falls into hand tuned category despite its size.

In my experience there's a lot of badly tuned code in the kernel these days,
especially when you go outside the core code (i.e. into drivers/*)

Or code that used to be tuned, but isn't aftermore after several years
of feature additions and bug fixes. The kernel code quality is also quite 
varying.

So anything the compiler can do helps.

> I would be curious about the results on Kernel.

We saw some upsides in performance with some standard tests, but nothing 
too significant.

-Andi
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [clocksource] INFO: possible irq lock inversion dependency detected

2014-04-08 Thread Jet Chen

On 04/08/2014 01:21 PM, Viresh Kumar wrote:

On 8 April 2014 09:29, Jet Chen  wrote:

(Sorry for previous bad format email)
Your patch on my testing branch in LKP system:
git://bee.sh.intel.com/git/tchen37/linux.git timer_debug3 got the below
dmesg.
FYI, I applied your patch on the top of commit
6378cb51af5f4743db0dcb3cbcf862eac5908754 - timer: don't migrate pinned
timers.


Hi Jet,

Thanks for your efforts. It looks like we must use spin_lock_irqsave() here.
And that's all we need to fix this issue..



spin_lock_irqsave() does fix this issue.

Tested-by: Jet Chen 


To get the right order in which patches must be applied (obviously with the
updates I have), please pick this branch:

git://git.linaro.org/people/viresh.kumar/linux.git isolate-cpusets

I hope this fixes the issues you were getting.



--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH V2 1/2] mm: move FAULT_AROUND_ORDER to arch/

2014-04-08 Thread Madhavan Srinivasan
On Friday 04 April 2014 06:47 PM, Kirill A. Shutemov wrote:
> On Fri, Apr 04, 2014 at 11:57:14AM +0530, Madhavan Srinivasan wrote:
>> Kirill A. Shutemov with faultaround patchset introduced
>> vm_ops->map_pages() for mapping easy accessible pages around
>> fault address in hope to reduce number of minor page faults.
>>
>> This patch creates infrastructure to move the FAULT_AROUND_ORDER
>> to arch/ using Kconfig. This will enable architecture maintainers
>> to decide on suitable FAULT_AROUND_ORDER value based on
>> performance data for that architecture. Patch also adds
>> FAULT_AROUND_ORDER Kconfig element in arch/X86.
>>
>> Signed-off-by: Madhavan Srinivasan 
>> ---
>>  arch/x86/Kconfig   |4 
>>  include/linux/mm.h |9 +
>>  mm/memory.c|   12 +---
>>  3 files changed, 18 insertions(+), 7 deletions(-)
>>
>> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
>> index 9c0a657..5833f22 100644
>> --- a/arch/x86/Kconfig
>> +++ b/arch/x86/Kconfig
>> @@ -1177,6 +1177,10 @@ config DIRECT_GBPAGES
>>support it. This can improve the kernel's performance a tiny bit by
>>reducing TLB pressure. If in doubt, say "Y".
>>  
>> +config FAULT_AROUND_ORDER
>> +int
>> +default "4"
>> +
>>  # Common NUMA Features
>>  config NUMA
>>  bool "Numa Memory Allocation and Scheduler Support"
>> diff --git a/include/linux/mm.h b/include/linux/mm.h
>> index 0bd4359..b93c1c3 100644
>> --- a/include/linux/mm.h
>> +++ b/include/linux/mm.h
>> @@ -26,6 +26,15 @@ struct file_ra_state;
>>  struct user_struct;
>>  struct writeback_control;
>>  
>> +/*
>> + * Fault around order is a control knob to decide the fault around pages.
>> + * Default value is set to 0UL (disabled), but the arch can override it as
>> + * desired.
>> + */
>> +#ifndef CONFIG_FAULT_AROUND_ORDER
>> +#define CONFIG_FAULT_AROUND_ORDER 0
>> +#endif
>> +
> 
> I don't think it should be in header file: nobody except mm/memory.c cares.
> Just put it instead '#define FAULT_AROUND_ORDER'.
> 

Ok. Will do this change.

>>  #ifndef CONFIG_NEED_MULTIPLE_NODES  /* Don't use mapnrs, do it properly */
>>  extern unsigned long max_mapnr;
>>  
>> diff --git a/mm/memory.c b/mm/memory.c
>> index b02c584..22a4a89 100644
>> --- a/mm/memory.c
>> +++ b/mm/memory.c
>> @@ -3358,10 +3358,8 @@ void do_set_pte(struct vm_area_struct *vma, unsigned 
>> long address,
>>  update_mmu_cache(vma, address, pte);
>>  }
>>  
>> -#define FAULT_AROUND_ORDER 4
>> -
>>  #ifdef CONFIG_DEBUG_FS
>> -static unsigned int fault_around_order = FAULT_AROUND_ORDER;
>> +static unsigned int fault_around_order = CONFIG_FAULT_AROUND_ORDER;
>>  
>>  static int fault_around_order_get(void *data, u64 *val)
>>  {
>> @@ -3371,7 +3369,7 @@ static int fault_around_order_get(void *data, u64 *val)
>>  
>>  static int fault_around_order_set(void *data, u64 val)
>>  {
>> -BUILD_BUG_ON((1UL << FAULT_AROUND_ORDER) > PTRS_PER_PTE);
>> +BUILD_BUG_ON((1UL << CONFIG_FAULT_AROUND_ORDER) > PTRS_PER_PTE);
>>  if (1UL << val > PTRS_PER_PTE)
>>  return -EINVAL;
>>  fault_around_order = val;
>> @@ -3406,14 +3404,14 @@ static inline unsigned long fault_around_pages(void)
>>  {
>>  unsigned long nr_pages;
>>  
>> -nr_pages = 1UL << FAULT_AROUND_ORDER;
>> +nr_pages = 1UL << CONFIG_FAULT_AROUND_ORDER;
>>  BUILD_BUG_ON(nr_pages > PTRS_PER_PTE);
>>  return nr_pages;
>>  }
>>  
>>  static inline unsigned long fault_around_mask(void)
>>  {
>> -return ~((1UL << (PAGE_SHIFT + FAULT_AROUND_ORDER)) - 1);
>> +return ~((1UL << (PAGE_SHIFT + CONFIG_FAULT_AROUND_ORDER)) - 1);
>>  }
>>  #endif
>>  
>> @@ -3471,7 +3469,7 @@ static int do_read_fault(struct mm_struct *mm, struct 
>> vm_area_struct *vma,
>>   * if page by the offset is not ready to be mapped (cold cache or
>>   * something).
>>   */
>> -if (vma->vm_ops->map_pages) {
>> +if ((vma->vm_ops->map_pages) && (fault_around_pages() > 1)) {
> 
>   if (vma->vm_ops->map_pages && fault_around_pages()) {
> 
For a fault around value of 0, fault_around_pages() will return 1 and
that is reason for checking it greater than 1. Also, using debug fs,
fault around value can be zeroed.

With regards
Maddy
>>  pte = pte_offset_map_lock(mm, pmd, address, );
>>  do_fault_around(vma, address, pte, pgoff, flags);
>>  if (!pte_same(*pte, orig_pte))
>> -- 
>> 1.7.10.4
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
>> the body of a message to majord...@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>> Please read the FAQ at  http://www.tux.org/lkml/
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 05/15 V3] perf, c2c: Rework setup code to prepare for features

2014-04-08 Thread Namhyung Kim
On Tue, 8 Apr 2014 10:11:07 -0400, Don Zickus wrote:
> On Tue, Apr 08, 2014 at 04:41:29PM +0900, Namhyung Kim wrote:
>> On Sat, 29 Mar 2014 18:10:18 +0100, Jiri Olsa wrote:
>> > On Mon, Mar 24, 2014 at 03:36:56PM -0400, Don Zickus wrote:
>> >
>> > SNIP
>> >
>> >>  
>> >>  static int perf_c2c__process_load_store(struct perf_c2c *c2c,
>> >> + struct addr_location *al,
>> >>   struct perf_sample *sample,
>> >> - struct addr_location *al)
>> >> + struct perf_evsel *evsel)
>> >>  {
>> >> - if (c2c->raw_records)
>> >> - perf_sample__fprintf(sample, ' ', "raw input", al, stdout);
>> >> + struct mem_info *mi;
>> >> +
>> >> + mi = sample__resolve_mem(sample, al);
>> >> + if (!mi)
>> >> + return -ENOMEM;
>> >
>> > perhaps not directly related to this patchset, but I needed
>> > attached patch to get resolved data in .bss (static), which
>> > for some reason happened to be located in executable segment
>> 
>> Wasn't it a read-only/const data?
>
> I believe it had the 'x' bit set.  Or the kernel was not passing any
> protection bits, so it defaulted to MAP_FUNCTION?

The perf treats a mapping as a data mapping (MAP_VARIABLE) by default if
the 'x' bit is not set.  As Jiri said its a static data, I guessed it's
a const data (set to 0?) and moved into .rodata section and then to the
text segment.

Thanks,
Namhyung
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Bumping required kernels to 3.0 for Linux backports ?

2014-04-08 Thread Luis R. Rodriguez
Folks,

we have a age old dance of random parties, in particular the embedded
folks, ending up with random ancient kernels on embedded devices. I've
tried to carefully document a few ideas on why and how I believe we
can make automatic kernel backporting scale [0] and part of this will
be to try to bring consensus about a unified front to persuade users,
partners, customers, whatever, to be at least on a kernel listed as
supported on kernel.org. Today we backport down to the last 30
kernels, from 2.6.24 up to 3.14 and while this is manageable right now
I expect the number of supported drivers and features to keep
increasing (I've stopped counting). I am very aware of the reasons to
support a slew of old kernels, but its nothing but our own fault for
not educating enough about the importance on upgrading. I realize this
is an age old issue, but since I think we need scale backports and
wish to remove older kernels from it fast, I wanted to see if any
folks might have ideas on what can help here other than saying, 'if
you use Linux backports, your drivers will be automatically backported
and supported'.

To start off -- what's the *last* kernel you realistically need for
your users to use backports right now? Is it really 2.6.25? Would
anyone kick and scream if for the backports-3.15 release try take
things up to support only down to least 3.0 *right now* ?

[0] 
http://www.do-not-panic.com/2014/04/automatic-linux-kernel-backporting-with-coccinelle.html

  Luis
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 03/15 V3] perf c2c: Shared data analyser

2014-04-08 Thread Namhyung Kim
On Tue, 8 Apr 2014 10:22:26 -0400, Don Zickus wrote:
> On Tue, Apr 08, 2014 at 03:59:15PM +0900, Namhyung Kim wrote:
>> Hi Don,
>> 
>> On Mon, 24 Mar 2014 15:36:54 -0400, Don Zickus wrote:
>> > From: Arnaldo Carvalho de Melo 
>> >
>> > This is the start of a new perf tool that will collect information about
>> > memory accesses and analyse it to find things like hot cachelines, etc.
>> 
>> So why not integrating this into existing 'perf mem' command if it's all
>> about analyzing memory accesses?
>
> Our expectations were different.  We expeted to do system-wide analysis
> with loads and stores.  With 'perf mem' you didn't have the ability to
> anlayze both load and stores at the same time.

But it's very simple to change perf mem to work with the both IMHO.

>
> In all my private conversations with Stephane, Arnalado and Jiri, it was
> never brought up.  We had just assumed that is made more sense to keep it
> separate.

Well, I'm not sure ;-)  Yes, the c2c is a complex tool which might
deserve an own command, but the functionality is very similar and I
guess there's something to share between them.

Thanks,
Namhyung
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[Patch v3 6/6] iio: Added ABI description for quaternion

2014-04-08 Thread Srinivas Pandruvada
Added documentation for reading quaternion components for 3D rotations.

Signed-off-by: Srinivas Pandruvada 
---
 Documentation/ABI/testing/sysfs-bus-iio | 9 +
 1 file changed, 9 insertions(+)

diff --git a/Documentation/ABI/testing/sysfs-bus-iio 
b/Documentation/ABI/testing/sysfs-bus-iio
index 6e02c50..d1cad9a 100644
--- a/Documentation/ABI/testing/sysfs-bus-iio
+++ b/Documentation/ABI/testing/sysfs-bus-iio
@@ -891,3 +891,12 @@ Contact:   linux-...@vger.kernel.org
 Description:
This attribute is used to get/set the integration time in
seconds.
+
+What:  /sys/bus/iio/devices/iio:deviceX/in_rot_quaternion_raw
+KernelVersion: 3.15
+Contact:   linux-...@vger.kernel.org
+Description:
+   Raw value of quaternion components using a format
+   x y z w. Here x, y, and z component represents the axis about
+   which a rotation will occur and w component represents the
+   amount of rotation.
-- 
1.7.11.7

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[Patch v3 5/6] iio: hid-sensors: Added device rotation support

2014-04-08 Thread Srinivas Pandruvada
Added usage id processing for device rotation. This uses IIO
interfaces for triggered buffer to present data to user
mode.This uses HID sensor framework for registering callback
events from the sensor hub.
Data is exported to user space in the form of quaternion rotation
format.

Signed-off-by: Srinivas Pandruvada 
---
 drivers/iio/orientation/Kconfig   |  12 +
 drivers/iio/orientation/Makefile  |   1 +
 drivers/iio/orientation/hid-sensor-rotation.c | 359 ++
 include/linux/hid-sensor-ids.h|   1 +
 4 files changed, 373 insertions(+)
 create mode 100644 drivers/iio/orientation/hid-sensor-rotation.c

diff --git a/drivers/iio/orientation/Kconfig b/drivers/iio/orientation/Kconfig
index 58c62c8..e3aa1e5 100644
--- a/drivers/iio/orientation/Kconfig
+++ b/drivers/iio/orientation/Kconfig
@@ -16,4 +16,16 @@ config HID_SENSOR_INCLINOMETER_3D
  Say yes here to build support for the HID SENSOR
  Inclinometer 3D.
 
+config HID_SENSOR_DEVICE_ROTATION
+   depends on HID_SENSOR_HUB
+   select IIO_BUFFER
+   select IIO_TRIGGERED_BUFFER
+   select HID_SENSOR_IIO_COMMON
+   select HID_SENSOR_IIO_TRIGGER
+   tristate "HID Device Rotation"
+   help
+ Say yes here to build support for the HID SENSOR
+ device rotation. The output of a device rotation sensor
+ is presented using quaternion format.
+
 endmenu
diff --git a/drivers/iio/orientation/Makefile b/drivers/iio/orientation/Makefile
index 2c97572..4734dab 100644
--- a/drivers/iio/orientation/Makefile
+++ b/drivers/iio/orientation/Makefile
@@ -4,3 +4,4 @@
 
 # When adding new entries keep the list in alphabetical order
 obj-$(CONFIG_HID_SENSOR_INCLINOMETER_3D) += hid-sensor-incl-3d.o
+obj-$(CONFIG_HID_SENSOR_DEVICE_ROTATION) += hid-sensor-rotation.o
diff --git a/drivers/iio/orientation/hid-sensor-rotation.c 
b/drivers/iio/orientation/hid-sensor-rotation.c
new file mode 100644
index 000..5c7d558
--- /dev/null
+++ b/drivers/iio/orientation/hid-sensor-rotation.c
@@ -0,0 +1,359 @@
+/*
+ * HID Sensors Driver
+ * Copyright (c) 2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include "../common/hid-sensors/hid-sensor-trigger.h"
+
+struct dev_rot_state {
+   struct hid_sensor_hub_callbacks callbacks;
+   struct hid_sensor_common common_attributes;
+   struct hid_sensor_hub_attribute_info quaternion;
+   u32 sampled_vals[4];
+};
+
+/* Channel definitions */
+static const struct iio_chan_spec dev_rot_channels[] = {
+   {
+   .type = IIO_ROT,
+   .modified = 1,
+   .channel2 = IIO_MOD_QUATERNION,
+   .info_mask_separate = BIT(IIO_CHAN_INFO_RAW),
+   .info_mask_shared_by_type = BIT(IIO_CHAN_INFO_OFFSET) |
+   BIT(IIO_CHAN_INFO_SAMP_FREQ) |
+   BIT(IIO_CHAN_INFO_HYSTERESIS) |
+   BIT(IIO_CHAN_INFO_RAW),
+   }
+};
+
+/* Adjust channel real bits based on report descriptor */
+static void dev_rot_adjust_channel_bit_mask(struct iio_chan_spec *chan,
+   int size)
+{
+   chan->scan_type.sign = 's';
+   /* Real storage bits will change based on the report desc. */
+   chan->scan_type.realbits = size * 8;
+   /* Maximum size of a sample to capture is u32 */
+   chan->scan_type.storagebits = sizeof(u32) * 8;
+   chan->scan_type.repeat = 4;
+}
+
+/* Channel read_raw handler */
+static int dev_rot_read_raw(struct iio_dev *indio_dev,
+   struct iio_chan_spec const *chan,
+   int size, int *vals, int *val_len,
+   long mask)
+{
+   struct dev_rot_state *rot_state = iio_priv(indio_dev);
+   int ret_type;
+   int i;
+
+   vals[0] = 0;
+   vals[1] = 0;
+
+   switch (mask) {
+   case IIO_CHAN_INFO_RAW:
+   if (size >= 4) {
+   for (i = 0; i < 4; ++i)
+   vals[i] = rot_state->sampled_vals[i];
+   ret_type = IIO_VAL_INT_MULTIPLE;
+   *val_len =  4;
+   } else
+   ret_type = -EINVAL;
+   break;
+   case IIO_CHAN_INFO_SCALE:
+   vals[0] = rot_state->quaternion.units;
+   

Re: [Ocfs2-devel] [PATCH 1/1] OCFS2: Remove NULL assignments on static

2014-04-08 Thread Wengang

Hi Fabian,

What's the badness if we do the assignments?

thanks,
wengang

于 2014年04月09日 04:13, Fabian Frederick 写道:

static values are automatically initialized to NULL.

Cc: Andrew Morton 
Cc: ocfs2-de...@oss.oracle.com
Signed-off-by: Fabian Frederick 
---
  fs/ocfs2/cluster/tcp.c   | 2 +-
  fs/ocfs2/dlm/dlmdebug.c  | 2 +-
  fs/ocfs2/dlm/dlmlock.c   | 2 +-
  fs/ocfs2/dlm/dlmmaster.c | 6 +++---
  fs/ocfs2/stackglue.c | 2 +-
  fs/ocfs2/super.c | 4 ++--
  fs/ocfs2/uptodate.c  | 2 +-
  7 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index eb649d2..161fb1e 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -108,7 +108,7 @@ static struct rb_root o2net_handler_tree = RB_ROOT;
  static struct o2net_node o2net_nodes[O2NM_MAX_NODES];
  
  /* XXX someday we'll need better accounting */

-static struct socket *o2net_listen_sock = NULL;
+static struct socket *o2net_listen_sock;
  
  /*

   * listen work is only queued by the listening socket callbacks on the
diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c
index e33cd7a..18f13c2 100644
--- a/fs/ocfs2/dlm/dlmdebug.c
+++ b/fs/ocfs2/dlm/dlmdebug.c
@@ -338,7 +338,7 @@ void dlm_print_one_mle(struct dlm_master_list_entry *mle)
  
  #ifdef CONFIG_DEBUG_FS
  
-static struct dentry *dlm_debugfs_root = NULL;

+static struct dentry *dlm_debugfs_root;
  
  #define DLM_DEBUGFS_DIR"o2dlm"

  #define DLM_DEBUGFS_DLM_STATE "dlm_state"
diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c
index 5d32f75..66c2a49 100644
--- a/fs/ocfs2/dlm/dlmlock.c
+++ b/fs/ocfs2/dlm/dlmlock.c
@@ -52,7 +52,7 @@
  #define MLOG_MASK_PREFIX ML_DLM
  #include "cluster/masklog.h"
  
-static struct kmem_cache *dlm_lock_cache = NULL;

+static struct kmem_cache *dlm_lock_cache;
  
  static DEFINE_SPINLOCK(dlm_cookie_lock);

  static u64 dlm_next_cookie = 1;
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index af3f7aa..fd07ef9 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -82,9 +82,9 @@ static inline int dlm_mle_equal(struct dlm_ctxt *dlm,
return 1;
  }
  
-static struct kmem_cache *dlm_lockres_cache = NULL;

-static struct kmem_cache *dlm_lockname_cache = NULL;
-static struct kmem_cache *dlm_mle_cache = NULL;
+static struct kmem_cache *dlm_lockres_cache;
+static struct kmem_cache *dlm_lockname_cache;
+static struct kmem_cache *dlm_mle_cache;
  
  static void dlm_mle_release(struct kref *kref);

  static void dlm_init_mle(struct dlm_master_list_entry *mle,
diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c
index 83f1a66..5d965e8 100644
--- a/fs/ocfs2/stackglue.c
+++ b/fs/ocfs2/stackglue.c
@@ -709,7 +709,7 @@ static struct ctl_table ocfs2_root_table[] = {
{ }
  };
  
-static struct ctl_table_header *ocfs2_table_header = NULL;

+static struct ctl_table_header *ocfs2_table_header;
  
  
  /*

diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index a7cdd56..9027729 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -75,7 +75,7 @@
  
  #include "buffer_head_io.h"
  
-static struct kmem_cache *ocfs2_inode_cachep = NULL;

+static struct kmem_cache *ocfs2_inode_cachep;
  struct kmem_cache *ocfs2_dquot_cachep;
  struct kmem_cache *ocfs2_qf_chunk_cachep;
  
@@ -85,7 +85,7 @@ struct kmem_cache *ocfs2_qf_chunk_cachep;

   * workqueue and schedule on our own. */
  struct workqueue_struct *ocfs2_wq = NULL;
  
-static struct dentry *ocfs2_debugfs_root = NULL;

+static struct dentry *ocfs2_debugfs_root;
  
  MODULE_AUTHOR("Oracle");

  MODULE_LICENSE("GPL");
diff --git a/fs/ocfs2/uptodate.c b/fs/ocfs2/uptodate.c
index 52eaf33..82e17b0 100644
--- a/fs/ocfs2/uptodate.c
+++ b/fs/ocfs2/uptodate.c
@@ -67,7 +67,7 @@ struct ocfs2_meta_cache_item {
sector_tc_block;
  };
  
-static struct kmem_cache *ocfs2_uptodate_cachep = NULL;

+static struct kmem_cache *ocfs2_uptodate_cachep;
  
  u64 ocfs2_metadata_cache_owner(struct ocfs2_caching_info *ci)

  {


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[Patch v3 3/6] IIO: core: Modify scan element type

2014-04-08 Thread Srinivas Pandruvada
The current scan element type uses the following format:
  [be|le]:[s|u]bits/storagebits[>>shift].
To specify multiple elements in this type, added a repeat value.
So new format is:
  [be|le]:[s|u]bits/storagebits{X[repeat]}[>>shift].
Here X is specifying how may times, real/storage bits are repeating.

When X is value is 0 or 1, then repeat value is not used in the format,
and it will be same as existing format.

Signed-off-by: Srinivas Pandruvada 
---
 drivers/iio/industrialio-buffer.c | 41 +--
 include/linux/iio/iio.h   |  9 +
 2 files changed, 44 insertions(+), 6 deletions(-)

diff --git a/drivers/iio/industrialio-buffer.c 
b/drivers/iio/industrialio-buffer.c
index e108f2a..afc81b5 100644
--- a/drivers/iio/industrialio-buffer.c
+++ b/drivers/iio/industrialio-buffer.c
@@ -150,7 +150,16 @@ static ssize_t iio_show_fixed_type(struct device *dev,
type = IIO_BE;
 #endif
}
-   return sprintf(buf, "%s:%c%d/%d>>%u\n",
+   if (this_attr->c->scan_type.repeat > 1)
+   return sprintf(buf, "%s:%c%d/%d{%d[repeat]}>>%u\n",
+  iio_endian_prefix[type],
+  this_attr->c->scan_type.sign,
+  this_attr->c->scan_type.realbits,
+  this_attr->c->scan_type.storagebits,
+  this_attr->c->scan_type.repeat,
+  this_attr->c->scan_type.shift);
+   else
+   return sprintf(buf, "%s:%c%d/%d>>%u\n",
   iio_endian_prefix[type],
   this_attr->c->scan_type.sign,
   this_attr->c->scan_type.realbits,
@@ -474,14 +483,22 @@ static int iio_compute_scan_bytes(struct iio_dev 
*indio_dev,
for_each_set_bit(i, mask,
 indio_dev->masklength) {
ch = iio_find_channel_from_si(indio_dev, i);
-   length = ch->scan_type.storagebits / 8;
+   if (ch->scan_type.repeat > 1)
+   length = ch->scan_type.storagebits / 8 *
+   ch->scan_type.repeat;
+   else
+   length = ch->scan_type.storagebits / 8;
bytes = ALIGN(bytes, length);
bytes += length;
}
if (timestamp) {
ch = iio_find_channel_from_si(indio_dev,
  indio_dev->scan_index_timestamp);
-   length = ch->scan_type.storagebits / 8;
+   if (ch->scan_type.repeat > 1)
+   length = ch->scan_type.storagebits / 8 *
+   ch->scan_type.repeat;
+   else
+   length = ch->scan_type.storagebits / 8;
bytes = ALIGN(bytes, length);
bytes += length;
}
@@ -957,7 +974,11 @@ static int iio_buffer_update_demux(struct iio_dev 
*indio_dev,
   indio_dev->masklength,
   in_ind + 1);
ch = iio_find_channel_from_si(indio_dev, in_ind);
-   length = ch->scan_type.storagebits/8;
+   if (ch->scan_type.repeat > 1)
+   length = ch->scan_type.storagebits / 8 *
+   ch->scan_type.repeat;
+   else
+   length = ch->scan_type.storagebits / 8;
/* Make sure we are aligned */
in_loc += length;
if (in_loc % length)
@@ -969,7 +990,11 @@ static int iio_buffer_update_demux(struct iio_dev 
*indio_dev,
goto error_clear_mux_table;
}
ch = iio_find_channel_from_si(indio_dev, in_ind);
-   length = ch->scan_type.storagebits/8;
+   if (ch->scan_type.repeat > 1)
+   length = ch->scan_type.storagebits / 8 *
+   ch->scan_type.repeat;
+   else
+   length = ch->scan_type.storagebits / 8;
if (out_loc % length)
out_loc += length - out_loc % length;
if (in_loc % length)
@@ -990,7 +1015,11 @@ static int iio_buffer_update_demux(struct iio_dev 
*indio_dev,
}
ch = iio_find_channel_from_si(indio_dev,
indio_dev->scan_index_timestamp);
-   length = ch->scan_type.storagebits/8;
+   if (ch->scan_type.repeat > 1)
+   length = ch->scan_type.storagebits / 8 *
+   ch->scan_type.repeat;
+   else
+   length = ch->scan_type.storagebits / 8;
if (out_loc % length)
out_loc += length - out_loc % length;
if (in_loc % length)
diff --git a/include/linux/iio/iio.h 

[Patch v3 4/6] IIO: core: Add quaternion modifier

2014-04-08 Thread Srinivas Pandruvada
Added quaternion in the list of supported modifiers.

Signed-off-by: Srinivas Pandruvada 
---
 drivers/iio/industrialio-core.c | 1 +
 include/linux/iio/types.h   | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c
index 3bd565c..1d907eb 100644
--- a/drivers/iio/industrialio-core.c
+++ b/drivers/iio/industrialio-core.c
@@ -84,6 +84,7 @@ static const char * const iio_modifier_names[] = {
[IIO_MOD_LIGHT_RED] = "red",
[IIO_MOD_LIGHT_GREEN] = "green",
[IIO_MOD_LIGHT_BLUE] = "blue",
+   [IIO_MOD_QUATERNION] = "quaternion",
 };
 
 /* relies on pairs of these shared then separate */
diff --git a/include/linux/iio/types.h b/include/linux/iio/types.h
index a13c224..4fdab2e 100644
--- a/include/linux/iio/types.h
+++ b/include/linux/iio/types.h
@@ -53,6 +53,7 @@ enum iio_modifier {
IIO_MOD_LIGHT_RED,
IIO_MOD_LIGHT_GREEN,
IIO_MOD_LIGHT_BLUE,
+   IIO_MOD_QUATERNION,
 };
 
 enum iio_event_type {
-- 
1.7.11.7

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[Patch v3 1/6] devres: introduce API "devm_kmemdup

2014-04-08 Thread Srinivas Pandruvada
Introduce devm_kmemdup, which uses resource managed kmalloc.
There are several request from maintainers to add this instead
of using kmemdup.

Signed-off-by: Srinivas Pandruvada 
---
 Documentation/driver-model/devres.txt |  1 +
 drivers/base/devres.c | 21 +
 include/linux/device.h|  2 ++
 3 files changed, 24 insertions(+)

diff --git a/Documentation/driver-model/devres.txt 
b/Documentation/driver-model/devres.txt
index 4f7897e..4999518 100644
--- a/Documentation/driver-model/devres.txt
+++ b/Documentation/driver-model/devres.txt
@@ -236,6 +236,7 @@ certainly invest a bit more effort into libata core layer).
 MEM
   devm_kzalloc()
   devm_kfree()
+  devm_kmemdup()
 
 IIO
   devm_iio_device_alloc()
diff --git a/drivers/base/devres.c b/drivers/base/devres.c
index db4e264..d0914cb 100644
--- a/drivers/base/devres.c
+++ b/drivers/base/devres.c
@@ -831,3 +831,24 @@ void devm_kfree(struct device *dev, void *p)
WARN_ON(rc);
 }
 EXPORT_SYMBOL_GPL(devm_kfree);
+
+/**
+ * devm_kmemdup - Resource-managed kmemdup
+ * @dev: Device this memory belongs to
+ * @src: Memory region to duplicate
+ * @len: Memory region length
+ * @gfp: GFP mask to use
+ *
+ * Duplicate region of a memory using resource managed kmalloc
+ */
+void *devm_kmemdup(struct device *dev, const void *src, size_t len, gfp_t gfp)
+{
+   void *p;
+
+   p = devm_kmalloc(dev, len, gfp);
+   if (p)
+   memcpy(p, src, len);
+
+   return p;
+}
+EXPORT_SYMBOL_GPL(devm_kmemdup);
diff --git a/include/linux/device.h b/include/linux/device.h
index 233bbbe..0b3117a 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -629,6 +629,8 @@ static inline void *devm_kcalloc(struct device *dev,
 }
 extern void devm_kfree(struct device *dev, void *p);
 extern char *devm_kstrdup(struct device *dev, const char *s, gfp_t gfp);
+extern void *devm_kmemdup(struct device *dev, const void *src, size_t len,
+   gfp_t gfp);
 
 void __iomem *devm_ioremap_resource(struct device *dev, struct resource *res);
 void __iomem *devm_request_and_ioremap(struct device *dev,
-- 
1.7.11.7

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[Patch v3 2/6] IIO: core: Introduce read_raw_multi

2014-04-08 Thread Srinivas Pandruvada
This callback is introduced to overcome some limitations of existing
read_raw callback. The functionality of both existing read_raw and
read_raw_multi is similar, both are used to request values from the
device. The current read_raw callback allows only two return values.
The new read_raw_multi allows returning multiple values. Instead of
passing just address of val and val2, it passes length and pointer
to values. Depending on the type and length of passed buffer, iio
client drivers can return multiple values.

Signed-off-by: Srinivas Pandruvada 
---
 drivers/iio/iio_core.h   |  2 +-
 drivers/iio/industrialio-core.c  | 65 ++--
 drivers/iio/industrialio-event.c |  6 ++--
 drivers/iio/inkern.c | 16 --
 include/linux/iio/iio.h  | 17 +++
 include/linux/iio/types.h|  1 +
 6 files changed, 80 insertions(+), 27 deletions(-)

diff --git a/drivers/iio/iio_core.h b/drivers/iio/iio_core.h
index f6db6af..30327ad 100644
--- a/drivers/iio/iio_core.h
+++ b/drivers/iio/iio_core.h
@@ -35,7 +35,7 @@ int __iio_add_chan_devattr(const char *postfix,
   struct list_head *attr_list);
 void iio_free_chan_devattr_list(struct list_head *attr_list);
 
-ssize_t iio_format_value(char *buf, unsigned int type, int val, int val2);
+ssize_t iio_format_value(char *buf, unsigned int type, int size, int *val);
 
 /* Event interface flags */
 #define IIO_BUSY_BIT_POS 1
diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c
index ede16aec..3bd565c 100644
--- a/drivers/iio/industrialio-core.c
+++ b/drivers/iio/industrialio-core.c
@@ -373,41 +373,53 @@ EXPORT_SYMBOL_GPL(iio_enum_write);
  * @buf: The buffer to which the formated value gets written
  * @type: One of the IIO_VAL_... constants. This decides how the val and val2
  *parameters are formatted.
- * @val: First part of the value, exact meaning depends on the type parameter.
- * @val2: Second part of the value, exact meaning depends on the type 
parameter.
+ * @vals: pointer to the values, exact meaning depends on the type parameter.
  */
-ssize_t iio_format_value(char *buf, unsigned int type, int val, int val2)
+ssize_t iio_format_value(char *buf, unsigned int type, int size, int *vals)
 {
unsigned long long tmp;
bool scale_db = false;
 
switch (type) {
case IIO_VAL_INT:
-   return sprintf(buf, "%d\n", val);
+   return sprintf(buf, "%d\n", vals[0]);
case IIO_VAL_INT_PLUS_MICRO_DB:
scale_db = true;
case IIO_VAL_INT_PLUS_MICRO:
-   if (val2 < 0)
-   return sprintf(buf, "-%ld.%06u%s\n", abs(val), -val2,
+   if (vals[1] < 0)
+   return sprintf(buf, "-%ld.%06u%s\n", abs(vals[0]),
+   -vals[1],
scale_db ? " dB" : "");
else
-   return sprintf(buf, "%d.%06u%s\n", val, val2,
+   return sprintf(buf, "%d.%06u%s\n", vals[0], vals[1],
scale_db ? " dB" : "");
case IIO_VAL_INT_PLUS_NANO:
-   if (val2 < 0)
-   return sprintf(buf, "-%ld.%09u\n", abs(val), -val2);
+   if (vals[1] < 0)
+   return sprintf(buf, "-%ld.%09u\n", abs(vals[0]),
+   -vals[1]);
else
-   return sprintf(buf, "%d.%09u\n", val, val2);
+   return sprintf(buf, "%d.%09u\n", vals[0], vals[1]);
case IIO_VAL_FRACTIONAL:
-   tmp = div_s64((s64)val * 10LL, val2);
-   val2 = do_div(tmp, 10LL);
-   val = tmp;
-   return sprintf(buf, "%d.%09u\n", val, val2);
+   tmp = div_s64((s64)vals[0] * 10LL, vals[1]);
+   vals[1] = do_div(tmp, 10LL);
+   vals[0] = tmp;
+   return sprintf(buf, "%d.%09u\n", vals[0], vals[1]);
case IIO_VAL_FRACTIONAL_LOG2:
-   tmp = (s64)val * 10LL >> val2;
-   val2 = do_div(tmp, 10LL);
-   val = tmp;
-   return sprintf(buf, "%d.%09u\n", val, val2);
+   tmp = (s64)vals[0] * 10LL >> vals[1];
+   vals[1] = do_div(tmp, 10LL);
+   vals[0] = tmp;
+   return sprintf(buf, "%d.%09u\n", vals[0], vals[1]);
+   case IIO_VAL_INT_MULTIPLE:
+   {
+   int i;
+   int len = 0;
+
+   for (i = 0; i < size; ++i)
+   len += sprintf([len], "%d ", vals[i]);
+   buf[len++] = '\n';
+   buf[len++] = '\0';
+   return len;
+   }
default:
return 0;
}
@@ -419,14 +431,23 @@ static ssize_t iio_read_channel_info(struct device *dev,
 {
struct 

[PATCH 2/2] Drivers: hv: vmbus: Implement per-CPU mapping of relid to channel

2014-04-08 Thread K. Y. Srinivasan
Currently the mapping of the relID to channel is done under the protection of a
single spin lock. Starting with ws2012, each channel is bound to a specific VCPU
in the guest. Use this binding to eliminate the spin lock by setting up
per-cpu state for mapping relId to the channel.

Signed-off-by: K. Y. Srinivasan 
Reviewed-by: Haiyang Zhang 
---
 drivers/hv/channel_mgmt.c |   41 -
 drivers/hv/connection.c   |   24 +++-
 drivers/hv/hv.c   |2 ++
 drivers/hv/hyperv_vmbus.h |5 +
 include/linux/hyperv.h|5 +
 5 files changed, 75 insertions(+), 2 deletions(-)

diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index 6f7fdd9..6c8b032c 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -149,6 +149,7 @@ static struct vmbus_channel *alloc_channel(void)
spin_lock_init(>sc_lock);
 
INIT_LIST_HEAD(>sc_list);
+   INIT_LIST_HEAD(>percpu_list);
 
channel->controlwq = create_workqueue("hv_vmbus_ctl");
if (!channel->controlwq) {
@@ -188,7 +189,20 @@ static void free_channel(struct vmbus_channel *channel)
queue_work(vmbus_connection.work_queue, >work);
 }
 
+static void percpu_channel_enq(void *arg)
+{
+   struct vmbus_channel *channel = arg;
+   int cpu = smp_processor_id();
+
+   list_add_tail(>percpu_list, _context.percpu_list[cpu]);
+}
 
+static void percpu_channel_deq(void *arg)
+{
+   struct vmbus_channel *channel = arg;
+
+   list_del(>percpu_list);
+}
 
 /*
  * vmbus_process_rescind_offer -
@@ -210,6 +224,12 @@ static void vmbus_process_rescind_offer(struct work_struct 
*work)
msg.header.msgtype = CHANNELMSG_RELID_RELEASED;
vmbus_post_msg(, sizeof(struct vmbus_channel_relid_released));
 
+   if (channel->target_cpu != smp_processor_id())
+   smp_call_function_single(channel->target_cpu,
+percpu_channel_deq, channel, true);
+   else
+   percpu_channel_deq(channel);
+
if (channel->primary_channel == NULL) {
spin_lock_irqsave(_connection.channel_lock, flags);
list_del(>listentry);
@@ -245,6 +265,7 @@ static void vmbus_process_offer(struct work_struct *work)
work);
struct vmbus_channel *channel;
bool fnew = true;
+   bool enq = false;
int ret;
unsigned long flags;
 
@@ -264,12 +285,22 @@ static void vmbus_process_offer(struct work_struct *work)
}
}
 
-   if (fnew)
+   if (fnew) {
list_add_tail(>listentry,
  _connection.chn_list);
+   enq = true;
+   }
 
spin_unlock_irqrestore(_connection.channel_lock, flags);
 
+   if (enq) {
+   if (newchannel->target_cpu != smp_processor_id())
+   smp_call_function_single(newchannel->target_cpu,
+percpu_channel_enq,
+newchannel, true);
+   else
+   percpu_channel_enq(newchannel);
+   }
if (!fnew) {
/*
 * Check to see if this is a sub-channel.
@@ -282,6 +313,14 @@ static void vmbus_process_offer(struct work_struct *work)
spin_lock_irqsave(>sc_lock, flags);
list_add_tail(>sc_list, >sc_list);
spin_unlock_irqrestore(>sc_lock, flags);
+
+   if (newchannel->target_cpu != smp_processor_id())
+   smp_call_function_single(newchannel->target_cpu,
+percpu_channel_enq,
+newchannel, true);
+   else
+   percpu_channel_enq(newchannel);
+
newchannel->state = CHANNEL_OPEN_STATE;
if (channel->sc_creation_callback != NULL)
channel->sc_creation_callback(newchannel);
diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
index d484bad..3589236 100644
--- a/drivers/hv/connection.c
+++ b/drivers/hv/connection.c
@@ -231,6 +231,28 @@ cleanup:
return ret;
 }
 
+/*
+ * Map the given relid to the corresponding channel based on the
+ * per-cpu list of channels that have been affinitized to this CPU.
+ * This will be used in the channel callback path as we can do this
+ * mapping in a lock-free fashion.
+ */
+static struct vmbus_channel *pcpu_relid2channel(u32 relid)
+{
+   struct vmbus_channel *channel;
+   struct vmbus_channel *found_channel  = NULL;
+   int cpu = smp_processor_id();
+   struct list_head *pcpu_head = _context.percpu_list[cpu];
+
+   list_for_each_entry(channel, pcpu_head, percpu_list) {
+ 

[PATCH 1/2] Drivers: hv: Eliminate the channel spinlock in the callback path

2014-04-08 Thread K. Y. Srinivasan
By ensuring that we set the callback handler to NULL in the channel close
path on the same CPU that the channel is bound to, we can eliminate this lock
acquisition and release in a performance critical path.


Signed-off-by: K. Y. Srinivasan 
Reviewed-by: Haiyang Zhang 
---
 drivers/hv/channel.c  |   16 
 drivers/hv/channel_mgmt.c |   11 +++
 drivers/hv/connection.c   |   11 ---
 include/linux/hyperv.h|2 ++
 4 files changed, 25 insertions(+), 15 deletions(-)

diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index 602ca86..740edec 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -471,18 +471,26 @@ int vmbus_teardown_gpadl(struct vmbus_channel *channel, 
u32 gpadl_handle)
 }
 EXPORT_SYMBOL_GPL(vmbus_teardown_gpadl);
 
+static void reset_channel_cb(void *arg)
+{
+   struct vmbus_channel *channel = arg;
+
+   channel->onchannel_callback = NULL;
+}
+
 static void vmbus_close_internal(struct vmbus_channel *channel)
 {
struct vmbus_channel_close_channel *msg;
int ret;
-   unsigned long flags;
 
channel->state = CHANNEL_OPEN_STATE;
channel->sc_creation_callback = NULL;
/* Stop callback and cancel the timer asap */
-   spin_lock_irqsave(>inbound_lock, flags);
-   channel->onchannel_callback = NULL;
-   spin_unlock_irqrestore(>inbound_lock, flags);
+   if (channel->target_cpu != smp_processor_id())
+   smp_call_function_single(channel->target_cpu, reset_channel_cb,
+channel, true);
+   else
+   reset_channel_cb(channel);
 
/* Send a closing message */
 
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index fa92046..6f7fdd9 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -365,7 +365,7 @@ static u32  next_vp;
  * performance critical channels (IDE, SCSI and Network) will be uniformly
  * distributed across all available CPUs.
  */
-static u32 get_vp_index(uuid_le *type_guid)
+static void init_vp_index(struct vmbus_channel *channel, uuid_le *type_guid)
 {
u32 cur_cpu;
int i;
@@ -387,10 +387,13 @@ static u32 get_vp_index(uuid_le *type_guid)
 * Also if the channel is not a performance critical
 * channel, bind it to cpu 0.
 */
-   return 0;
+   channel->target_cpu = 0;
+   channel->target_vp = 0;
+   return;
}
cur_cpu = (++next_vp % max_cpus);
-   return hv_context.vp_index[cur_cpu];
+   channel->target_cpu = cur_cpu;
+   channel->target_vp = hv_context.vp_index[cur_cpu];
 }
 
 /*
@@ -438,7 +441,7 @@ static void vmbus_onoffer(struct 
vmbus_channel_message_header *hdr)
offer->connection_id;
}
 
-   newchannel->target_vp = get_vp_index(>offer.if_type);
+   init_vp_index(newchannel, >offer.if_type);
 
memcpy(>offermsg, offer,
   sizeof(struct vmbus_channel_offer_channel));
diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
index f2d7bf9..d484bad 100644
--- a/drivers/hv/connection.c
+++ b/drivers/hv/connection.c
@@ -274,7 +274,6 @@ struct vmbus_channel *relid2channel(u32 relid)
 static void process_chn_event(u32 relid)
 {
struct vmbus_channel *channel;
-   unsigned long flags;
void *arg;
bool read_state;
u32 bytes_to_read;
@@ -293,13 +292,12 @@ static void process_chn_event(u32 relid)
/*
 * A channel once created is persistent even when there
 * is no driver handling the device. An unloading driver
-* sets the onchannel_callback to NULL under the
-* protection of the channel inbound_lock. Thus, checking
-* and invoking the driver specific callback takes care of
-* orderly unloading of the driver.
+* sets the onchannel_callback to NULL on the same CPU
+* as where this interrupt is handled (in an interrupt context).
+* Thus, checking and invoking the driver specific callback takes
+* care of orderly unloading of the driver.
 */
 
-   spin_lock_irqsave(>inbound_lock, flags);
if (channel->onchannel_callback != NULL) {
arg = channel->channel_callback_context;
read_state = channel->batched_reading;
@@ -324,7 +322,6 @@ static void process_chn_event(u32 relid)
pr_err("no channel callback for relid - %u\n", relid);
}
 
-   spin_unlock_irqrestore(>inbound_lock, flags);
 }
 
 /*
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index ab7359f..8b41570 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -694,6 +694,8 @@ struct vmbus_channel {
 * preserve the earlier behavior.
 */
u32 target_vp;
+   /* The corresponding CPUID in the guest */
+   u32 target_cpu;
/*
 * Support for sub-channels. For 

[PATCH 0/2] Eliminate spin locks in the vmbus channel callback path

2014-04-08 Thread K. Y. Srinivasan
Currently we map the channel handle to the channel under the protection of
a spin lock. Additionally, we dispatch the channel callback function under the
protection of the channel inbound lock (another spin lock). In some recent
nework performance runs, the time spent acquiring and releasing these locks
were identified as potential bottlenecks. This patch-set gets rid of these
locks by leveraging the interrupt bindings that we support starting with win8.

K. Y. Srinivasan (2):
  Drivers: hv: Eliminate the channel spinlock in the callback path
  Drivers: hv: vmbus: Implement per-CPU mapping of relid to channel

 drivers/hv/channel.c  |   16 ++---
 drivers/hv/channel_mgmt.c |   52 
 drivers/hv/connection.c   |   35 +++---
 drivers/hv/hv.c   |2 +
 drivers/hv/hyperv_vmbus.h |5 
 include/linux/hyperv.h|7 ++
 6 files changed, 100 insertions(+), 17 deletions(-)

-- 
1.7.4.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2] openvswitch: supply a dummy err_handler of gre_cisco_protocol to prevent kernel crash

2014-04-08 Thread Jesse Gross
On Fri, Apr 4, 2014 at 9:20 PM, wei zhang  wrote:
> At 2014-04-05 07:05:59,"Jesse Gross"  wrote:
>>On Tue, Apr 1, 2014 at 5:23 PM, Wei Zhang  wrote:
>>>
>>> v2 -> v1: use the same logic of the gre_rcv() to distinguish which packet is
>>> intended to us!
>>
>>As a tip on kernel process: if you put the version information after
>>three dashes below the signed-off-by line then git will automatically
>>remove it when the final patch is applied.
>
> Thanks, should I modify it and send a v3 patch?
>
>>
>>> diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c
>>> index a3d6951..f391df1 100644
>>> --- a/net/openvswitch/vport-gre.c
>>> +++ b/net/openvswitch/vport-gre.c
>>> @@ -110,6 +110,21 @@ static int gre_rcv(struct sk_buff *skb,
>>> return PACKET_RCVD;
>>>  }
>>>
>>> +/* Called with rcu_read_lock and BH disabled. */
>>> +static int gre_err(struct sk_buff *skb, u32 info,
>>> +  const struct tnl_ptk_info *tpi)
>>> +{
>>> +   struct ovs_net *ovs_net;
>>> +   struct vport *vport;
>>> +
>>> +   ovs_net = net_generic(dev_net(skb->dev), ovs_net_id);
>>> +   vport = rcu_dereference(ovs_net->vport_net.gre_vport);
>>> +   if (unlikely(!vport))
>>> +   return PACKET_REJECT;
>>> +   else
>>> +   return PACKET_RCVD;
>>
>>Sorry, I forgot to say this before - if we receive the packet then we
>>should also call consume_skb() on it.
>
> Maybe there is no need to call consume_skb()? The icmp_rcv() would
> call kfree_skb() for us. I also checked the ipgre_err(), it return
> PACKET_RCVD without call consume_skb() too.

Thanks, you are right. I applied your patch as is.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 5/5] hugetlb: add support for gigantic page allocation at runtime

2014-04-08 Thread Yasuaki Ishimatsu
(2014/04/09 4:02), Luiz Capitulino wrote:
> HugeTLB is limited to allocating hugepages whose size are less than
> MAX_ORDER order. This is so because HugeTLB allocates hugepages via
> the buddy allocator. Gigantic pages (that is, pages whose size is
> greater than MAX_ORDER order) have to be allocated at boottime.
> 
> However, boottime allocation has at least two serious problems. First,
> it doesn't support NUMA and second, gigantic pages allocated at
> boottime can't be freed.
> 
> This commit solves both issues by adding support for allocating gigantic
> pages during runtime. It works just like regular sized hugepages,
> meaning that the interface in sysfs is the same, it supports NUMA,
> and gigantic pages can be freed.
> 
> For example, on x86_64 gigantic pages are 1GB big. To allocate two 1G
> gigantic pages on node 1, one can do:
> 
>   # echo 2 > \
> /sys/devices/system/node/node1/hugepages/hugepages-1048576kB/nr_hugepages
> 
> And to free them all:
> 
>   # echo 0 > \
> /sys/devices/system/node/node1/hugepages/hugepages-1048576kB/nr_hugepages
> 
> The one problem with gigantic page allocation at runtime is that it
> can't be serviced by the buddy allocator. To overcome that problem, this
> commit scans all zones from a node looking for a large enough contiguous
> region. When one is found, it's allocated by using CMA, that is, we call
> alloc_contig_range() to do the actual allocation. For example, on x86_64
> we scan all zones looking for a 1GB contiguous region. When one is found,
> it's allocated by alloc_contig_range().
> 
> One expected issue with that approach is that such gigantic contiguous
> regions tend to vanish as runtime goes by. The best way to avoid this for
> now is to make gigantic page allocations very early during system boot, say
> from a init script. Other possible optimization include using compaction,
> which is supported by CMA but is not explicitly used by this commit.
> 
> It's also important to note the following:
> 
>   1. Gigantic pages allocated at boottime by the hugepages= command-line
>  option can be freed at runtime just fine
> 
>   2. This commit adds support for gigantic pages only to x86_64. The
>  reason is that I don't have access to nor experience with other archs.
>  The code is arch indepedent though, so it should be simple to add
>  support to different archs
> 
>   3. I didn't add support for hugepage overcommit, that is allocating
>  a gigantic page on demand when
> /proc/sys/vm/nr_overcommit_hugepages > 0. The reason is that I don't
> think it's reasonable to do the hard and long work required for
> allocating a gigantic page at fault time. But it should be simple
> to add this if wanted
> 
> Signed-off-by: Luiz Capitulino 
> ---
>   mm/hugetlb.c | 158 
> ++-
>   1 file changed, 147 insertions(+), 11 deletions(-)
> 
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index 9dded98..2258045 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -679,11 +679,141 @@ static int hstate_next_node_to_free(struct hstate *h, 
> nodemask_t *nodes_allowed)
>   ((node = hstate_next_node_to_free(hs, mask)) || 1); \
>   nr_nodes--)
>   
> +#if defined(CONFIG_CMA) && defined(CONFIG_X86_64)
> +static void destroy_compound_gigantic_page(struct page *page,
> + unsigned long order)
> +{
> + int i;
> + int nr_pages = 1 << order;
> + struct page *p = page + 1;
> +
> + for (i = 1; i < nr_pages; i++, p = mem_map_next(p, page, i)) {
> + __ClearPageTail(p);
> + set_page_refcounted(p);
> + p->first_page = NULL;
> + }
> +
> + set_compound_order(page, 0);
> + __ClearPageHead(page);
> +}
> +
> +static void free_gigantic_page(struct page *page, unsigned order)
> +{
> + free_contig_range(page_to_pfn(page), 1 << order);
> +}
> +
> +static int __alloc_gigantic_page(unsigned long start_pfn, unsigned long 
> count)
> +{
> + unsigned long end_pfn = start_pfn + count;
> + return alloc_contig_range(start_pfn, end_pfn, MIGRATE_MOVABLE);
> +}
> +
> +static bool pfn_range_valid_gigantic(unsigned long start_pfn,
> + unsigned long nr_pages)
> +{
> + unsigned long i, end_pfn = start_pfn + nr_pages;
> + struct page *page;
> +
> + for (i = start_pfn; i < end_pfn; i++) {
> + if (!pfn_valid(i))
> + return false;
> +
> + page = pfn_to_page(i);
> +
> + if (PageReserved(page))
> + return false;
> +
> + if (page_count(page) > 0)
> + return false;
> +
> + if (PageHuge(page))
> + return false;
> + }
> +
> + return true;
> +}
> +
> +static struct page *alloc_gigantic_page(int nid, unsigned order)
> +{
> + unsigned long nr_pages = 1 << order;
> + unsigned long ret, pfn, flags;
> + 

Re: [PATCH 1/1 v11] Tracepoint: register/unregister struct tracepoint

2014-04-08 Thread Mathieu Desnoyers
- Original Message -
> From: "Steven Rostedt" 
> To: "Mathieu Desnoyers" 
> Cc: linux-kernel@vger.kernel.org, "Ingo Molnar" , "Frederic 
> Weisbecker" ,
> "Andrew Morton" , "Frank Ch. Eigler" 
> , "Johannes Berg"
> 
> Sent: Tuesday, April 8, 2014 8:17:05 PM
> Subject: Re: [PATCH 1/1 v11] Tracepoint: register/unregister struct tracepoint
> 
> OK, I kicked off my tests to test this patch. If all goes well, I'll
> push it to linux-next and then off to Linus.
> 
> But I also added a patch on top. This shouldn't cause any issues for
> you, does it?

It allright by me. You can add my

Acked-by: Mathieu Desnoyers 

Thanks!

Mathieu

> 
> -- Steve
> 
> From a22720af7d7d2e06284fbed68bd93827f75381b1 Mon Sep 17 00:00:00 2001
> From: "Steven Rostedt (Red Hat)" 
> Date: Tue, 8 Apr 2014 20:09:40 -0400
> Subject: [PATCH] tracepoint: Simplify tracepoint module search
> 
> Instead of copying the num_tracepoints and tracepoints_ptrs from
> the module structure to the tp_mod structure, which only uses it to
> find the module associated to tracepoints of modules that are coming
> and going, simply copy the pointer to the module struct to the tracepoint
> tp_module structure.
> 
> Also removed un-needed brackets around an if statement.
> 
> Cc: Mathieu Desnoyers 
> Signed-off-by: Steven Rostedt 
> ---
>  include/linux/tracepoint.h | 3 +--
>  kernel/tracepoint.c| 9 -
>  2 files changed, 5 insertions(+), 7 deletions(-)
> 
> diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
> index 08150e2..69a298b 100644
> --- a/include/linux/tracepoint.h
> +++ b/include/linux/tracepoint.h
> @@ -47,8 +47,7 @@ for_each_kernel_tracepoint(void (*fct)(struct tracepoint
> *tp, void *priv),
>  #ifdef CONFIG_MODULES
>  struct tp_module {
>   struct list_head list;
> - unsigned int num_tracepoints;
> - struct tracepoint * const *tracepoints_ptrs;
> + struct module *mod;
>  };
>  
>  bool trace_module_has_bad_taint(struct module *mod);
> diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
> index 8c4f2f4..c115ec4 100644
> --- a/kernel/tracepoint.c
> +++ b/kernel/tracepoint.c
> @@ -374,8 +374,7 @@ static int tracepoint_module_coming(struct module *mod)
>   ret = -ENOMEM;
>   goto end;
>   }
> - tp_mod->num_tracepoints = mod->num_tracepoints;
> - tp_mod->tracepoints_ptrs = mod->tracepoints_ptrs;
> + tp_mod->mod = mod;
>   list_add_tail(_mod->list, _module_list);
>   blocking_notifier_call_chain(_notify_list,
>   MODULE_STATE_COMING, tp_mod);
> @@ -393,7 +392,7 @@ static void tracepoint_module_going(struct module *mod)
>  
>   mutex_lock(_module_list_mutex);
>   list_for_each_entry(tp_mod, _module_list, list) {
> - if (tp_mod->tracepoints_ptrs == mod->tracepoints_ptrs) {
> + if (tp_mod->mod == mod) {
>   blocking_notifier_call_chain(_notify_list,
>   MODULE_STATE_GOING, tp_mod);
>   list_del(_mod->list);
> @@ -447,9 +446,9 @@ static __init int init_tracepoints(void)
>   int ret;
>  
>   ret = register_module_notifier(_module_nb);
> - if (ret) {
> + if (ret)
>   pr_warning("Failed to register tracepoint module enter 
> notifier\n");
> - }
> +
>   return ret;
>  }
>  __initcall(init_tracepoints);
> --
> 1.8.5.3
> 
> 
> 

-- 
Mathieu Desnoyers
EfficiOS Inc.
http://www.efficios.com
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: post 3.14 serial regression

2014-04-08 Thread Greg Kroah-Hartman
On Tue, Apr 08, 2014 at 02:38:31PM -0700, Dave Hansen wrote:
> On 04/08/2014 02:03 PM, Dave Hansen wrote:
> > On 04/08/2014 04:27 AM, One Thousand Gnomes wrote:
> >>> At the end, you can see that init is somehow dying.  If I revert this
> >>> patch, init is happy again and doesn't die, and the serial console works
> >>> like before.
> >>
> >> Can you check if init is getting a SIGHUP - possibly its opening the
> >> device and when it goes away gets a hangup which it isn't catching ?
> > 
> > I do see plenty of SIGCHLDs and a heap of SIGTERMs to 'systemd-udevd',
> > but no SIGHUP.  I do see a "Warning: unable to open an initial console."
> > now, though.  (details far below)
> > 
> > I instrumented uart_remove_one_port().  It *looks* like while searching
> > for a uart_port for 0x1008 (my actual port),
> > serial8250_find_match_or_unused() finds 0x3e8 since 0x3e8 is
> > PORT_UNKNOWN.  The new code unregisters the 0x1008 console since it
> > _thinks_ it is about to re-register it.
> 
> 
> 
> Looks like this just changed the detection order so my device went from
> ttyS2 to ttyS4:
> 
> # cat /proc/tty/driver/serial
> serinfo:1.0 driver revision:
> 0: uart:16550A port:03F8 irq:4 tx:0 rx:0 CTS|DSR|CD
> 1: uart:16550A port:02F8 irq:3 tx:0 rx:0
> 2: uart:unknown port:03E8 irq:4
> 3: uart:unknown port:02E8 irq:3
> 4: uart:ST16650V2 port:1008 irq:18 tx:0 rx:0
> 5: uart:ST16650V2 port:1000 irq:19 tx:0 rx:0

That's not good.

Geert, any idea how to fix this?  Or should I just revert your change to
get back to the "working" behavior?

thanks,

greg k-h
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] sched_clock: Remove deprecated setup_sched_clock() API

2014-04-08 Thread Stephen Boyd
Remove the 32-bit only setup_sched_clock() API now that all users
have been converted to the 64-bit friendly sched_clock_register().

Signed-off-by: Stephen Boyd 
---

This is based on Linus' tree at a7963eb7f4c4 (Merge branch 'for_linus' of
git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs, 2014-04-07)
but technically this only needs to be based on 2d1eb87ae1e6 (Merge branch
'for-linus' of git://ftp.arm.linux.org.uk/~rmk/linux-arm, 2014-04-05) because
the last user of setup_sched_clock() was in rmk's tree.

 include/linux/sched_clock.h |  1 -
 kernel/time/sched_clock.c   | 13 -
 2 files changed, 14 deletions(-)

diff --git a/include/linux/sched_clock.h b/include/linux/sched_clock.h
index cddf0c2940b6..efa931c5cef1 100644
--- a/include/linux/sched_clock.h
+++ b/include/linux/sched_clock.h
@@ -14,7 +14,6 @@ extern void sched_clock_postinit(void);
 static inline void sched_clock_postinit(void) { }
 #endif
 
-extern void setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate);
 extern void sched_clock_register(u64 (*read)(void), int bits,
 unsigned long rate);
 
diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c
index 4d23dc4d8139..445106d2c729 100644
--- a/kernel/time/sched_clock.c
+++ b/kernel/time/sched_clock.c
@@ -49,13 +49,6 @@ static u64 notrace jiffy_sched_clock_read(void)
return (u64)(jiffies - INITIAL_JIFFIES);
 }
 
-static u32 __read_mostly (*read_sched_clock_32)(void);
-
-static u64 notrace read_sched_clock_32_wrapper(void)
-{
-   return read_sched_clock_32();
-}
-
 static u64 __read_mostly (*read_sched_clock)(void) = jiffy_sched_clock_read;
 
 static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift)
@@ -176,12 +169,6 @@ void __init sched_clock_register(u64 (*read)(void), int 
bits,
pr_debug("Registered %pF as sched_clock source\n", read);
 }
 
-void __init setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate)
-{
-   read_sched_clock_32 = read;
-   sched_clock_register(read_sched_clock_32_wrapper, bits, rate);
-}
-
 void __init sched_clock_postinit(void)
 {
/*
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
hosted by The Linux Foundation

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 0/4 v2] perf: Make some functions generic

2014-04-08 Thread Namhyung Kim
On Mon,  7 Apr 2014 14:55:20 -0400, Don Zickus wrote:
> This patch just converts some private functions into global ones
> that can be used by other tools like the c2c tool I am trying to merge.

For the series,

Reviewed-by: Namhyung Kim 

Thanks,
Namhyung

>
> Don Zickus (4):
>   perf: Allow ability to map cpus to nodes easily
>   perf: Use cpu/possible instead of cpu/kernel_max
>   perf, kmem: Utilize the new generic cpunode_map
>   perf, callchain: Add generic report parse callchain callback function
>
>  tools/perf/builtin-kmem.c   |  78 +--
>  tools/perf/builtin-report.c |  81 +---
>  tools/perf/util/callchain.c |  82 
>  tools/perf/util/callchain.h |   1 +
>  tools/perf/util/cpumap.c| 149 
> 
>  tools/perf/util/cpumap.h|  35 +++
>  6 files changed, 273 insertions(+), 153 deletions(-)
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 4/4] hugetlb: add support for gigantic page allocation at runtime

2014-04-08 Thread Luiz Capitulino
On Tue, 8 Apr 2014 15:51:02 -0700
Andrew Morton  wrote:

> On Mon, 7 Apr 2014 14:49:35 -0400 Luiz Capitulino  
> wrote:
> 
> > > > ---
> > > >  arch/x86/include/asm/hugetlb.h |  10 +++
> > > >  mm/hugetlb.c   | 177 
> > > > ++---
> > > >  2 files changed, 176 insertions(+), 11 deletions(-)
> > > > 
> > > > diff --git a/arch/x86/include/asm/hugetlb.h 
> > > > b/arch/x86/include/asm/hugetlb.h
> > > > index a809121..2b262f7 100644
> > > > --- a/arch/x86/include/asm/hugetlb.h
> > > > +++ b/arch/x86/include/asm/hugetlb.h
> > > > @@ -91,6 +91,16 @@ static inline void arch_release_hugepage(struct page 
> > > > *page)
> > > >  {
> > > >  }
> > > >  
> > > > +static inline int arch_prepare_gigantic_page(struct page *page)
> > > > +{
> > > > +   return 0;
> > > > +}
> > > > +
> > > > +static inline void arch_release_gigantic_page(struct page *page)
> > > > +{
> > > > +}
> > > > +
> > > > +
> > > >  static inline void arch_clear_hugepage_flags(struct page *page)
> > > >  {
> > > >  }
> > > 
> > > These are defined only on arch/x86, but called in generic code.
> > > Does it cause build failure on other archs?
> > 
> > Hmm, probably. The problem here is that I'm unable to test this
> > code in other archs. So I think the best solution for the first
> > merge is to make the build of this feature conditional to x86_64?
> > Then the first person interested in making this work in other
> > archs add the generic code. Sounds reasonable?
> 
> These functions don't actually do anything so if and when other
> architectures come along to implement this feature, their developers
> won't know what you were thinking when you added them.  So how about
> some code comments to explain their roles and responsibilities?
> 
> Or just delete them altogether and let people add them (or something
> similar) if and when the need arises.  It's hard to tell when one lacks
> telepathic powers, sigh.

That's exactly what I did for v2 (already posted).
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[GIT PULL] Detaching mounts on unlink for 3.15-rc1

2014-04-08 Thread Eric W. Biederman

Linus,

Please pull the for-linus branch from the git tree:

   git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace.git 
for-linus

   HEAD: 0d7d90f86f83f29a442b37c78172870f8ee28c58 proc: Update 
proc_flush_task_mnt to use d_invalidate

My apologies for sending this pull request late.  I thought these
changes were going to come through Al's tree that doesn't look like it
is going to happen, and it is long past time for these changes to be
merged.

This set of changes has been reviewed and been sitting idle for the last
6 weeks.  In that time the vfs has slightly shifted under me the new
version of rename and the mount hash list becoming a hlist.  None of
those changes has caused changed the code in ways to invalidate these
changes, but small conflicts do result and I have attached my conflict
resolution at the end of this email in case it helps.

To recap these changes allow a file or a directory that is a mount point
in one mount namespace to be unlinked/rmdired elsewhere where it is not
a mount point (either a remote filesystem or another mount namespace).
As has been agreed during review semantics when only a single mount
namespace exists remain unchanged.

This removes a long standing need to lie to the vfs when a mount point
has been removed behind it's back.  This also removes a DOS attack where
an unprivileged user could prevent root from renaming or deleting files
and directories by using them as mountpoints in another mount namespace.

This change also fixes a few cases where because we were not lying to
the vfs we could leak mount points.

When renaming or unlinking directory entries that are not mountpoints
no additional locks are taken so no performance differences can result,
and my benchmark reflected that.

To verify that nothing significant from the time this code was
written until now, I have performed a test merge.  I successfully
performed an allyesconfig build (skipping a broken wireless driver in 
staging) and tested to make certain that the code still functions
as expected.

Eric W. Biederman (12):
  vfs: Document the effect of d_revalidate on d_find_alias
  vfs: More precise tests in d_invalidate
  vfs: Don't allow overwriting mounts in the current mount namespace
  vfs: Keep a list of mounts on a mount point
  vfs: factor out lookup_mountpoint from new_mountpoint
  vfs: Add a function to lazily unmount all mounts from any dentry.
  vfs: Lazily remove mounts on unlinked files and directories.
  vfs: Remove unnecessary calls of check_submounts_and_drop
  vfs: Merge check_submounts_and_drop and d_invalidate
  vfs: Make d_invalidate return void
  vfs: Remove d_drop calls from d_revalidate implementations
  proc: Update proc_flush_task_mnt to use d_invalidate

 fs/afs/dir.c   |5 --
 fs/btrfs/ioctl.c   |5 +--
 fs/ceph/dir.c  |1 -
 fs/cifs/readdir.c  |6 +--
 fs/dcache.c|  140 +---
 fs/fuse/dir.c  |7 +--
 fs/gfs2/dentry.c   |3 -
 fs/kernfs/dir.c|   11 
 fs/mount.h |   20 +++
 fs/namei.c |   28 ++
 fs/namespace.c |   87 +-
 fs/nfs/dir.c   |7 +--
 fs/proc/base.c |   10 +---
 fs/proc/fd.c   |2 -
 include/linux/dcache.h |3 +-
 15 files changed, 178 insertions(+), 157 deletions(-)

Eric

--- 

My merge conflict resolution.

__d_move gained an argument I took it out of a conditional.

m_hash went from a struct list to a struct hlist changing nearby code,
and affecting my factoring out of lookup_mountpoint from new_mountpoint.

There was a major refactoring of rename.  As long as d_mountpoint
becomes is_local_mounptoint and detach_mount is added after dont_mount
all is well.


 dcache.c|6 ++
 mount.h |1 +
 namei.c |3 ++-
 namespace.c |   48 ++--
 4 files changed, 51 insertions(+), 7 deletions(-)

diff --cc fs/dcache.c
index 66cba5a8a346,5b78bd98649c..d4a1e55d65a9
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@@ -2701,10 -2631,8 +2663,8 @@@ static struct dentry *__d_unalias(struc
goto out_err;
m2 = >d_parent->d_inode->i_mutex;
  out_unalias:
-   if (likely(!d_mountpoint(alias))) {
-   __d_move(alias, dentry, false);
-   ret = alias;
-   }
 -  __d_move(alias, dentry);
++  __d_move(alias, dentry, false);
+   ret = alias;
  out_err:
spin_unlock(>i_lock);
if (m2)
diff --cc fs/mount.h
index b29e42f05f34,c5e717542bbc..aa3c0aa473df
--- a/fs/mount.h
+++ b/fs/mount.h
@@@ -19,8 -19,9 +19,9 @@@ struct mnt_pcp 
  };
  
  struct mountpoint {
 -  struct list_head m_hash;
 +  struct hlist_node m_hash;
struct dentry *m_dentry;
+   struct list_head m_list;
int m_count;
  };
  
diff --cc fs/namei.c
index 88339f59efb5,384fcc6a5606..0e438b395e28

Re: [PATCH 06/15 V3] perf, c2c: Add in new options to configure latency and stores

2014-04-08 Thread Namhyung Kim
On Mon, 7 Apr 2014 14:16:18 -0400, Don Zickus wrote:
> On Sun, Apr 06, 2014 at 03:14:22PM +0200, Jiri Olsa wrote:
>> On Mon, Mar 31, 2014 at 10:55:35PM -0400, Don Zickus wrote:
>> > On Sat, Mar 29, 2014 at 06:11:38PM +0100, Jiri Olsa wrote:
>> > > > @@ -316,6 +369,12 @@ int cmd_c2c(int argc, const char **argv, const 
>> > > > char *prefix __maybe_unused)
>> > > >};
>> > > >const struct option c2c_options[] = {
>> > > >OPT_BOOLEAN('r', "raw_records", _records, "dump raw 
>> > > > events"),
>> > > > +  OPT_INTEGER('l', "latency-level", _level,
>> > > > +   "specify the latency threshold for loads 
>> > > > [default=30]"),
>> > > > +  OPT_INTEGER('p', "precision-level", _level,
>> > > > +   "specify the precision level of events (0,1,2,3) 
>> > > > [default=1]"),
>> > > 
>> > > could we get also option for user space modifier?
>> > 
>> > You mean the 'u' modifier, ie cpu/mem-loads/u ?  If so, then I can do that
>> > but will that work with the -a option (which is hardcoded in the c2c
>> > tool [system-wide mode])?
>> 
>> right, forgot about that.. could the -a option be optional as well?
>> probably the same way as for record would be the best:
>> 
>>   perf c2c record ./foo   # workload specific
>>   perf c2c record -a sleep 3  # system wide
>
> I understand what you are saying, but our tool was written to find cache
> contention across the system, so -a is usually implied.  Most of our
> profiling is done system-wide.

Hmm.. any chance it can be used for non-system-wide analysis?  I think
tool should provide a way to do it if it's a valid usecase.  And adding
-a option when recording doesn't look too hard. :)


> I would rather implement the opposite option --no-system-wide, if that
> is ok.

Please just add -a/--system-wide (maybe use can make it default if no
argument/workload is given like perf top does) then tool will provide
the --no-* option automatically.

Thanks,
Namhyung
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [GIT] kbuild/lto changes for 3.15-rc1

2014-04-08 Thread Jan Hubicka
> Hi Linus,
> 
> > So right now, I see several reasons not to merge it ("It's so
> > experimental that we don't even want to encourage people to test it"
> 
> I don't want them to enable it during allyesconfig because they
> might need more than 4GB of RAM to build it (especially with gcc 
> 4.8, 4.9 is better). But allyesconfig is a special case. More standard
> kernels with smaller vmlinux don't have this problem, but build
> somewhat slower.
> 
> > to "it's not fully fleshed out yet and makes compile times _much_
> > longer").
> 
> It's functionally stable, I have a number of users who
> don't report any problems.
> 
> > 
> > And yet nobody has actually talked about why I *should* merge it.
> > 
> > Which - I think understandably - makes me less than enthusiastic.
> > 
> > So I think I'll let this wait a bit longer, _unless_ people start
> > talking about the upsides. How much smaller is the end result? How
> > much faster is it? How much more beautiful is it? Does it make new
> 
> The smaller part is mainly visible with small kernels, because
> it's very good at throwing out unused code there.  All the
> stuff in kernel etc. that is not used.
> 
> For example Tim Bird saw ~11% binary reduction on ARM with his 
> configs [1]. We also see some reduction in small configs.
> 
> Some of the static measures like nice, for example
> a LTO kernel has ~4% less calls.
> 
> We did some performance tests, but at least in the standard
> macro benchmarks we do there wasn't a clear performance
> win.  LKP had a small win, but nothing dramatic.
> But I would like others to test it on their workloads.
> 
> In principle LTO can do cool optimizations, like propagating
> constants into functions (e.g. generate specialized versions
> of some code). I experimented a bit with this, however
> it currently seems to bloat the code quite a bit.
> 
> There are some other possible future optimizations
> that can be enabled by a global optimizer.
> 
> Honza may have more reasons for LTO.

My basic understanding of LTO benefits is about the following.

 1) Today LTO will quite reliably reduce code size. 
Andi mentione 11% of kernel. It is not that unusual to get over 30% code
size reduction.

Generally it is a lot easier to fine tune hot spots than to throw away all
unnecesary code in all possible configurations your project might have 
(+ optimize by hand code layout).  So even well hand optimized programs
benefits from LTO in code size.

 2) If build machinery is well structured, code size reduction also translates
to compile time improvements (GCC spends a lot of time in codegen).
This holds only for full rebuild (not compile/edit cycle) and only for
projects that do not rebuild one binary many times (like both kernel and
GCC does) or do not link many times large LTO library (like Firefox).

For SPEC2k6, the LTO build time is faster, for Firefox it is about the same
For GCC the bootstrap time is slower since our build system needs reorg (we
use old libtool that requires fat LTO files and we rebuild every binary
twice just to get a checksum, we also link whole backend as static library
to every frontend binary) 

I hope that not in too distant future we will be able to build majority of
distro with LTO and get LTO build times better than non-LTO.
I also have some longer term plans for compile-edit development model that
won't trigger reoptimization of whole binary, but that is bit more of
research stage right now.

 3) On really large projects, LTO may need a lot of memory.
This is basically problem of kernel/firefox and chromium. Notihng else
on my installation has similarly large binary.  We are improving this from
release to release and I believe 4.9 is doing pretty well so I can build
those things on my 8GB laptop w/o swap storms.

 4) LTO brings noticeable performance wins on average, but it is largely 
benchmark
dependent; some see huge improvemnts, others no improvements at all.

Basic observation is there is not much that LTO can do that can not be done
by hand.  Careful developer can just identify the important spot and
restructure the soruces.
The runtime benefits are more visible on bigger, bloated and less
optimized projects than on hand tuned video encoder implementation.
I believe Kernel largely falls into hand tuned category despite its size.

I am in progress of trying to benchmark GCC 4.9 LTO for Firefox/Libreoffice
and Chromium and will publish once it is done.
Just as very quick data point (not too serious) I just run Dromaeo
benchmarks on firefox comparing default and LTO build, the overall
difference is 7% http://dromaeo.com/?id=219677,219678,219672,219676
(first two tests are default build, second two are LTO).
This is a lot more than I expected given that dromaeo tests largery JIT
generated code and I am sure it is a common benchmark well hand optimized.

Re: [PATCH 1/2] watchdog: add Intel MID watchdog driver support

2014-04-08 Thread Guenter Roeck

On 04/08/2014 01:59 PM, David Cohen wrote:

Add initial Intel MID watchdog driver support.

This driver is an initial implementation of generic Intel MID watchdog
driver. Currently it supports Intel Merrifield platform.


How does this compare with the existing SCU watchdog driver
(intel_scu_watchdog) ? The watchdog IPC message (0xf8) seems
to be the same, so there must be at least some overlap.
Would it be possible to have just one watchdog driver
serving the different SCU based devices ?



Signed-off-by: Eric Ernst 
Signed-off-by: David Cohen 
---
  drivers/watchdog/Kconfig |  12 +++
  drivers/watchdog/Makefile|   1 +
  drivers/watchdog/intel-mid_wdt.c | 209 +++
  3 files changed, 222 insertions(+)
  create mode 100644 drivers/watchdog/intel-mid_wdt.c

diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index 79d25894343a..4da09b8b2f11 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -643,6 +643,18 @@ config INTEL_SCU_WATCHDOG

  To compile this driver as a module, choose M here.

+config INTEL_MID_WATCHDOG
+   bool "Intel MID SCU Watchdog Mobile Platforms"
+   depends on X86_INTEL_MID && WATCHDOG_CORE


All other watchdog drivers using the watchdog core use
select WATCHDOG_CORE

so this one should do the same.

Guenter
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


RE: [tip:x86/urgent] x86 idle: Repair large-server 50-watt idle-power regression

2014-04-08 Thread Brown, Len
Davidlohr,

Thanks for the note.

Ideally (on Linux in general, and on servers, in particular) we strive
for the performance impact of power saving features to be small enough
to be considered in "measurement noise".

Your report for 160 core Westmere AIM numbers being hit at 10-25%
shows 15% measurement noise?  But even if true, this looks bad.

Any chance you can re-run, with the following two tweaks,
one at a time?

I'd be curious if you can wrap the invocation in turbostat -v
and capture that output to how what states we are seeing
during the benchmark run.

thanks,
-Len



#1: skip flush for C1

diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index f80b700..6027d06 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -377,7 +377,7 @@ static int intel_idle(struct cpuidle_device *dev,
 
if (!current_set_polling_and_test()) {
 
-   if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR))
+   if ((eax > 0) && this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR))
clflush((void *)_thread_info()->flags);
 
__monitor((void *)_thread_info()->flags, 0, 0);


#2: skip flush for C1 and C1E

diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index f80b700..6027d06 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -377,7 +377,7 @@ static int intel_idle(struct cpuidle_device *dev,
 
if (!current_set_polling_and_test()) {
 
-   if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR))
+   if ((eax > 1) && this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR))
clflush((void *)_thread_info()->flags);
 
__monitor((void *)_thread_info()->flags, 0, 0);




Re: [PATCH 1/1 v11] Tracepoint: register/unregister struct tracepoint

2014-04-08 Thread Steven Rostedt
OK, I kicked off my tests to test this patch. If all goes well, I'll
push it to linux-next and then off to Linus.

But I also added a patch on top. This shouldn't cause any issues for
you, does it?

-- Steve

>From a22720af7d7d2e06284fbed68bd93827f75381b1 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" 
Date: Tue, 8 Apr 2014 20:09:40 -0400
Subject: [PATCH] tracepoint: Simplify tracepoint module search

Instead of copying the num_tracepoints and tracepoints_ptrs from
the module structure to the tp_mod structure, which only uses it to
find the module associated to tracepoints of modules that are coming
and going, simply copy the pointer to the module struct to the tracepoint
tp_module structure.

Also removed un-needed brackets around an if statement.

Cc: Mathieu Desnoyers 
Signed-off-by: Steven Rostedt 
---
 include/linux/tracepoint.h | 3 +--
 kernel/tracepoint.c| 9 -
 2 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 08150e2..69a298b 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -47,8 +47,7 @@ for_each_kernel_tracepoint(void (*fct)(struct tracepoint *tp, 
void *priv),
 #ifdef CONFIG_MODULES
 struct tp_module {
struct list_head list;
-   unsigned int num_tracepoints;
-   struct tracepoint * const *tracepoints_ptrs;
+   struct module *mod;
 };
 
 bool trace_module_has_bad_taint(struct module *mod);
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index 8c4f2f4..c115ec4 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -374,8 +374,7 @@ static int tracepoint_module_coming(struct module *mod)
ret = -ENOMEM;
goto end;
}
-   tp_mod->num_tracepoints = mod->num_tracepoints;
-   tp_mod->tracepoints_ptrs = mod->tracepoints_ptrs;
+   tp_mod->mod = mod;
list_add_tail(_mod->list, _module_list);
blocking_notifier_call_chain(_notify_list,
MODULE_STATE_COMING, tp_mod);
@@ -393,7 +392,7 @@ static void tracepoint_module_going(struct module *mod)
 
mutex_lock(_module_list_mutex);
list_for_each_entry(tp_mod, _module_list, list) {
-   if (tp_mod->tracepoints_ptrs == mod->tracepoints_ptrs) {
+   if (tp_mod->mod == mod) {
blocking_notifier_call_chain(_notify_list,
MODULE_STATE_GOING, tp_mod);
list_del(_mod->list);
@@ -447,9 +446,9 @@ static __init int init_tracepoints(void)
int ret;
 
ret = register_module_notifier(_module_nb);
-   if (ret) {
+   if (ret)
pr_warning("Failed to register tracepoint module enter 
notifier\n");
-   }
+
return ret;
 }
 __initcall(init_tracepoints);
-- 
1.8.5.3


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [GIT] kbuild/lto changes for 3.15-rc1

2014-04-08 Thread Tim Bird
FWIW, I'd really like to see this go in as an experimental feature.

Andi has already quoted my size results, which I thought were pretty
good, as well as given a pointer to my size optimization presentation.

Some of what follows is in the presentation, but here is a summary:
There are other automated reductions that I experimented with, that
are made more effective (or are only possible) with LTO.  Examples of
these include:
 - elimination of unused kernel command line handlers,
 - automated elimination of unused syscalls (through whole-system analysis), and
 - an experimental system I developed for doing structure reduction.

LTO shows promise for allowing more automation in configuration
handling (that is, requiring less CONFIG options).

People should definitely be warned off using this in any production
setting, but I think it's valuable for developers experimenting with
tiny-size systems to have this easily available in mainline.

On Tue, Apr 8, 2014 at 3:49 PM, Andi Kleen  wrote:
> Hi Linus,
>
>> So right now, I see several reasons not to merge it ("It's so
>> experimental that we don't even want to encourage people to test it"
>
> I don't want them to enable it during allyesconfig because they
> might need more than 4GB of RAM to build it (especially with gcc
> 4.8, 4.9 is better). But allyesconfig is a special case. More standard
> kernels with smaller vmlinux don't have this problem, but build
> somewhat slower.
>
>> to "it's not fully fleshed out yet and makes compile times _much_
>> longer").
>
> It's functionally stable, I have a number of users who
> don't report any problems.
>
>>
>> And yet nobody has actually talked about why I *should* merge it.
>>
>> Which - I think understandably - makes me less than enthusiastic.
>>
>> So I think I'll let this wait a bit longer, _unless_ people start
>> talking about the upsides. How much smaller is the end result? How
>> much faster is it? How much more beautiful is it? Does it make new
>
> The smaller part is mainly visible with small kernels, because
> it's very good at throwing out unused code there.  All the
> stuff in kernel etc. that is not used.
>
> For example Tim Bird saw ~11% binary reduction on ARM with his
> configs [1]. We also see some reduction in small configs.
>
> Some of the static measures like nice, for example
> a LTO kernel has ~4% less calls.
>
> We did some performance tests, but at least in the standard
> macro benchmarks we do there wasn't a clear performance
> win.  LKP had a small win, but nothing dramatic.
> But I would like others to test it on their workloads.
>
> In principle LTO can do cool optimizations, like propagating
> constants into functions (e.g. generate specialized versions
> of some code). I experimented a bit with this, however
> it currently seems to bloat the code quite a bit.
>
> There are some other possible future optimizations
> that can be enabled by a global optimizer.
>
> Honza may have more reasons for LTO.
>
> Other benefits are global warnings and some additional
> type checking. The LTO log files are really useful
> to do global call graph analysis and similar.
>
> -Andi
>
> [1] http://elinux.org/images/9/9e/Bird-Kernel-Size-Optimization-LCJ-2013.pdf
>
> --
> a...@linux.intel.com -- Speaking for myself only
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/



-- 
 -- Tim Bird
Senior Software Engineer, Sony Mobile
Architecture Group Chair, CE Workgroup, Linux Foundation
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] mfd: pm8921: Remove pm8xxx API now that sub-devices use regmap

2014-04-08 Thread Stephen Boyd
The pm8xxx read/write wrappers are no longer necessary now that
all the sub-device drivers are using the regmap API. Remove it.

Signed-off-by: Stephen Boyd 
---

Based on Linus' tip as of a7963eb7f4c4 (Merge branch 'for_linus' of
git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs, 2014-04-07)

 drivers/mfd/pm8921-core.c   | 123 +---
 include/linux/mfd/pm8xxx/core.h |  81 --
 2 files changed, 2 insertions(+), 202 deletions(-)
 delete mode 100644 include/linux/mfd/pm8xxx/core.h

diff --git a/drivers/mfd/pm8921-core.c b/drivers/mfd/pm8921-core.c
index b97a97187ae9..959513803542 100644
--- a/drivers/mfd/pm8921-core.c
+++ b/drivers/mfd/pm8921-core.c
@@ -26,7 +26,6 @@
 #include 
 #include 
 #include 
-#include 
 
 #defineSSBI_REG_ADDR_IRQ_BASE  0x1BB
 
@@ -57,7 +56,6 @@
 #define PM8921_NR_IRQS 256
 
 struct pm_irq_chip {
-   struct device   *dev;
struct regmap   *regmap;
spinlock_t  pm_irq_lock;
struct irq_domain   *irqdomain;
@@ -67,11 +65,6 @@ struct pm_irq_chip {
u8  config[0];
 };
 
-struct pm8921 {
-   struct device   *dev;
-   struct pm_irq_chip  *irq_chip;
-};
-
 static int pm8xxx_read_block_irq(struct pm_irq_chip *chip, unsigned int bp,
 unsigned int *ip)
 {
@@ -255,55 +248,6 @@ static struct irq_chip pm8xxx_irq_chip = {
.flags  = IRQCHIP_MASK_ON_SUSPEND | IRQCHIP_SKIP_SET_WAKE,
 };
 
-/**
- * pm8xxx_get_irq_stat - get the status of the irq line
- * @chip: pointer to identify a pmic irq controller
- * @irq: the irq number
- *
- * The pm8xxx gpio and mpp rely on the interrupt block to read
- * the values on their pins. This function is to facilitate reading
- * the status of a gpio or an mpp line. The caller has to convert the
- * gpio number to irq number.
- *
- * RETURNS:
- * an int indicating the value read on that line
- */
-static int pm8xxx_get_irq_stat(struct pm_irq_chip *chip, int irq)
-{
-   int pmirq, rc;
-   unsigned int  block, bits, bit;
-   unsigned long flags;
-   struct irq_data *irq_data = irq_get_irq_data(irq);
-
-   pmirq = irq_data->hwirq;
-
-   block = pmirq / 8;
-   bit = pmirq % 8;
-
-   spin_lock_irqsave(>pm_irq_lock, flags);
-
-   rc = regmap_write(chip->regmap, SSBI_REG_ADDR_IRQ_BLK_SEL, block);
-   if (rc) {
-   pr_err("Failed Selecting block irq=%d pmirq=%d blk=%d rc=%d\n",
-   irq, pmirq, block, rc);
-   goto bail_out;
-   }
-
-   rc = regmap_read(chip->regmap, SSBI_REG_ADDR_IRQ_RT_STATUS, );
-   if (rc) {
-   pr_err("Failed Configuring irq=%d pmirq=%d blk=%d rc=%d\n",
-   irq, pmirq, block, rc);
-   goto bail_out;
-   }
-
-   rc = (bits & (1 << bit)) ? 1 : 0;
-
-bail_out:
-   spin_unlock_irqrestore(>pm_irq_lock, flags);
-
-   return rc;
-}
-
 static int pm8xxx_irq_domain_map(struct irq_domain *d, unsigned int irq,
   irq_hw_number_t hwirq)
 {
@@ -324,56 +268,6 @@ static const struct irq_domain_ops pm8xxx_irq_domain_ops = 
{
.map = pm8xxx_irq_domain_map,
 };
 
-static int pm8921_readb(const struct device *dev, u16 addr, u8 *val)
-{
-   const struct pm8xxx_drvdata *pm8921_drvdata = dev_get_drvdata(dev);
-   const struct pm8921 *pmic = pm8921_drvdata->pm_chip_data;
-
-   return ssbi_read(pmic->dev->parent, addr, val, 1);
-}
-
-static int pm8921_writeb(const struct device *dev, u16 addr, u8 val)
-{
-   const struct pm8xxx_drvdata *pm8921_drvdata = dev_get_drvdata(dev);
-   const struct pm8921 *pmic = pm8921_drvdata->pm_chip_data;
-
-   return ssbi_write(pmic->dev->parent, addr, , 1);
-}
-
-static int pm8921_read_buf(const struct device *dev, u16 addr, u8 *buf,
-   int cnt)
-{
-   const struct pm8xxx_drvdata *pm8921_drvdata = dev_get_drvdata(dev);
-   const struct pm8921 *pmic = pm8921_drvdata->pm_chip_data;
-
-   return ssbi_read(pmic->dev->parent, addr, buf, cnt);
-}
-
-static int pm8921_write_buf(const struct device *dev, u16 addr, u8 *buf,
-   int cnt)
-{
-   const struct pm8xxx_drvdata *pm8921_drvdata = dev_get_drvdata(dev);
-   const struct pm8921 *pmic = pm8921_drvdata->pm_chip_data;
-
-   return ssbi_write(pmic->dev->parent, addr, buf, cnt);
-}
-
-static int pm8921_read_irq_stat(const struct device *dev, int irq)
-{
-   const struct pm8xxx_drvdata *pm8921_drvdata = dev_get_drvdata(dev);
-   const struct pm8921 *pmic = pm8921_drvdata->pm_chip_data;
-
-   return pm8xxx_get_irq_stat(pmic->irq_chip, irq);
-}
-
-static struct pm8xxx_drvdata pm8921_drvdata = {
-   .pmic_readb = pm8921_readb,
-   .pmic_writeb  

Re: post 3.14 serial regression

2014-04-08 Thread Dave Hansen
On 04/08/2014 02:03 PM, Dave Hansen wrote:
> On 04/08/2014 04:27 AM, One Thousand Gnomes wrote:
>>> At the end, you can see that init is somehow dying.  If I revert this
>>> patch, init is happy again and doesn't die, and the serial console works
>>> like before.
>>
>> Can you check if init is getting a SIGHUP - possibly its opening the
>> device and when it goes away gets a hangup which it isn't catching ?
> 
> I do see plenty of SIGCHLDs and a heap of SIGTERMs to 'systemd-udevd',
> but no SIGHUP.  I do see a "Warning: unable to open an initial console."
> now, though.  (details far below)
> 
> I instrumented uart_remove_one_port().  It *looks* like while searching
> for a uart_port for 0x1008 (my actual port),
> serial8250_find_match_or_unused() finds 0x3e8 since 0x3e8 is
> PORT_UNKNOWN.  The new code unregisters the 0x1008 console since it
> _thinks_ it is about to re-register it.



Looks like this just changed the detection order so my device went from
ttyS2 to ttyS4:

# cat /proc/tty/driver/serial
serinfo:1.0 driver revision:
0: uart:16550A port:03F8 irq:4 tx:0 rx:0 CTS|DSR|CD
1: uart:16550A port:02F8 irq:3 tx:0 rx:0
2: uart:unknown port:03E8 irq:4
3: uart:unknown port:02E8 irq:3
4: uart:ST16650V2 port:1008 irq:18 tx:0 rx:0
5: uart:ST16650V2 port:1000 irq:19 tx:0 rx:0


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 5/9] crypto: qce: Adds sha and hmac transforms

2014-04-08 Thread Stephen Boyd
On 04/03, Stanimir Varbanov wrote:
> +static void qce_ahash_dma_done(void *data)
> +{
> + struct crypto_async_request *async_req = data;
> + struct ahash_request *req = ahash_request_cast(async_req);
> + struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
> + struct qce_sha_reqctx *rctx = ahash_request_ctx(req);
> + struct qce_alg_template *tmpl = to_ahash_tmpl(async_req->tfm);
> + struct qce_device *qce = tmpl->qce;
> + struct qce_result_dump *result = qce->dma.result_buf;
> + unsigned int digestsize = crypto_ahash_digestsize(ahash);
> + int error;
> + u32 status;
> +
> + qce_dma_terminate_all(>dma);
> +
> + qce_unmapsg(qce->dev, req->src, rctx->src_nents, DMA_TO_DEVICE,
> +  rctx->src_chained);
> + qce_unmapsg(qce->dev, >result_sg, 1, DMA_FROM_DEVICE, 0);
> +
> + memcpy(rctx->digest, result->auth_iv, digestsize);
> + if (req->result)
> + memcpy(req->result, result->auth_iv, digestsize);
> +
> + rctx->byte_count[0] = cpu_to_be32(result->auth_byte_count[0]);
> + rctx->byte_count[1] = cpu_to_be32(result->auth_byte_count[1]);

Does rctx->byte_count need to be marked __be32?

> +
> + error = qce_check_status(qce, );
> + if (error < 0)
> + dev_err(qce->dev, "ahash operation error (%x)\n", status);
> +
> + req->src = rctx->src;
> + req->nbytes = rctx->nbytes;
> +
> + rctx->last_blk = false;
> + rctx->first_blk = false;
> +
> + tmpl->async_req_done(tmpl->qce, error);
> +}
> +
[...]
> +static int qce_import_common(struct ahash_request *req, u64 in_count,
> +  u32 *state, u8 *buffer, bool hmac)
> +{
> + struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
> + struct qce_sha_reqctx *rctx = ahash_request_ctx(req);
> + u64 count = in_count;
> + unsigned int digestsize = crypto_ahash_digestsize(ahash);
> + unsigned int blocksize;
> +
> + blocksize = crypto_tfm_alg_blocksize(crypto_ahash_tfm(ahash));
> + rctx->count = in_count;
> + memcpy(rctx->trailing_buf, buffer, blocksize);
> +
> + if (in_count <= blocksize) {
> + rctx->first_blk = 1;
> + } else {
> + rctx->first_blk = 0;
> + /*
> +  * For HMAC, there is a hardware padding done when first block
> +  * is set. Therefore the byte_count must be incremened by 64
> +  * after the first block operation.
> +  */
> + if (hmac)
> + count += SHA_PADDING;
> + }
> +
> + rctx->byte_count[0] = (u32)(count & ~SHA_PADDING_MASK);
> + rctx->byte_count[1] = (u32)(count >> 32);
> + qce_cpu_to_be32p_array((__be32 *)rctx->digest, (const u8 *)state,
> +digestsize);
> + rctx->trailing_buf_len = (unsigned int)(in_count & (blocksize - 1));

Is this a way to say

(unsigned int)clamp_t(u64, in_count, blocksize - 1)

?

> +
> + return 0;
> +}
> +
> +static int qce_ahash_import(struct ahash_request *req, const void *in)
> +{
> + struct qce_sha_reqctx *rctx = ahash_request_ctx(req);
> + u32 flags = rctx->flags;
> + bool hmac = IS_SHA_HMAC(flags);
> + int ret;
> +
> + if (IS_SHA1(flags) || IS_SHA1_HMAC(flags)) {
> + struct sha1_state *state = (struct sha1_state *)in;

Unnecessary cast from void *.

> +
> + ret = qce_import_common(req, state->count, state->state,
> + state->buffer, hmac);
> + } else if (IS_SHA256(flags) || IS_SHA256_HMAC(flags)) {
> + struct sha256_state *state = (struct sha256_state *)in;

Ditto.

> +
> + ret = qce_import_common(req, state->count, state->state,
> + state->buf, hmac);
> + } else {
> + ret = -EINVAL;
> + }
> +
> + return ret;
> +}
> +
> +static int qce_ahash_update(struct ahash_request *req)
> +{
> + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
> + struct qce_sha_reqctx *rctx = ahash_request_ctx(req);
> + struct qce_alg_template *tmpl = to_ahash_tmpl(req->base.tfm);
> + unsigned int total, len;
> + int nents;
> + struct scatterlist *sg_last;
> + u8 *buf;

> + u32 pad_len;
> + u32 trailing_buf_len;
> + u32 nbytes;
> + u32 offset;
> + u32 bytes;

size_t for these?

> + u8 *staging;
> + bool chained;
> + unsigned int blocksize;
> +
> + blocksize = crypto_tfm_alg_blocksize(crypto_ahash_tfm(tfm));
> + rctx->count += req->nbytes;
> +
> + /* check for trailing buffer from previous updates and append it */
> + total = req->nbytes + rctx->trailing_buf_len;
> + len = req->nbytes;
[...]
> +
> +struct qce_ahash_def {
> + u32 flags;

unsigned long?

> + const char *name;
> + const char *drv_name;
> + unsigned int digestsize;
> + unsigned int blocksize;
> + unsigned int statesize;
> + const __be32 *std_iv;
> +};
[..]
> +

  1   2   3   4   5   6   7   8   9   10   >