[PATCH] PowerNV/PCI: Fix NULL PCI controller

2013-04-16 Thread Mike Qiu
In pnv_pci_read_config() or pnv_pci_write_config(), we never check if
the PCI controller is valid before converting that into platform
dependent one, this is very dangerous. 

To avoid this potential risks, the patch check PCI controller first
before use it.

Signed-off-by: Mike Qiu 
---
 arch/powerpc/platforms/powernv/pci.c |8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci.c 
b/arch/powerpc/platforms/powernv/pci.c
index b8b8e0b..e7b7f1a 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -286,11 +286,11 @@ static int pnv_pci_read_config(struct pci_bus *bus,
   int where, int size, u32 *val)
 {
struct pci_controller *hose = pci_bus_to_host(bus);
-   struct pnv_phb *phb = hose->private_data;
+   struct pnv_phb *phb = hose ? hose->private_data : NULL;
u32 bdfn = (((uint64_t)bus->number) << 8) | devfn;
s64 rc;
 
-   if (hose == NULL)
+   if (!phb)
return PCIBIOS_DEVICE_NOT_FOUND;
 
switch (size) {
@@ -330,10 +330,10 @@ static int pnv_pci_write_config(struct pci_bus *bus,
int where, int size, u32 val)
 {
struct pci_controller *hose = pci_bus_to_host(bus);
-   struct pnv_phb *phb = hose->private_data;
+   struct pnv_phb *phb = hose ? hose->private_data : NULL;
u32 bdfn = (((uint64_t)bus->number) << 8) | devfn;
 
-   if (hose == NULL)
+   if (!phb)
return PCIBIOS_DEVICE_NOT_FOUND;
 
cfg_dbg("pnv_pci_write_config bus: %x devfn: %x +%x/%x -> %08x\n",
-- 
1.7.10.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Bug fix PATCH v3] Reusing a resource structure allocated by bootmem

2013-04-16 Thread David Rientjes
On Wed, 17 Apr 2013, Yasuaki Ishimatsu wrote:

> > How much memory are we talking about?
> 
> Hmm. I don't know correctly.
> 
> Here is kernel message of my system. The message is shown by mem_init().
> 

Do you have an estimate on the amount of struct resource memory that will 
be leaked if entire pages won't be freed?
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: Linux 3.8.8

2013-04-16 Thread Greg KH
diff --git a/Makefile b/Makefile
index 85204da..7684f95 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 VERSION = 3
 PATCHLEVEL = 8
-SUBLEVEL = 7
+SUBLEVEL = 8
 EXTRAVERSION =
 NAME = Displaced Humerus Anterior
 
diff --git a/arch/arm/boot/dts/kirkwood-iomega_ix2_200.dts 
b/arch/arm/boot/dts/kirkwood-iomega_ix2_200.dts
index 93c3afb..3694e94 100644
--- a/arch/arm/boot/dts/kirkwood-iomega_ix2_200.dts
+++ b/arch/arm/boot/dts/kirkwood-iomega_ix2_200.dts
@@ -96,11 +96,11 @@
marvell,function = "gpio";
};
pmx_led_rebuild_brt_ctrl_1: pmx-led-rebuild-brt-ctrl-1 {
-   marvell,pins = "mpp44";
+   marvell,pins = "mpp46";
marvell,function = "gpio";
};
pmx_led_rebuild_brt_ctrl_2: pmx-led-rebuild-brt-ctrl-2 {
-   marvell,pins = "mpp45";
+   marvell,pins = "mpp47";
marvell,function = "gpio";
};
 
@@ -157,14 +157,14 @@
gpios = <&gpio0 16 0>;
linux,default-trigger = "default-on";
};
-   health_led1 {
+   rebuild_led {
+   label = "status:white:rebuild_led";
+   gpios = <&gpio1 4 0>;
+   };
+   health_led {
label = "status:red:health_led";
gpios = <&gpio1 5 0>;
};
-   health_led2 {
-   label = "status:white:health_led";
-   gpios = <&gpio1 4 0>;
-   };
backup_led {
label = "status:blue:backup_led";
gpios = <&gpio0 15 0>;
diff --git a/arch/arm/mach-imx/clk-imx35.c b/arch/arm/mach-imx/clk-imx35.c
index f0727e8..0edce4b 100644
--- a/arch/arm/mach-imx/clk-imx35.c
+++ b/arch/arm/mach-imx/clk-imx35.c
@@ -257,6 +257,7 @@ int __init mx35_clocks_init()
clk_register_clkdev(clk[wdog_gate], NULL, "imx2-wdt.0");
clk_register_clkdev(clk[nfc_div], NULL, "imx25-nand.0");
clk_register_clkdev(clk[csi_gate], NULL, "mx3-camera.0");
+   clk_register_clkdev(clk[admux_gate], "audmux", NULL);
 
clk_prepare_enable(clk[spba_gate]);
clk_prepare_enable(clk[gpio1_gate]);
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 5edd174..7361e47 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -703,7 +703,10 @@ static inline void arch_leave_lazy_mmu_mode(void)
PVOP_VCALL0(pv_mmu_ops.lazy_mode.leave);
 }
 
-void arch_flush_lazy_mmu_mode(void);
+static inline void arch_flush_lazy_mmu_mode(void)
+{
+   PVOP_VCALL0(pv_mmu_ops.lazy_mode.flush);
+}
 
 static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx,
phys_addr_t phys, pgprot_t flags)
diff --git a/arch/x86/include/asm/paravirt_types.h 
b/arch/x86/include/asm/paravirt_types.h
index 142236e..b3b0ec1 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -91,6 +91,7 @@ struct pv_lazy_ops {
/* Set deferred update mode, used for batching operations. */
void (*enter)(void);
void (*leave)(void);
+   void (*flush)(void);
 };
 
 struct pv_time_ops {
@@ -679,6 +680,7 @@ void paravirt_end_context_switch(struct task_struct *next);
 
 void paravirt_enter_lazy_mmu(void);
 void paravirt_leave_lazy_mmu(void);
+void paravirt_flush_lazy_mmu(void);
 
 void _paravirt_nop(void);
 u32 _paravirt_ident_32(u32);
diff --git a/arch/x86/include/asm/tlb.h b/arch/x86/include/asm/tlb.h
index 4fef207..c779730 100644
--- a/arch/x86/include/asm/tlb.h
+++ b/arch/x86/include/asm/tlb.h
@@ -7,7 +7,7 @@
 
 #define tlb_flush(tlb) \
 {  \
-   if (tlb->fullmm == 0)   \
+   if (!tlb->fullmm && !tlb->need_flush_all)   \
flush_tlb_mm_range(tlb->mm, tlb->start, tlb->end, 0UL); \
else\
flush_tlb_mm_range(tlb->mm, 0UL, TLB_FLUSH_ALL, 0UL);   \
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 17fff18..8bfb335 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -263,6 +263,18 @@ void paravirt_leave_lazy_mmu(void)
leave_lazy(PARAVIRT_LAZY_MMU);
 }
 
+void paravirt_flush_lazy_mmu(void)
+{
+   preempt_disable();
+
+   if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) {
+   arch_leave_lazy_mmu_mode();
+   arch_enter_lazy_mmu_mode();
+   }
+
+   preempt_enable();
+}
+
 void paravirt

Linux 3.8.8

2013-04-16 Thread Greg KH
I'm announcing the release of the 3.8.8 kernel.

All users of the 3.8 kernel series must upgrade.

The updated 3.8.y git tree can be found at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git 
linux-3.8.y
and can be browsed at the normal kernel.org git web browser:

http://git.kernel.org/?p=linux/kernel/git/stable/linux-stable.git;a=summary

thanks,

greg k-h



 Makefile |2 
 arch/arm/boot/dts/kirkwood-iomega_ix2_200.dts|   14 
 arch/arm/mach-imx/clk-imx35.c|1 
 arch/x86/include/asm/paravirt.h  |5 
 arch/x86/include/asm/paravirt_types.h|2 
 arch/x86/include/asm/tlb.h   |2 
 arch/x86/kernel/paravirt.c   |   25 -
 arch/x86/lguest/boot.c   |1 
 arch/x86/mm/fault.c  |6 
 arch/x86/mm/pgtable.c|7 
 arch/x86/xen/mmu.c   |1 
 drivers/dma/omap-dma.c   |   20 
 drivers/gpio/gpiolib-of.c|5 
 drivers/gpu/drm/udl/udl_connector.c  |4 
 drivers/net/wireless/brcm80211/brcmsmac/phy/phy_lcn.c|  369 +--
 drivers/net/wireless/brcm80211/brcmsmac/phy/phytbl_lcn.c |   64 +-
 drivers/scsi/libsas/sas_expander.c   |   12 
 drivers/target/target_core_alua.c|3 
 drivers/tty/tty_ldisc.c  |   10 
 fs/cifs/connect.c|   16 
 fs/gfs2/file.c   |5 
 fs/gfs2/rgrp.c   |2 
 fs/inode.c   |2 
 include/asm-generic/tlb.h|7 
 include/linux/ftrace.h   |3 
 ipc/msg.c|1 
 kernel/sched/clock.c |   26 +
 kernel/sys.c |3 
 kernel/trace/ftrace.c|   37 -
 kernel/trace/trace_stack.c   |2 
 lib/kobject.c|9 
 mm/memory.c  |1 
 sound/soc/codecs/wm5102.c|2 
 sound/soc/codecs/wm8903.c|2 
 sound/soc/soc-core.c |2 
 sound/usb/mixer_quirks.c |4 
 sound/usb/quirks.c   |2 
 37 files changed, 335 insertions(+), 344 deletions(-)

Alban Bedel (1):
  ASoC: wm8903: Fix the bypass to HP/LINEOUT when no DAC or ADC is running

Boris Ostrovsky (1):
  x86, mm: Patch out arch_flush_lazy_mmu_mode() when running on bare metal

Dave Airlie (1):
  udl: handle EDID failure properly.

Dave Hansen (1):
  x86-32: Fix possible incomplete TLB invalidate with PAE pagetables

Eldad Zack (1):
  ALSA: usb-audio: fix endianness bug in snd_nativeinstruments_*

Greg Kroah-Hartman (1):
  Linux 3.8.8

Haojian Zhuang (1):
  gpio: fix wrong checking condition for gpio range

Huacai Chen (1):
  PM / reboot: call syscore_shutdown() after disable_nonboot_cpus()

John W. Linville (1):
  Revert "brcmsmac: support 4313iPA"

Joonyoung Shim (1):
  ASoC: core: Fix to check return value of snd_soc_update_bits_locked()

Linus Torvalds (1):
  kobject: fix kset_find_obj() race with concurrent last kobject_put()

Lukasz Dorau (1):
  SCSI: libsas: fix handling vacant phy in sas_set_ex_phy()

Mark Brown (1):
  ASoC: wm5102: Correct lookup of arizona struct in SYSCLK event

Markus Pargmann (1):
  ARM: imx35 Bugfix admux clock

Namhyung Kim (2):
  tracing: Fix double free when function profile init failed
  tracing: Fix possible NULL pointer dereferences

Nicholas Bellinger (1):
  target: Fix incorrect fallthrough of ALUA Standby/Offline/Transition CDBs

Nigel Roberts (1):
  ARM: Kirkwood: Fix typo in the definition of ix2-200 rebuild LED

Peter Ujfalusi (1):
  dmaengine: omap-dma: Start DMA without delay for cyclic channels

Sachin Prabhu (1):
  cifs: Allow passwords which begin with a delimitor

Samu Kallio (1):
  x86, mm, paravirt: Fix vmalloc_fault oops during lazy MMU updates

Sebastian Andrzej Siewior (1):
  tty: don't deadlock while flushing workqueue

Stanislav Kinsbursky (1):
  ipc: set msg back to -EAGAIN if copy wasn't performed

Steven Rostedt (Red Hat) (1):
  ftrace: Move ftrace_filter_lseek out of CONFIG_DYNAMIC_FTRACE section

Steven Whitehouse (1):
  GFS2: Fix unlock of fcntl locks during w

Re: Linux 3.4.41

2013-04-16 Thread Greg KH

diff --git a/Makefile b/Makefile
index 3efde3d..90c3a6f 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 VERSION = 3
 PATCHLEVEL = 4
-SUBLEVEL = 40
+SUBLEVEL = 41
 EXTRAVERSION =
 NAME = Saber-toothed Squirrel
 
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index aa0f913..25e9734 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -741,7 +741,10 @@ static inline void arch_leave_lazy_mmu_mode(void)
PVOP_VCALL0(pv_mmu_ops.lazy_mode.leave);
 }
 
-void arch_flush_lazy_mmu_mode(void);
+static inline void arch_flush_lazy_mmu_mode(void)
+{
+   PVOP_VCALL0(pv_mmu_ops.lazy_mode.flush);
+}
 
 static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx,
phys_addr_t phys, pgprot_t flags)
diff --git a/arch/x86/include/asm/paravirt_types.h 
b/arch/x86/include/asm/paravirt_types.h
index 8e8b9a4..faf2c04 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -91,6 +91,7 @@ struct pv_lazy_ops {
/* Set deferred update mode, used for batching operations. */
void (*enter)(void);
void (*leave)(void);
+   void (*flush)(void);
 };
 
 struct pv_time_ops {
@@ -680,6 +681,7 @@ void paravirt_end_context_switch(struct task_struct *next);
 
 void paravirt_enter_lazy_mmu(void);
 void paravirt_leave_lazy_mmu(void);
+void paravirt_flush_lazy_mmu(void);
 
 void _paravirt_nop(void);
 u32 _paravirt_ident_32(u32);
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index ab13760..128323e 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -263,6 +263,18 @@ void paravirt_leave_lazy_mmu(void)
leave_lazy(PARAVIRT_LAZY_MMU);
 }
 
+void paravirt_flush_lazy_mmu(void)
+{
+   preempt_disable();
+
+   if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) {
+   arch_leave_lazy_mmu_mode();
+   arch_enter_lazy_mmu_mode();
+   }
+
+   preempt_enable();
+}
+
 void paravirt_start_context_switch(struct task_struct *prev)
 {
BUG_ON(preemptible());
@@ -292,18 +304,6 @@ enum paravirt_lazy_mode paravirt_get_lazy_mode(void)
return percpu_read(paravirt_lazy_mode);
 }
 
-void arch_flush_lazy_mmu_mode(void)
-{
-   preempt_disable();
-
-   if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) {
-   arch_leave_lazy_mmu_mode();
-   arch_enter_lazy_mmu_mode();
-   }
-
-   preempt_enable();
-}
-
 struct pv_info pv_info = {
.name = "bare hardware",
.paravirt_enabled = 0,
@@ -477,6 +477,7 @@ struct pv_mmu_ops pv_mmu_ops = {
.lazy_mode = {
.enter = paravirt_nop,
.leave = paravirt_nop,
+   .flush = paravirt_nop,
},
 
.set_fixmap = native_set_fixmap,
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 642d880..53272bd 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -1333,6 +1333,7 @@ __init void lguest_init(void)
pv_mmu_ops.read_cr3 = lguest_read_cr3;
pv_mmu_ops.lazy_mode.enter = paravirt_enter_lazy_mmu;
pv_mmu_ops.lazy_mode.leave = lguest_leave_lazy_mmu_mode;
+   pv_mmu_ops.lazy_mode.flush = paravirt_flush_lazy_mmu;
pv_mmu_ops.pte_update = lguest_pte_update;
pv_mmu_ops.pte_update_defer = lguest_pte_update;
 
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index e922e01..4a0a2e8 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -377,10 +377,12 @@ static noinline __kprobes int vmalloc_fault(unsigned long 
address)
if (pgd_none(*pgd_ref))
return -1;
 
-   if (pgd_none(*pgd))
+   if (pgd_none(*pgd)) {
set_pgd(pgd, *pgd_ref);
-   else
+   arch_flush_lazy_mmu_mode();
+   } else {
BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
+   }
 
/*
 * Below here mismatches are bugs because these lower tables
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 5cb8e27..cf7fe36 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -2076,6 +2076,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
.lazy_mode = {
.enter = paravirt_enter_lazy_mmu,
.leave = xen_leave_lazy_mmu,
+   .flush = paravirt_flush_lazy_mmu,
},
 
.set_fixmap = xen_set_fixmap,
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index a135c61..99a7855 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -72,7 +72,7 @@ int i915_gem_init_aliasing_ppgtt(struct drm_device *dev)
/* ppgtt PDEs reside in the global gtt pagetable, which has 512*1024
 * entries. For aliasing ppgtt support we just steal them at the end for
 * now. */
-   first_pd_entry_in_global_pt = 512*1024 - I915_PPGTT_PD_ENTRIES;
+   first_pd_en

Re: [PATCH 1/3] fsfreeze: wait in killable state in __sb_start_write

2013-04-16 Thread Marco Stornelli
Resend due to mail client problem.

Marco

2013/4/17 Marco Stornelli 
>
> Hi,
>
>
> 2013/4/15 Jan Kara 
>>
>> On Sat 13-04-13 12:35:54, Marco Stornelli wrote:
>> > Added a new enum to decide if we want to sleep in uninterruptible or
>> > killable state or we want simply to return immediately.
>>   I like the patch. You can add:
>> Reviewed-by: Jan Kara 
>>
>> Honza
>>
>

I'm happy if we can include the patches. However I do an update about
the on-going and additional work: the patches submitted can  be
applied as-is, however, I'm still working on extending the killable
path in mnt_want_write/mnt_want_write_file and I'm seeing if it's
possible to change even the page_mkwrite path. In the first case, as
Al said, there are three clear "hot" points, do_last, kern_path_create
and mq_open. However I modified the code carefully in these code paths
and I did some basic tests and it works. I'm going to submit the patch
next week for a review if I'm able to do more tests.
About the page_mkwrite path and the return value VM_FAULT_RETRY: the
return value of get_user_pages can be 0 in case of VM_FAULT_RETRY. The
caller set the nonblocking flag and it can manage the situation.
Blocking callers, instead, have a BUG_ON on the return value of 0.
They want either an error code or the number of pages gotten. A little
modification of __get_user_pages can do the work. However,
page_mkwrite could return VM_FAULT_RETRY even if the flag
FAULT_FLAG_ALLOW_RETRY is not set, I don't know if it's correct and
this flag in each case, at the moment, it's not visibile in
page_mkwrite. In addition, I need to understand if a skip can be
useful, or in each case the process will go to sleep in
uninterruptible state in a step forward. Any comments is welcome.

Marco
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Linux 3.4.41

2013-04-16 Thread Greg KH
I'm announcing the release of the 3.4.41 kernel.

All users of the 3.4 kernel series must upgrade.

The updated 3.4.y git tree can be found at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git 
linux-3.4.y
and can be browsed at the normal kernel.org git web browser:

http://git.kernel.org/?p=linux/kernel/git/stable/linux-stable.git;a=summary

thanks,

greg k-h



 Makefile  |2 -
 arch/x86/include/asm/paravirt.h   |5 ++-
 arch/x86/include/asm/paravirt_types.h |2 +
 arch/x86/kernel/paravirt.c|   25 +++
 arch/x86/lguest/boot.c|1 
 arch/x86/mm/fault.c   |6 ++-
 arch/x86/xen/mmu.c|1 
 drivers/gpu/drm/i915/i915_gem_gtt.c   |2 -
 drivers/gpu/drm/udl/udl_connector.c   |4 ++
 drivers/mtd/mtdchar.c |   56 +-
 drivers/net/ethernet/realtek/r8169.c  |   28 +++--
 drivers/scsi/libsas/sas_expander.c|   12 +++
 drivers/target/target_core_alua.c |3 +
 fs/cifs/connect.c |   16 +++--
 fs/inode.c|2 -
 include/linux/kref.h  |   21 
 kernel/sched/clock.c  |   26 +++
 kernel/sys.c  |3 +
 kernel/trace/ftrace.c |1 
 lib/kobject.c |9 -
 sound/soc/codecs/wm8903.c |2 +
 sound/usb/mixer_quirks.c  |4 +-
 sound/usb/quirks.c|2 -
 23 files changed, 195 insertions(+), 38 deletions(-)

Alban Bedel (1):
  ASoC: wm8903: Fix the bypass to HP/LINEOUT when no DAC or ADC is running

Boris Ostrovsky (1):
  x86, mm: Patch out arch_flush_lazy_mmu_mode() when running on bare metal

Chris Wilson (1):
  drm/i915: Use the correct size of the GTT for placing the per-process 
entries

Dave Airlie (1):
  udl: handle EDID failure properly.

David Woodhouse (1):
  mtd: Disable mtdchar mmap on MMU systems

Eldad Zack (1):
  ALSA: usb-audio: fix endianness bug in snd_nativeinstruments_*

Greg Kroah-Hartman (1):
  Linux 3.4.41

Hayes Wang (1):
  r8169: fix auto speed down issue

Huacai Chen (1):
  PM / reboot: call syscore_shutdown() after disable_nonboot_cpus()

Linus Torvalds (2):
  mtdchar: fix offset overflow detection
  kobject: fix kset_find_obj() race with concurrent last kobject_put()

Lukasz Dorau (1):
  SCSI: libsas: fix handling vacant phy in sas_set_ex_phy()

Namhyung Kim (1):
  tracing: Fix double free when function profile init failed

Nicholas Bellinger (1):
  target: Fix incorrect fallthrough of ALUA Standby/Offline/Transition CDBs

Sachin Prabhu (1):
  cifs: Allow passwords which begin with a delimitor

Samu Kallio (1):
  x86, mm, paravirt: Fix vmalloc_fault oops during lazy MMU updates

Suleiman Souhlal (1):
  vfs: Revert spurious fix to spinning prevention in prune_icache_sb

Thomas Gleixner (1):
  sched_clock: Prevent 64bit inatomicity on 32bit systems

Thomas Hellstrom (1):
  kref: Implement kref_get_unless_zero v3



pgpNRNyGaTMUi.pgp
Description: PGP signature


Re: regulator: tps80031: question about LDO2 TRACK_MODE_ENABLE of TPS80031 or TPS80032-ES1.0

2013-04-16 Thread Laxman Dewangan

On Wednesday 17 April 2013 08:54 AM, Axel Lin wrote:

hi Laxman,

Currently we have below code in tps80031_ldo_set_voltage_sel():

 /* Check for valid setting for TPS80031 or TPS80032-ES1.0 */
 if ((ri->rinfo->desc.id == TPS80031_REGULATOR_LDO2) &&
 (ri->device_flags & TRACK_MODE_ENABLE)) {
 unsigned nvsel = (sel) & 0x1F;
 if (((tps80031_get_chip_info(parent) == TPS80031) ||
 ((tps80031_get_chip_info(parent) == TPS80032) &&
 (tps80031_get_pmu_version(parent) == 0x0))) &&
 ((nvsel == 0x0) || (nvsel >= 0x19 && nvsel <= 0x1F))) {
 dev_err(ri->dev,
 "Invalid sel %d in track mode LDO2\n",
 nvsel);
 return -EINVAL;
 }
 }

However, list_voltage() still show these invalid selectors have supported 
voltage.
Besides, having the code "nvsel = (sel) & 0x1F" looks a bit odd, because 
currently
n_voltages is set to 57 when TRACK_MODE_ENABLE is set.

I'm wondering if below comment is still true for LDO2 TRACK_MODE_ENABLE of
"TPS80031/TPS80032-ES1.0"?

/* TRACK mode the ldo2 varies from 600mV to 1300mV */

What is the register value && voltage mapping for LDO2 TRACK_MODE_ENABLE of
TPS80031 or TPS80032-ES1.0?


Hi Axel,

Following the excerpt of the mail got from TI on this issue:

There is a bug on LDO2 tracking on TPS80031 and TPS80032 ES1.0 but you 
should be able to have the functionality working.


Issue description:
- LDO2 traking mode is enabled
- LDO2 tracks SMPS2 voltage.
- LDO2 automatically switch-off when LDO2_CFG_VOLTAGE is changed to some 
discrete values (non exhaustive list):

00011001, 00011010, 00011011, 00011100, .
- LDO2 switch-on again when LDO2_CFG_VOLTAGE is changed to other values 
(non exhaustive list):

00011000, 00010111, .

LDOs have reserved codes. For these codes, LDO is switch-off.

In tracking, LDO2 ref comes from SMPS2.
However LDO2 enable is still gated by LDO2 VSEL decoding.
As a result, in tracking mode LDO2 will be disabled for following code 
(SMPS VSEL format):

00 & 10 (MSB not decoded)
011001 & 111001 (MSB not decoded)
011010 & 111010 (MSB not decoded)
011100 & 00 (MSB not decoded)
011101 & 01 (MSB not decoded)
00 & 10 (MSB not decoded)





Therefore, we made this voltage to be invalid on above case.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] No need to call irq_domain_legacy_revmap() for twice

2013-04-16 Thread Mike
在 2012-12-11二的 08:30 +,Grant Likely写道:
> On Tue, 27 Nov 2012 09:41:46 +0800, Mike  wrote:
> > 在 2012-11-26一的 20:17 +,Grant Likely写道:
> > > On Mon, 24 Sep 2012 17:37:55 +0800, Mike Qiu  
> > > wrote:
> > > > Function irq_create_mapping() calls irq_find_mapping(). The later
> > > > function has checked if the indicated IRQ domain has hw IRQ mapped to
> > > > virtual IRQ through legacy mode or not and return the value of the
> > > > legacy irq number by call irq_domain_legacy_revmap(). We needn't
> > > > to call irq_domain_legacy_revmap() to do same check in
> > > > irq_create_mapping() again.
> > > > 
> > > > The patch removes the duplicate call.
> > > > 
> > > > Signed-off-by: Mike Qiu 
> > > > ---
> > > >  kernel/irq/irqdomain.c |7 +--
> > > >  1 files changed, 5 insertions(+), 2 deletions(-)
> > > > 
> > > > diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
> > > > index 49a7772..286d672 100644
> > > > --- a/kernel/irq/irqdomain.c
> > > > +++ b/kernel/irq/irqdomain.c
> > > > @@ -547,9 +547,12 @@ unsigned int irq_create_mapping(struct irq_domain 
> > > > *domain,
> > > > return virq;
> > > > }
> > > >  
> > > > -   /* Get a virtual interrupt number */
> > > > +   /*
> > > > +* For IRQ domain with type of IRQ_DOMAIN_MAP_LEGACY, we needn't
> > > > +* create the IRQ mapping for non-existing one, so just return 
> > > > 0.
> > > > +*/
> > > > if (domain->revmap_type == IRQ_DOMAIN_MAP_LEGACY)
> > > > -   return irq_domain_legacy_revmap(domain, hwirq);
> > > > +   return 0;
> > > 
> > > But it does need to return the virq assigned to the hwirq. That is why
> > > it has to call the revmap function.
> > > 
> > Yes, thanks
> > 
> > this judgment has been done in
> >  /*Check if mapping already exists*/
> > virq = irq_find_mapping(domain, hwirq);
> > 
> > if the virq equals none zero, the func irq_create_mapping will 
> > return the virq value directly(already exists).
> > 
> > otherwise, that means virq equals zero, this has two meanings, one is
> > haven't find the already exist mapping, the other one is in
> > irq_find_mapping()
> > case IRQ_DOMAIN_MAP_LEGACY:
> > return irq_domain_legacy_revmap(domain, hwirq); 
> > this may return zero.
> > 
> > So, when we check if it is a IRQ_DOMAIN_MAP_LEGACY, we just return zero
> > is OK. because if it is none zero, it will be return after check if
> > mapping already exists, and never come here. also we never try to assign
> > the virq for the legacy map.
> > 
> > I don't know if you have understand my explanation.
> 
> Yes, your explanation makes sense and you are correct. I'm not going to
> apply this for v3.8 (I've left it too late before the merge window), but
> I'll look again after the merge window closes.
> 
Hi Grant 

I don't know if you forget this patch ...

I haven't see this patch in 3.9-rc7

just for a reminder

Thanks
Mike
> g.
> 


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: Linux 3.0.74

2013-04-16 Thread Greg KH

diff --git a/Makefile b/Makefile
index f82a70f..71e8efa 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 VERSION = 3
 PATCHLEVEL = 0
-SUBLEVEL = 73
+SUBLEVEL = 74
 EXTRAVERSION =
 NAME = Sneaky Weasel
 
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index ebbc4d8..2fdfe31 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -731,7 +731,10 @@ static inline void arch_leave_lazy_mmu_mode(void)
PVOP_VCALL0(pv_mmu_ops.lazy_mode.leave);
 }
 
-void arch_flush_lazy_mmu_mode(void);
+static inline void arch_flush_lazy_mmu_mode(void)
+{
+   PVOP_VCALL0(pv_mmu_ops.lazy_mode.flush);
+}
 
 static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx,
phys_addr_t phys, pgprot_t flags)
diff --git a/arch/x86/include/asm/paravirt_types.h 
b/arch/x86/include/asm/paravirt_types.h
index 8288509..4b67ec9 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -85,6 +85,7 @@ struct pv_lazy_ops {
/* Set deferred update mode, used for batching operations. */
void (*enter)(void);
void (*leave)(void);
+   void (*flush)(void);
 };
 
 struct pv_time_ops {
@@ -673,6 +674,7 @@ void paravirt_end_context_switch(struct task_struct *next);
 
 void paravirt_enter_lazy_mmu(void);
 void paravirt_leave_lazy_mmu(void);
+void paravirt_flush_lazy_mmu(void);
 
 void _paravirt_nop(void);
 u32 _paravirt_ident_32(u32);
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 869e1ae..704faba 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -253,6 +253,18 @@ void paravirt_leave_lazy_mmu(void)
leave_lazy(PARAVIRT_LAZY_MMU);
 }
 
+void paravirt_flush_lazy_mmu(void)
+{
+   preempt_disable();
+
+   if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) {
+   arch_leave_lazy_mmu_mode();
+   arch_enter_lazy_mmu_mode();
+   }
+
+   preempt_enable();
+}
+
 void paravirt_start_context_switch(struct task_struct *prev)
 {
BUG_ON(preemptible());
@@ -282,18 +294,6 @@ enum paravirt_lazy_mode paravirt_get_lazy_mode(void)
return percpu_read(paravirt_lazy_mode);
 }
 
-void arch_flush_lazy_mmu_mode(void)
-{
-   preempt_disable();
-
-   if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) {
-   arch_leave_lazy_mmu_mode();
-   arch_enter_lazy_mmu_mode();
-   }
-
-   preempt_enable();
-}
-
 struct pv_info pv_info = {
.name = "bare hardware",
.paravirt_enabled = 0,
@@ -462,6 +462,7 @@ struct pv_mmu_ops pv_mmu_ops = {
.lazy_mode = {
.enter = paravirt_nop,
.leave = paravirt_nop,
+   .flush = paravirt_nop,
},
 
.set_fixmap = native_set_fixmap,
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index db832fd..2d45247 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -1309,6 +1309,7 @@ __init void lguest_init(void)
pv_mmu_ops.read_cr3 = lguest_read_cr3;
pv_mmu_ops.lazy_mode.enter = paravirt_enter_lazy_mmu;
pv_mmu_ops.lazy_mode.leave = lguest_leave_lazy_mmu_mode;
+   pv_mmu_ops.lazy_mode.flush = paravirt_flush_lazy_mmu;
pv_mmu_ops.pte_update = lguest_pte_update;
pv_mmu_ops.pte_update_defer = lguest_pte_update;
 
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 3b2ad91..7653f14 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -376,10 +376,12 @@ static noinline __kprobes int vmalloc_fault(unsigned long 
address)
if (pgd_none(*pgd_ref))
return -1;
 
-   if (pgd_none(*pgd))
+   if (pgd_none(*pgd)) {
set_pgd(pgd, *pgd_ref);
-   else
+   arch_flush_lazy_mmu_mode();
+   } else {
BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
+   }
 
/*
 * Below here mismatches are bugs because these lower tables
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index 849a975..025d469 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -73,167 +73,6 @@ unsigned long node_memmap_size_bytes(int nid, unsigned long 
start_pfn,
 
 extern unsigned long highend_pfn, highstart_pfn;
 
-#define LARGE_PAGE_BYTES (PTRS_PER_PTE * PAGE_SIZE)
-
-static void *node_remap_start_vaddr[MAX_NUMNODES];
-void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags);
-
-/*
- * Remap memory allocator
- */
-static unsigned long node_remap_start_pfn[MAX_NUMNODES];
-static void *node_remap_end_vaddr[MAX_NUMNODES];
-static void *node_remap_alloc_vaddr[MAX_NUMNODES];
-
-/**
- * alloc_remap - Allocate remapped memory
- * @nid: NUMA node to allocate memory from
- * @size: The size of allocation
- *
- * Allocate @size bytes from the remap area of NUMA node @nid.  The
- * size of the remap area is predetermined by init_alloc_remap() and
- * only the callers considered there should 

Linux 3.0.74

2013-04-16 Thread Greg KH
I'm announcing the release of the 3.0.74 kernel.

All users of the 3.0 kernel series must upgrade.

The updated 3.0.y git tree can be found at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git 
linux-3.0.y
and can be browsed at the normal kernel.org git web browser:

http://git.kernel.org/?p=linux/kernel/git/stable/linux-stable.git;a=summary

thanks,

greg k-h



 Makefile  |2 
 arch/x86/include/asm/paravirt.h   |5 -
 arch/x86/include/asm/paravirt_types.h |2 
 arch/x86/kernel/paravirt.c|   25 ++---
 arch/x86/lguest/boot.c|1 
 arch/x86/mm/fault.c   |6 -
 arch/x86/mm/numa_32.c |  161 --
 arch/x86/xen/mmu.c|1 
 drivers/mtd/mtdchar.c |   56 ++-
 drivers/net/r8169.c   |   30 +-
 drivers/target/target_core_alua.c |3 
 kernel/sched_clock.c  |   26 +
 kernel/sys.c  |3 
 kernel/trace/ftrace.c |1 
 sound/soc/codecs/wm8903.c |2 
 sound/usb/mixer_quirks.c  |4 
 sound/usb/quirks.c|2 
 17 files changed, 136 insertions(+), 194 deletions(-)

Alban Bedel (1):
  ASoC: wm8903: Fix the bypass to HP/LINEOUT when no DAC or ADC is running

Boris Ostrovsky (1):
  x86, mm: Patch out arch_flush_lazy_mmu_mode() when running on bare metal

Dave Hansen (1):
  x86-32, mm: Rip out x86_32 NUMA remapping code

David Woodhouse (1):
  mtd: Disable mtdchar mmap on MMU systems

Eldad Zack (1):
  ALSA: usb-audio: fix endianness bug in snd_nativeinstruments_*

Greg Kroah-Hartman (1):
  Linux 3.0.74

Hayes Wang (1):
  r8169: fix auto speed down issue

Huacai Chen (1):
  PM / reboot: call syscore_shutdown() after disable_nonboot_cpus()

Linus Torvalds (1):
  mtdchar: fix offset overflow detection

Namhyung Kim (1):
  tracing: Fix double free when function profile init failed

Nicholas Bellinger (1):
  target: Fix incorrect fallthrough of ALUA Standby/Offline/Transition CDBs

Samu Kallio (1):
  x86, mm, paravirt: Fix vmalloc_fault oops during lazy MMU updates

Thomas Gleixner (1):
  sched_clock: Prevent 64bit inatomicity on 32bit systems



pgpXVlIb8pyT3.pgp
Description: PGP signature


Re: [Bug fix PATCH v3] Reusing a resource structure allocated by bootmem

2013-04-16 Thread Yasuaki Ishimatsu

Hi David,

2013/04/17 13:47, David Rientjes wrote:

On Wed, 17 Apr 2013, Yasuaki Ishimatsu wrote:


Why not simply do what generic sparsemem support does by testing
PageSlab(virt_to_head_page(res)) and calling kfree() if true and freeing
back to bootmem if false?  This should be like a five line patch.


Is your explanation about free_section_usemap()?
If so, I don't think we can release resource structure like
free_section_usemap().





Right, you can't release it like free_section_usemap(), but you're free to
test for PageSlab(virt_to_head_page(res)) in kernel/resource.c.


O.K. I'll update it.




In your explanation case, memmap can be released by put_page_bootmem() in
free_map_bootmem() since all pages of memmap is used only for memmap.
But if my understanding is correct, a page of released resource structure
contain other purpose objects allocated by bootmem. So we cannot
release resource structure like free_section_usemap().



I'm thinking it would be much easier to just suppress the kfree() if
!PageSlab.  If you can free an entire page with free_bootmem_late(),
that would be great,



but I'm thinking that will take more work than it's
worth.  It seems fine to just do free_bootmem() and leave those pages as
reserved.


I think so, too.


How much memory are we talking about?


Hmm. I don't know correctly.

Here is kernel message of my system. The message is shown by mem_init().

--
Memory: 30491076k/33554432k available (5570k kernel code, 2274228k absent, 
789128k reserved, 5667k data, 1784k init)
---

Reserved memroy size is 789128k. So part of them is freed after system boot
by  memory hotplug  et al.

Thanks,
Yasuaki Ishimatsu


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC Patch 0/2] mm: Add parameters to make kernel behavior at memory error on dirty cache selectable

2013-04-16 Thread Simon Jeons
Hi Naoya,
On 04/11/2013 11:23 PM, Naoya Horiguchi wrote:
> On Thu, Apr 11, 2013 at 03:49:16PM +0200, Andi Kleen wrote:
>>> As a result, if the dirty cache includes user data, the data is lost,
>>> and data corruption occurs if an application uses old data.
>> The application cannot use old data, the kernel code kills it if it
>> would do that. And if it's IO data there is an EIO triggered.
>>
>> iirc the only concern in the past was that the application may miss
>> the asynchronous EIO because it's cleared on any fd access. 
>>
>> This is a general problem not specific to memory error handling, 
>> as these asynchronous IO errors can happen due to other reason
>> (bad disk etc.) 
>>
>> If you're really concerned about this case I think the solution
>> is to make the EIO more sticky so that there is a higher chance
>> than it gets returned.  This will make your data much more safe,
>> as it will cover all kinds of IO errors, not just the obscure memory
>> errors.
> I'm interested in this topic, and in previous discussion, what I was said
> is that we can't expect user applications to change their behaviors when
> they get EIO, so globally changing EIO's stickiness is not a great approach.

The user applications will get EIO firstly or get SIG_KILL firstly?

> I'm working on a new pagecache tag based mechanism to solve this.
> But it needs time and more discussions.
> So I guess Tanino-san suggests giving up on dirty pagecache errors
> as a quick solution.
>
> Thanks,
> Naoya
>
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majord...@kvack.org.  For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: mailto:"d...@kvack.org";> em...@kvack.org 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2] regulator: ab8500: Fix set voltage for AB8540_LDO_AUX3

2013-04-16 Thread Bengt Jönsson

On 04/16/2013 04:46 PM, Axel Lin wrote:

When setting voltage for AB8540_LDO_AUX3, current code only updates one of
info->voltage_reg and info->expand_register registers which is wrong.
To ensure we set to correct voltage, it always needs to clear or set
expand_register.voltage_mask bit of expand_register.

The function of the expand register bit is the following (from the user manual):
0: VAUX3 output voltage is determined by Vaux3Sel bit settings in register
VldoCVaux3Sel
1: VAUX3 output voltage is set to 3.05V regardless of Vaux3Sel settings in
register VldoCVaux3Sel (VldoCVaux3Sel is the register at 0x0421)

So when going to 3.05V, set the expand register bit.
When leaving 3.05V for another voltage, set the target voltage before clearing
the expand register bit.

Signed-off-by: Axel Lin 

Looks fine.Thanks!

Acked-by: Bengt Jonsson 

---
  drivers/regulator/ab8500.c |   69 
  1 file changed, 51 insertions(+), 18 deletions(-)

diff --git a/drivers/regulator/ab8500.c b/drivers/regulator/ab8500.c
index 0a62ef9..84e064b 100644
--- a/drivers/regulator/ab8500.c
+++ b/drivers/regulator/ab8500.c
@@ -612,32 +612,65 @@ static int ab8540_aux3_regulator_set_voltage_sel(struct 
regulator_dev *rdev,
return -EINVAL;
}
  
+	/* Set the expand register bit for 3.05V.

+  Once expand register bit is set, VAUX3 output voltage is set to 3.05V
+  regardless of Vaux3Sel settings in register VldoCVaux3Sel.
+*/
if (selector >= info->expand_register.voltage_limit) {
-   /* Vaux3 bit4 has different layout */
-   regval = (u8)selector << info->expand_register.voltage_shift;
-   ret = abx500_mask_and_set_register_interruptible(info->dev,
-   info->expand_register.voltage_bank,
-   info->expand_register.voltage_reg,
-   info->expand_register.voltage_mask,
-   regval);
-   } else {
-   /* set the registers for the request */
-   regval = (u8)selector << info->voltage_shift;
ret = abx500_mask_and_set_register_interruptible(info->dev,
-   info->voltage_bank, info->voltage_reg,
-   info->voltage_mask, regval);
+   info->expand_register.voltage_bank,
+   info->expand_register.voltage_reg,
+   info->expand_register.voltage_mask,
+   info->expand_register.voltage_mask);
+   if (ret < 0) {
+   dev_err(rdev_get_dev(rdev),
+   "couldn't set expand voltage reg for 
regulator\n");
+   return ret;
+   }
+
+   dev_vdbg(rdev_get_dev(rdev),
+"%s-set_voltage expand (bank, reg, mask, value): 0x%x, 
0x%x, 0x%x, 0x%x\n",
+info->desc.name, info->expand_register.voltage_bank,
+info->expand_register.voltage_reg,
+info->expand_register.voltage_mask,
+info->expand_register.voltage_mask);
+
+   return 0;
}
-   if (ret < 0)
+
+   /* Set target voltage before clearing the expand register bit */
+   regval = (u8)selector << info->voltage_shift;
+   ret = abx500_mask_and_set_register_interruptible(info->dev,
+   info->voltage_bank, info->voltage_reg,
+   info->voltage_mask, regval);
+   if (ret < 0) {
dev_err(rdev_get_dev(rdev),
"couldn't set voltage reg for regulator\n");
+   return ret;
+   }
  
  	dev_vdbg(rdev_get_dev(rdev),

-   "%s-set_voltage (bank, reg, mask, value): 0x%x, 0x%x, 
0x%x,"
-   " 0x%x\n",
-   info->desc.name, info->voltage_bank, info->voltage_reg,
-   info->voltage_mask, regval);
+"%s-set_voltage (bank, reg, mask, value): 0x%x, 0x%x, 0x%x, 
0x%x\n",
+info->desc.name, info->voltage_bank, info->voltage_reg,
+info->voltage_mask, regval);
  
-	return ret;

+   ret = abx500_mask_and_set_register_interruptible(info->dev,
+   info->expand_register.voltage_bank,
+   info->expand_register.voltage_reg,
+   info->expand_register.voltage_mask, 0);
+   if (ret < 0) {
+   dev_err(rdev_get_dev(rdev),
+   "couldn't clear expand voltage reg for regulator\n");
+   return ret;
+   }
+
+   dev_vdbg(rdev_get_dev(rdev),
+"%s-set_voltage expand (bank, reg, mask, value): 0x%x, 0x%x, 0x%x, 
0x%x\n",
+info->desc.name, info->expand_register.voltage

Re: linux-next: manual merge of the gen-gpio tree with the usb tree

2013-04-16 Thread Felipe Balbi
Hi,

On Wed, Apr 17, 2013 at 03:47:15PM +1000, Stephen Rothwell wrote:
> Hi Alexandre,
> 
> Today's linux-next merge of the gen-gpio tree got a conflict in
> drivers/usb/otg/Kconfig between commit a0e631235a04 ("usb: phy: move all
> PHY drivers to drivers/usb/phy/") from the usb tree and commit
> 76ec9d18b897 ("Convert selectors of GENERIC_GPIO to GPIOLIB") from the
> gen-gpio tree.
> 
> The former patch removed the file, so I did that and then applied the
> following patch and can carry the fix as necessary (no action is
> required).

Thanks Stephen, looks alright :-)

-- 
balbi


signature.asc
Description: Digital signature


linux-next: manual merge of the renesas tree with the gen-gpio tree

2013-04-16 Thread Stephen Rothwell
Hi Simon,

Today's linux-next merge of the renesas tree got a conflict in
drivers/pinctrl/sh-pfc/Kconfig between commit 76ec9d18b897 ("Convert
selectors of GENERIC_GPIO to GPIOLIB") from the gen-gpio tree and commit
809e5fce84f4 ("sh-pfc: Remove dependency on GPIOLIB") from the renesas
tree.

I fixed it up (using the latter) and can carry the fix as necessary (no
action is required).

-- 
Cheers,
Stephen Rothwells...@canb.auug.org.au


pgpgfrsu0j8Ks.pgp
Description: PGP signature


Re: [RFC Patch 0/2] mm: Add parameters to make kernel behavior at memory error on dirty cache selectable

2013-04-16 Thread Simon Jeons

Hi Mitsuhiro,
On 04/12/2013 09:43 PM, Mitsuhiro Tanino wrote:

(2013/04/11 22:00), Ric Mason wrote:

Hi Mitsuhiro,
On 04/11/2013 08:51 PM, Mitsuhiro Tanino wrote:

(2013/04/11 12:53), Simon Jeons wrote:

One question against mce instead of the patchset. ;-)

When check memory is bad? Before memory access? Is there a process scan it 
period?

Hi Simon-san,

Yes, there is a process to scan memory periodically.

At Intel Nehalem-EX and CPUs after Nehalem-EX generation, MCA recovery
is supported. MCA recovery provides error detection and isolation
features to work together with OS.
One of the MCA Recovery features is Memory Scrubbing. It periodically
checks memory in the background of OS.

Memory Scrubbing is a kernel thread? Where is the codes of memory scrubbing?

Hi Ric,

No. One of the MCA Recovery features is Memory Scrubbing.


Memory Scrubbing is a process in CPU?


And Memory Scrubbing is a hardware feature of Intel CPU.

OS has a hwpoison feature which is included at mm/memory-failure.c.
A main function is memory_failure().

If Memory Scrubbing finds a memory error, MCA recovery notifies SRAO error
into OS and OS handles the SRAO error using hwpoison function.



If Memory Scrubbing find an uncorrectable error on a memory before
OS accesses the memory bit, MCA recovery notifies SRAO error into OS

It maybe can't find memory error timely since it is sleeping when memory error 
occur, can this case happened?

Memory Scrubbing seems to be operated periodically but I don't have
information about how oftern it is executed.


If Memory Scurbbing doesn't catch memory error timely, who will send 
SRAR into OS?




Regards,
Mitsuhiro Tanino

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majord...@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: mailto:"d...@kvack.org";> em...@kvack.org 


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


linux-next: manual merge of the gen-gpio tree with the usb tree

2013-04-16 Thread Stephen Rothwell
Hi Alexandre,

Today's linux-next merge of the gen-gpio tree got a conflict in
drivers/usb/otg/Kconfig between commit a0e631235a04 ("usb: phy: move all
PHY drivers to drivers/usb/phy/") from the usb tree and commit
76ec9d18b897 ("Convert selectors of GENERIC_GPIO to GPIOLIB") from the
gen-gpio tree.

The former patch removed the file, so I did that and then applied the
following patch and can carry the fix as necessary (no action is
required).

From: Stephen Rothwell 
Date: Wed, 17 Apr 2013 15:44:53 +1000
Subject: [PATCH] gen-gpio: GPIOLIB merge fix up

Signed-off-by: Stephen Rothwell 
---
 drivers/usb/phy/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/phy/Kconfig b/drivers/usb/phy/Kconfig
index 3a7fec9..21153d1 100644
--- a/drivers/usb/phy/Kconfig
+++ b/drivers/usb/phy/Kconfig
@@ -128,7 +128,7 @@ config TWL6030_USB
 
 config USB_GPIO_VBUS
tristate "GPIO based peripheral-only VBUS sensing 'transceiver'"
-   depends on GENERIC_GPIO
+   depends on GPIOLIB
help
  Provides simple GPIO VBUS sensing for controllers with an
  internal transceiver via the usb_phy interface, and
-- 
1.8.1

-- 
Cheers,
Stephen Rothwells...@canb.auug.org.au


pgpUXrbjrDyvj.pgp
Description: PGP signature


[PATCH] usb: storage: Add usb_stor_dbg, reduce object size

2013-04-16 Thread Joe Perches
Reduce the size of the objects by consolidating
the duplicated USB_STORAGE into a single function.

Add function usb_stor_dbg to emit debugging messages.
Always validate the format and arguments.
Reduce the number of uses of CONFIG_USB_STORAGE_DEBUG.

Reduces size of objects ~7KB when CONFIG_USB_STORAGE_DEBUG
is set.

$ size drivers/usb/storage/built-in.o*
   textdata bss dec hex filename
 140133   55296   70312  265741   40e0d drivers/usb/storage/built-in.o.new
 147494   55248   70296  273038   42a8e drivers/usb/storage/built-in.o.old

Signed-off-by: Joe Perches 
---
 drivers/usb/storage/debug.c  | 20 +++-
 drivers/usb/storage/debug.h  | 14 +-
 drivers/usb/storage/isd200.c | 12 ++--
 3 files changed, 34 insertions(+), 12 deletions(-)

diff --git a/drivers/usb/storage/debug.c b/drivers/usb/storage/debug.c
index a2b5526..b428129 100644
--- a/drivers/usb/storage/debug.c
+++ b/drivers/usb/storage/debug.c
@@ -150,7 +150,7 @@ void usb_stor_show_command(struct scsi_cmnd *srb)
default: what = "(unknown command)"; break;
}
US_DEBUGP("Command %s (%d bytes)\n", what, srb->cmd_len);
-   US_DEBUGP("");
+   US_DEBUGP("bytes: ");
for (i = 0; i < srb->cmd_len && i < 16; i++)
US_DEBUGPX(" %02x", srb->cmnd[i]);
US_DEBUGPX("\n");
@@ -175,3 +175,21 @@ void usb_stor_show_sense(
US_DEBUGPX(what, ascq);
US_DEBUGPX("\n");
 }
+
+int usb_stor_dbg(const char *fmt, ...)
+{
+   struct va_format vaf;
+   va_list args;
+   int r;
+
+   va_start(args, fmt);
+
+   vaf.fmt = fmt;
+   vaf.va = &args;
+
+   r = printk(KERN_DEBUG USB_STORAGE "%pV", &vaf);
+
+   va_end(args);
+
+   return r;
+}
diff --git a/drivers/usb/storage/debug.h b/drivers/usb/storage/debug.h
index dbb985d..d4280e1 100644
--- a/drivers/usb/storage/debug.h
+++ b/drivers/usb/storage/debug.h
@@ -50,12 +50,16 @@
 void usb_stor_show_command(struct scsi_cmnd *srb);
 void usb_stor_show_sense( unsigned char key,
unsigned char asc, unsigned char ascq );
-#define US_DEBUGP(x...) printk( KERN_DEBUG USB_STORAGE x )
-#define US_DEBUGPX(x...) printk( x )
-#define US_DEBUG(x) x 
+__printf(1, 2) int usb_stor_dbg(const char *fmt, ...);
+
+#define US_DEBUGP(fmt, ...)usb_stor_dbg(fmt, ##__VA_ARGS__)
+#define US_DEBUGPX(fmt, ...)   printk(fmt, ##__VA_ARGS__)
+#define US_DEBUG(x)x
 #else
-#define US_DEBUGP(x...)
-#define US_DEBUGPX(x...)
+#define US_DEBUGP(fmt, ...)\
+   do { if (0) printk(fmt, ##__VA_ARGS__); } while (0)
+#define US_DEBUGPX(fmt, ...)   \
+   do { if (0) printk(fmt, ##__VA_ARGS__); } while (0)
 #define US_DEBUG(x)
 #endif
 
diff --git a/drivers/usb/storage/isd200.c b/drivers/usb/storage/isd200.c
index 06a3d22..55571ae 100644
--- a/drivers/usb/storage/isd200.c
+++ b/drivers/usb/storage/isd200.c
@@ -926,10 +926,6 @@ static int isd200_try_enum(struct us_data *us, unsigned 
char master_slave,
 
/* loop until we detect !BSY or timeout */
while(1) {
-#ifdef CONFIG_USB_STORAGE_DEBUG
-   char* mstr = master_slave == ATA_ADDRESS_DEVHEAD_STD ?
-   "Master" : "Slave";
-#endif
 
status = isd200_action( us, ACTION_ENUM, NULL, master_slave );
if ( status != ISD200_GOOD )
@@ -942,9 +938,13 @@ static int isd200_try_enum(struct us_data *us, unsigned 
char master_slave,
 
if (!detect) {
if (regs[ATA_REG_STATUS_OFFSET] & ATA_BUSY) {
-   US_DEBUGP("   %s status is still BSY, try 
again...\n",mstr);
+   US_DEBUGP("   %s status is still BSY, try 
again...\n",
+ master_slave == 
ATA_ADDRESS_DEVHEAD_STD ?
+ "Master" : "Slave");
} else {
-   US_DEBUGP("   %s status !BSY, continue with 
next operation\n",mstr);
+   US_DEBUGP("   %s status !BSY, continue with 
next operation\n",
+ master_slave == 
ATA_ADDRESS_DEVHEAD_STD ?
+ "Master" : "Slave");
break;
}
}


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[GIT] Networking

2013-04-16 Thread David Miller

1) Fix erroneous netfilter drop of SIP packets generated by some Cisco
   phones, from Patrick McHardy.

2) Fix netfilter IPSET refcounting in list_set_add(), from Jozsef
   Kadlecsik.

3) Fix TCP syncookies route lookup key, we don't use the same values
   we would use for the usual SYN receive processing, from Dmitry
   Popov.

4) Fix NULL deref in bond_slave_netdev_event(), from Nikolay
   Aleksandrov.

5) When bonding enslave fails, we can forget to clear the IFF_BONDING
   bit, fix also from Nikolay Aleksandrov.

6) skb->csum_start is 16-bits, which is almost always just fine.
   But if we reallocate the headroom of an SKB this can push the
   skb->csum_start value outside of it's valid range.  This can
   easily happen when collapsing multiple SKBs from the retransmit
   queue together.

   Fix from Thomas Graf.

7) Fix NULL deref in be2net driver due to missing check of
   __vlan_put_tag() return value, from Ivan Vecera.

8) tun_set_iff() returns zero instead of error code on failure,
   fix from Wei Yongjun.

9) Like GARP, 802 MRP needs to hold the app->lock when adding MAD
   events and queueing PDUs.  Fix from David Ward.

10) Build fix, MVMDIO needs PHYLIB, from Thomas Petazzoni..

11) Fix mac80211 static with ipv6 modular build, from Cong Wang.

12) If userland specifies a path cost explicitly, do not override
it when the carrier state changes.  From Stephen Hemminger.

13) mvnets calculates the TX queue to use incorrectly resulting in
garbage pointer derefs and crashes, fix from Willy Tarreau.

14) cdc_mbim does erroneous sizeof(ETH_HLEN).  Fix from Bjorn Mork.

15) IP fragmentation can leak a refcount-less route out from an
RCU protected section.  This results in crashes and all sorts
of hard to diagnose behavior.  Fix from Eric Dumazet.

Please pull, thanks a lot!

The following changes since commit fe2971a01762963c62e9872bfcf0632546342c0f:

  Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net (2013-04-10 
14:15:27 -0700)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/davem/net master

for you to fetch changes up to 361cd29cf9363505c2a35bbf9a034a22feebfb07:

  qlcnic: fix beaconing test for 82xx adapter (2013-04-17 01:15:29 -0400)


Bjørn Mork (1):
  net: cdc_mbim: remove bogus sizeof()

Christian Ruppert (1):
  stmmac: prevent interrupt loop with MMC RX IPC Counter

Christoph Fritz (1):
  can: sja1000: fix handling on dt properties on little endian systems

Cong Wang (1):
  ipv6: statically link register_inet6addr_notifier()

David S. Miller (2):
  Merge branch 'master' of git://git.kernel.org/.../pablo/nf into netfilter
  Merge branch 'fixes-for-3.9' of git://gitorious.org/linux-can/linux-can

David Ward (1):
  net/802/mrp: fix possible race condition when calling mrp_pdu_queue()

Dmitry Popov (1):
  tcp: incoming connections might use wrong route under synflood

Eric Dumazet (1):
  net: drop dst before queueing fragments

Florian Westphal (1):
  netfilter: nf_nat: fix race when unloading protocol modules

Himanshu Madhani (1):
  qlcnic: fix beaconing test for 82xx adapter

Ivan Vecera (1):
  be2net: take care of __vlan_put_tag return value

Jozsef Kadlecsik (2):
  netfilter: ipset: list:set: fix reference counter update
  netfilter: ipset: hash:*net*: nomatch flag not excluded on set resize

Lucas Stach (1):
  net: fec: fix regression in link change accounting

Marc Kleine-Budde (1):
  can: mcp251x: add missing IRQF_ONESHOT to request_threaded_irq

Mugunthan V N (1):
  drivers: net: ethernet: cpsw: get slave VLAN id from slave node instead 
of cpsw node

Patrick McHardy (1):
  netfilter: nf_ct_sip: don't drop packets with offsets pointing outside 
the packet

Thomas Graf (1):
  tcp: Reallocate headroom if it would overflow csum_start

Thomas Petazzoni (1):
  net: mvmdio: add select PHYLIB

Wei Yongjun (2):
  tuntap: fix error return code in tun_set_iff()
  esp4: fix error return code in esp_output()

Willy Tarreau (1):
  net: mvneta: fix improper tx queue usage in mvneta_tx()

niko...@redhat.com (2):
  bonding: fix netdev event NULL pointer dereference
  bonding: IFF_BONDING is not stripped on enslave failure

stephen hemminger (1):
  bridge: make user modified path cost sticky

 drivers/net/bonding/bond_main.c   | 14 --
 drivers/net/can/mcp251x.c | 10 --
 drivers/net/can/sja1000/sja1000_of_platform.c | 31 
+++
 drivers/net/ethernet/emulex/benet/be_main.c   |  5 +++--
 drivers/net/ethernet/freescale/fec.c  |  1 +
 drivers/net/ethernet/marvell/Kconfig  |  2 +-
 drivers/net/ethernet/marvell/mvneta.c |  9 -
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c |  4 ++--
 drivers/net/ethernet/stmicro/stmmac/mmc_core.c| 

Re: [PATCH] regulator: as3711: Use a static of_regulator_match table for of_regulator_match

2013-04-16 Thread Guennadi Liakhovetski
Hi Axel

Thanks for the patch

On Wed, 17 Apr 2013, Axel Lin wrote:

> The same table can be used for multiple instance of pdev, so we don't need to
> allocate memory for of_regulator_match table per pdev.
> 
> Signed-off-by: Axel Lin 
> ---
>  drivers/regulator/as3711-regulator.c |   46 
> ++
>  1 file changed, 19 insertions(+), 27 deletions(-)
> 
> diff --git a/drivers/regulator/as3711-regulator.c 
> b/drivers/regulator/as3711-regulator.c
> index 0539b3e..dd1a089 100644
> --- a/drivers/regulator/as3711-regulator.c
> +++ b/drivers/regulator/as3711-regulator.c
> @@ -278,52 +278,44 @@ static struct as3711_regulator_info as3711_reg_info[] = 
> {
>  
>  #define AS3711_REGULATOR_NUM ARRAY_SIZE(as3711_reg_info)
>  
> -static const char *as3711_regulator_of_names[AS3711_REGULATOR_NUM] = {
> - [AS3711_REGULATOR_SD_1] = "sd1",
> - [AS3711_REGULATOR_SD_2] = "sd2",
> - [AS3711_REGULATOR_SD_3] = "sd3",
> - [AS3711_REGULATOR_SD_4] = "sd4",
> - [AS3711_REGULATOR_LDO_1] = "ldo1",
> - [AS3711_REGULATOR_LDO_2] = "ldo2",
> - [AS3711_REGULATOR_LDO_3] = "ldo3",
> - [AS3711_REGULATOR_LDO_4] = "ldo4",
> - [AS3711_REGULATOR_LDO_5] = "ldo5",
> - [AS3711_REGULATOR_LDO_6] = "ldo6",
> - [AS3711_REGULATOR_LDO_7] = "ldo7",
> - [AS3711_REGULATOR_LDO_8] = "ldo8",
> +static struct of_regulator_match as3711_regulator_matches[] = {
> + { .name = "sd1" },
> + { .name = "sd2" },
> + { .name = "sd3" },
> + { .name = "sd4" },
> + { .name = "ldo1" },
> + { .name = "ldo2" },
> + { .name = "ldo3" },
> + { .name = "ldo4" },
> + { .name = "ldo5" },
> + { .name = "ldo6" },
> + { .name = "ldo7" },
> + { .name = "ldo8" },

Please keep explicit indices to match this array's members to the 
as3711_reg_info[] array.

>  };
>  
>  static int as3711_regulator_parse_dt(struct device *dev,
>   struct device_node **of_node, const int count)
>  {
>   struct as3711_regulator_pdata *pdata = dev_get_platdata(dev);
> - struct device_node *regulators =
> - of_find_node_by_name(dev->parent->of_node, "regulators");
> - struct of_regulator_match *matches, *match;
> + struct device_node *regulators;
> + struct of_regulator_match *match;
>   int ret, i;
>  
> + regulators = of_find_node_by_name(dev->parent->of_node, "regulators");

What was wrong with the original code? I don't see a difference, this 
seems to be an unrelated stylistic change, please, don't do this.

>   if (!regulators) {
>   dev_err(dev, "regulator node not found\n");
>   return -ENODEV;
>   }
>  
> - matches = devm_kzalloc(dev, sizeof(*matches) * count, GFP_KERNEL);
> - if (!matches)
> - return -ENOMEM;
> -
> - for (i = 0, match = matches; i < count; i++, match++) {
> - match->name = as3711_regulator_of_names[i];
> - match->driver_data = as3711_reg_info + i;

This is a separate change. I was probably copy-pasting this parsing from 
some other driver and didn't realise, that .driver_data isn't actually 
used. And that was the reason why I copied the array at run-time. I won't 
play a patch-police here, asking you to split this into a separate patch, 
but please, could you at least make a remark in the commit message, 
confirming my understanding. Or maybe I'm wrong and .driver_data is 
needed? Then the whole your patch might not be right.

> - }
> -
> - ret = of_regulator_match(dev->parent, regulators, matches, count);
> + ret = of_regulator_match(dev->parent, regulators,
> +  as3711_regulator_matches, count);
>   of_node_put(regulators);
>   if (ret < 0) {
>   dev_err(dev, "Error parsing regulator init data: %d\n", ret);
>   return ret;
>   }
>  
> - for (i = 0, match = matches; i < count; i++, match++)
> + for (i = 0, match = as3711_regulator_matches; i < count; i++, match++)
>   if (match->of_node) {
>   pdata->init_data[i] = match->init_data;
>   of_node[i] = match->of_node;
> -- 
> 1.7.10.4

Thanks
Guennadi
---
Guennadi Liakhovetski, Ph.D.
Freelance Open-Source Software Developer
http://www.open-technology.de/
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] module: add kset_obj_exists() and use it

2013-04-16 Thread Veaceslav Falico

On Wed, Apr 17, 2013 at 01:25:13PM +0930, Rusty Russell wrote:

Veaceslav Falico  writes:

Tested for a day on two reproducers on the latest upstream kernel, with the
recent kobject fix a49b7e82 ("kobject: fix kset_find_obj() race with
concurrent last kobject_put()") - it fixes the issue, no regressions met.


Thanks, I've included the fix in my modules-next tree.

I did not CC:stable, since concurrent unload and reload is not really a
normal condition.


Sounds great, thank you!



Cheers,
Rusty.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC Patch 0/2] mm: Add parameters to make kernel behavior at memory error on dirty cache selectable

2013-04-16 Thread Simon Jeons

On 04/11/2013 09:49 PM, Andi Kleen wrote:

As a result, if the dirty cache includes user data, the data is lost,
and data corruption occurs if an application uses old data.


Hi Andi,

Could you give me the link of your mce testcase?


The application cannot use old data, the kernel code kills it if it
would do that. And if it's IO data there is an EIO triggered.

iirc the only concern in the past was that the application may miss
the asynchronous EIO because it's cleared on any fd access.

This is a general problem not specific to memory error handling,
as these asynchronous IO errors can happen due to other reason
(bad disk etc.)

If you're really concerned about this case I think the solution
is to make the EIO more sticky so that there is a higher chance
than it gets returned.  This will make your data much more safe,
as it will cover all kinds of IO errors, not just the obscure memory
errors.

Or maybe have a panic knob on any IO error for any case if you don't
trust your application to check IO syscalls. But I would rather
have better EIO reporting than just giving up like this.

The problem of tying it just to any dirty data for memory errors
is that most anonymous data is dirty and it doesn't have this problem
at all (because the signals handle this and they cannot be lost)

And that is a far more common case than this relatively unlikely
case of dirty IO data.

So just doing it for "dirty" is not the right knob.

Basically I'm saying if you worry about unreliable IO error reporting
fix IO error reporting, don't add random unnecessary panics to
the memory error handling.

BTW my suspicion is that if you approach this from a data driven
perspective: that is measure how much such dirty data is typically
around in comparison to other data it will be unlikely. Such
a study can be done with the "page-types" program in tools/vm

-Andi

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majord...@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: mailto:"d...@kvack.org";> em...@kvack.org 


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 4/8] Read/Write oops nvram partition via pstore

2013-04-16 Thread Aruna Balakrishnaiah

On Tuesday 16 April 2013 11:50 AM, Aruna Balakrishnaiah wrote:


Currently with this patchset, pstore is not supporting compression of 
oops-messages

since it involves some changes in the pstore framework.

big_oops_buf will hold the large part of oops data which will be compressed 
and put

to oops_buf.

big_oops_buf: (1.45 of oops_partition_size)


Sorry, big_oops_buf is (2.22 of oops_data_sz)

where oops_data_sz is oops_partition_size - sizeof(oops_log_info).

where oops_log_info is oops header.


_
|  header |   oops-text |
|_|_|

 is added by the pstore.

So in case compression fails:

we would need to log the header + last few bytes of big_oops_buf to oops_buf.
oops_buf: (this is of oops_partition_size)



We would need to log the header + last oops_data_sz bytes of big_oops_buf to 
oops_buf.

So that we can have the header while throwing away the data that immediately
follows it.


we need last few bytes of big_oops_buf as we need to log the recent messages of
printk buffer. For which we need to know the header size and it involves some
changes in the pstore framework.



Just communicating the header size from pstore would do the job for us.


I have the compression patches ready, will be posting it soon as a separate set.


cheers





--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 0/2] ptrace/x86: simplify ptrace_write_dr7()

2013-04-16 Thread Jan Kratochvil
On Tue, 16 Apr 2013 15:25:45 +0200, Oleg Nesterov wrote:
> On 04/16, Frederic Weisbecker wrote:
> > On Sun, Apr 14, 2013 at 09:12:05PM +0200, Oleg Nesterov wrote:
> > Looking at the bug report, it seems they only reproduced with a homemade
> > test. No real app has reported that issue?
> 
> iirc (Jan can correct me) gdb hit this problem, but it was already
> changed to change DR0 first.

Both old GDB and recent GDB handle it correctly (DR0 first, then DR7).

I do not remember how but I have hit this issue, probably during development
of the GDB watchpoint code, there were many variants of it in the past.

So it was not found just by an artificial testing.

My concern was not so much about GDB but rather about possible other existing
debugging/tracing software using DR.


Jan
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


RE: [PATCH v2] of/base: release the node correctly in of_parse_phandle_with_args()

2013-04-16 Thread Tang Yuantian-B29983


> -Original Message-
> From: Timur Tabi [mailto:ti...@tabi.org]
> Sent: 2013年4月17日 11:31
> To: Tang Yuantian-B29983
> Cc: Grant Likely; devicetree-discuss; linuxppc-...@lists.ozlabs.org; lkml;
> Rob Herring
> Subject: Re: [PATCH v2] of/base: release the node correctly in
> of_parse_phandle_with_args()
> 
> Tang Yuantian-B29983 wrote:
> >> >On Tue, Apr 9, 2013 at 10:36 PM,  wrote:
> >>> > >
> >>> > >+   /* Found it! return success */
> >> >
> >> >I'm pretty sure this comment is in the wrong place.
> 
> > It is not perfect, but acceptable.
> 
> Like I said, I'm pretty sure it's in the wrong place.
> 

It was placed on ELSE statement originally, I moved it to IF statement.
Why is it so wrong?

Thanks,
Yuantian
N�Р骒r��yb�X�肚�v�^�)藓{.n�+�伐�{��赙zXФ�≤�}��财�z�&j:+v�����赙zZ+��+zf"�h���~i���z��wア�?�ㄨ��&�)撷f��^j谦y�m��@A�a囤�
0鹅h���i

Re: [Bug fix PATCH v3] Reusing a resource structure allocated by bootmem

2013-04-16 Thread David Rientjes
On Wed, 17 Apr 2013, Yasuaki Ishimatsu wrote:

> > Why not simply do what generic sparsemem support does by testing
> > PageSlab(virt_to_head_page(res)) and calling kfree() if true and freeing
> > back to bootmem if false?  This should be like a five line patch.
> 
> Is your explanation about free_section_usemap()?
> If so, I don't think we can release resource structure like
> free_section_usemap().

Right, you can't release it like free_section_usemap(), but you're free to 
test for PageSlab(virt_to_head_page(res)) in kernel/resource.c.

> In your explanation case, memmap can be released by put_page_bootmem() in
> free_map_bootmem() since all pages of memmap is used only for memmap.
> But if my understanding is correct, a page of released resource structure
> contain other purpose objects allocated by bootmem. So we cannot
> release resource structure like free_section_usemap().
> 

I'm thinking it would be much easier to just suppress the kfree() if 
!PageSlab.  If you can free an entire page with free_bootmem_late(), 
that would be great, but I'm thinking that will take more work than it's 
worth.  It seems fine to just do free_bootmem() and leave those pages as 
reserved.  How much memory are we talking about?
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 097/102] efivars: explicitly calculate length of VariableName

2013-04-16 Thread Lingzhu Xiang

On 04/16/2013 06:33 PM, Luis Henriques wrote:

68d929862e29a8b52a7f2f2f86a0600423b093cd efi: be more paranoid about available 
space when creating variables


This prevents a bricking issue for some Samsung devices but causes 
regression on many other machines.


https://bugzilla.kernel.org/show_bug.cgi?id=55471
https://bugzilla.redhat.com/show_bug.cgi?id=947142
http://article.gmane.org/gmane.linux.kernel.efi/1078
http://article.gmane.org/gmane.linux.kernel.stable/47150

This patchset will fix it again:

http://thread.gmane.org/gmane.linux.kernel.efi/1081

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v3 1/5] mutex: Make more scalable by doing less atomic operations

2013-04-16 Thread Waiman Long
In the __mutex_lock_common() function, an initial entry into
the lock slow path will cause two atomic_xchg instructions to be
issued. Together with the atomic decrement in the fast path, a total
of three atomic read-modify-write instructions will be issued in
rapid succession. This can cause a lot of cache bouncing when many
tasks are trying to acquire the mutex at the same time.

This patch will reduce the number of atomic_xchg instructions used by
checking the counter value first before issuing the instruction. The
atomic_read() function is just a simple memory read. The atomic_xchg()
function, on the other hand, can be up to 2 order of magnitude or even
more in cost when compared with atomic_read(). By using atomic_read()
to check the value first before calling atomic_xchg(), we can avoid a
lot of unnecessary cache coherency traffic. The only downside with this
change is that a task on the slow path will have a tiny bit
less chance of getting the mutex when competing with another task
in the fast path.

The same is true for the atomic_cmpxchg() function in the
mutex-spin-on-owner loop. So an atomic_read() is also performed before
calling atomic_cmpxchg().

The mutex locking and unlocking code for the x86 architecture can allow
any negative number to be used in the mutex count to indicate that some
tasks are waiting for the mutex. I am not so sure if that is the case
for the other architectures. So the default is to avoid atomic_xchg()
if the count has already been set to -1. For x86, the check is modified
to include all negative numbers to cover a larger case.

The following table shows the jobs per minutes (JPM) scalability data
on an 8-node 80-core Westmere box with a 3.7.10 kernel. The numactl
command is used to restrict the running of the high_systime workloads
to 1/2/4/8 nodes with hyperthreading on and off.

+-+---++--+
|  Configuration  | Mean JPM  |  Mean JPM  | % Change |
| | w/o patch | with patch |  |
+-+---+
| |  User Range 1100 - 2000   |
+-+---+
| 8 nodes, HT on  |36980   |   148590  | +301.8%  |
| 8 nodes, HT off |42799   |   145011  | +238.8%  |
| 4 nodes, HT on  |61318   |   118445  |  +51.1%  |
| 4 nodes, HT off |   158481   |   158592  |   +0.1%  |
| 2 nodes, HT on  |   180602   |   173967  |   -3.7%  |
| 2 nodes, HT off |   198409   |   198073  |   -0.2%  |
| 1 node , HT on  |   149042   |   147671  |   -0.9%  |
| 1 node , HT off |   126036   |   126533  |   +0.4%  |
+-+---+
| |   User Range 200 - 1000   |
+-+---+
| 8 nodes, HT on  |   41525|   122349  | +194.6%  |
| 8 nodes, HT off |   49866|   124032  | +148.7%  |
| 4 nodes, HT on  |   66409|   106984  |  +61.1%  |
| 4 nodes, HT off |  119880|   130508  |   +8.9%  |
| 2 nodes, HT on  |  138003|   133948  |   -2.9%  |
| 2 nodes, HT off |  132792|   131997  |   -0.6%  |
| 1 node , HT on  |  116593|   115859  |   -0.6%  |
| 1 node , HT off |  104499|   104597  |   +0.1%  |
+-++---+--+

At low user range 10-100, the JPM differences were within +/-1%. So
they are not that interesting.

AIM7 benchmark run has a pretty large run-to-run variance due to random
nature of the subtests executed. So a difference of less than +-5%
may not be really significant.

This patch improves high_systime workload performance at 4 nodes
and up by maintaining transaction rates without significant drop-off
at high node count.  The patch has practically no impact on 1 and 2
nodes system.

The table below shows the percentage time (as reported by perf
record -a -s -g) spent on the __mutex_lock_slowpath() function by
the high_systime workload at 1500 users for 2/4/8-node configurations
with hyperthreading off.

+---+-+--+-+
| Configuration | %Time w/o patch | %Time with patch | %Change |
+---+-+--+-+
|8 nodes|  65.34% |  0.69%   |  -99%   |
|4 nodes|   8.70% |  1.02%   |  -88%   |
|2 nodes|   0.41% |  0.32%   |  -22%   |
+---+-+--+-+

It is obvious that the dramatic performance improvement at 8
nodes was due to the drastic cut in the time spent within the
__mutex_lock_slowpath() function.

The table below show the improvements in other AIM7 workloads (at 8
nodes, hyperthreading off).

+--+---++-+
|   Workload   | mean % change | mean % change  | mean % change   |
|  | 10-100 users  | 200-1000 users | 1100-2000 users |
+--+---++

[PATCH v3 optional 3/5] mutex: back out architecture specific check for negative mutex count

2013-04-16 Thread Waiman Long
If it is confirmed that all the supported architectures can allow a
negative mutex count without incorrect behavior, we can then back
out the architecture specific change and allow the mutex count to
go to any negative number. That should further reduce contention for
non-x86 architecture.

If this is not the case, this patch should be dropped.

Signed-off-by: Waiman Long 
---
 arch/x86/include/asm/mutex.h |   10 --
 kernel/mutex.c   |9 ++---
 2 files changed, 2 insertions(+), 17 deletions(-)

diff --git a/arch/x86/include/asm/mutex.h b/arch/x86/include/asm/mutex.h
index bc2a0b0..7d3a482 100644
--- a/arch/x86/include/asm/mutex.h
+++ b/arch/x86/include/asm/mutex.h
@@ -3,13 +3,3 @@
 #else
 # include 
 #endif
-
-#ifndef__ASM_MUTEX_H
-#define__ASM_MUTEX_H
-/*
- * For the x86 architecture, it allows any negative number (besides -1) in
- * the mutex count to indicate that some other threads are waiting on the
- * mutex.
- */
-#define __ARCH_ALLOW_ANY_NEGATIVE_MUTEX_COUNT  1
-#endif
diff --git a/kernel/mutex.c b/kernel/mutex.c
index e6a90de..5600bdf 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -38,15 +38,10 @@
 #endif
 
 /*
- * A mutex count of -1 indicates that waiters are sleeping waiting for the
- * mutex. Some architectures can allow any negative number, not just -1, for
- * this purpose.
+ * A negative mutex count indicates that waiters are sleeping waiting for the
+ * mutex.
  */
-#ifdef __ARCH_ALLOW_ANY_NEGATIVE_MUTEX_COUNT
 #defineMUTEX_SHOW_NO_WAITER(mutex) (atomic_read(&(mutex)->count) 
>= 0)
-#else
-#defineMUTEX_SHOW_NO_WAITER(mutex) (atomic_read(&(mutex)->count) 
!= -1)
-#endif
 
 void
 __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key)
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v3 0/5] mutex: Improve mutex performance by doing less atomic-ops & better spinning

2013-04-16 Thread Waiman Long
v2->v3
  - Add patch 4 to remove new typedefs introduced in patch 2.
  - Add patch 5 to remove SCHED_FEAT_OWNER_SPIN and move the mutex
spinning code to mutex.c.

v1->v2
 - Remove the 2 mutex spinner patches and replaced it by another one
   to improve the mutex spinning process.
 - Remove changes made to kernel/mutex.h & localize changes in
   kernel/mutex.c.
 - Add an optional patch to remove architecture specific check in patch
   1.

This patch set is a collection of 5 different mutex related patches
aimed at improving mutex performance especially for system with large
number of CPUs. This is achieved by doing less atomic operations and
better mutex spinning (when the CONFIG_MUTEX_SPIN_ON_OWNER is on).

The first patch reduces the number of atomic operations executed. It
can produce dramatic performance improvement in the AIM7 benchmark
with large number of CPUs. For example, there was a more than 3X
improvement in the high_systime workload with a 3.7.10 kernel on
an 8-socket x86-64 system with 80 cores. The 3.8 kernels, on the
other hand, are not mutex limited for that workload anymore. So the
performance improvement is only about 1% for the high_systime workload.

Patch 2 improves the mutex spinning process by reducing contention
among the spinners when competing for the mutex. This is done by
using a MCS lock to put the spinners in a queue so that only the
first spinner will try to acquire the mutex when it is available. This
patch showed significant performance improvement of +30% on the AIM7
fserver and new_fserver workload.

Compared with patches 2&3 in v1, the new patch 2 consistently provided
better performance improvement at high user load (1100-2000) for the
fserver and new_fserver AIM7 workloads. The old patches had around 10%
and less improvement at high user load while the new patch produced
30% better performance for the same workloads.

Patch is an optional one for backing out architecture specific check
in patch 1, if so desired.

Patch 4 removes new typedefs introduced in patch 2 and an unnecessary
barrier() call.

Patch 5 removes SCHED_FEAT_OWNER_SPIN which was just an earlier hack
for testing purpose. It also moves the mutex spinning code back to
mutex.c.

Waiman Long (5):
  mutex: Make more scalable by doing less atomic operations
  mutex: Queue mutex spinners with MCS lock to reduce cacheline
contention
  mutex: back out architecture specific check for negative mutex count
  mutex: Remove new typedefs introduced in patch 2
  mutex: Move mutex spinning code from sched/core.c back to mutex.c

 include/linux/mutex.h   |3 +
 include/linux/sched.h   |1 -
 kernel/mutex.c  |  151 +-
 kernel/sched/core.c |   45 --
 kernel/sched/features.h |7 --
 5 files changed, 150 insertions(+), 57 deletions(-)

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v3 4/5] mutex: Remove new typedefs introduced in patch 2

2013-04-16 Thread Waiman Long
In response to the review comment from Davidlohr, this patch will
remove the new typedefs introduced by patch 2. It also removes an
unnecessary barrier() call.

Signed-off-by: Waiman Long 
---
 kernel/mutex.c |   25 +++--
 1 files changed, 11 insertions(+), 14 deletions(-)

diff --git a/kernel/mutex.c b/kernel/mutex.c
index 5600bdf..140f113 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -113,18 +113,16 @@ EXPORT_SYMBOL(mutex_lock);
  * We don't inline mspin_lock() so that perf can correctly account for the
  * time spent in this lock function.
  */
-typedef struct mspin_node {
-   struct mspin_node *next;
-   intlocked;  /* 1 if lock acquired */
-} mspin_node_t;
-
-typedef mspin_node_t   *mspin_lock_t;
-
-#defineMLOCK(mutex)((mspin_lock_t *)&((mutex)->spin_mlock))
-
-static noinline void mspin_lock(mspin_lock_t *lock,  mspin_node_t *node)
+struct mspin_node {
+   struct mspin_node *next ;
+   int   locked;   /* 1 if lock acquired */
+};
+#defineMLOCK(mutex)((struct mspin_node **)&((mutex)->spin_mlock))
+
+static noinline
+void mspin_lock(struct mspin_node **lock, struct mspin_node *node)
 {
-   mspin_node_t *prev;
+   struct mspin_node *prev;
 
/* Init node */
node->locked = 0;
@@ -143,9 +141,9 @@ static noinline void mspin_lock(mspin_lock_t *lock,  
mspin_node_t *node)
arch_mutex_cpu_relax();
 }
 
-static void mspin_unlock(mspin_lock_t *lock,  mspin_node_t *node)
+static void mspin_unlock(struct mspin_node **lock, struct mspin_node *node)
 {
-   mspin_node_t *next = ACCESS_ONCE(node->next);
+   struct mspin_node *next = ACCESS_ONCE(node->next);
 
if (likely(!next)) {
/*
@@ -157,7 +155,6 @@ static void mspin_unlock(mspin_lock_t *lock,  mspin_node_t 
*node)
while (!(next = ACCESS_ONCE(node->next)))
arch_mutex_cpu_relax();
}
-   barrier();
ACCESS_ONCE(next->locked) = 1;
smp_wmb();
 }
@@ -237,7 +234,7 @@ __mutex_lock_common(struct mutex *lock, long state, 
unsigned int subclass,
 
for (;;) {
struct task_struct *owner;
-   mspin_node_tnode;
+   struct mspin_node  node;
 
/*
 * If there's an owner, wait for it to either
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v3 2/5] mutex: Queue mutex spinners with MCS lock to reduce cacheline contention

2013-04-16 Thread Waiman Long
The current mutex spinning code (with MUTEX_SPIN_ON_OWNER option turned
on) allow multiple tasks to spin on a single mutex concurrently. A
potential problem with the current approach is that when the mutex
becomes available, all the spinning tasks will try to acquire the
mutex more or less simultaneously. As a result, there will be a lot of
cacheline bouncing especially on systems with a large number of CPUs.

This patch tries to reduce this kind of contention by putting the
mutex spinners into a queue so that only the first one in the queue
will try to acquire the mutex. This will reduce contention and allow
all the tasks to move forward faster.

The queuing of mutex spinners is done using an MCS lock based
implementation which will further reduce contention on the mutex
cacheline than a similar ticket spinlock based implementation. This
patch will add a new field into the mutex data structure for holding
the MCS lock. This expands the mutex size by 8 bytes for 64-bit system
and 4 bytes for 32-bit system. This overhead will be avoid if the
MUTEX_SPIN_ON_OWNER option is turned off.

The following table shows the jobs per minute (JPM) scalability data
on an 8-node 80-core Westmere box with a 3.7.10 kernel. The numactl
command is used to restrict the running of the fserver workloads to
1/2/4/8 nodes with hyperthreading off.

+-+---+---+-+--+
|  Configuration  | Mean JPM  | Mean JPM  |  Mean JPM   | % Change |
| | w/o patch | patch 1   | patches 1&2 |  1->1&2  |
+-++
| |  User Range 1100 - 2000|
+-++
| 8 nodes, HT off |  227972   |  227237   |   305043|  +34.2%  |
| 4 nodes, HT off |  393503   |  381558   |   394650|   +3.4%  |
| 2 nodes, HT off |  334957   |  325240   |   338853|   +4.2%  |
| 1 node , HT off |  198141   |  197972   |   198075|   +0.1%  |
+-++
| |  User Range 200 - 1000 |
+-++
| 8 nodes, HT off |  282325   |  312870   |   332185|   +6.2%  |
| 4 nodes, HT off |  390698   |  378279   |   393419|   +4.0%  |
| 2 nodes, HT off |  336986   |  326543   |   340260|   +4.2%  |
| 1 node , HT off |  197588   |  197622   |   197582|0.0%  |
+-+---+---+-+--+

At low user range 10-100, the JPM differences were within +/-1%. So
they are not that interesting.

The fserver workload uses mutex spinning extensively. With just
the mutex change in the first patch, there is no noticeable change
in performance.  Rather, there is a slight drop in performance. This
mutex spinning patch more than recovers the lost performance and show
a significant increase of +30% at high user load with the full 8 nodes.
Similar improvements were also seen in a 3.8 kernel.

The table below shows the %time spent by different kernel functions
as reported by perf when running the fserver workload at 1500 users
with all 8 nodes.

+---+---+-+-+
|Function   |  % time   | % time  |   % time|
|   | w/o patch | patch 1 | patches 1&2 |
+---+---+-+-+
| __read_lock_failed|  34.96%   | 34.91%  |   29.14%|
| __write_lock_failed   |  10.14%   | 10.68%  |7.51%|
| mutex_spin_on_owner   |   3.62%   |  3.42%  |2.33%|
| mspin_lock|N/A|   N/A   |9.90%|
| __mutex_lock_slowpath |   1.46%   |  0.81%  |0.14%|
| _raw_spin_lock|   2.25%   |  2.50%  |1.10%|
+---+---+-+-+

The fserver workload for an 8-node system is dominated by the
contention in the read/write lock. Mutex contention also plays a
role. With the first patch only, mutex contention is down (as shown by
the __mutex_lock_slowpath figure) which help a little bit. We saw only
a few percents improvement with that.

By applying patch 2 as well, the single mutex_spin_on_owner figure is
now split out into an additional mspin_lock figure. The time increases
from 3.42% to 11.23%. It shows a great reduction in contention among
the spinners leading to a 30% improvement. The time ratio 9.9/2.33=4.3
indicates that there are on average 4+ spinners waiting in the spin_lock
loop for each spinner in the mutex_spin_on_owner loop. Contention in
other locking functions also go down by quite a lot.

The table below shows the performance change of both patches 1 & 2 over
patch 1 alone in other AIM7 workloads (at 8 nodes, hyperthreading off).

+--+---++-+
|   Workload   | mean % change | mean % change  | mean % change   |
|  

[PATCH v3 5/5] mutex: Move mutex spinning code from sched/core.c back to mutex.c

2013-04-16 Thread Waiman Long
As mentioned by Ingo, the SCHED_FEAT_OWNER_SPIN scheduler feature
bit was really just an early hack to make with/without mutex-spinning
testable. So it is no longer necessary.

This patch removes the SCHED_FEAT_OWNER_SPIN feature bit and move the
mutex spinning code from kernel/sched/core.c back to kernel/mutex.c
which is where they should belong.

Signed-off-by: Waiman Long 
---
 include/linux/sched.h   |4 ---
 kernel/mutex.c  |   62 ++
 kernel/sched/core.c |   63 ---
 kernel/sched/features.h |7 -
 4 files changed, 62 insertions(+), 74 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8af6f13..aefe45d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -320,10 +320,6 @@ extern signed long schedule_timeout_killable(signed long 
timeout);
 extern signed long schedule_timeout_uninterruptible(signed long timeout);
 asmlinkage void schedule(void);
 extern void schedule_preempt_disabled(void);
-#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
-extern int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner);
-extern int mutex_can_spin_on_owner(struct mutex *lock);
-#endif
 
 struct nsproxy;
 struct user_namespace;
diff --git a/kernel/mutex.c b/kernel/mutex.c
index 140f113..ad53a66 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -158,6 +158,68 @@ static void mspin_unlock(struct mspin_node **lock, struct 
mspin_node *node)
ACCESS_ONCE(next->locked) = 1;
smp_wmb();
 }
+
+/*
+ * Mutex spinning code migrated from kernel/sched/core.c
+ */
+
+static inline bool owner_running(struct mutex *lock, struct task_struct *owner)
+{
+   if (lock->owner != owner)
+   return false;
+
+   /*
+* Ensure we emit the owner->on_cpu, dereference _after_ checking
+* lock->owner still matches owner, if that fails, owner might
+* point to free()d memory, if it still matches, the rcu_read_lock()
+* ensures the memory stays valid.
+*/
+   barrier();
+
+   return owner->on_cpu;
+}
+
+/*
+ * Look out! "owner" is an entirely speculative pointer
+ * access and not reliable.
+ */
+static noinline
+int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner)
+{
+   rcu_read_lock();
+   while (owner_running(lock, owner)) {
+   if (need_resched())
+   break;
+
+   arch_mutex_cpu_relax();
+   }
+   rcu_read_unlock();
+
+   /*
+* We break out the loop above on need_resched() and when the
+* owner changed, which is a sign for heavy contention. Return
+* success only when lock->owner is NULL.
+*/
+   return lock->owner == NULL;
+}
+
+/*
+ * Initial check for entering the mutex spinning loop
+ */
+static inline int mutex_can_spin_on_owner(struct mutex *lock)
+{
+   int retval = 1;
+
+   rcu_read_lock();
+   if (lock->owner)
+   retval = lock->owner->on_cpu;
+   rcu_read_unlock();
+   /*
+* if lock->owner is not set, the mutex owner may have just acquired
+* it and not set the owner yet or the mutex has been released.
+*/
+   return retval;
+}
 #endif
 
 static __used noinline void __sched __mutex_unlock_slowpath(atomic_t 
*lock_count);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 176e82a..b37a22b 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2997,69 +2997,6 @@ void __sched schedule_preempt_disabled(void)
preempt_disable();
 }
 
-#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
-
-static inline bool owner_running(struct mutex *lock, struct task_struct *owner)
-{
-   if (lock->owner != owner)
-   return false;
-
-   /*
-* Ensure we emit the owner->on_cpu, dereference _after_ checking
-* lock->owner still matches owner, if that fails, owner might
-* point to free()d memory, if it still matches, the rcu_read_lock()
-* ensures the memory stays valid.
-*/
-   barrier();
-
-   return owner->on_cpu;
-}
-
-/*
- * Look out! "owner" is an entirely speculative pointer
- * access and not reliable.
- */
-int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner)
-{
-   rcu_read_lock();
-   while (owner_running(lock, owner)) {
-   if (need_resched())
-   break;
-
-   arch_mutex_cpu_relax();
-   }
-   rcu_read_unlock();
-
-   /*
-* We break out the loop above on need_resched() and when the
-* owner changed, which is a sign for heavy contention. Return
-* success only when lock->owner is NULL.
-*/
-   return lock->owner == NULL;
-}
-
-/*
- * Initial check for entering the mutex spinning loop
- */
-int mutex_can_spin_on_owner(struct mutex *lock)
-{
-   int retval = 1;
-
-   if (!sched_feat(OWNER_SPIN))
-   return 0;
-
-   rcu_read_lock();
-  

[PATCH] scsi_dh: remove unused declaration dm_pg_init_complete()

2013-04-16 Thread Ren Mingxin
This patch removes dm_pg_init_complete()'s declaration as it is
not needed anymore since 2651f5d7d3bc5120a439e498f131e4d731f99b3e.

Signed-off-by: Ren Mingxin 
---
 drivers/md/dm-mpath.h |3 ---
 1 files changed, 0 insertions(+), 3 deletions(-)

diff --git a/drivers/md/dm-mpath.h b/drivers/md/dm-mpath.h
index e230f71..9c36d0f 100644
--- a/drivers/md/dm-mpath.h
+++ b/drivers/md/dm-mpath.h
@@ -16,7 +16,4 @@ struct dm_path {
void *pscontext;/* For path-selector use */
 };
 
-/* Callback for hwh_pg_init_fn to use when complete */
-void dm_pg_init_complete(struct dm_path *path, unsigned err_flags);
-
 #endif
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] kernel: auditfilter: resource management, need process tree when audit_add_watch failed in audit_add_rule

2013-04-16 Thread Chen Gang

  also please help checking this patch, when you have time.

  thanks.

On 2013年04月12日 16:56, Chen Gang wrote:
> 
>   need call audit_put_tree, if tree is valid.
>   just like another area have done in function audit_add_rule.
> 
> Signed-off-by: Chen Gang 
> ---
>  kernel/auditfilter.c |6 ++
>  1 files changed, 6 insertions(+), 0 deletions(-)
> 
> diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
> index f9fc54b..81f63f9 100644
> --- a/kernel/auditfilter.c
> +++ b/kernel/auditfilter.c
> @@ -952,6 +952,12 @@ static inline int audit_add_rule(struct audit_entry 
> *entry)
>   err = audit_add_watch(&entry->rule, &list);
>   if (err) {
>   mutex_unlock(&audit_filter_mutex);
> + /*
> +  * normally audit_add_tree_rule() will free it
> +  * on failure
> +  */
> + if (tree)
> + audit_put_tree(tree);
>   goto error;
>   }
>   }
> 


-- 
Chen Gang

Asianux Corporation
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v2] kernel: auditfilter: resource management, tree and watch will memory leak when failure occurs

2013-04-16 Thread Chen Gang

  in function audit_data_to_entry:
when failure occurs, need check and free tree and watch.
or memory leak.

  test:
plan:
  test command:
"auditctl -a exit,always -w /etc -F auid=-1"
(on fedora17, need modify auditctl to let "-w /etc" has effect)
  running:
under fedora17 x86_64, 2 CPUs 3.20GHz, 2.5GB RAM.
let 15 auditctl processes continue running at the same time.
  monitor command: 
watch -d -n 1 "cat /proc/meminfo | awk '{print \$2}' \
  | head -n 4 | xargs \
  | awk '{print \"used \",\$1 - \$2 - \$3 - \$4}'"

result:
  for original version:
will use up all memory, within 3 hours.
kill all auditctl, the memory still does not free.
  for new version (apply this patch):
after 14 hours later, not find issues.


Signed-off-by: Chen Gang 
---
 kernel/auditfilter.c |4 
 1 files changed, 4 insertions(+), 0 deletions(-)

diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index f9fc54b..2674368 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -594,6 +594,10 @@ exit_nofree:
return entry;
 
 exit_free:
+   if (entry->rule.watch)
+   audit_put_watch(entry->rule.watch); /* matches initial get */
+   if (entry->rule.tree)
+   audit_put_tree(entry->rule.tree); /* that's the temporary one */
audit_free_rule(entry);
return ERR_PTR(err);
 }
-- 
1.7.7.6
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Bug fix PATCH v3] Reusing a resource structure allocated by bootmem

2013-04-16 Thread Yasuaki Ishimatsu

Hi David,

2013/04/17 9:36, David Rientjes wrote:

On Wed, 17 Apr 2013, Yasuaki Ishimatsu wrote:


When hot removing memory presented at boot time, following messages are shown:

[  296.867031] [ cut here ]
[  296.922273] kernel BUG at mm/slub.c:3409!
[  296.970229] invalid opcode:  [#1] SMP
[  297.019453] Modules linked in: ebtable_nat ebtables xt_CHECKSUM 
iptable_mangle bridge stp llc ipmi_devintf ipmi_msghandler sunrpc ipt_REJECT 
nf_conntrack_ipv4 nf_defrag_ipv4 iptable_filter ip_tables ip6t_REJECT 
nf_conntrack_ipv6 nf_defrag_ipv6 xt_state nf_conntrack ip6table_filter 
ip6_tables binfmt_misc vfat fat dm_mirror dm_region_hash dm_log dm_mod 
vhost_net macvtap macvlan tun uinput iTCO_wdt iTCO_vendor_support coretemp 
kvm_intel kvm crc32c_intel ghash_clmulni_intel microcode pcspkr sg i2c_i801 
lpc_ich mfd_core igb i2c_algo_bit i2c_core e1000e ptp pps_core tpm_infineon 
ioatdma dca sr_mod cdrom sd_mod crc_t10dif usb_storage megaraid_sas lpfc 
scsi_transport_fc scsi_tgt scsi_mod
[  297.747808] CPU 0
[  297.769764] Pid: 5091, comm: kworker/0:2 Tainted: GW3.9.0-rc6+ 
#15
[  297.897917] RIP: 0010:[]  [] 
kfree+0x232/0x240
[  297.988634] RSP: 0018:88084678d968  EFLAGS: 00010246
[  298.052196] RAX: 00600400 RBX: 8987fea0 RCX: 
[  298.137595] RDX: 8107a5ae RSI: 0001 RDI: 8987fea0
[  298.222994] RBP: 88084678d998 R08: 8200 R09: 0001
[  298.308390] R10:  R11:  R12: 0300
[  298.393792] R13: ea061fc0 R14: 0303 R15: 0080
[  298.479190] FS:  () GS:88085aa0() 
knlGS:
[  298.576030] CS:  0010 DS:  ES:  CR0: 80050033
[  298.644791] CR2: 025d3f78 CR3: 01c0c000 CR4: 001407f0
[  298.730192] DR0:  DR1:  DR2: 
[  298.815590] DR3:  DR6: 0ff0 DR7: 0400
[  298.900997] Process kworker/0:2 (pid: 5091, threadinfo 88084678c000, 
task 88083928ca80)
[  299.005121] Stack:
[  299.029156]  0303 8987fea0 0300 
8987fe90
[  299.118116]  0303 0080 88084678d9c8 
8107a5d4
[  299.207084]  3000 8987fffb2680 0080 
3000
[  299.296045] Call Trace:
[  299.325288]  [] __release_region+0xd4/0xe0
[  299.393020]  [] __remove_pages+0x52/0x110
[  299.459707]  [] arch_remove_memory+0x89/0xd0
[  299.529505]  [] remove_memory+0xc4/0x100
[  299.595145]  [] acpi_memory_device_remove+0x6d/0xb1
[  299.672230]  [] acpi_device_remove+0x89/0xab
[  299.742033]  [] __device_release_driver+0x7c/0xf0
[  299.817048]  [] device_release_driver+0x2f/0x50
[  299.889972]  [] acpi_bus_device_detach+0x6c/0x70
[  299.963938]  [] acpi_ns_walk_namespace+0x11a/0x250
[  300.039982]  [] ? power_state_show+0x36/0x36
[  300.109800]  [] ? power_state_show+0x36/0x36
[  300.179612]  [] acpi_walk_namespace+0xee/0x137
[  300.251492]  [] acpi_bus_trim+0x33/0x7a
[  300.316089]  [] ? mutex_lock_nested+0x4a/0x60
[  300.386927]  [] acpi_bus_hot_remove_device+0xc4/0x1a1
[  300.466096]  [] acpi_os_execute_deferred+0x27/0x34
[  300.542137]  [] process_one_work+0x1f7/0x590
[  300.611940]  [] ? process_one_work+0x185/0x590
[  300.683823]  [] worker_thread+0x11a/0x370
[  300.750502]  [] ? manage_workers+0x180/0x180
[  300.820308]  [] kthread+0xee/0x100
[  300.879714]  [] ? __lock_release+0x12b/0x190
[  300.949512]  [] ? __init_kthread_worker+0x70/0x70
[  301.024517]  [] ret_from_fork+0x7c/0xb0
[  301.089135]  [] ? __init_kthread_worker+0x70/0x70
[  301.164138] Code: 89 ef e8 c2 2c fb ff e9 0b ff ff ff 4d 8b 6d 30 e9 5c fe ff ff 
4c 89 f1 48 89 da 4c 89 ee 4c 89 e7 e8 03 f9 ff ff e9 ec fe ff ff <0f> 0b eb fe 
66 2e 0f 1f 84 00 00 00 00 00 55 48 89 e5 48 83 ec
[  301.397214] RIP  [] kfree+0x232/0x240
[  301.459855]  RSP 
[  301.501675] ---[ end trace 8679967aa8606ed8 ]---

The reason why the messages are shown is to release a resource structure,
allocated by bootmem, by kfree(). So when we release a resource structure,
we should check whether it is allocated by bootmem or not.

But even if we know a resource structure is allocated by bootmem, we cannot
release it since SLxB cannot treat it. So for reusing a resource structure,
this patch remembers it by using bootmem_resource as follows:

When releasing a resource structure by free_resource(), free_resource() checks
whether the resource structure is allocated by bootmem or not. If it is
allocated by bootmem, free_resource() adds it to bootmem_resource. If it is
not allocated by bootmem, free_resource() release it by kfree().

And when getting a new resource structure by get_resource(), get_resource()
checks whether bootmem_resource has released resource structures or not. If
there is a released resource structure, get_resource() returns it. If there is

Re: [PATCH] Kbuild: Avoid DTB rebuilds if source files are untouched

2013-04-16 Thread Vineet Gupta
On 04/16/2013 09:32 PM, James Hogan wrote:
>
> Also, I think you probably now want *.dtb.S added to clean-files,
> otherwise they won't get removed by make clean.

Good catch !

Thx,
-Vineet
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] Kbuild: Avoid DTB rebuilds if source files are untouched

2013-04-16 Thread Vineet Gupta
Hi James,

On 04/16/2013 09:23 PM, James Hogan wrote:
> On 12/04/13 22:52, Stephen Warren wrote:
>> +.SECONDARY: $(obj)/$(builtindtb-y).dtb.S
> Note, this may not work if you're using CONFIG_ARC_BUILTIN_DTB_NAME,
> since it'll have quotes around it, so you may instead need:
> .SECONDARY: $(obj)/$(patsubst "%",%,$(builtindtb-y)).dtb.S
>
> (at least that's what's required for the metag equivalent)
>
>> +
>>  dtbs:  $(addprefix  $(obj)/, $(builtindtb-y).dtb)
> You might find the same thing here too.

Actually in my Makefile, the quotes are stripped off in the very beginning to
avoid duplicating it in every place.

ifneq ($(CONFIG_ARC_BUILTIN_DTB_NAME),"")
builtindtb-y:= $(patsubst "%",%,$(CONFIG_ARC_BUILTIN_DTB_NAME))
endif


Thus both the above are not required - redundant if at all.

-Vineet
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] module: add kset_obj_exists() and use it

2013-04-16 Thread Rusty Russell
Veaceslav Falico  writes:
> Tested for a day on two reproducers on the latest upstream kernel, with the
> recent kobject fix a49b7e82 ("kobject: fix kset_find_obj() race with
> concurrent last kobject_put()") - it fixes the issue, no regressions met.

Thanks, I've included the fix in my modules-next tree.

I did not CC:stable, since concurrent unload and reload is not really a
normal condition.

Cheers,
Rusty.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v2] kernel: audit_tree: resource management: need put_tree and goto Err when failure occures

2013-04-16 Thread Chen Gang

  since "normally audit_add_tree_rule() will free it on failure",
  need free it completely, when failure occures.

need additional put_tree before return, since get_tree was called.
always need goto error processing area for list_del_init.

Signed-off-by: Chen Gang 
---
 kernel/audit_tree.c |5 -
 1 files changed, 4 insertions(+), 1 deletions(-)

diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 642a89c..9dfb0da 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -694,13 +694,15 @@ int audit_add_tree_rule(struct audit_krule *rule)
spin_unlock(&hash_lock);
} else {
trim_marked(tree);
+   put_tree(tree);
goto Err;
}
 
mutex_lock(&audit_filter_mutex);
if (list_empty(&rule->rlist)) {
put_tree(tree);
-   return -ENOENT;
+   err = -ENOENT;
+   goto Err1;
}
rule->tree = tree;
put_tree(tree);
@@ -708,6 +710,7 @@ int audit_add_tree_rule(struct audit_krule *rule)
return 0;
 Err:
mutex_lock(&audit_filter_mutex);
+Err1:
list_del_init(&tree->list);
list_del_init(&tree->rules);
put_tree(tree);
-- 
1.7.7.6
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH V2 2/5] powerpc, perf: Add basic assembly code to read BHRB entries on POWER8

2013-04-16 Thread Anshuman Khandual
On 04/16/2013 10:53 PM, Segher Boessenkool wrote:
>> +/* r3 = n  (where n = [0-1023])
>> + * The maximum number of BHRB entries supported with PPC_MFBHRBE
>> instruction
>> + * is 1024. We have limited number of table entries here as POWER8
>> implements
>> + * 32 BHRB entries.
>> + */
>> +
>> +/* .global read_bhrb */
>> +_GLOBAL(read_bhrb)
>> +cmpldir3,1023
> 
> This should be 31, since that is the last entry in the table below.

Hey Segher,

Would fix this in the next version. Thanks for pointing it out.

Regards
Anshuman

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v2 05/10] perf util: Parse header_page to get proper long size

2013-04-16 Thread Namhyung Kim
From: Namhyung Kim 

The header_page file describes the format of the ring buffer page
which is used by ftrace (not perf).  And size of "commit" field (I
guess it's older name was 'size') represents the real size of long
type used for kernel.  So update the pevent's long size.

Signed-off-by: Namhyung Kim 
---
It should read into header_page not buf.

 tools/perf/util/trace-event-read.c | 22 +-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/trace-event-read.c 
b/tools/perf/util/trace-event-read.c
index fa45fca2a2d3..f2112270c663 100644
--- a/tools/perf/util/trace-event-read.c
+++ b/tools/perf/util/trace-event-read.c
@@ -212,6 +212,7 @@ static int read_ftrace_printk(struct pevent *pevent)
 static int read_header_files(struct pevent *pevent)
 {
unsigned long long size;
+   char *header_page;
char buf[BUFSIZ];
int ret = 0;
 
@@ -224,7 +225,26 @@ static int read_header_files(struct pevent *pevent)
}
 
size = read8(pevent);
-   skip(size);
+
+   header_page = malloc(size);
+   if (header_page == NULL)
+   return -1;
+
+   if (do_read(header_page, size) < 0) {
+   pr_debug("did not read header page");
+   free(header_page);
+   return -1;
+   }
+
+   if (!pevent_parse_header_page(pevent, header_page, size,
+ pevent_get_long_size(pevent))) {
+   /*
+* The commit field in the page is of type long,
+* use that instead, since it represents the kernel.
+*/
+   pevent_set_long_size(pevent, pevent->header_page_size_size);
+   }
+   free(header_page);
 
if (do_read(buf, 13) < 0)
return -1;
-- 
1.7.11.7

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] perf tools: Revert regression in configuration of Python support

2013-04-16 Thread Michael Witten
On Tue, 16 Apr 2013 20:41:59 -, Michael Witten wrote:

> On Tue, 16 Apr 2013 13:32:08 -0700, David Ahern wrote:
>
>> On 4/16/13 10:08 AM, Michael Witten wrote:
>>> You should probably disable python support more directly:
>>>
>>>make NO_LIBPYTHON=1
>> 
>> sure, but I should not have to do anything. The intent of the existing 
>> auto-probing code is to figure out what is installed and build a binary 
>> with those capabilities. In this case not having python installed causes 
>> it to blow up.
>
> That's certainly how it behaved up until the regression.
>
> To make matters worse, the NO_LIBPYTHON variable is checked only *after* 
> probing for an executable `python'; in the case that no python is installed
> at all, the workaround is to double up on your current trick:
>
>   make PYTHON=false PYTHON_CONFIG=false

I'm probably going to submit a small patch series to improve this
configuration code in general, but there's no reason to wait for
me to do this.

Thus, I think the simplest thing to do right away is just to revert
the one-line change that led to the regression, thereby restoring
the old behavior which has hitherto worked well enough.

The following patch applies to at least the following commit:

  bb33db7a076f4719dc68c235e187dd4bfb16b621

To apply this patch, save this email to:

  /path/to/email

and then run:

  git am --scissors /path/to/email

Sincerely,
Michael Witten

8<---8<---8<---8<---8<---8<---
Among other things, the following:

  commit 31160d7feab786c991780d7f0ce2755a469e0e5e
  Date:   Tue Jan 8 16:22:36 2013 -0500
  perf tools: Fix GNU make v3.80 compatibility issue

attempts to aid the user by tapping into an existing error message,
as described in the commit message:

  ... Also fix an issue where _get_attempt was called with only
  one argument. This prevented the error message from printing
  the name of the variable that can be used to fix the problem.

or more precisely:

  -$(if $($(1)),$(call _ge_attempt,$($(1)),$(1)),$(call _ge_attempt,$(2)))
  +$(if $($(1)),$(call _ge_attempt,$($(1)),$(1)),$(call _ge_attempt,$(2),$(1)))

However, The "missing" argument was in fact missing on purpose; it's
absence is a signal that the error message should be skipped, because
the failure would be due to the default value, not any user-supplied
value.  This can be seen in how `_ge_attempt' uses `gea_err' (in the
config/utilities.mak file):

  _ge_attempt = $(if $(get-executable),$(get-executable),$(_gea_warn)$(call 
_gea_err,$(2)))
  _gea_warn = $(warning The path '$(1)' is not executable.)
  _gea_err  = $(if $(1),$(error Please set '$(1)' appropriately))

That is, because the argument is no longer missing, the value `$(1)'
(associated with `_gea_err') always evaluates to true, thus always
triggering the error condition that is meant to be reserved for
only the case when a user explicitly supplies an invalid value.

Concretely, the result is a regression in the Makefile's configuration
of python support; rather than gracefully disable support when the
relevant executables cannot be found according to default values, the
build process halts in error as though the user explicitly supplied
the values.

This new commit simply reverts the offending one-line change.

Reported-by: Pekka Enberg 
Link: 
http://lkml.kernel.org/r/caojsxlhv17ys3m7p5q25imkuxqw6le_vabxh1n3tt7mv6ho...@mail.gmail.com
Signed-off-by: Michael Witten 

---
 tools/perf/config/utilities.mak | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/config/utilities.mak b/tools/perf/config/utilities.mak
index 8ef3bd3..3e89719 100644
--- a/tools/perf/config/utilities.mak
+++ b/tools/perf/config/utilities.mak
@@ -173,7 +173,7 @@ _ge-abspath = $(if $(is-executable),$(1))
 # Usage: absolute-executable-path-or-empty = $(call 
get-executable-or-default,variable,default)
 #
 define get-executable-or-default
-$(if $($(1)),$(call _ge_attempt,$($(1)),$(1)),$(call _ge_attempt,$(2),$(1)))
+$(if $($(1)),$(call _ge_attempt,$($(1)),$(1)),$(call _ge_attempt,$(2)))
 endef
 _ge_attempt = $(if $(get-executable),$(get-executable),$(_gea_warn)$(call 
_gea_err,$(2)))
 _gea_warn = $(warning The path '$(1)' is not executable.)
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2] of/base: release the node correctly in of_parse_phandle_with_args()

2013-04-16 Thread Timur Tabi

Tang Yuantian-B29983 wrote:

>On Tue, Apr 9, 2013 at 10:36 PM,  wrote:

> >
> >+   /* Found it! return success */

>
>I'm pretty sure this comment is in the wrong place.



It is not perfect, but acceptable.


Like I said, I'm pretty sure it's in the wrong place.

--
Timur Tabi
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


regulator: tps80031: question about LDO2 TRACK_MODE_ENABLE of TPS80031 or TPS80032-ES1.0

2013-04-16 Thread Axel Lin
hi Laxman,

Currently we have below code in tps80031_ldo_set_voltage_sel():

/* Check for valid setting for TPS80031 or TPS80032-ES1.0 */
if ((ri->rinfo->desc.id == TPS80031_REGULATOR_LDO2) &&
(ri->device_flags & TRACK_MODE_ENABLE)) {
unsigned nvsel = (sel) & 0x1F;
if (((tps80031_get_chip_info(parent) == TPS80031) ||
((tps80031_get_chip_info(parent) == TPS80032) &&
(tps80031_get_pmu_version(parent) == 0x0))) &&
((nvsel == 0x0) || (nvsel >= 0x19 && nvsel <= 0x1F))) {
dev_err(ri->dev,
"Invalid sel %d in track mode LDO2\n",
nvsel);
return -EINVAL;
}
}

However, list_voltage() still show these invalid selectors have supported 
voltage.
Besides, having the code "nvsel = (sel) & 0x1F" looks a bit odd, because 
currently
n_voltages is set to 57 when TRACK_MODE_ENABLE is set.

I'm wondering if below comment is still true for LDO2 TRACK_MODE_ENABLE of
"TPS80031/TPS80032-ES1.0"?

/* TRACK mode the ldo2 varies from 600mV to 1300mV */

What is the register value && voltage mapping for LDO2 TRACK_MODE_ENABLE of
TPS80031 or TPS80032-ES1.0?

Regards,
Axel

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Device driver memory 'mmap()' function helper cleanup

2013-04-16 Thread Linus Torvalds
Guys, I just pushed out a new helper function intended for cleaning up
various device driver mmap functions, because they are rather messy,
and at least part of the problem was the bad impedance between what a
driver author would want to have, and the VM interfaces to map a
memory range into user space with mmap.

Some drivers would end up doing extensive checks on the length of the
mappings and the page offset within the mapping, while other drivers
would end up doing no checks at all.

The new helper is in commit b4cbb197c7e7 ("vm: add vm_iomap_memory()
helper function"), but I didn't actually commit any *users* of it,
because I just have this untested patch-collection for a few random
drivers (picked across a few different driver subsystems, just to make
it interesting).  I did that largely just to check the different use
cases, but I don't actually tend to *use* all that many fancy drivers,
so I don't have much of a way of testing it.

The media layer has a few users of [io_]remap_pfn_range() that look
like they could do with some tender loving too, but they don't match
this particular pattern of "allow users to map a part of a fixed range
of memory". In fact, the media pattern seems to be single-page
mappings, which probably should use "vm_insert_page()" instead, but
that's a whole separate thing. But I didn't check all the media cases
(and there's a lot of remap_pfn_range use outside of media drivers I
didn't check either), so there might be code that could use the new
helper.

Anyway, I'm attaching the *untested* patch to several drivers. Guys,
mind taking a look? The point here is to simplify the interface,
avoiding bugs, but also:

 5 files changed, 21 insertions(+), 87 deletions(-)

it needs current -git for the new helper function.

NOTE! The driver subsystem .mmap functions seem to almost universally do

if (io_remap_pfn_range(..))
return -EAGAIN;
return 0;

and I didn't make the new helper function do that "turn all
remap_pfn_range errors into EAGAIN". My *suspicion* is that this is
just really old copy-pasta and makes no sense, but maybe there is some
actual reasoning behind EAGAIN vs ENOMEM, for example. EAGAIN is
documented to be about file/memory locking, which means that it really
doesn't make any sense, but obviously there might be some binary that
actally depends on this, so I'm perfectly willing to make the helper
do that odd error case, I'd just like to know (and a add a comment)
WHY.

My personal guess is that nobody actually cares (we return other error
codes for other cases, notably EINVAL for various out-of-mapping-range
issues), and the whole EAGAIN return value is just a completely
historical oddity.

(And yes, I know the mtdchar code is actually disabled right now. But
that was a good example of a driver that had a bug in this area and
that I touched myself not too long ago, and recent stable noise
reminded me of it, so I did that one despite it not being active).

Linus


patch.diff
Description: Binary data


Re: [PATCH] vfs: fix audit_inode call in O_CREAT case of do_last

2013-04-16 Thread Richard Guy Briggs
On Fri, Apr 12, 2013 at 03:16:32PM -0400, Jeff Layton wrote:
> Jiri reported a regression in auditing of open(..., O_CREAT) syscalls.
> In older kernels, creating a file with open(..., O_CREAT) created
> audit_name records that looked like this:
> 
> type=PATH msg=audit(1360255720.628:64): item=1 name="/abc/foo" inode=138810 
> dev=fd:00 mode=0100640 ouid=0 ogid=0 rdev=00:00 
> obj=unconfined_u:object_r:default_t:s0
> type=PATH msg=audit(1360255720.628:64): item=0 name="/abc/" inode=138635 
> dev=fd:00 mode=040750 ouid=0 ogid=0 rdev=00:00 
> obj=unconfined_u:object_r:default_t:s0
> 
> ...in recent kernels though, they look like this:
> 
> type=PATH msg=audit(1360255402.886:12574): item=2 name=(null) inode=264599 
> dev=fd:00 mode=0100640 ouid=0 ogid=0 rdev=00:00 
> obj=unconfined_u:object_r:default_t:s0
> type=PATH msg=audit(1360255402.886:12574): item=1 name=(null) inode=264598 
> dev=fd:00 mode=040750 ouid=0 ogid=0 rdev=00:00 
> obj=unconfined_u:object_r:default_t:s0
> type=PATH msg=audit(1360255402.886:12574): item=0 name="/abc/foo" 
> inode=264598 dev=fd:00 mode=040750 ouid=0 ogid=0 rdev=00:00 
> obj=unconfined_u:object_r:default_t:s0
> 
> Richard bisected to determine that the problems started with commit
> bfcec708, but the log messages have changed with some later
> audit-related patches.
> 
> The problem is that this audit_inode call is passing in the parent of
> the dentry being opened, but audit_inode is being called with the parent
> flag false. This causes later audit_inode and audit_inode_child calls to
> match the wrong entry in the audit_names list.
> 
> This patch simply sets the flag to properly indicate that this inode
> represents the parent. With this, the audit_names entries are back to
> looking like they did before.

This patch fixes the problem for me.

Tested-by: Richard Guy Briggs 

> Cc:  # v3.7+
> Cc: Richard Guy Briggs 
> Reported-by: Jiri Jaburek 
> Signed-off-by: Jeff Layton 
> ---
>  fs/namei.c |2 +-
>  1 files changed, 1 insertions(+), 1 deletions(-)
> 
> diff --git a/fs/namei.c b/fs/namei.c
> index 57ae9c8..85e40d1 100644
> --- a/fs/namei.c
> +++ b/fs/namei.c
> @@ -2740,7 +2740,7 @@ static int do_last(struct nameidata *nd, struct path 
> *path,
>   if (error)
>   return error;
>  
> - audit_inode(name, dir, 0);
> + audit_inode(name, dir, LOOKUP_PARENT);
>   error = -EISDIR;
>   /* trailing slashes? */
>   if (nd->last.name[nd->last.len])
> -- 
> 1.7.1
> 
> --
> Linux-audit mailing list
> linux-au...@redhat.com
> https://www.redhat.com/mailman/listinfo/linux-audit

- RGB

--
Richard Guy Briggs 
Senior Software Engineer
AMER ENG Base Operating Systems
Remote, Canada, Ottawa
Voice: 1.647.777.2635
Internal: (81) 32635
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] mm: mmu_notifier: re-fix freed page still mapped in secondary MMU

2013-04-16 Thread Xiao Guangrong
On 04/17/2013 02:08 AM, Robin Holt wrote:
> On Tue, Apr 16, 2013 at 09:07:20PM +0800, Xiao Guangrong wrote:
>> On 04/16/2013 07:43 PM, Robin Holt wrote:
>>> Argh.  Taking a step back helped clear my head.
>>>
>>> For the -stable releases, I agree we should just go with your
>>> revert-plus-hlist_del_init_rcu patch.  I will give it a test
>>> when I am in the office.
>>
>> Okay. Wait for your test report. Thank you in advance.
>>
>>>
>>> For the v3.10 release, we should work on making this more
>>> correct and completely documented.
>>
>> Better document is always welcomed.
>>
>> Double call ->release is not bad, like i mentioned it in the changelog:
>>
>> it is really rare (e.g, can not happen on kvm since mmu-notify is 
>> unregistered
>> after exit_mmap()) and the later call of multiple ->release should be
>> fast since all the pages have already been released by the first call.
>>
>> But, of course, it's great if you have a _light_ way to avoid this.
> 
> Getting my test environment set back up took longer than I would have liked.
> 
> Your patch passed.  I got no NULL-pointer derefs.

Thanks for your test again.

> 
> How would you feel about adding the following to your patch?

I prefer to make these changes as a separate patch, this change is the
improvement, please do not mix it with bugfix.

You can make a patchset (comments improvement and this change) based on
my fix.


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


RE: [PATCH v2] of/base: release the node correctly in of_parse_phandle_with_args()

2013-04-16 Thread Tang Yuantian-B29983
> -Original Message-
> From: Timur Tabi [mailto:ti...@tabi.org]
> Sent: 2013年4月16日 19:37
> To: Tang Yuantian-B29983
> Cc: Grant Likely; devicetree-discuss; linuxppc-...@lists.ozlabs.org; lkml;
> Rob Herring
> Subject: Re: [PATCH v2] of/base: release the node correctly in
> of_parse_phandle_with_args()
> 
> On Tue, Apr 9, 2013 at 10:36 PM,   wrote:
> >
> > +   /* Found it! return success */
> 
> I'm pretty sure this comment is in the wrong place.

It is not perfect, but acceptable.

-Yuantian



Re: [PATCH] kernel: auditfilter: looping issue, memory leak if has 2 or more AUDIT_FILTERKEYs

2013-04-16 Thread Chen Gang
On 2013年04月16日 18:38, Chen Gang wrote:
> On 2013年04月16日 18:25, Chen Gang wrote:
>> On 2013年04月12日 17:42, Chen Gang wrote:
>>> On 2013年04月11日 12:10, Chen Gang wrote:
 On 2013年04月11日 05:19, Eric Paris wrote:
> - Original Message -
>
>>>   b. has an new issue for AUDIT_DIR:
>>>after AUDIT_DIR succeed, it will set rule->tree.
>>>next, the other case fail, then will call audit_free_rule.
>>>but audit_free_rule will not free rule->tree.
> Definitely a couple of leaks here...
>
> I'm seeing leaks on size 8, 64, and 128.
>
> Al, what do you think?  Should I be calling audit_put_tree() in the error 
> case if entry->tree != NULL?  The audit trees are some of the most 
> complex code in the kernel I think.
>
>

  after the test, the original version really has memory leak.

  test:
the related monitor command is:
  watch -d -n 1 "cat /proc/meminfo | awk '{print \$2}' \
| head -n 4 | xargs \
| awk '{print \"used \",\$1 - \$2 - \$3 - \$4}'"
I run 15 processes of modified auditctl at the same time.

  result:
for original version:
  can see the memory leak, it will be more clear after 1 - 2 hours.

for new version (fix it):
  can not see the memory leak after ran 12 - 14 hours.


  I will use LTP (ltp-full-20130109) to test audit again under fedora 17
x86_64 for next-20130415, then send related patch.


  welcome any suggestions or completions.


> 
>   oh, also need buffering optarg of auditctl under fedora 17.
>   or "-F auid=-1" will be truncated to "-F auid".
>   it is ok if not looping again. but in our case, we need loop again.
> 
>   to see memory usage, I think:
> in top, really used memory = 'used' - 'cached'
> it is enough for us.
> 
>   welcome any suggestions or completions.
> 
>   thanks.
> 
> 
>>
>>   I am just testing about it with:
>>
>> ---
>> while(1)
>> auditctl -a exit,always -w /etc -F auid=-1
>> ---
>>
>>   under fedora 17, we need modify the auditctl source code:
>> a. let -w /etc can pass auditctl checking.
>> b. let loop infinitely in a process (if process quit, will free mem)
>> c. need fix a bug for auditctl (under Fedora 17)
>>  audit_open may open 2 times.
>>  when loop infinitely, it will cause resource handle leak.
>>
>>   I have checked (by insert printf in kernel/auditfilter.c):
>> after modify the auditct, the work flow is just what we want to be.
>>   (will alloc watch, alloc tree, then failure occurs)
>>
>>
>>   I guess, we need 2-3 days to get a test result.
>>
>>
>>   welcome any suggestions and completions.
>>
>>   thanks.
>>
>>
>>
>>>
>>>   it seems, your way is the only executable way (if not change code much).
>>>   what my original idea is incorrect.
>>>
>>> we need add related code at failure process area in audit_data_to_entry.
>>> and another functions need not add these code (should not add).
>>> 'watch' also need be processed, since audit_to_watch let ref count = 2.
>>>   (it just like the function audit_del_rule has done)
>>>
>>>   please help check thanks.
>>>
>>>   :-)
>>>
>>>
>>> diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
>>> index 81f63f9..f5327ce 100644
>>> --- a/kernel/auditfilter.c
>>> +++ b/kernel/auditfilter.c
>>> @@ -594,6 +594,10 @@ exit_nofree:
>>> return entry;
>>>  
>>>  exit_free:
>>> +   if (entry->rule.watch)
>>> +   audit_put_watch(entry->rule.watch); /* matches initial get */
>>> +   if (entry->rule.tree)
>>> +   audit_put_tree(entry->rule.tree); /* that's the temporary one */
>>> audit_free_rule(entry);
>>> return ERR_PTR(err);
>>>  }
>>>
>>>
>>>

   can we add it in audit_free_rule ?

   maybe like this:

 @@ -75,6 +75,8 @@ static inline void audit_free_rule(struct audit_entry *e)
/* some rules don't have associated watches */
if (erule->watch)
audit_put_watch(erule->watch);
 +  if (erule->tree)
 +  audit_put_tree(erule->tree);
if (erule->fields)
for (i = 0; i < erule->field_count; i++) {
struct audit_field *f = &erule->fields[i];


   thanks.

   :-)

>>>
>>>
>>
>>
> 
> 


-- 
Chen Gang

Asianux Corporation
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] regulator: as3711: Use a static of_regulator_match table for of_regulator_match

2013-04-16 Thread Axel Lin
The same table can be used for multiple instance of pdev, so we don't need to
allocate memory for of_regulator_match table per pdev.

Signed-off-by: Axel Lin 
---
 drivers/regulator/as3711-regulator.c |   46 ++
 1 file changed, 19 insertions(+), 27 deletions(-)

diff --git a/drivers/regulator/as3711-regulator.c 
b/drivers/regulator/as3711-regulator.c
index 0539b3e..dd1a089 100644
--- a/drivers/regulator/as3711-regulator.c
+++ b/drivers/regulator/as3711-regulator.c
@@ -278,52 +278,44 @@ static struct as3711_regulator_info as3711_reg_info[] = {
 
 #define AS3711_REGULATOR_NUM ARRAY_SIZE(as3711_reg_info)
 
-static const char *as3711_regulator_of_names[AS3711_REGULATOR_NUM] = {
-   [AS3711_REGULATOR_SD_1] = "sd1",
-   [AS3711_REGULATOR_SD_2] = "sd2",
-   [AS3711_REGULATOR_SD_3] = "sd3",
-   [AS3711_REGULATOR_SD_4] = "sd4",
-   [AS3711_REGULATOR_LDO_1] = "ldo1",
-   [AS3711_REGULATOR_LDO_2] = "ldo2",
-   [AS3711_REGULATOR_LDO_3] = "ldo3",
-   [AS3711_REGULATOR_LDO_4] = "ldo4",
-   [AS3711_REGULATOR_LDO_5] = "ldo5",
-   [AS3711_REGULATOR_LDO_6] = "ldo6",
-   [AS3711_REGULATOR_LDO_7] = "ldo7",
-   [AS3711_REGULATOR_LDO_8] = "ldo8",
+static struct of_regulator_match as3711_regulator_matches[] = {
+   { .name = "sd1" },
+   { .name = "sd2" },
+   { .name = "sd3" },
+   { .name = "sd4" },
+   { .name = "ldo1" },
+   { .name = "ldo2" },
+   { .name = "ldo3" },
+   { .name = "ldo4" },
+   { .name = "ldo5" },
+   { .name = "ldo6" },
+   { .name = "ldo7" },
+   { .name = "ldo8" },
 };
 
 static int as3711_regulator_parse_dt(struct device *dev,
struct device_node **of_node, const int count)
 {
struct as3711_regulator_pdata *pdata = dev_get_platdata(dev);
-   struct device_node *regulators =
-   of_find_node_by_name(dev->parent->of_node, "regulators");
-   struct of_regulator_match *matches, *match;
+   struct device_node *regulators;
+   struct of_regulator_match *match;
int ret, i;
 
+   regulators = of_find_node_by_name(dev->parent->of_node, "regulators");
if (!regulators) {
dev_err(dev, "regulator node not found\n");
return -ENODEV;
}
 
-   matches = devm_kzalloc(dev, sizeof(*matches) * count, GFP_KERNEL);
-   if (!matches)
-   return -ENOMEM;
-
-   for (i = 0, match = matches; i < count; i++, match++) {
-   match->name = as3711_regulator_of_names[i];
-   match->driver_data = as3711_reg_info + i;
-   }
-
-   ret = of_regulator_match(dev->parent, regulators, matches, count);
+   ret = of_regulator_match(dev->parent, regulators,
+as3711_regulator_matches, count);
of_node_put(regulators);
if (ret < 0) {
dev_err(dev, "Error parsing regulator init data: %d\n", ret);
return ret;
}
 
-   for (i = 0, match = matches; i < count; i++, match++)
+   for (i = 0, match = as3711_regulator_matches; i < count; i++, match++)
if (match->of_node) {
pdata->init_data[i] = match->init_data;
of_node[i] = match->of_node;
-- 
1.7.10.4



--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


RE: RE: [PATCH v3] clk: add PowerPC corenet clock driver support

2013-04-16 Thread Tang Yuantian-B29983
OK, thanks.

Thanks,
Yuantian


> -Original Message-
> From: Mike Turquette [mailto:mturque...@linaro.org]
> Sent: 2013年4月17日 6:27
> To: Tang Yuantian-B29983; Tang Yuantian-B29983
> Cc: linus.wall...@linaro.org; viresh.ku...@linaro.org;
> shawn@linaro.org; ulf.hans...@linaro.org; linux-
> ker...@vger.kernel.org; devicetree-disc...@lists.ozlabs.org; linuxppc-
> d...@lists.ozlabs.org; linux-...@vger.kernel.org; Li Yang-R58472
> Subject: Re: RE: [PATCH v3] clk: add PowerPC corenet clock driver support
> 
> Quoting Tang Yuantian-B29983 (2013-04-15 23:59:34)
> > Hi Mike,
> >
> > I really appreciate if you can spend some times to review this patch.
> >
> 
> Yauntian,
> 
> Thanks for submitting this patch.  I have frozen the changes I plan to
> submit for 3.10, with the exception of any last-minute fixes.  I'll take
> a closer look at this after the merge window.
> 
> Regards,
> Mike
> 
> > Thanks,
> > Yuantian
> >
> >
> > > -Original Message-
> > > From: Tang Yuantian-B29983
> > > Sent: 2013年4月9日 16:46
> > > To: mturque...@linaro.org
> > > Cc: linus.wall...@linaro.org; viresh.ku...@linaro.org;
> > > shawn@linaro.org; ulf.hans...@linaro.org; linux-
> > > ker...@vger.kernel.org; devicetree-disc...@lists.ozlabs.org;
> > > linuxppc- d...@lists.ozlabs.org; linux-...@vger.kernel.org; Tang
> > > Yuantian-B29983; Tang Yuantian-B29983; Li Yang-R58472
> > > Subject: [PATCH v3] clk: add PowerPC corenet clock driver support
> > >
> > > From: Tang Yuantian 
> > >
> > > This adds the clock driver for Freescale PowerPC corenet series SoCs
> > > using common clock infrastructure.
> > >
> > > Signed-off-by: Tang Yuantian 
> > > Signed-off-by: Li Yang 
> > > ---
> > > v3:
> > >   - remove the module author and description
> > > v2:
> > >   - add the document for device tree clock bindings
> > >
> > >  arch/powerpc/platforms/Kconfig.cputype |   1 +
> > >  drivers/clk/Kconfig|   7 +
> > >  drivers/clk/Makefile   |   1 +
> > >  drivers/clk/clk-ppc-corenet.c  | 280
> > > +
> > >  4 files changed, 289 insertions(+)
> > >  create mode 100644 drivers/clk/clk-ppc-corenet.c
> > >
> > > diff --git a/arch/powerpc/platforms/Kconfig.cputype
> > > b/arch/powerpc/platforms/Kconfig.cputype
> > > index 18e3b76..cf065b8 100644
> > > --- a/arch/powerpc/platforms/Kconfig.cputype
> > > +++ b/arch/powerpc/platforms/Kconfig.cputype
> > > @@ -158,6 +158,7 @@ config E500
> > >  config PPC_E500MC
> > >   bool "e500mc Support"
> > >   select PPC_FPU
> > > + select COMMON_CLK
> > >   depends on E500
> > >   help
> > > This must be enabled for running on e500mc (and derivatives
> > > diff --git a/drivers/clk/Kconfig b/drivers/clk/Kconfig index
> > > a47e6ee..6e2fd9c
> > > 100644
> > > --- a/drivers/clk/Kconfig
> > > +++ b/drivers/clk/Kconfig
> > > @@ -63,6 +63,13 @@ config CLK_TWL6040
> > > McPDM. McPDM module is using the external bit clock on the
> > > McPDM bus
> > > as functional clock.
> > >
> > > +config CLK_PPC_CORENET
> > > + bool "Clock driver for PowerPC corenet platforms"
> > > + depends on PPC_E500MC && OF
> > > + ---help---
> > > +   This adds the clock driver support for Freescale PowerPC
> corenet
> > > +   platforms using common clock framework.
> > > +
> > >  endmenu
> > >
> > >  source "drivers/clk/mvebu/Kconfig"
> > > diff --git a/drivers/clk/Makefile b/drivers/clk/Makefile index
> > > 300d477..6720319 100644
> > > --- a/drivers/clk/Makefile
> > > +++ b/drivers/clk/Makefile
> > > @@ -34,3 +34,4 @@ obj-$(CONFIG_X86)   += x86/
> > >  obj-$(CONFIG_COMMON_CLK_WM831X) += clk-wm831x.o
> > >  obj-$(CONFIG_COMMON_CLK_MAX77686) += clk-max77686.o
> > >  obj-$(CONFIG_CLK_TWL6040)+= clk-twl6040.o
> > > +obj-$(CONFIG_CLK_PPC_CORENET)+= clk-ppc-corenet.o
> > > diff --git a/drivers/clk/clk-ppc-corenet.c b/drivers/clk/clk-ppc-
> > > corenet.c new file mode 100644 index 000..a2d483f
> > > --- /dev/null
> > > +++ b/drivers/clk/clk-ppc-corenet.c
> > > @@ -0,0 +1,280 @@
> > > +/*
> > > + * Copyright 2013 Freescale Semiconductor, Inc.
> > > + *
> > > + * This program is free software; you can redistribute it and/or
> > > +modify
> > > + * it under the terms of the GNU General Public License version 2
> > > +as
> > > + * published by the Free Software Foundation.
> > > + *
> > > + * clock driver for Freescale PowerPC corenet SoCs.
> > > + */
> > > +#include 
> > > +#include 
> > > +#include 
> > > +#include 
> > > +#include 
> > > +#include 
> > > +#include 
> > > +
> > > +struct cmux_clk {
> > > + struct clk_hw hw;
> > > + void __iomem *reg;
> > > + u32 flags;
> > > +};
> > > +
> > > +#define PLL_KILL BIT(31)
> > > +#define  CLKSEL_SHIFT27
> > > +#define CLKSEL_ADJUSTBIT(0)
> > > +#define to_cmux_clk(p)   container_of(p, struct cmux_clk,
> hw)
> > > +
> > > +static void __iomem *base;
> > > +s

Re: [PATCH] pm2301-charger: Fix suspend/resume

2013-04-16 Thread Anton Vorontsov
On Sat, Apr 13, 2013 at 01:20:07PM +0200, Lars-Peter Clausen wrote:
> The pm2301-charger driver implements runtime pm and at the same time uses the
> legacy pm callbacks for suspend and resume. This does not work since the I2C
> core wont look at the legacy pm callbacks if a driver has the 'pm' field set.
> This patch fixes it by moving over to dev_pm_ops for suspend/resume as well.
> 
> Signed-off-by: Lars-Peter Clausen 
> ---

Applied, thanks a lot!

Anton
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] power: kmemdup instead of kzalloc + memcpy

2013-04-16 Thread Anton Vorontsov
On Wed, Mar 27, 2013 at 11:48:21PM +0200, Andrei Epure wrote:
> Patch found using coccinelle.
> 
> Signed-off-by: Andrei Epure 
> ---

Applied, thanks!

>  drivers/power/charger-manager.c |3 +--
>  1 file changed, 1 insertion(+), 2 deletions(-)
> 
> diff --git a/drivers/power/charger-manager.c b/drivers/power/charger-manager.c
> index 8acc3f8..fefc39f 100644
> --- a/drivers/power/charger-manager.c
> +++ b/drivers/power/charger-manager.c
> @@ -1485,13 +1485,12 @@ static int charger_manager_probe(struct 
> platform_device *pdev)
>  
>   /* Basic Values. Unspecified are Null or 0 */
>   cm->dev = &pdev->dev;
> - cm->desc = kzalloc(sizeof(struct charger_desc), GFP_KERNEL);
> + cm->desc = kmemdup(desc, sizeof(struct charger_desc), GFP_KERNEL);
>   if (!cm->desc) {
>   dev_err(&pdev->dev, "Cannot allocate memory.\n");
>   ret = -ENOMEM;
>   goto err_alloc_desc;
>   }
> - memcpy(cm->desc, desc, sizeof(struct charger_desc));
>   cm->last_temp_mC = INT_MIN; /* denotes "unmeasured, yet" */
>  
>   /*
> -- 
> 1.7.9.5
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH V2 3/3] power: power_supply_core: Add support for supplied_from

2013-04-16 Thread Anton Vorontsov
On Mon, Apr 01, 2013 at 05:45:55PM -0400, Rhyland Klein wrote:
> Adding support for supplied_from char * array. This is meant to store the
> list of suppliers for a given supply, i.e. chargers for a battery. This
> list can be populated through devicetree readily as well as passed
> directly from the driver.

The subject doesn't quite reflect the content of the patch... but I fixed
it up.

> +int power_supply_populate_supplied_from(struct power_supply *psy)
> +{

This gives me:

  CHECK   drivers/power/power_supply_core.c
drivers/power/power_supply_core.c:119:5: warning: symbol 
'power_supply_populate_supplied_from' was not declared. Should it be static?
drivers/power/power_supply_core.c:144:5: warning: symbol 
'power_supply_find_supply_from_node' was not declared. Should it be static?
drivers/power/power_supply_core.c:173:5: warning: symbol 
'power_supply_check_supplies' was not declared. Should it be static?

I fixed it up by making the functions static. Once you need the functions
outside of _core.c, feel free to export them.

Thanks,

Anton
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH V2 2/3] power: power_supply: Add core support for supplied_from

2013-04-16 Thread Anton Vorontsov
On Tue, Apr 02, 2013 at 03:31:30PM -0600, Stephen Warren wrote:
> On 04/01/2013 03:45 PM, Rhyland Klein wrote:
> > This patch adds support for supplies to register a list of char *'s
> > which represent the list of supplies which supply them. This is the
> > opposite as the supplied_to list.
> 
> This patch resolves the concerns I had before, so,
> Reviewed-by: Stephen Warren 

Thanks a lot for the work, folks! With some fixes (see comments for patch
3/3), this is now in battery-2.6.git tree.

Anton
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[patch resend] mm, memcg: give exiting processes access to memory reserves

2013-04-16 Thread David Rientjes
A memcg may livelock when oom if the process that grabs the hierarchy's
oom lock is never the first process with PF_EXITING set in the memcg's
task iteration.

The oom killer, both global and memcg, will defer if it finds an eligible
process that is in the process of exiting and it is not being ptraced.
The idea is to allow it to exit without using memory reserves before
needlessly killing another process.

This normally works fine except in the memcg case with a large number of
threads attached to the oom memcg.  In this case, the memcg oom killer
only gets called for the process that grabs the hierarchy's oom lock; all
others end up blocked on the memcg's oom waitqueue.  Thus, if the process
that grabs the hierarchy's oom lock is never the first PF_EXITING process
in the memcg's task iteration, the oom killer is constantly deferred
without anything making progress.

The fix is to give PF_EXITING processes access to memory reserves so that
we've marked them as oom killed without any iteration.  This allows
__mem_cgroup_try_charge() to succeed so that the process may exit.  This
makes the memcg oom killer exemption for TIF_MEMDIE tasks, now
immediately granted for processes with pending SIGKILLs and those in the
exit path, to be equivalent to what is done for the global oom killer.

Acked-by: Michal Hocko 
Acked-by: KAMEZAWA Hiroyuki 
Acked-by: Johannes Weiner 
Signed-off-by: David Rientjes 
---
 mm/memcontrol.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1686,11 +1686,11 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup 
*memcg, gfp_t gfp_mask,
struct task_struct *chosen = NULL;
 
/*
-* If current has a pending SIGKILL, then automatically select it.  The
-* goal is to allow it to allocate so that it may quickly exit and free
-* its memory.
+* If current has a pending SIGKILL or is exiting, then automatically
+* select it.  The goal is to allow it to allocate so that it may
+* quickly exit and free its memory.
 */
-   if (fatal_signal_pending(current)) {
+   if (fatal_signal_pending(current) || current->flags & PF_EXITING) {
set_thread_flag(TIF_MEMDIE);
return;
}
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 25/28] proc: Supply an accessor to get the process ID associated with some proc files [RFC]

2013-04-16 Thread Li Zefan
On 2013/4/17 2:27, David Howells wrote:
> Supply an accessor to get the process ID associated with some proc files and
> directories (get_proc_pid()).
> 
> Signed-off-by: David Howells 
> cc: Tejun Heo 
> cc: Li Zefan 
> cc: contain...@lists.linux-foundation.org
> cc: cgro...@vger.kernel.org

Acked-by: Li Zefan 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] power: rx51_battery: Fix reporting temperature

2013-04-16 Thread Anton Vorontsov
On Thu, Mar 28, 2013 at 05:42:23PM +0100, Pali Rohár wrote:
> This patch fixing units (1/10 °C) in which is temperature reported.
> 
> Signed-off-by: Pali Rohár 
> ---

Applied, thanks!

>  drivers/power/rx51_battery.c |4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/power/rx51_battery.c b/drivers/power/rx51_battery.c
> index 820..527d256 100644
> --- a/drivers/power/rx51_battery.c
> +++ b/drivers/power/rx51_battery.c
> @@ -119,7 +119,7 @@ static int rx51_battery_read_temperature(struct 
> rx51_device_info *di)
>  
>   /* First check for temperature in first direct table */
>   if (raw < ARRAY_SIZE(rx51_temp_table1))
> - return rx51_temp_table1[raw] * 100;
> + return rx51_temp_table1[raw] * 10;
>  
>   /* Binary search RAW value in second inverse table */
>   while (max - min > 1) {
> @@ -132,7 +132,7 @@ static int rx51_battery_read_temperature(struct 
> rx51_device_info *di)
>   break;
>   }
>  
> - return (rx51_temp_table2_first - min) * 100;
> + return (rx51_temp_table2_first - min) * 10;
>  }
>  
>  /*
> -- 
> 1.7.10.4
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v8 5/5] hwmon: add ST-Ericsson ABX500 hwmon driver

2013-04-16 Thread Anton Vorontsov
On Wed, Apr 03, 2013 at 09:31:55AM -0700, Guenter Roeck wrote:
> On Wed, Apr 03, 2013 at 08:18:12PM +0800, Hongbo Zhang wrote:
> > Each of ST-Ericsson X500 chip set series consists of both ABX500 and DBX500
> > chips. This is ABX500 hwmon driver, where the abx500.c is a common layer for
> > all ABX500s, and the ab8500.c is specific for AB8500 chip. Under this 
> > designed
> > structure, other chip specific files can be added simply using the same 
> > common
> > layer abx500.c.
> > 
> > Signed-off-by: Hongbo Zhang 
> 
> Acked-by: Guenter Roeck 

Patches 1-5 applied, thanks a lot!

Anton
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: + posix_timers-remove-dead-task-timer-expiry-caching.patch added to -mm tree

2013-04-16 Thread Frederic Weisbecker
2013/4/9  :
>
> The patch titled
>  Subject: posix_timers: Remove dead task timer expiry caching
> has been added to the -mm tree.  Its filename is
>  posix_timers-remove-dead-task-timer-expiry-caching.patch
>
> Before you just go and hit "reply", please:
>a) Consider who else should be cc'ed
>b) Prefer to cc a suitable mailing list as well
>c) Ideally: find the original patch on the mailing list and do a
>   reply-to-all to that, adding suitable additional cc's
>
> *** Remember to use Documentation/SubmitChecklist when testing your code ***
>
> The -mm tree is included into linux-next and is updated
> there every 3-4 working days
>
> --
> From: Frederic Weisbecker 
> Subject: posix_timers: Remove dead task timer expiry caching
>
> When reading a timer sample, posix_cpu_timer_get() and
> posix_cpu_timer_schedule() both perform a caching of the timer expiry time
> by converting its value from absolute to relative if the task has exited.
>
> The reason for this caching is not clear though, it could be:
>
> 1) For performance reasons: no need to calculate the delta after the
>task has died, its cputime won't change anymore.  We can thus avoid
>some locking (sighand, tasklist_lock, rq->lock for task_delta_exec(),
>...), and various operations to calculate the sample...
>
> 2) To keep the remaining delta for the timer available after the task
>has died.  When it gets reaped, its sighand disappears, so accessing
>the process wide cputime through tsk->signal is probably not safe.
>
> Now, is the first reason really worth it?  I have no idea if it is a case
> we really want to optimize.
>
> Considering the second reason, we return a disarmed zero'ed timer when
> tsk->sighand == NULL.  So if this is an assumed reason, it's broken.  And
> this case only concern process wide timers that have their group leader
> reaped.  The posix cpu timer shouldn't even be available anymore at that
> time.  Unless the group leader changed since we called
> posix_cpu_timer_create() after an exec?
>
> Anyway for now I'm sending this as an RFC because there may well be subtle
> things I left behind.
>
> Signed-off-by: Frederic Weisbecker 
> Cc: Stanislaw Gruszka 
> Cc: Thomas Gleixner 
> Cc: Peter Zijlstra 
> Cc: Ingo Molnar 
> Cc: Oleg Nesterov 
> Signed-off-by: Andrew Morton 

So this very patch probably shouldn't go to 3.10, I sent it early in
case I could get some hindsight from reviewers.
Anyway, let me some time to think more about it and all the possible
implications against exit, de_thread, etc... then I'll resend if that
sounds palatable.

Thanks.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] ARM: dts: omap3-beagle-xm: Add USB Host support for Rev Ax/Bx

2013-04-16 Thread Robert Nelson
On Tue, Apr 16, 2013 at 7:52 PM, Tony Lindgren  wrote:
> * Roger Quadros  [130415 05:44]:
>> On 04/15/2013 03:35 PM, Roger Quadros wrote:
>> > Provide RESET and Power regulators for the USB PHY,
>> > the USB Host port mode and the PHY device.
>> >
>> > Also provide pin multiplexer information for USB host
>> > pins.
>> >
>> > This will not work for Rev Cx boards because of reversed logic
>> > for USB_POWER_Enable.
>> >
>> > CC: Benoît Cousson 
>> > Signed-off-by: Roger Quadros 
>> > ---
>> >  arch/arm/boot/dts/omap3-beagle-xm.dts |   62 
>> > +
>> >  1 files changed, 62 insertions(+), 0 deletions(-)
>> >
>> > diff --git a/arch/arm/boot/dts/omap3-beagle-xm.dts 
>> > b/arch/arm/boot/dts/omap3-beagle-xm.dts
>> > index 5a31964..d394c51 100644
>> > --- a/arch/arm/boot/dts/omap3-beagle-xm.dts
>> > +++ b/arch/arm/boot/dts/omap3-beagle-xm.dts
>> > @@ -57,6 +57,60 @@
>> > ti,mcbsp = <&mcbsp2>;
>> > ti,codec = <&twl_audio>;
>> > };
>> > +
>> > +   /* HS USB Port 2 RESET */
>> > +   hsusb2_reset: hsusb2_reset_reg {
>> > +   compatible = "regulator-fixed";
>> > +   regulator-name = "hsusb2_reset";
>> > +   regulator-min-microvolt = <330>;
>> > +   regulator-max-microvolt = <330>;
>> > +   gpio = <&gpio5 19 0>;   /* gpio_147 */
>> > +   startup-delay-us = <7>;
>> > +   enable-active-high;
>> > +   };
>> > +
>> > +   /* HS USB Port 2 Power */
>> > +   hsusb2_power: hsusb2_power_reg {
>> > +   compatible = "regulator-fixed";
>> > +   regulator-name = "hsusb2_vbus";
>> > +   regulator-min-microvolt = <330>;
>> > +   regulator-max-microvolt = <330>;
>> > +   gpio = <&twl_gpio 18 0>;/* GPIO LEDA */
>> > +   startup-delay-us = <7>;
>> > +   enable-active-high; /* FIXME: active-low for Rev. C */
>>
>> Benoit & Tony,
>>
>> Any ideas how to tackle the reversed logic for Rev. C boards?
>
> Sounds like we need a shared omap3-beage.dtsi, then omap3-beagle-xm.dts
> and omap3-beagle-rev-c.dts. Then xm and rev-c can both include the

Bike-sheding, but we might want to make that "omap3-beagle-xmc.dts" as
there is the "rev c" variant of the original beagle...

It's too bad we can't read the 3 gpio pin states in the device tree
and make a decision.

Regards,

-- 
Robert Nelson
http://www.rcn-ee.com/
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: cgroup: status-quo and userland efforts

2013-04-16 Thread Li Zefan
On 2013/4/17 1:10, Tejun Heo wrote:
> Hello, Li.
> 
> On Tue, Apr 16, 2013 at 07:17:17PM +0800, Li Zefan wrote:
> ...
>>> hot-unplug).  It currently transfers all its tasks to the nearest
>>> ancestor with executing resources, which is an irreversible process
>>> which would affect all other co-mounted controllers.  We probably want
>>> it to just take on the masks of the ancestor until its own executing
>>> resources become online again, and the new behavior should be gated
>>> behind a switch (Li, can you please look into this?).
>>>
>>
>> Sure, I'll be working on sane hierarchy behavior for cpuset.
> 
> Great, it'd be great if you can share how it's gonna be done once the
> basic design gets settled before full implementation.
> 

The basic idea is, when a cpuset becomes empty due to hotplug, we don't
move the tasks in it, but instead we update tasks' cpumask/nodemask using
the nearest non-empty acestor cpuset's cpus_allowed and mems_allowed.

- then it's allowed to move those tasks from the empty cpuset to another
cpuset

- when this acestor cpuset's cpumask/nodemask is changed (either by writing
cpuset.cpus/mems or hotplug), not only the tasks in it but also tasks in
the empty cpuset will be updated.

- it's allowed to move a task to an empty cpuset, and the task's 
cpumask/nodemask
will be updated according to the nearst non-empty acestor, no matter if this
empty cpuset is exclusive or not.

- if a previously offlined cpu becomes online again, the emtpy cpuset won't
get this cpu resource automatically, which is the current behavior.

How does this sound?

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 0/2] posix_timers: A few expiry caching fixes

2013-04-16 Thread Frederic Weisbecker
Hi Andrew,

This v2 only updates the changelogs to describe more explicitly
the user visible effects as you suggested.

I dropped the 3rd patch for now because it needs more thinking.

Thanks.


Frederic Weisbecker (2):
  posix-timers: correctly get dying task time sample in
posix_cpu_timer_schedule()
  posix_timers: Fix racy timer delta caching on task exit

 kernel/posix-cpu-timers.c |   23 ---
 1 files changed, 12 insertions(+), 11 deletions(-)

-- 
1.7.5.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 1/2] posix-timers: correctly get dying task time sample in posix_cpu_timer_schedule()

2013-04-16 Thread Frederic Weisbecker
In order to re-arm a timer after it fired, we take a sample of the
current process or thread cputime.

If the task is dying though, we don't arm anything but we cache the
remaining timer expiration delta for further reads.

Something similar is performed in posix_cpu_timer_get() but here we forget
to take the process wide cputime sample before caching it.

As a result we are storing random stack content, leading every further
reads of that timer to return junk values.

Fix this by taking the appropriate sample in the case of process wide
timers.

This probably doesn't matter much in practice because, at this stage, the
thread is the last one in the group and we reached exit_notify(). This
implies that we called exit_itimers() and there should be no more timers
to handle for that task.

So this is likely dead code anyway but let's fix the current logic
and the warning that came along:

kernel/posix-cpu-timers.c: In function 'posix_cpu_timer_schedule':
kernel/posix-cpu-timers.c:1127: warning: 'now' may be used uninitialized in 
this function

Then we can start to think further about cleaning up that code.

Reported-by: Andrew Morton 
Reported-by: Chen Gang 
Signed-off-by: Frederic Weisbecker 
Cc: Stanislaw Gruszka 
Cc: Thomas Gleixner 
Cc: Peter Zijlstra 
Cc: Ingo Molnar 
Cc: Oleg Nesterov 
Cc: Chen Gang 
Signed-off-by: Andrew Morton 
---
 kernel/posix-cpu-timers.c |1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index edf94b6..afd79a9 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -1062,6 +1062,7 @@ void posix_cpu_timer_schedule(struct k_itimer *timer)
 * not yet reaped.  Take this opportunity to
 * drop our task ref.
 */
+   cpu_timer_sample_group(timer->it_clock, p, &now);
clear_dead_task(timer, now);
goto out_unlock;
}
-- 
1.7.5.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 2/2] posix_timers: Fix racy timer delta caching on task exit

2013-04-16 Thread Frederic Weisbecker
When a task exits, we perform a caching of the remaining cputime delta
before expiring of its timers.

This is done from the following places:

* When the task is reaped. We iterate through its list of
  posix cpu timers and store the remaining timer delta to
  the timer struct instead of the absolute value.
  (See posix_cpu_timers_exit() / posix_cpu_timers_exit_group() )

* When we call posix_cpu_timer_get() or posix_cpu_timer_schedule().
  If the timer's task is considered dying when watched from these
  places, the same conversion from absolute to relative expiry time
  is performed. Then the given task's reference is released.
  (See clear_dead_task() ).

The relevance of this caching is questionable but this is another
and deeper debate.

The big issue here is that these two sources of caching don't mix
up very well together.

More specifically, the caching can easily be done twice, resulting
in a wrong delta as it gets spuriously substracted a second time by
the elapsed clock. This can happen in the following scenario:

1) The task exits and gets reaped: we call posix_cpu_timers_exit()
   and the absolute timer expiry values are converted to a relative
   delta.

2) timer_gettime() -> posix_cpu_timer_get() is called and relies on
   clear_dead_task() because  tsk->exit_state == EXIT_DEAD.
   The delta gets substracted again by the elapsed clock and we return
   a wrong result.

To fix this, just remove the caching done on task reaping time.  It
doesn't bring much value on its own.  The caching done from
posix_cpu_timer_get/schedule is enough.

And it would also be hard to get it really right: we could make it put and
clear the target task in the timer struct so that readers know if they are
dealing with a relative cached of absolute value.  But it would be racy.
The only safe way to do it would be to lock the itimer->it_lock so that we
know nobody reads the cputime expiry value while we modify it and its
target task reference.  Doing so would involve some funny workarounds to
avoid circular lock against the sighand lock.  There is just no reason to
maintain this.

The user visible effect of this patch can be observed by running the
following code: it creates a subthread that launches a posix cputimer
which expires after 10 seconds. But then the subthread only busy loops for 2
seconds and exits. The parent reaps the subthread and read the timer value.
Its expected value should the be the initial timer's expiration value
minus the cputime elapsed in the subthread. Roughly 10 - 2 = 8 seconds:

#include 
#include 
#include 
#include 
#include 

static timer_t id;
static struct itimerspec val = { .it_value.tv_sec = 10, }, new;

static void *thread(void *unused)
{
int err;
struct timeval start, end, diff;

timer_create(CLOCK_THREAD_CPUTIME_ID, NULL, &id);
if (err < 0) {
perror("Can't create timer\n");
return NULL;
}

/* Arm 10 sec timer */
err = timer_settime(id, 0, &val, NULL);
if (err < 0) {
perror("Can't set timer\n");
return NULL;
}

/* Exit after 2 seconds of execution */
gettimeofday(&start, NULL);
do {
gettimeofday(&end, NULL);
timersub(&end, &start, &diff);
} while (diff.tv_sec < 2);

return NULL;
}

int main(int argc, char **argv)
{
pthread_t pthread;
int err;

err = pthread_create(&pthread, NULL, thread, NULL);
if (err) {
perror("Can't create thread\n");
return -1;
}
pthread_join(pthread, NULL);
/* Just wait a little bit to make sure the child got reaped */
sleep(1);
err = timer_gettime(id, &new);
if (err)
perror("Can't get timer value\n");
printf("%d %ld\n", new.it_value.tv_sec, new.it_value.tv_nsec);

return 0;
}

Before the patch:

   $ ./posix_cpu_timers
   6 2278074

After the patch:

  $ ./posix_cpu_timers
  8 1158766

Before the patch, the elapsed time got two more seconds spuriously accounted.

Signed-off-by: Frederic Weisbecker 
Cc: Stanislaw Gruszka 
Cc: Thomas Gleixner 
Cc: Peter Zijlstra 
Cc: Ingo Molnar 
Cc: Oleg Nesterov 
Signed-off-by: Andrew Morton 
---
 kernel/posix-cpu-timers.c |   22 +++---
 1 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index afd79a9..877439b 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -387,14 +387,8 @@ sta

Re: [patch v7 0/21] sched: power aware scheduling

2013-04-16 Thread Alex Shi
On 04/16/2013 06:24 PM, Borislav Petkov wrote:
> On Tue, Apr 16, 2013 at 08:22:19AM +0800, Alex Shi wrote:
>> testing has a little variation, but the power data is quite accurate.
>> I may change to packing tasks per cpu capacity than current cpu
>> weight. that should has better power efficient value.
> 
> Yeah, this probably needs careful measuring - and by "this" I mean how
> to place N tasks where N is less than number of cores in the system.
> 
> I can imagine trying to migrate them all together on a single physical
> socket (maybe even overcommitting it) so that you can flush the caches
> of the cores on the other sockets and so that you can power down the
> other sockets and avoid coherent traffic from waking them up, to be one
> strategy. My supposition here is that maybe putting the whole unused
> sockets in a deep sleep state could save a lot of power.

Sure. Currently if the whole socket get into sleep, but the memory on
the node is still accessed. the cpu socket still spend some power on
'uncore' part. So the further step is reduce the remote memory access to
save more power, and that is also numa balance want to do.
And then the next step is to detect if this socket is cache intensive,
if there is much cache thresh on the node.
In theory, there is still has lots of tuning space. :)
> 
> Or not, who knows. Only empirical measurements should show us what
> actually happens.

Sure. :)
> 
> Thanks.
> 


-- 
Thanks Alex
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2 0/6] mm/hugetlb: gigantic hugetlb page pools shrink supporting

2013-04-16 Thread Michal Hocko
On Wed 17-04-13 08:36:28, Wanpeng Li wrote:
> Changelog:
>  * add comments from Andi which indicate shrink gigantic hugetlb page pools 
> make 
>sense to patchset description.
>
> order >= MAX_ORDER pages are only allocated at boot stage using the 
> bootmem allocator with the "hugepages=xxx" option. These pages are never 
> free after boot by default since it would be a one-way street(>= MAX_ORDER
> pages cannot be allocated later), but if administrator confirm not to 
> use these gigantic pages any more, these pinned pages will waste memory
> since other users can't grab free pages from gigantic hugetlb pool even
> if OOM, it's not flexible.  The patchset add hugetlb gigantic page pools
> shrink supporting. Administrator can enable knob exported in sysctl to
> permit to shrink gigantic hugetlb pool.
> 
> http://marc.info/?l=linux-mm&m=136578016214512&w=2 
> Andi thinks this idea make sense since he is working on a new patchkit to 
> allocate GB pages from CMA. With that freeing actually makes sense, as the 
> pages can be reallocated.

But that is not implemented yet...
 
> 
> Testcase:
> boot: hugepagesz=1G hugepages=10
> 
> [root@localhost hugepages]# free -m
>  total   used   free sharedbuffers cached
> Mem: 36269  10836  25432  0 11288
> -/+ buffers/cache:  10537  25732
> Swap:35999  0  35999
> [root@localhost hugepages]# echo 0 > 
> /sys/kernel/mm/hugepages/hugepages-1048576kB/nr_hugepages
> -bash: echo: write error: Invalid argument
> [root@localhost hugepages]# echo 1 > /proc/sys/vm/hugetlb_shrink_gigantic_pool

I have asked that already but it didn't get answered. What is the reason
for an explicit knob to enable this? It just adds an additional code and
it doesn't make much sense to me to be honest.

[...]
-- 
Michal Hocko
SUSE Labs
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH RESEND 8/8] rtc: rtc-twl: convert twl4030rtc_driver to dev_pm_ops

2013-04-16 Thread Jingoo Han
Instead of using legacy suspend/resume methods, using newer dev_pm_ops
structure allows better control over power management.

Signed-off-by: Jingoo Han 
---
 drivers/rtc/rtc-twl.c |   16 ++--
 1 files changed, 6 insertions(+), 10 deletions(-)

diff --git a/drivers/rtc/rtc-twl.c b/drivers/rtc/rtc-twl.c
index 8bc6c80..8751a52 100644
--- a/drivers/rtc/rtc-twl.c
+++ b/drivers/rtc/rtc-twl.c
@@ -566,11 +566,10 @@ static void twl_rtc_shutdown(struct platform_device *pdev)
mask_rtc_irq_bit(BIT_RTC_INTERRUPTS_REG_IT_TIMER_M);
 }
 
-#ifdef CONFIG_PM
-
+#ifdef CONFIG_PM_SLEEP
 static unsigned char irqstat;
 
-static int twl_rtc_suspend(struct platform_device *pdev, pm_message_t state)
+static int twl_rtc_suspend(struct device *dev)
 {
irqstat = rtc_irq_bits;
 
@@ -578,17 +577,15 @@ static int twl_rtc_suspend(struct platform_device *pdev, 
pm_message_t state)
return 0;
 }
 
-static int twl_rtc_resume(struct platform_device *pdev)
+static int twl_rtc_resume(struct device *dev)
 {
set_rtc_irq_bit(irqstat);
return 0;
 }
-
-#else
-#define twl_rtc_suspend NULL
-#define twl_rtc_resume  NULL
 #endif
 
+static SIMPLE_DEV_PM_OPS(twl_rtc_pm_ops, twl_rtc_suspend, twl_rtc_resume);
+
 #ifdef CONFIG_OF
 static const struct of_device_id twl_rtc_of_match[] = {
{.compatible = "ti,twl4030-rtc", },
@@ -603,11 +600,10 @@ static struct platform_driver twl4030rtc_driver = {
.probe  = twl_rtc_probe,
.remove = twl_rtc_remove,
.shutdown   = twl_rtc_shutdown,
-   .suspend= twl_rtc_suspend,
-   .resume = twl_rtc_resume,
.driver = {
.owner  = THIS_MODULE,
.name   = "twl_rtc",
+   .pm = &twl_rtc_pm_ops,
.of_match_table = of_match_ptr(twl_rtc_of_match),
},
 };
-- 
1.7.2.5


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH RESEND 7/8] rtc: rtc-stmp3xxx: convert stmp3xxx_rtcdrv to dev_pm_ops

2013-04-16 Thread Jingoo Han
Instead of using legacy suspend/resume methods, using newer dev_pm_ops
structure allows better control over power management.

Signed-off-by: Jingoo Han 
---
 drivers/rtc/rtc-stmp3xxx.c |   17 -
 1 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/drivers/rtc/rtc-stmp3xxx.c b/drivers/rtc/rtc-stmp3xxx.c
index 67d2612..30eacdb 100644
--- a/drivers/rtc/rtc-stmp3xxx.c
+++ b/drivers/rtc/rtc-stmp3xxx.c
@@ -307,15 +307,15 @@ out_free:
return err;
 }
 
-#ifdef CONFIG_PM
-static int stmp3xxx_rtc_suspend(struct platform_device *dev, pm_message_t 
state)
+#ifdef CONFIG_PM_SLEEP
+static int stmp3xxx_rtc_suspend(struct device *dev)
 {
return 0;
 }
 
-static int stmp3xxx_rtc_resume(struct platform_device *dev)
+static int stmp3xxx_rtc_resume(struct device *dev)
 {
-   struct stmp3xxx_rtc_data *rtc_data = platform_get_drvdata(dev);
+   struct stmp3xxx_rtc_data *rtc_data = dev_get_drvdata(dev);
 
stmp_reset_block(rtc_data->io);
writel(STMP3XXX_RTC_PERSISTENT0_ALARM_EN |
@@ -324,11 +324,11 @@ static int stmp3xxx_rtc_resume(struct platform_device 
*dev)
rtc_data->io + STMP3XXX_RTC_PERSISTENT0_CLR);
return 0;
 }
-#else
-#define stmp3xxx_rtc_suspend   NULL
-#define stmp3xxx_rtc_resumeNULL
 #endif
 
+static SIMPLE_DEV_PM_OPS(stmp3xxx_rtc_pm_ops, stmp3xxx_rtc_suspend,
+   stmp3xxx_rtc_resume);
+
 static const struct of_device_id rtc_dt_ids[] = {
{ .compatible = "fsl,stmp3xxx-rtc", },
{ /* sentinel */ }
@@ -338,11 +338,10 @@ MODULE_DEVICE_TABLE(of, rtc_dt_ids);
 static struct platform_driver stmp3xxx_rtcdrv = {
.probe  = stmp3xxx_rtc_probe,
.remove = stmp3xxx_rtc_remove,
-   .suspend= stmp3xxx_rtc_suspend,
-   .resume = stmp3xxx_rtc_resume,
.driver = {
.name   = "stmp3xxx-rtc",
.owner  = THIS_MODULE,
+   .pm = &stmp3xxx_rtc_pm_ops,
.of_match_table = of_match_ptr(rtc_dt_ids),
},
 };
-- 
1.7.2.5


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH RESEND 6/8] rtc: rtc-spear: convert spear_rtc_driver to dev_pm_ops

2013-04-16 Thread Jingoo Han
Instead of using legacy suspend/resume methods, using newer dev_pm_ops
structure allows better control over power management.

Signed-off-by: Jingoo Han 
---
 drivers/rtc/rtc-spear.c |   18 --
 1 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/drivers/rtc/rtc-spear.c b/drivers/rtc/rtc-spear.c
index db3ef61..574359c 100644
--- a/drivers/rtc/rtc-spear.c
+++ b/drivers/rtc/rtc-spear.c
@@ -434,10 +434,10 @@ static int spear_rtc_remove(struct platform_device *pdev)
return 0;
 }
 
-#ifdef CONFIG_PM
-
-static int spear_rtc_suspend(struct platform_device *pdev, pm_message_t state)
+#ifdef CONFIG_PM_SLEEP
+static int spear_rtc_suspend(struct device *dev)
 {
+   struct platform_device *pdev = to_platform_device(dev);
struct spear_rtc_config *config = platform_get_drvdata(pdev);
int irq;
 
@@ -453,8 +453,9 @@ static int spear_rtc_suspend(struct platform_device *pdev, 
pm_message_t state)
return 0;
 }
 
-static int spear_rtc_resume(struct platform_device *pdev)
+static int spear_rtc_resume(struct device *dev)
 {
+   struct platform_device *pdev = to_platform_device(dev);
struct spear_rtc_config *config = platform_get_drvdata(pdev);
int irq;
 
@@ -472,12 +473,10 @@ static int spear_rtc_resume(struct platform_device *pdev)
 
return 0;
 }
-
-#else
-#define spear_rtc_suspend  NULL
-#define spear_rtc_resume   NULL
 #endif
 
+static SIMPLE_DEV_PM_OPS(spear_rtc_pm_ops, spear_rtc_suspend, 
spear_rtc_resume);
+
 static void spear_rtc_shutdown(struct platform_device *pdev)
 {
struct spear_rtc_config *config = platform_get_drvdata(pdev);
@@ -497,11 +496,10 @@ MODULE_DEVICE_TABLE(of, spear_rtc_id_table);
 static struct platform_driver spear_rtc_driver = {
.probe = spear_rtc_probe,
.remove = spear_rtc_remove,
-   .suspend = spear_rtc_suspend,
-   .resume = spear_rtc_resume,
.shutdown = spear_rtc_shutdown,
.driver = {
.name = "rtc-spear",
+   .pm = &spear_rtc_pm_ops,
.of_match_table = of_match_ptr(spear_rtc_id_table),
},
 };
-- 
1.7.2.5


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH RESEND 5/8] rtc: rtc-puv3: convert puv3_rtc_driver to dev_pm_ops

2013-04-16 Thread Jingoo Han
Instead of using legacy suspend/resume methods, using newer dev_pm_ops
structure allows better control over power management.

Signed-off-by: Jingoo Han 
---
 drivers/rtc/rtc-puv3.c |   27 ---
 1 files changed, 12 insertions(+), 15 deletions(-)

diff --git a/drivers/rtc/rtc-puv3.c b/drivers/rtc/rtc-puv3.c
index 0407e13..72f4371 100644
--- a/drivers/rtc/rtc-puv3.c
+++ b/drivers/rtc/rtc-puv3.c
@@ -207,14 +207,14 @@ static const struct rtc_class_ops puv3_rtcops = {
.proc   = puv3_rtc_proc,
 };
 
-static void puv3_rtc_enable(struct platform_device *pdev, int en)
+static void puv3_rtc_enable(struct device *dev, int en)
 {
if (!en) {
writel(readl(RTC_RTSR) & ~RTC_RTSR_HZE, RTC_RTSR);
} else {
/* re-enable the device, and check it is ok */
if ((readl(RTC_RTSR) & RTC_RTSR_HZE) == 0) {
-   dev_info(&pdev->dev, "rtc disabled, re-enabling\n");
+   dev_info(dev, "rtc disabled, re-enabling\n");
writel(readl(RTC_RTSR) | RTC_RTSR_HZE, RTC_RTSR);
}
}
@@ -276,7 +276,7 @@ static int puv3_rtc_probe(struct platform_device *pdev)
goto err_nores;
}
 
-   puv3_rtc_enable(pdev, 1);
+   puv3_rtc_enable(&pdev->dev, 1);
 
/* register RTC and exit */
rtc = rtc_device_register("pkunity", &pdev->dev, &puv3_rtcops,
@@ -296,44 +296,41 @@ static int puv3_rtc_probe(struct platform_device *pdev)
return 0;
 
  err_nortc:
-   puv3_rtc_enable(pdev, 0);
+   puv3_rtc_enable(&pdev->dev, 0);
release_resource(puv3_rtc_mem);
 
  err_nores:
return ret;
 }
 
-#ifdef CONFIG_PM
-
+#ifdef CONFIG_PM_SLEEP
 static int ticnt_save;
 
-static int puv3_rtc_suspend(struct platform_device *pdev, pm_message_t state)
+static int puv3_rtc_suspend(struct device *dev)
 {
/* save RTAR for anyone using periodic interrupts */
ticnt_save = readl(RTC_RTAR);
-   puv3_rtc_enable(pdev, 0);
+   puv3_rtc_enable(dev, 0);
return 0;
 }
 
-static int puv3_rtc_resume(struct platform_device *pdev)
+static int puv3_rtc_resume(struct device *dev)
 {
-   puv3_rtc_enable(pdev, 1);
+   puv3_rtc_enable(dev, 1);
writel(ticnt_save, RTC_RTAR);
return 0;
 }
-#else
-#define puv3_rtc_suspend NULL
-#define puv3_rtc_resume  NULL
 #endif
 
+static SIMPLE_DEV_PM_OPS(puv3_rtc_pm_ops, puv3_rtc_suspend, puv3_rtc_resume);
+
 static struct platform_driver puv3_rtc_driver = {
.probe  = puv3_rtc_probe,
.remove = puv3_rtc_remove,
-   .suspend= puv3_rtc_suspend,
-   .resume = puv3_rtc_resume,
.driver = {
.name   = "PKUnity-v3-RTC",
.owner  = THIS_MODULE,
+   .pm = &puv3_rtc_pm_ops,
}
 };
 
-- 
1.7.2.5


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH RESEND 4/8] rtc: rtc-omap: convert omap_rtc_driver to dev_pm_ops

2013-04-16 Thread Jingoo Han
Instead of using legacy suspend/resume methods, using newer dev_pm_ops
structure allows better control over power management.

Signed-off-by: Jingoo Han 
---
 drivers/rtc/rtc-omap.c |   24 ++--
 1 files changed, 10 insertions(+), 14 deletions(-)

diff --git a/drivers/rtc/rtc-omap.c b/drivers/rtc/rtc-omap.c
index a1f9d30..4e1bdb8 100644
--- a/drivers/rtc/rtc-omap.c
+++ b/drivers/rtc/rtc-omap.c
@@ -464,11 +464,10 @@ static int __exit omap_rtc_remove(struct platform_device 
*pdev)
return 0;
 }
 
-#ifdef CONFIG_PM
-
+#ifdef CONFIG_PM_SLEEP
 static u8 irqstat;
 
-static int omap_rtc_suspend(struct platform_device *pdev, pm_message_t state)
+static int omap_rtc_suspend(struct device *dev)
 {
irqstat = rtc_read(OMAP_RTC_INTERRUPTS_REG);
 
@@ -476,34 +475,32 @@ static int omap_rtc_suspend(struct platform_device *pdev, 
pm_message_t state)
 * source, and in fact this enable() call is just saving a flag
 * that's never used...
 */
-   if (device_may_wakeup(&pdev->dev))
+   if (device_may_wakeup(dev))
enable_irq_wake(omap_rtc_alarm);
else
rtc_write(0, OMAP_RTC_INTERRUPTS_REG);
 
/* Disable the clock/module */
-   pm_runtime_put_sync(&pdev->dev);
+   pm_runtime_put_sync(dev);
 
return 0;
 }
 
-static int omap_rtc_resume(struct platform_device *pdev)
+static int omap_rtc_resume(struct device *dev)
 {
/* Enable the clock/module so that we can access the registers */
-   pm_runtime_get_sync(&pdev->dev);
+   pm_runtime_get_sync(dev);
 
-   if (device_may_wakeup(&pdev->dev))
+   if (device_may_wakeup(dev))
disable_irq_wake(omap_rtc_alarm);
else
rtc_write(irqstat, OMAP_RTC_INTERRUPTS_REG);
return 0;
 }
-
-#else
-#define omap_rtc_suspend NULL
-#define omap_rtc_resume  NULL
 #endif
 
+static SIMPLE_DEV_PM_OPS(omap_rtc_pm_ops, omap_rtc_suspend, omap_rtc_resume);
+
 static void omap_rtc_shutdown(struct platform_device *pdev)
 {
rtc_write(0, OMAP_RTC_INTERRUPTS_REG);
@@ -512,12 +509,11 @@ static void omap_rtc_shutdown(struct platform_device 
*pdev)
 MODULE_ALIAS("platform:omap_rtc");
 static struct platform_driver omap_rtc_driver = {
.remove = __exit_p(omap_rtc_remove),
-   .suspend= omap_rtc_suspend,
-   .resume = omap_rtc_resume,
.shutdown   = omap_rtc_shutdown,
.driver = {
.name   = DRIVER_NAME,
.owner  = THIS_MODULE,
+   .pm = &omap_rtc_pm_ops,
.of_match_table = of_match_ptr(omap_rtc_of_match),
},
.id_table   = omap_rtc_devtype,
-- 
1.7.2.5


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH RESEND 3/8] rtc: rtc-coh901331: convert coh901331_driver to dev_pm_ops

2013-04-16 Thread Jingoo Han
Instead of using legacy suspend/resume methods, using newer dev_pm_ops
structure allows better control over power management.

Signed-off-by: Jingoo Han 
---
 drivers/rtc/rtc-coh901331.c |   24 +++-
 1 files changed, 11 insertions(+), 13 deletions(-)

diff --git a/drivers/rtc/rtc-coh901331.c b/drivers/rtc/rtc-coh901331.c
index bf0387f..93c0658 100644
--- a/drivers/rtc/rtc-coh901331.c
+++ b/drivers/rtc/rtc-coh901331.c
@@ -47,7 +47,7 @@ struct coh901331_port {
u32 physize;
void __iomem *virtbase;
int irq;
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
u32 irqmaskstore;
 #endif
 };
@@ -225,17 +225,17 @@ static int __init coh901331_probe(struct platform_device 
*pdev)
return ret;
 }
 
-#ifdef CONFIG_PM
-static int coh901331_suspend(struct platform_device *pdev, pm_message_t state)
+#ifdef CONFIG_PM_SLEEP
+static int coh901331_suspend(struct device *dev)
 {
-   struct coh901331_port *rtap = dev_get_drvdata(&pdev->dev);
+   struct coh901331_port *rtap = dev_get_drvdata(dev);
 
/*
 * If this RTC alarm will be used for waking the system up,
 * don't disable it of course. Else we just disable the alarm
 * and await suspension.
 */
-   if (device_may_wakeup(&pdev->dev)) {
+   if (device_may_wakeup(dev)) {
enable_irq_wake(rtap->irq);
} else {
clk_enable(rtap->clk);
@@ -247,12 +247,12 @@ static int coh901331_suspend(struct platform_device 
*pdev, pm_message_t state)
return 0;
 }
 
-static int coh901331_resume(struct platform_device *pdev)
+static int coh901331_resume(struct device *dev)
 {
-   struct coh901331_port *rtap = dev_get_drvdata(&pdev->dev);
+   struct coh901331_port *rtap = dev_get_drvdata(dev);
 
clk_prepare(rtap->clk);
-   if (device_may_wakeup(&pdev->dev)) {
+   if (device_may_wakeup(dev)) {
disable_irq_wake(rtap->irq);
} else {
clk_enable(rtap->clk);
@@ -261,11 +261,10 @@ static int coh901331_resume(struct platform_device *pdev)
}
return 0;
 }
-#else
-#define coh901331_suspend NULL
-#define coh901331_resume NULL
 #endif
 
+static SIMPLE_DEV_PM_OPS(coh901331_pm_ops, coh901331_suspend, 
coh901331_resume);
+
 static void coh901331_shutdown(struct platform_device *pdev)
 {
struct coh901331_port *rtap = dev_get_drvdata(&pdev->dev);
@@ -279,10 +278,9 @@ static struct platform_driver coh901331_driver = {
.driver = {
.name = "rtc-coh901331",
.owner = THIS_MODULE,
+   .pm = &coh901331_pm_ops,
},
.remove = __exit_p(coh901331_remove),
-   .suspend = coh901331_suspend,
-   .resume = coh901331_resume,
.shutdown = coh901331_shutdown,
 };
 
-- 
1.7.2.5


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH RESEND 2/8] rtc: rtc-bfin: convert bfin_rtc_driver to dev_pm_ops

2013-04-16 Thread Jingoo Han
Instead of using legacy suspend/resume methods, using newer dev_pm_ops
structure allows better control over power management.

Signed-off-by: Jingoo Han 
---
 drivers/rtc/rtc-bfin.c |   18 ++
 1 files changed, 6 insertions(+), 12 deletions(-)

diff --git a/drivers/rtc/rtc-bfin.c b/drivers/rtc/rtc-bfin.c
index 4ec614b..f35b17a 100644
--- a/drivers/rtc/rtc-bfin.c
+++ b/drivers/rtc/rtc-bfin.c
@@ -402,11 +402,9 @@ static int bfin_rtc_remove(struct platform_device *pdev)
return 0;
 }
 
-#ifdef CONFIG_PM
-static int bfin_rtc_suspend(struct platform_device *pdev, pm_message_t state)
+#ifdef CONFIG_PM_SLEEP
+static int bfin_rtc_suspend(struct device *dev)
 {
-   struct device *dev = &pdev->dev;
-
dev_dbg_stamp(dev);
 
if (device_may_wakeup(dev)) {
@@ -418,10 +416,8 @@ static int bfin_rtc_suspend(struct platform_device *pdev, 
pm_message_t state)
return 0;
 }
 
-static int bfin_rtc_resume(struct platform_device *pdev)
+static int bfin_rtc_resume(struct device *dev)
 {
-   struct device *dev = &pdev->dev;
-
dev_dbg_stamp(dev);
 
if (device_may_wakeup(dev))
@@ -440,20 +436,18 @@ static int bfin_rtc_resume(struct platform_device *pdev)
 
return 0;
 }
-#else
-# define bfin_rtc_suspend NULL
-# define bfin_rtc_resume  NULL
 #endif
 
+static SIMPLE_DEV_PM_OPS(bfin_rtc_pm_ops, bfin_rtc_suspend, bfin_rtc_resume);
+
 static struct platform_driver bfin_rtc_driver = {
.driver = {
.name   = "rtc-bfin",
.owner  = THIS_MODULE,
+   .pm = &bfin_rtc_pm_ops,
},
.probe  = bfin_rtc_probe,
.remove = bfin_rtc_remove,
-   .suspend= bfin_rtc_suspend,
-   .resume = bfin_rtc_resume,
 };
 
 module_platform_driver(bfin_rtc_driver);
-- 
1.7.2.5


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH RESEND 1/8] rtc: rtc-at91sam9: convert at91_rtc_driver to dev_pm_ops

2013-04-16 Thread Jingoo Han
Instead of using legacy suspend/resume methods, using newer dev_pm_ops
structure allows better control over power management.

Signed-off-by: Jingoo Han 
---
 drivers/rtc/rtc-at91sam9.c |   23 ++-
 1 files changed, 10 insertions(+), 13 deletions(-)

diff --git a/drivers/rtc/rtc-at91sam9.c b/drivers/rtc/rtc-at91sam9.c
index 39cfd2e..4843e42 100644
--- a/drivers/rtc/rtc-at91sam9.c
+++ b/drivers/rtc/rtc-at91sam9.c
@@ -414,14 +414,13 @@ static void at91_rtc_shutdown(struct platform_device 
*pdev)
rtt_writel(rtc, MR, mr & ~rtc->imr);
 }
 
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 
 /* AT91SAM9 RTC Power management control */
 
-static int at91_rtc_suspend(struct platform_device *pdev,
-   pm_message_t state)
+static int at91_rtc_suspend(struct device *dev)
 {
-   struct sam9_rtc *rtc = platform_get_drvdata(pdev);
+   struct sam9_rtc *rtc = dev_get_drvdata(dev);
u32 mr = rtt_readl(rtc, MR);
 
/*
@@ -430,7 +429,7 @@ static int at91_rtc_suspend(struct platform_device *pdev,
 */
rtc->imr = mr & (AT91_RTT_ALMIEN | AT91_RTT_RTTINCIEN);
if (rtc->imr) {
-   if (device_may_wakeup(&pdev->dev) && (mr & AT91_RTT_ALMIEN)) {
+   if (device_may_wakeup(dev) && (mr & AT91_RTT_ALMIEN)) {
enable_irq_wake(rtc->irq);
/* don't let RTTINC cause wakeups */
if (mr & AT91_RTT_RTTINCIEN)
@@ -442,13 +441,13 @@ static int at91_rtc_suspend(struct platform_device *pdev,
return 0;
 }
 
-static int at91_rtc_resume(struct platform_device *pdev)
+static int at91_rtc_resume(struct device *dev)
 {
-   struct sam9_rtc *rtc = platform_get_drvdata(pdev);
+   struct sam9_rtc *rtc = dev_get_drvdata(dev);
u32 mr;
 
if (rtc->imr) {
-   if (device_may_wakeup(&pdev->dev))
+   if (device_may_wakeup(dev))
disable_irq_wake(rtc->irq);
mr = rtt_readl(rtc, MR);
rtt_writel(rtc, MR, mr | rtc->imr);
@@ -456,20 +455,18 @@ static int at91_rtc_resume(struct platform_device *pdev)
 
return 0;
 }
-#else
-#define at91_rtc_suspend   NULL
-#define at91_rtc_resumeNULL
 #endif
 
+static SIMPLE_DEV_PM_OPS(at91_rtc_pm_ops, at91_rtc_suspend, at91_rtc_resume);
+
 static struct platform_driver at91_rtc_driver = {
.probe  = at91_rtc_probe,
.remove = at91_rtc_remove,
.shutdown   = at91_rtc_shutdown,
-   .suspend= at91_rtc_suspend,
-   .resume = at91_rtc_resume,
.driver = {
.name   = "rtc-at91sam9",
.owner  = THIS_MODULE,
+   .pm = &at91_rtc_pm_ops,
},
 };
 
-- 
1.7.2.5


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH RESEND 3/3] backlight: vgg2432a4: convert vgg2432a4_driver to dev_pm_ops

2013-04-16 Thread Jingoo Han
Instead of using legacy suspend/resume methods, using newer dev_pm_ops
structure allows better control over power management. Also, use of
pm_message_t is deprecated. Thus, it is removed.

Signed-off-by: Jingoo Han 
---
 drivers/video/backlight/ili9320.c   |   24 +---
 drivers/video/backlight/ili9320.h   |2 +-
 drivers/video/backlight/vgg2432a4.c |   18 --
 3 files changed, 18 insertions(+), 26 deletions(-)

diff --git a/drivers/video/backlight/ili9320.c 
b/drivers/video/backlight/ili9320.c
index c3a5299..f8be90c 100644
--- a/drivers/video/backlight/ili9320.c
+++ b/drivers/video/backlight/ili9320.c
@@ -270,27 +270,21 @@ int ili9320_remove(struct ili9320 *ili)
 }
 EXPORT_SYMBOL_GPL(ili9320_remove);
 
-#ifdef CONFIG_PM
-int ili9320_suspend(struct ili9320 *lcd, pm_message_t state)
+#ifdef CONFIG_PM_SLEEP
+int ili9320_suspend(struct ili9320 *lcd)
 {
int ret;
 
-   dev_dbg(lcd->dev, "%s: event %d\n", __func__, state.event);
+   ret = ili9320_power(lcd, FB_BLANK_POWERDOWN);
 
-   if (state.event == PM_EVENT_SUSPEND) {
-   ret = ili9320_power(lcd, FB_BLANK_POWERDOWN);
-
-   if (lcd->platdata->suspend == ILI9320_SUSPEND_DEEP) {
-   ili9320_write(lcd, ILI9320_POWER1, lcd->power1 |
- ILI9320_POWER1_SLP |
- ILI9320_POWER1_DSTB);
-   lcd->initialised = 0;
-   }
-
-   return ret;
+   if (lcd->platdata->suspend == ILI9320_SUSPEND_DEEP) {
+   ili9320_write(lcd, ILI9320_POWER1, lcd->power1 |
+ ILI9320_POWER1_SLP |
+ ILI9320_POWER1_DSTB);
+   lcd->initialised = 0;
}
 
-   return 0;
+   return ret;
 }
 EXPORT_SYMBOL_GPL(ili9320_suspend);
 
diff --git a/drivers/video/backlight/ili9320.h 
b/drivers/video/backlight/ili9320.h
index e0db738..42329e7 100644
--- a/drivers/video/backlight/ili9320.h
+++ b/drivers/video/backlight/ili9320.h
@@ -76,5 +76,5 @@ extern void ili9320_shutdown(struct ili9320 *lcd);
 
 /* PM */
 
-extern int ili9320_suspend(struct ili9320 *lcd, pm_message_t state);
+extern int ili9320_suspend(struct ili9320 *lcd);
 extern int ili9320_resume(struct ili9320 *lcd);
diff --git a/drivers/video/backlight/vgg2432a4.c 
b/drivers/video/backlight/vgg2432a4.c
index 84d582f..d538947 100644
--- a/drivers/video/backlight/vgg2432a4.c
+++ b/drivers/video/backlight/vgg2432a4.c
@@ -205,18 +205,15 @@ static int vgg2432a4_lcd_init(struct ili9320 *lcd,
return ret;
 }
 
-#ifdef CONFIG_PM
-static int vgg2432a4_suspend(struct spi_device *spi, pm_message_t state)
+#ifdef CONFIG_PM_SLEEP
+static int vgg2432a4_suspend(struct device *dev)
 {
-   return ili9320_suspend(spi_get_drvdata(spi), state);
+   return ili9320_suspend(dev_get_drvdata(dev));
 }
-static int vgg2432a4_resume(struct spi_device *spi)
+static int vgg2432a4_resume(struct device *dev)
 {
-   return ili9320_resume(spi_get_drvdata(spi));
+   return ili9320_resume(dev_get_drvdata(dev));
 }
-#else
-#define vgg2432a4_suspend  NULL
-#define vgg2432a4_resume   NULL
 #endif
 
 static struct ili9320_client vgg2432a4_client = {
@@ -249,16 +246,17 @@ static void vgg2432a4_shutdown(struct spi_device *spi)
ili9320_shutdown(spi_get_drvdata(spi));
 }
 
+static SIMPLE_DEV_PM_OPS(vgg2432a4_pm_ops, vgg2432a4_suspend, 
vgg2432a4_resume);
+
 static struct spi_driver vgg2432a4_driver = {
.driver = {
.name   = "VGG2432A4",
.owner  = THIS_MODULE,
+   .pm = &vgg2432a4_pm_ops,
},
.probe  = vgg2432a4_probe,
.remove = vgg2432a4_remove,
.shutdown   = vgg2432a4_shutdown,
-   .suspend= vgg2432a4_suspend,
-   .resume = vgg2432a4_resume,
 };
 
 module_spi_driver(vgg2432a4_driver);
-- 
1.7.2.5


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH RESEND 2/3] backlight: tosa: convert tosa to dev_pm_ops

2013-04-16 Thread Jingoo Han
Instead of using legacy suspend/resume methods, using newer dev_pm_ops
structure allows better control over power management.

Signed-off-by: Jingoo Han 
---
 drivers/video/backlight/tosa_bl.c  |   18 --
 drivers/video/backlight/tosa_lcd.c |   18 --
 2 files changed, 16 insertions(+), 20 deletions(-)

diff --git a/drivers/video/backlight/tosa_bl.c 
b/drivers/video/backlight/tosa_bl.c
index 2326fa8..9df66ac 100644
--- a/drivers/video/backlight/tosa_bl.c
+++ b/drivers/video/backlight/tosa_bl.c
@@ -134,28 +134,27 @@ static int tosa_bl_remove(struct i2c_client *client)
return 0;
 }
 
-#ifdef CONFIG_PM
-static int tosa_bl_suspend(struct i2c_client *client, pm_message_t pm)
+#ifdef CONFIG_PM_SLEEP
+static int tosa_bl_suspend(struct device *dev)
 {
-   struct tosa_bl_data *data = i2c_get_clientdata(client);
+   struct tosa_bl_data *data = dev_get_drvdata(dev);
 
tosa_bl_set_backlight(data, 0);
 
return 0;
 }
 
-static int tosa_bl_resume(struct i2c_client *client)
+static int tosa_bl_resume(struct device *dev)
 {
-   struct tosa_bl_data *data = i2c_get_clientdata(client);
+   struct tosa_bl_data *data = dev_get_drvdata(dev);
 
backlight_update_status(data->bl);
return 0;
 }
-#else
-#define tosa_bl_suspend NULL
-#define tosa_bl_resume NULL
 #endif
 
+static SIMPLE_DEV_PM_OPS(tosa_bl_pm_ops, tosa_bl_suspend, tosa_bl_resume);
+
 static const struct i2c_device_id tosa_bl_id[] = {
{ "tosa-bl", 0 },
{ },
@@ -165,11 +164,10 @@ static struct i2c_driver tosa_bl_driver = {
.driver = {
.name   = "tosa-bl",
.owner  = THIS_MODULE,
+   .pm = &tosa_bl_pm_ops,
},
.probe  = tosa_bl_probe,
.remove = tosa_bl_remove,
-   .suspend= tosa_bl_suspend,
-   .resume = tosa_bl_resume,
.id_table   = tosa_bl_id,
 };
 
diff --git a/drivers/video/backlight/tosa_lcd.c 
b/drivers/video/backlight/tosa_lcd.c
index 666fe25..bf08157 100644
--- a/drivers/video/backlight/tosa_lcd.c
+++ b/drivers/video/backlight/tosa_lcd.c
@@ -240,19 +240,19 @@ static int tosa_lcd_remove(struct spi_device *spi)
return 0;
 }
 
-#ifdef CONFIG_PM
-static int tosa_lcd_suspend(struct spi_device *spi, pm_message_t state)
+#ifdef CONFIG_PM_SLEEP
+static int tosa_lcd_suspend(struct device *dev)
 {
-   struct tosa_lcd_data *data = spi_get_drvdata(spi);
+   struct tosa_lcd_data *data = dev_get_drvdata(dev);
 
tosa_lcd_tg_off(data);
 
return 0;
 }
 
-static int tosa_lcd_resume(struct spi_device *spi)
+static int tosa_lcd_resume(struct device *dev)
 {
-   struct tosa_lcd_data *data = spi_get_drvdata(spi);
+   struct tosa_lcd_data *data = dev_get_drvdata(dev);
 
tosa_lcd_tg_init(data);
if (POWER_IS_ON(data->lcd_power))
@@ -262,20 +262,18 @@ static int tosa_lcd_resume(struct spi_device *spi)
 
return 0;
 }
-#else
-#define tosa_lcd_suspend   NULL
-#define tosa_lcd_resume NULL
 #endif
 
+static SIMPLE_DEV_PM_OPS(tosa_lcd_pm_ops, tosa_lcd_suspend, tosa_lcd_resume);
+
 static struct spi_driver tosa_lcd_driver = {
.driver = {
.name   = "tosa-lcd",
.owner  = THIS_MODULE,
+   .pm = &tosa_lcd_pm_ops,
},
.probe  = tosa_lcd_probe,
.remove = tosa_lcd_remove,
-   .suspend= tosa_lcd_suspend,
-   .resume = tosa_lcd_resume,
 };
 
 module_spi_driver(tosa_lcd_driver);
-- 
1.7.2.5


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH RESEND 1/3] backlight: omap1: convert omapbl to dev_pm_ops

2013-04-16 Thread Jingoo Han
Instead of using legacy suspend/resume methods, using newer dev_pm_ops
structure allows better control over power management.

Signed-off-by: Jingoo Han 
---
 drivers/video/backlight/omap1_bl.c |   22 ++
 1 files changed, 10 insertions(+), 12 deletions(-)

diff --git a/drivers/video/backlight/omap1_bl.c 
b/drivers/video/backlight/omap1_bl.c
index 0aed176..812e22e 100644
--- a/drivers/video/backlight/omap1_bl.c
+++ b/drivers/video/backlight/omap1_bl.c
@@ -71,27 +71,24 @@ static void omapbl_blank(struct omap_backlight *bl, int 
mode)
}
 }
 
-#ifdef CONFIG_PM
-static int omapbl_suspend(struct platform_device *pdev, pm_message_t state)
+#ifdef CONFIG_PM_SLEEP
+static int omapbl_suspend(struct device *dev)
 {
-   struct backlight_device *dev = platform_get_drvdata(pdev);
-   struct omap_backlight *bl = bl_get_data(dev);
+   struct backlight_device *bl_dev = dev_get_drvdata(dev);
+   struct omap_backlight *bl = bl_get_data(bl_dev);
 
omapbl_blank(bl, FB_BLANK_POWERDOWN);
return 0;
 }
 
-static int omapbl_resume(struct platform_device *pdev)
+static int omapbl_resume(struct device *dev)
 {
-   struct backlight_device *dev = platform_get_drvdata(pdev);
-   struct omap_backlight *bl = bl_get_data(dev);
+   struct backlight_device *bl_dev = dev_get_drvdata(dev);
+   struct omap_backlight *bl = bl_get_data(bl_dev);
 
omapbl_blank(bl, bl->powermode);
return 0;
 }
-#else
-#define omapbl_suspend NULL
-#define omapbl_resume  NULL
 #endif
 
 static int omapbl_set_power(struct backlight_device *dev, int state)
@@ -182,13 +179,14 @@ static int omapbl_remove(struct platform_device *pdev)
return 0;
 }
 
+static SIMPLE_DEV_PM_OPS(omapbl_pm_ops, omapbl_suspend, omapbl_resume);
+
 static struct platform_driver omapbl_driver = {
.probe  = omapbl_probe,
.remove = omapbl_remove,
-   .suspend= omapbl_suspend,
-   .resume = omapbl_resume,
.driver = {
.name   = "omap-bl",
+   .pm = &omapbl_pm_ops,
},
 };
 
-- 
1.7.2.5


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 21/28] dgrp: Clean up the use of procfs [RFC]

2013-04-16 Thread Greg KH
On Tue, Apr 16, 2013 at 07:27:13PM +0100, David Howells wrote:
> Clean up the use of procfs by the dgrp driver:
> 
>  (1) Use remove_proc_subtree() for the mass slaughter of a subdir full of proc
>  files rather than doing it manually.
> 
>  (2) When creating files, only call ID_TO_CHAR() once to generate the name.
> 
> Signed-off-by : David Howells 
> cc: Bill Pemberton 
> cc: Tommi Rantala 
> cc: de...@driverdev.osuosl.org

Acked-by: Greg Kroah-Hartman 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 14/28] proc: Supply an accessor for getting the data from a PDE's parent [RFC]

2013-04-16 Thread Greg KH
On Tue, Apr 16, 2013 at 07:26:46PM +0100, David Howells wrote:
> Supply an accessor function for getting the private data from the parent
> proc_dir_entry struct of the proc_dir_entry struct associated with an inode.
> 
> ReiserFS, for instance, stores the super_block pointer in the proc directory
> it makes for that super_block, and a pointer to the respective seq_file show
> function in each of the proc files in that directory.
> 
> This allows a reduction in the number of file_operations structs, open
> functions and seq_operations structs required.  The problem otherwise is that
> each show function requires two pieces of data but only has storage for one
> per PDE (and this has no release function).
> 
> Signed-off-by: David Howells 
> cc: Jerry Chuang 
> cc: Mauro Carvalho Chehab 
> cc: Maxim Mikityanskiy 
> cc: YAMANE Toshiaki 
> cc: linux-wirel...@vger.kernel.org
> cc: linux-s...@vger.kernel.org
> cc: de...@driverdev.osuosl.org

Acked-by: Greg Kroah-Hartman 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 12/28] rtl8192u: Don't need to save device proc dir PDE [RFC]

2013-04-16 Thread Greg KH
On Tue, Apr 16, 2013 at 07:26:39PM +0100, David Howells wrote:
> Don't need to save the PDE of a directory created under /proc/net/rtl8192/ as
> we can use proc subtree deletion to get rid of it and all its children.
> 
> Signed-off-by: David Howells 
> cc: Jerry Chuang 
> cc: Mauro Carvalho Chehab 
> cc: linux-wirel...@vger.kernel.org
> cc: de...@driverdev.osuosl.org

Acked-by: Greg Kroah-Hartman 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 11/28] rtl8187se: Use a dir under /proc/net/r8180/ [RFC]

2013-04-16 Thread Greg KH
On Tue, Apr 16, 2013 at 07:26:35PM +0100, David Howells wrote:
> Create a dir under /proc/net/r8180/ named for the device and create that
> device's files under there.  This means that there won't be a problem for
> multiple devices in the system (if such is possible) and it means we don't
> need to save the 'device directory' PDE any more as we can just do a proc
> subtree removal.
> 
> Signed-off-by: David Howells 
> cc: Maxim Mikityanskiy 
> cc: YAMANE Toshiaki 
> cc: linux-wirel...@vger.kernel.org
> cc: de...@driverdev.osuosl.org

Acked-by: Greg Kroah-Hartman 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 10/28] proc: Add proc_mkdir_data() [RFC]

2013-04-16 Thread Greg KH
On Tue, Apr 16, 2013 at 07:26:30PM +0100, David Howells wrote:
> Add proc_mkdir_data() to allow procfs directories to be created that are
> annotated at the time of creation with private data rather than doing this
> post-creation.  This means no access is then required to the proc_dir_entry
> struct to set this.
> 
> Signed-off-by: David Howells 
> cc: Neela Syam Kolli 
> cc: Jerry Chuang 
> cc: Mauro Carvalho Chehab 
> cc: linux-s...@vger.kernel.org
> cc: de...@driverdev.osuosl.org
> cc: linux-wirel...@vger.kernel.org
> ---

Acked-by: Greg Kroah-Hartman 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 09/28] proc: Move some bits from linux/proc_fs.h to linux/{of.h, signal.h, tty.h} [RFC]

2013-04-16 Thread Greg Kroah-Hartman
On Tue, Apr 16, 2013 at 07:26:26PM +0100, David Howells wrote:
> Move some bits from linux/proc_fs.h to linux/of.h, signal.h and tty.h.
> 
> Also move proc_tty_init() and proc_device_tree_init() to fs/proc/internal.h as
> they're internal to procfs.
> 
> Signed-off-by: David Howells 
> cc: devicetree-disc...@lists.ozlabs.org
> cc: linux-a...@vger.kernel.org
> cc: Greg Kroah-Hartman 
> cc: Jri Slaby 
> ---

Acked-by: Greg Kroah-Hartman 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 01/28] Include missing linux/slab.h inclusions [RFC]

2013-04-16 Thread Greg KH
On Tue, Apr 16, 2013 at 07:25:54PM +0100, David Howells wrote:
> Include missing linux/slab.h inclusions where the source file is currently
> expecting to get kmalloc() and co. through linux/proc_fs.h.
> 
> Signed-off-by: David Howells 
> cc: linux-s...@vger.kernel.org
> cc: sparcli...@vger.kernel.org
> cc: linux-...@vger.kernel.org
> cc: linux-...@lists.infradead.org
> cc: de...@driverdev.osuosl.org
> cc: x...@kernel.org
> ---
>

Acked-by: Greg Kroah-Hartman 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] ARM: dts: omap3-beagle-xm: Add USB Host support for Rev Ax/Bx

2013-04-16 Thread Tony Lindgren
* Roger Quadros  [130415 05:44]:
> On 04/15/2013 03:35 PM, Roger Quadros wrote:
> > Provide RESET and Power regulators for the USB PHY,
> > the USB Host port mode and the PHY device.
> > 
> > Also provide pin multiplexer information for USB host
> > pins.
> > 
> > This will not work for Rev Cx boards because of reversed logic
> > for USB_POWER_Enable.
> > 
> > CC: Benoît Cousson 
> > Signed-off-by: Roger Quadros 
> > ---
> >  arch/arm/boot/dts/omap3-beagle-xm.dts |   62 
> > +
> >  1 files changed, 62 insertions(+), 0 deletions(-)
> > 
> > diff --git a/arch/arm/boot/dts/omap3-beagle-xm.dts 
> > b/arch/arm/boot/dts/omap3-beagle-xm.dts
> > index 5a31964..d394c51 100644
> > --- a/arch/arm/boot/dts/omap3-beagle-xm.dts
> > +++ b/arch/arm/boot/dts/omap3-beagle-xm.dts
> > @@ -57,6 +57,60 @@
> > ti,mcbsp = <&mcbsp2>;
> > ti,codec = <&twl_audio>;
> > };
> > +
> > +   /* HS USB Port 2 RESET */
> > +   hsusb2_reset: hsusb2_reset_reg {
> > +   compatible = "regulator-fixed";
> > +   regulator-name = "hsusb2_reset";
> > +   regulator-min-microvolt = <330>;
> > +   regulator-max-microvolt = <330>;
> > +   gpio = <&gpio5 19 0>;   /* gpio_147 */
> > +   startup-delay-us = <7>;
> > +   enable-active-high;
> > +   };
> > +
> > +   /* HS USB Port 2 Power */
> > +   hsusb2_power: hsusb2_power_reg {
> > +   compatible = "regulator-fixed";
> > +   regulator-name = "hsusb2_vbus";
> > +   regulator-min-microvolt = <330>;
> > +   regulator-max-microvolt = <330>;
> > +   gpio = <&twl_gpio 18 0>;/* GPIO LEDA */
> > +   startup-delay-us = <7>;
> > +   enable-active-high; /* FIXME: active-low for Rev. C */
> 
> Benoit & Tony,
> 
> Any ideas how to tackle the reversed logic for Rev. C boards?

Sounds like we need a shared omap3-beage.dtsi, then omap3-beagle-xm.dts
and omap3-beagle-rev-c.dts. Then xm and rev-c can both include the
common .dtsi.

Regards,

Tony
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 19/26] sh: Don't use create_proc_read_entry() [RFC]

2013-04-16 Thread Simon Horman
On Wed, Apr 17, 2013 at 03:42:03AM +0900, Paul Mundt wrote:
> On Tue, Apr 16, 2013 at 07:28:42AM +0100, Al Viro wrote:
> > On Tue, Apr 16, 2013 at 03:11:13PM +0900, Simon Horman wrote:
> > > On Thu, Apr 11, 2013 at 02:30:09PM +0100, David Howells wrote:
> > > > Don't use create_proc_read_entry() as that is deprecated, but rather use
> > > > proc_create_data() and seq_file instead.
> > > 
> > > Paul, do you want me to handle this?
> > 
> > FWIW, I can pick that stuff via vfs.git - not a problem and I've already got
> > shitloads of procfs-related patches in that queue...
> 
> That works for me.

Great.

Acked-by: Simon Horman 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] x86: Add a Kconfig shortcut for a kvm-bootable kernel

2013-04-16 Thread Sasha Levin
On 04/16/2013 12:18 PM, Borislav Petkov wrote:
> On Sun, Apr 14, 2013 at 01:03:20PM +0200, Borislav Petkov wrote:
>> On Sun, Apr 14, 2013 at 12:31:12PM +0300, Pekka Enberg wrote:
>>> I obviously support having something like this in mainline. I wonder
>>> though if we could just call this "default standalone KVM guest
>>> config" instead of emphasizing testing angle.
>>
>> /me nods agreeingly...
>>
>> And it should be unter HYPERVISOR_GUEST where the rest of this stuff
>> resides. Good point.
> 
> Sanity check question:
> 
> Why not add the select stuff, i.e. this:
> 
>   select NET
>   select NETDEVICES
>   select PCI
>   select BLOCK
>   select BLK_DEV
>   select NETWORK_FILESYSTEMS
>   select INET
>   select EXPERIMENTAL
>   select TTY
>   select SERIAL_8250
>   select SERIAL_8250_CONSOLE
>   select IP_PNP
>   select IP_PNP_DHCP
>   select BINFMT_ELF
>   select PCI_MSI
>   select HAVE_ARCH_KGDB
>   select DEBUG_KERNEL
>   select KGDB
>   select KGDB_SERIAL_CONSOLE
>   select VIRTUALIZATION
>   select VIRTIO
>   select VIRTIO_RING
>   select VIRTIO_PCI
>   select VIRTIO_BLK
>   select VIRTIO_CONSOLE
>   select VIRTIO_NET
>   select 9P_FS
>   select NET_9P
>   select NET_9P_VIRTIO
> 
> to the option below which we already have. It is in the same sense a KVM
> guest support deal.
> 
> Hmm.
> 
> KVM people, any objections?
> 
> config KVM_GUEST
> bool "KVM Guest support (including kvmclock)"
> depends on PARAVIRT
> select PARAVIRT_CLOCK
> default y
> ---help---
>   This option enables various optimizations for running under the KVM
>   hypervisor. It includes a paravirtualized clock, so that instead
>   of relying on a PIT (or probably other) emulation by the
>   underlying device model, the host provides the guest with
>   timing infrastructure such as time of day, and system time

KVM guests don't need a serial device, KGDB, DEBUG_KERNEL or 9p in particular.


Thanks,
Sasha
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v2 0/6] mm/hugetlb: gigantic hugetlb page pools shrink supporting

2013-04-16 Thread Wanpeng Li
Changelog:
 * add comments from Andi which indicate shrink gigantic hugetlb page pools 
make 
   sense to patchset description.

order >= MAX_ORDER pages are only allocated at boot stage using the 
bootmem allocator with the "hugepages=xxx" option. These pages are never 
free after boot by default since it would be a one-way street(>= MAX_ORDER
pages cannot be allocated later), but if administrator confirm not to 
use these gigantic pages any more, these pinned pages will waste memory
since other users can't grab free pages from gigantic hugetlb pool even
if OOM, it's not flexible.  The patchset add hugetlb gigantic page pools
shrink supporting. Administrator can enable knob exported in sysctl to
permit to shrink gigantic hugetlb pool.

http://marc.info/?l=linux-mm&m=136578016214512&w=2 
Andi thinks this idea make sense since he is working on a new patchkit to 
allocate GB pages from CMA. With that freeing actually makes sense, as the 
pages can be reallocated.

Testcase:
boot: hugepagesz=1G hugepages=10

[root@localhost hugepages]# free -m
 total   used   free sharedbuffers cached
Mem: 36269  10836  25432  0 11288
-/+ buffers/cache:  10537  25732
Swap:35999  0  35999
[root@localhost hugepages]# echo 0 > 
/sys/kernel/mm/hugepages/hugepages-1048576kB/nr_hugepages
-bash: echo: write error: Invalid argument
[root@localhost hugepages]# echo 1 > /proc/sys/vm/hugetlb_shrink_gigantic_pool
[root@localhost hugepages]# echo 0 > 
/sys/kernel/mm/hugepages/hugepages-1048576kB/nr_hugepages
[root@localhost hugepages]# free -m
 total   used   free sharedbuffers cached
Mem: 36269597  35672  0 11288
-/+ buffers/cache:297  35972
Swap:35999  0  35999

Wanpeng Li (6):
  introduce new sysctl knob which control gigantic page pools shrinking
  update_and_free_page gigantic pages awareness
  enable gigantic hugetlb page pools shrinking
  use already exist huge_page_order() instead of h->order
  remove redundant hugetlb_prefault 
  use already exist interface huge_page_shift

 Documentation/sysctl/vm.txt |   13 +++
 include/linux/hugetlb.h |5 +--
 kernel/sysctl.c |7 
 mm/hugetlb.c|   83 +--
 mm/internal.h   |1 +
 mm/page_alloc.c |2 +-
 6 files changed, 82 insertions(+), 29 deletions(-)

-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v2 5/6] mm/hugetlb: remove redundant hugetlb_prefault

2013-04-16 Thread Wanpeng Li
hugetlb_prefault is not used by any users. This patch remove redundant 
hugetlb_prefault.

Signed-off-by: Wanpeng Li 
---
 include/linux/hugetlb.h |2 --
 1 file changed, 2 deletions(-)

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index b7e4106..813b265 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -57,7 +57,6 @@ void __unmap_hugepage_range_final(struct mmu_gather *tlb,
 void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
unsigned long start, unsigned long end,
struct page *ref_page);
-int hugetlb_prefault(struct address_space *, struct vm_area_struct *);
 void hugetlb_report_meminfo(struct seq_file *);
 int hugetlb_report_node_meminfo(int, char *);
 void hugetlb_show_meminfo(void);
@@ -113,7 +112,6 @@ static inline unsigned long hugetlb_total_pages(void)
 #define follow_hugetlb_page(m,v,p,vs,a,b,i,w)  ({ BUG(); 0; })
 #define follow_huge_addr(mm, addr, write)  ERR_PTR(-EINVAL)
 #define copy_hugetlb_page_range(src, dst, vma) ({ BUG(); 0; })
-#define hugetlb_prefault(mapping, vma) ({ BUG(); 0; })
 static inline void hugetlb_report_meminfo(struct seq_file *m)
 {
 }
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v2 4/6] mm/hugetlb: use already exist huge_page_order() instead of h->order

2013-04-16 Thread Wanpeng Li
Use already exist interface huge_page_order() instead of h->order to get 
huge page order.

Signed-off-by: Wanpeng Li 
---
 mm/hugetlb.c |   36 +++-
 1 file changed, 19 insertions(+), 17 deletions(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 328f140..0cae950 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -593,7 +593,8 @@ static void update_and_free_page(struct hstate *h, struct 
page *page)
struct page *p;
int order = huge_page_order(h);
 
-   VM_BUG_ON(!hugetlb_shrink_gigantic_pool && h->order >= MAX_ORDER);
+   VM_BUG_ON(!hugetlb_shrink_gigantic_pool &&
+   huge_page_order(h) >= MAX_ORDER);
 
h->nr_huge_pages--;
h->nr_huge_pages_node[page_to_nid(page)]--;
@@ -722,7 +723,7 @@ static struct page *alloc_fresh_huge_page_node(struct 
hstate *h, int nid)
 {
struct page *page;
 
-   if (h->order >= MAX_ORDER)
+   if (huge_page_order(h) >= MAX_ORDER)
return NULL;
 
page = alloc_pages_exact_node(nid,
@@ -876,7 +877,7 @@ static struct page *alloc_buddy_huge_page(struct hstate *h, 
int nid)
struct page *page;
unsigned int r_nid;
 
-   if (h->order >= MAX_ORDER)
+   if (huge_page_order(h) >= MAX_ORDER)
return NULL;
 
/*
@@ -1071,7 +1072,7 @@ static void return_unused_surplus_pages(struct hstate *h,
h->resv_huge_pages -= unused_resv_pages;
 
/* Cannot return gigantic pages currently */
-   if (h->order >= MAX_ORDER)
+   if (huge_page_order(h) >= MAX_ORDER)
return;
 
nr_pages = min(unused_resv_pages, h->surplus_huge_pages);
@@ -1265,7 +1266,7 @@ static void __init gather_bootmem_prealloc(void)
 #endif
__ClearPageReserved(page);
WARN_ON(page_count(page) != 1);
-   prep_compound_huge_page(page, h->order);
+   prep_compound_huge_page(page, huge_page_order(h));
prep_new_huge_page(h, page, page_to_nid(page));
/*
 * If we had gigantic hugepages allocated at boot time, we need
@@ -1273,8 +1274,8 @@ static void __init gather_bootmem_prealloc(void)
 * fix confusing memory reports from free(1) and another
 * side-effects, like CommitLimit going negative.
 */
-   if (h->order > (MAX_ORDER - 1))
-   totalram_pages += 1 << h->order;
+   if (huge_page_order(h) > (MAX_ORDER - 1))
+   totalram_pages += 1 << huge_page_order(h);
}
 }
 
@@ -1283,7 +1284,7 @@ static void __init hugetlb_hstate_alloc_pages(struct 
hstate *h)
unsigned long i;
 
for (i = 0; i < h->max_huge_pages; ++i) {
-   if (h->order >= MAX_ORDER) {
+   if (huge_page_order(h) >= MAX_ORDER) {
if (!alloc_bootmem_huge_page(h))
break;
} else if (!alloc_fresh_huge_page(h,
@@ -1299,7 +1300,7 @@ static void __init hugetlb_init_hstates(void)
 
for_each_hstate(h) {
/* oversize hugepages were init'ed in early boot */
-   if (h->order < MAX_ORDER)
+   if (huge_page_order(h) < MAX_ORDER)
hugetlb_hstate_alloc_pages(h);
}
 }
@@ -1333,7 +1334,7 @@ static void try_to_free_low(struct hstate *h, unsigned 
long count,
 {
int i;
 
-   if (h->order >= MAX_ORDER)
+   if (huge_page_order(h) >= MAX_ORDER)
return;
 
for_each_node_mask(i, *nodes_allowed) {
@@ -1416,8 +1417,8 @@ static unsigned long set_max_huge_pages(struct hstate *h, 
unsigned long count,
 {
unsigned long min_count, ret;
 
-   if (h->order >= MAX_ORDER && (!hugetlb_shrink_gigantic_pool ||
-   count > persistent_huge_pages(h)))
+   if (huge_page_order(h) >= MAX_ORDER && (!hugetlb_shrink_gigantic_pool
+   || count > persistent_huge_pages(h)))
return h->max_huge_pages;
 
/*
@@ -1543,7 +1544,7 @@ static ssize_t nr_hugepages_store_common(bool 
obey_mempolicy,
goto out;
 
h = kobj_to_hstate(kobj, &nid);
-   if (h->order >= MAX_ORDER && !hugetlb_shrink_gigantic_pool) {
+   if (huge_page_order(h) >= MAX_ORDER && !hugetlb_shrink_gigantic_pool) {
err = -EINVAL;
goto out;
}
@@ -1626,7 +1627,7 @@ static ssize_t nr_overcommit_hugepages_store(struct 
kobject *kobj,
unsigned long input;
struct hstate *h = kobj_to_hstate(kobj, NULL);
 
-   if (h->order >= MAX_ORDER)
+   if (huge_page_order(h) >= MAX_ORDER)
return -EINVAL;
 
err = strict_strtoul(buf, 10, &input);
@@ -2037,7 +2038,8 @@ static int hugetlb_sysctl_handler_common(bool 
obey_mempolicy,
 
tmp = h->max_huge_pages;
 
-   if (write && h->order >= MAX_ORDER && !hugetlb_shri

[PATCH v2 2/6] mm/hugetlb: update_and_free_page gigantic pages awareness

2013-04-16 Thread Wanpeng Li
order >= MAX_ORDER pages can't be freed to buddy system directly, this patch
destroy the gigantic hugetlb page to normal order-0 pages and free them one
by one.

Signed-off-by: Wanpeng Li 
---
 mm/hugetlb.c|   39 +--
 mm/internal.h   |1 +
 mm/page_alloc.c |2 +-
 3 files changed, 31 insertions(+), 11 deletions(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 4a0c270..eeaf6f2 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -579,25 +579,44 @@ err:
return NULL;
 }
 
+static inline clear_page_flag(struct page *page)
+{
+   page->flags &= ~(1 << PG_locked | 1 << PG_error |
+   1 << PG_referenced | 1 << PG_dirty |
+   1 << PG_active | 1 << PG_reserved |
+   1 << PG_private | 1 << PG_writeback);
+}
+
 static void update_and_free_page(struct hstate *h, struct page *page)
 {
int i;
+   struct page *p;
+   int order = huge_page_order(h);
 
-   VM_BUG_ON(h->order >= MAX_ORDER);
+   VM_BUG_ON(!hugetlb_shrink_gigantic_pool && h->order >= MAX_ORDER);
 
h->nr_huge_pages--;
h->nr_huge_pages_node[page_to_nid(page)]--;
-   for (i = 0; i < pages_per_huge_page(h); i++) {
-   page[i].flags &= ~(1 << PG_locked | 1 << PG_error |
-   1 << PG_referenced | 1 << PG_dirty |
-   1 << PG_active | 1 << PG_reserved |
-   1 << PG_private | 1 << PG_writeback);
-   }
-   VM_BUG_ON(hugetlb_cgroup_from_page(page));
set_compound_page_dtor(page, NULL);
-   set_page_refcounted(page);
arch_release_hugepage(page);
-   __free_pages(page, huge_page_order(h));
+   VM_BUG_ON(hugetlb_cgroup_from_page(page));
+
+   if (order < MAX_ORDER) {
+   for (i = 0; i < pages_per_huge_page(h); i++)
+   clear_page_flag(page+i);
+   set_page_refcounted(page);
+   __free_pages(page, huge_page_order(h));
+   } else {
+   int nr_pages = 1 << order;
+   destroy_compound_page(page, order);
+   set_compound_order(page, 0);
+   for (i = 0, p = page; i < nr_pages; i++,
+   p = mem_map_next(p, page, i)) {
+   clear_page_flag(p);
+   set_page_refcounted(p);
+   __free_pages(p, 0);
+   }
+   }
 }
 
 struct hstate *size_to_hstate(unsigned long size)
diff --git a/mm/internal.h b/mm/internal.h
index 8562de0..a63a35f 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -101,6 +101,7 @@ extern pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned 
long address);
  */
 extern void __free_pages_bootmem(struct page *page, unsigned int order);
 extern void prep_compound_page(struct page *page, unsigned long order);
+extern int destroy_compound_page(struct page *page, unsigned long order);
 #ifdef CONFIG_MEMORY_FAILURE
 extern bool is_free_buddy_page(struct page *page);
 #endif
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 1394c5a..0ea14ba 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -367,7 +367,7 @@ void prep_compound_page(struct page *page, unsigned long 
order)
 }
 
 /* update __split_huge_page_refcount if you change this function */
-static int destroy_compound_page(struct page *page, unsigned long order)
+int destroy_compound_page(struct page *page, unsigned long order)
 {
int i;
int nr_pages = 1 << order;
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v2 6/6] mm/hugetlb: use already exist interface huge_page_shift

2013-04-16 Thread Wanpeng Li
Use already exist interface huge_page_shift instead of h->order + PAGE_SHIFT.

Signed-off-by: Wanpeng Li 
---
 mm/hugetlb.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 0cae950..750ed8a 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -320,7 +320,7 @@ unsigned long vma_kernel_pagesize(struct vm_area_struct 
*vma)
 
hstate = hstate_vma(vma);
 
-   return 1UL << (hstate->order + PAGE_SHIFT);
+   return 1UL << huge_page_shift(hstate);
 }
 EXPORT_SYMBOL_GPL(vma_kernel_pagesize);
 
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v2 3/6] mm/hugetlb: enable gigantic hugetlb page pools shrinking

2013-04-16 Thread Wanpeng Li
Enable gigantic hugetlb page pools shrinking.

Signed-off-by: Wanpeng Li 
---
 mm/hugetlb.c |7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index eeaf6f2..328f140 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1416,7 +1416,8 @@ static unsigned long set_max_huge_pages(struct hstate *h, 
unsigned long count,
 {
unsigned long min_count, ret;
 
-   if (h->order >= MAX_ORDER)
+   if (h->order >= MAX_ORDER && (!hugetlb_shrink_gigantic_pool ||
+   count > persistent_huge_pages(h)))
return h->max_huge_pages;
 
/*
@@ -1542,7 +1543,7 @@ static ssize_t nr_hugepages_store_common(bool 
obey_mempolicy,
goto out;
 
h = kobj_to_hstate(kobj, &nid);
-   if (h->order >= MAX_ORDER) {
+   if (h->order >= MAX_ORDER && !hugetlb_shrink_gigantic_pool) {
err = -EINVAL;
goto out;
}
@@ -2036,7 +2037,7 @@ static int hugetlb_sysctl_handler_common(bool 
obey_mempolicy,
 
tmp = h->max_huge_pages;
 
-   if (write && h->order >= MAX_ORDER)
+   if (write && h->order >= MAX_ORDER && !hugetlb_shrink_gigantic_pool)
return -EINVAL;
 
table->data = &tmp;
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v2 1/6] mm/hugetlb: introduce new sysctl knob which control gigantic page pools shrinking

2013-04-16 Thread Wanpeng Li
This patch introduces new sysctl knob to support gigantic hugetlb page
pools shrinking. The default value is 0 since gigantic page pools
aren't permitted shrinked by default, administrator can echo 1 to knob
to enable gigantic page pools shrinking after they confirm they won't
use them any more.

Signed-off-by: Wanpeng Li 
---
 Documentation/sysctl/vm.txt |   13 +
 include/linux/hugetlb.h |3 +++
 kernel/sysctl.c |7 +++
 mm/hugetlb.c|9 +
 4 files changed, 32 insertions(+)

diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
index 21ad181..3baf332 100644
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -30,6 +30,7 @@ Currently, these files are in /proc/sys/vm:
 - extfrag_threshold
 - hugepages_treat_as_movable
 - hugetlb_shm_group
+- hugetlb_shrink_gigantic_pool
 - laptop_mode
 - legacy_va_layout
 - lowmem_reserve_ratio
@@ -211,6 +212,18 @@ shared memory segment using hugetlb page.
 
 ==
 
+hugetlb_shrink_gigantic_pool
+
+order >= MAX_ORDER pages are only allocated at boot stage using the bootmem
+allocator with the "hugepages=xxx" option. These pages are never free'd
+by default since it would be a one-way street(>= MAX_ORDER pages cannot
+be allocated later), but if administrator confirm not to use these gigantic
+pages any more, these pinned pages will waste memory since other users
+can't grab free pages from gigantic hugetlb pool even OOM. Administrator
+can enable this parameter to permit to shrink gigantic hugetlb pool
+
+==
+
 laptop_mode
 
 laptop_mode is a knob that controls "laptop mode". All the things that are
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 3a62df3..b7e4106 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -36,6 +36,8 @@ void reset_vma_resv_huge_pages(struct vm_area_struct *vma);
 int hugetlb_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, 
loff_t *);
 int hugetlb_overcommit_handler(struct ctl_table *, int, void __user *, size_t 
*, loff_t *);
 int hugetlb_treat_movable_handler(struct ctl_table *, int, void __user *, 
size_t *, loff_t *);
+int hugetlb_shrink_gigantic_pool_handler(struct ctl_table *,
+   int, void __user *, size_t *, loff_t *);
 
 #ifdef CONFIG_NUMA
 int hugetlb_mempolicy_sysctl_handler(struct ctl_table *, int,
@@ -73,6 +75,7 @@ extern unsigned long hugepages_treat_as_movable;
 extern const unsigned long hugetlb_zero, hugetlb_infinity;
 extern int sysctl_hugetlb_shm_group;
 extern struct list_head huge_boot_pages;
+extern int hugetlb_shrink_gigantic_pool;
 
 /* arch callbacks */
 
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 3dadde5..25eb85f 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1187,6 +1187,13 @@ static struct ctl_table vm_table[] = {
.extra1 = (void *)&hugetlb_zero,
.extra2 = (void *)&hugetlb_infinity,
},
+   {
+   .procname   = "hugetlb_shrink_gigantic_pool",
+   .data   = &hugetlb_shrink_gigantic_pool,
+   .maxlen = sizeof(int),
+   .mode   = 0644,
+   .proc_handler   = hugetlb_shrink_gigantic_pool_handler,
+   },
 #ifdef CONFIG_NUMA
{
.procname   = "nr_hugepages_mempolicy",
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index bacdf38..4a0c270 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -35,6 +35,7 @@
 const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL;
 static gfp_t htlb_alloc_mask = GFP_HIGHUSER;
 unsigned long hugepages_treat_as_movable;
+int hugetlb_shrink_gigantic_pool;
 
 int hugetlb_max_hstate __read_mostly;
 unsigned int default_hstate_idx;
@@ -671,6 +672,14 @@ static void prep_compound_gigantic_page(struct page *page, 
unsigned long order)
}
 }
 
+int hugetlb_shrink_gigantic_pool_handler(struct ctl_table *table, int write,
+   void __user *buffer,
+   size_t *length, loff_t *ppos)
+{
+   proc_dointvec(table, write, buffer, length, ppos);
+   return 0;
+}
+
 /*
  * PageHuge() only returns true for hugetlbfs pages, but not for normal or
  * transparent huge pages.  See the PageTransHuge() documentation for more
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Bug fix PATCH v3] Reusing a resource structure allocated by bootmem

2013-04-16 Thread David Rientjes
On Wed, 17 Apr 2013, Yasuaki Ishimatsu wrote:

> When hot removing memory presented at boot time, following messages are shown:
> 
> [  296.867031] [ cut here ]
> [  296.922273] kernel BUG at mm/slub.c:3409!
> [  296.970229] invalid opcode:  [#1] SMP
> [  297.019453] Modules linked in: ebtable_nat ebtables xt_CHECKSUM 
> iptable_mangle bridge stp llc ipmi_devintf ipmi_msghandler sunrpc ipt_REJECT 
> nf_conntrack_ipv4 nf_defrag_ipv4 iptable_filter ip_tables ip6t_REJECT 
> nf_conntrack_ipv6 nf_defrag_ipv6 xt_state nf_conntrack ip6table_filter 
> ip6_tables binfmt_misc vfat fat dm_mirror dm_region_hash dm_log dm_mod 
> vhost_net macvtap macvlan tun uinput iTCO_wdt iTCO_vendor_support coretemp 
> kvm_intel kvm crc32c_intel ghash_clmulni_intel microcode pcspkr sg i2c_i801 
> lpc_ich mfd_core igb i2c_algo_bit i2c_core e1000e ptp pps_core tpm_infineon 
> ioatdma dca sr_mod cdrom sd_mod crc_t10dif usb_storage megaraid_sas lpfc 
> scsi_transport_fc scsi_tgt scsi_mod
> [  297.747808] CPU 0
> [  297.769764] Pid: 5091, comm: kworker/0:2 Tainted: GW3.9.0-rc6+ 
> #15
> [  297.897917] RIP: 0010:[]  [] 
> kfree+0x232/0x240
> [  297.988634] RSP: 0018:88084678d968  EFLAGS: 00010246
> [  298.052196] RAX: 00600400 RBX: 8987fea0 RCX: 
> 
> [  298.137595] RDX: 8107a5ae RSI: 0001 RDI: 
> 8987fea0
> [  298.222994] RBP: 88084678d998 R08: 8200 R09: 
> 0001
> [  298.308390] R10:  R11:  R12: 
> 0300
> [  298.393792] R13: ea061fc0 R14: 0303 R15: 
> 0080
> [  298.479190] FS:  () GS:88085aa0() 
> knlGS:
> [  298.576030] CS:  0010 DS:  ES:  CR0: 80050033
> [  298.644791] CR2: 025d3f78 CR3: 01c0c000 CR4: 
> 001407f0
> [  298.730192] DR0:  DR1:  DR2: 
> 
> [  298.815590] DR3:  DR6: 0ff0 DR7: 
> 0400
> [  298.900997] Process kworker/0:2 (pid: 5091, threadinfo 88084678c000, 
> task 88083928ca80)
> [  299.005121] Stack:
> [  299.029156]  0303 8987fea0 0300 
> 8987fe90
> [  299.118116]  0303 0080 88084678d9c8 
> 8107a5d4
> [  299.207084]  3000 8987fffb2680 0080 
> 3000
> [  299.296045] Call Trace:
> [  299.325288]  [] __release_region+0xd4/0xe0
> [  299.393020]  [] __remove_pages+0x52/0x110
> [  299.459707]  [] arch_remove_memory+0x89/0xd0
> [  299.529505]  [] remove_memory+0xc4/0x100
> [  299.595145]  [] acpi_memory_device_remove+0x6d/0xb1
> [  299.672230]  [] acpi_device_remove+0x89/0xab
> [  299.742033]  [] __device_release_driver+0x7c/0xf0
> [  299.817048]  [] device_release_driver+0x2f/0x50
> [  299.889972]  [] acpi_bus_device_detach+0x6c/0x70
> [  299.963938]  [] acpi_ns_walk_namespace+0x11a/0x250
> [  300.039982]  [] ? power_state_show+0x36/0x36
> [  300.109800]  [] ? power_state_show+0x36/0x36
> [  300.179612]  [] acpi_walk_namespace+0xee/0x137
> [  300.251492]  [] acpi_bus_trim+0x33/0x7a
> [  300.316089]  [] ? mutex_lock_nested+0x4a/0x60
> [  300.386927]  [] acpi_bus_hot_remove_device+0xc4/0x1a1
> [  300.466096]  [] acpi_os_execute_deferred+0x27/0x34
> [  300.542137]  [] process_one_work+0x1f7/0x590
> [  300.611940]  [] ? process_one_work+0x185/0x590
> [  300.683823]  [] worker_thread+0x11a/0x370
> [  300.750502]  [] ? manage_workers+0x180/0x180
> [  300.820308]  [] kthread+0xee/0x100
> [  300.879714]  [] ? __lock_release+0x12b/0x190
> [  300.949512]  [] ? __init_kthread_worker+0x70/0x70
> [  301.024517]  [] ret_from_fork+0x7c/0xb0
> [  301.089135]  [] ? __init_kthread_worker+0x70/0x70
> [  301.164138] Code: 89 ef e8 c2 2c fb ff e9 0b ff ff ff 4d 8b 6d 30 e9 5c fe 
> ff ff 4c 89 f1 48 89 da 4c 89 ee 4c 89 e7 e8 03 f9 ff ff e9 ec fe ff ff <0f> 
> 0b eb fe 66 2e 0f 1f 84 00 00 00 00 00 55 48 89 e5 48 83 ec
> [  301.397214] RIP  [] kfree+0x232/0x240
> [  301.459855]  RSP 
> [  301.501675] ---[ end trace 8679967aa8606ed8 ]---
> 
> The reason why the messages are shown is to release a resource structure,
> allocated by bootmem, by kfree(). So when we release a resource structure,
> we should check whether it is allocated by bootmem or not.
> 
> But even if we know a resource structure is allocated by bootmem, we cannot
> release it since SLxB cannot treat it. So for reusing a resource structure,
> this patch remembers it by using bootmem_resource as follows:
> 
> When releasing a resource structure by free_resource(), free_resource() checks
> whether the resource structure is allocated by bootmem or not. If it is
> allocated by bootmem, free_resource() adds it to bootmem_resource. If it is
> not allocated by bootmem, free_resource() release it by kfree().
> 
> And when getting a new resource structure by get_resource(), get_resource()
> chec

Re: [PATCH v2 0/6] Chainsaw efivars.c

2013-04-16 Thread Tom Gundersen
Hi Matt,

On Tue, Apr 16, 2013 at 11:41 PM, Matt Fleming  wrote:
> This patch series introduces the new efivar_entry API, and splits out the 
> major
> parts of efivars.c into new files. In particular, having the efivarfs code
> under fs/ allows building an efivarfs.ko module, which means mount(8) can
> automatically load it.
>
> The remaining EFI code is repositioned under drivers/firmware/efi/.

These all look good to me, so feel free to add my Reviewed-by.

Tested on top of 3.9-rc7, with everything built as modules. Everything
works as intended: systemd correctly detects it is on an EFI system,
so mounts efivarfs, which auto-loads the required modules.

Thanks a lot!

Tom
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


  1   2   3   4   5   6   >